Commit f4566f04854d78acfc74b9acb029744acde9d033

Authored by Nadia Derbey
Committed by Linus Torvalds
1 parent 2802831313

ipc: fix wrong comments

This patch fixes the wrong / obsolete comments in the ipc code.  Also adds
a missing lock around ipc_get_maxid() in shm_get_stat().

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 5 changed files with 118 additions and 45 deletions Inline Diff

1 /* 1 /*
2 * linux/ipc/msg.c 2 * linux/ipc/msg.c
3 * Copyright (C) 1992 Krishna Balasubramanian 3 * Copyright (C) 1992 Krishna Balasubramanian
4 * 4 *
5 * Removed all the remaining kerneld mess 5 * Removed all the remaining kerneld mess
6 * Catch the -EFAULT stuff properly 6 * Catch the -EFAULT stuff properly
7 * Use GFP_KERNEL for messages as in 1.2 7 * Use GFP_KERNEL for messages as in 1.2
8 * Fixed up the unchecked user space derefs 8 * Fixed up the unchecked user space derefs
9 * Copyright (C) 1998 Alan Cox & Andi Kleen 9 * Copyright (C) 1998 Alan Cox & Andi Kleen
10 * 10 *
11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
12 * 12 *
13 * mostly rewritten, threaded and wake-one semantics added 13 * mostly rewritten, threaded and wake-one semantics added
14 * MSGMAX limit removed, sysctl's added 14 * MSGMAX limit removed, sysctl's added
15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
16 * 16 *
17 * support for audit of ipc object properties and permission changes 17 * support for audit of ipc object properties and permission changes
18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
19 * 19 *
20 * namespaces support 20 * namespaces support
21 * OpenVZ, SWsoft Inc. 21 * OpenVZ, SWsoft Inc.
22 * Pavel Emelianov <xemul@openvz.org> 22 * Pavel Emelianov <xemul@openvz.org>
23 */ 23 */
24 24
25 #include <linux/capability.h> 25 #include <linux/capability.h>
26 #include <linux/slab.h> 26 #include <linux/slab.h>
27 #include <linux/msg.h> 27 #include <linux/msg.h>
28 #include <linux/spinlock.h> 28 #include <linux/spinlock.h>
29 #include <linux/init.h> 29 #include <linux/init.h>
30 #include <linux/proc_fs.h> 30 #include <linux/proc_fs.h>
31 #include <linux/list.h> 31 #include <linux/list.h>
32 #include <linux/security.h> 32 #include <linux/security.h>
33 #include <linux/sched.h> 33 #include <linux/sched.h>
34 #include <linux/syscalls.h> 34 #include <linux/syscalls.h>
35 #include <linux/audit.h> 35 #include <linux/audit.h>
36 #include <linux/seq_file.h> 36 #include <linux/seq_file.h>
37 #include <linux/mutex.h> 37 #include <linux/mutex.h>
38 #include <linux/nsproxy.h> 38 #include <linux/nsproxy.h>
39 39
40 #include <asm/current.h> 40 #include <asm/current.h>
41 #include <asm/uaccess.h> 41 #include <asm/uaccess.h>
42 #include "util.h" 42 #include "util.h"
43 43
44 /* 44 /*
45 * one msg_receiver structure for each sleeping receiver: 45 * one msg_receiver structure for each sleeping receiver:
46 */ 46 */
47 struct msg_receiver { 47 struct msg_receiver {
48 struct list_head r_list; 48 struct list_head r_list;
49 struct task_struct *r_tsk; 49 struct task_struct *r_tsk;
50 50
51 int r_mode; 51 int r_mode;
52 long r_msgtype; 52 long r_msgtype;
53 long r_maxsize; 53 long r_maxsize;
54 54
55 struct msg_msg *volatile r_msg; 55 struct msg_msg *volatile r_msg;
56 }; 56 };
57 57
58 /* one msg_sender for each sleeping sender */ 58 /* one msg_sender for each sleeping sender */
59 struct msg_sender { 59 struct msg_sender {
60 struct list_head list; 60 struct list_head list;
61 struct task_struct *tsk; 61 struct task_struct *tsk;
62 }; 62 };
63 63
64 #define SEARCH_ANY 1 64 #define SEARCH_ANY 1
65 #define SEARCH_EQUAL 2 65 #define SEARCH_EQUAL 2
66 #define SEARCH_NOTEQUAL 3 66 #define SEARCH_NOTEQUAL 3
67 #define SEARCH_LESSEQUAL 4 67 #define SEARCH_LESSEQUAL 4
68 68
69 static atomic_t msg_bytes = ATOMIC_INIT(0); 69 static atomic_t msg_bytes = ATOMIC_INIT(0);
70 static atomic_t msg_hdrs = ATOMIC_INIT(0); 70 static atomic_t msg_hdrs = ATOMIC_INIT(0);
71 71
72 static struct ipc_ids init_msg_ids; 72 static struct ipc_ids init_msg_ids;
73 73
74 #define msg_ids(ns) (*((ns)->ids[IPC_MSG_IDS])) 74 #define msg_ids(ns) (*((ns)->ids[IPC_MSG_IDS]))
75 75
76 #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) 76 #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm)
77 #define msg_buildid(ns, id, seq) \ 77 #define msg_buildid(ns, id, seq) \
78 ipc_buildid(&msg_ids(ns), id, seq) 78 ipc_buildid(&msg_ids(ns), id, seq)
79 79
80 static void freeque(struct ipc_namespace *, struct msg_queue *); 80 static void freeque(struct ipc_namespace *, struct msg_queue *);
81 static int newque(struct ipc_namespace *, struct ipc_params *); 81 static int newque(struct ipc_namespace *, struct ipc_params *);
82 #ifdef CONFIG_PROC_FS 82 #ifdef CONFIG_PROC_FS
83 static int sysvipc_msg_proc_show(struct seq_file *s, void *it); 83 static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
84 #endif 84 #endif
85 85
86 static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) 86 static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
87 { 87 {
88 ns->ids[IPC_MSG_IDS] = ids; 88 ns->ids[IPC_MSG_IDS] = ids;
89 ns->msg_ctlmax = MSGMAX; 89 ns->msg_ctlmax = MSGMAX;
90 ns->msg_ctlmnb = MSGMNB; 90 ns->msg_ctlmnb = MSGMNB;
91 ns->msg_ctlmni = MSGMNI; 91 ns->msg_ctlmni = MSGMNI;
92 ipc_init_ids(ids); 92 ipc_init_ids(ids);
93 } 93 }
94 94
95 int msg_init_ns(struct ipc_namespace *ns) 95 int msg_init_ns(struct ipc_namespace *ns)
96 { 96 {
97 struct ipc_ids *ids; 97 struct ipc_ids *ids;
98 98
99 ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL); 99 ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
100 if (ids == NULL) 100 if (ids == NULL)
101 return -ENOMEM; 101 return -ENOMEM;
102 102
103 __msg_init_ns(ns, ids); 103 __msg_init_ns(ns, ids);
104 return 0; 104 return 0;
105 } 105 }
106 106
107 void msg_exit_ns(struct ipc_namespace *ns) 107 void msg_exit_ns(struct ipc_namespace *ns)
108 { 108 {
109 struct msg_queue *msq; 109 struct msg_queue *msq;
110 int next_id; 110 int next_id;
111 int total, in_use; 111 int total, in_use;
112 112
113 mutex_lock(&msg_ids(ns).mutex); 113 mutex_lock(&msg_ids(ns).mutex);
114 114
115 in_use = msg_ids(ns).in_use; 115 in_use = msg_ids(ns).in_use;
116 116
117 for (total = 0, next_id = 0; total < in_use; next_id++) { 117 for (total = 0, next_id = 0; total < in_use; next_id++) {
118 msq = idr_find(&msg_ids(ns).ipcs_idr, next_id); 118 msq = idr_find(&msg_ids(ns).ipcs_idr, next_id);
119 if (msq == NULL) 119 if (msq == NULL)
120 continue; 120 continue;
121 ipc_lock_by_ptr(&msq->q_perm); 121 ipc_lock_by_ptr(&msq->q_perm);
122 freeque(ns, msq); 122 freeque(ns, msq);
123 total++; 123 total++;
124 } 124 }
125 mutex_unlock(&msg_ids(ns).mutex); 125 mutex_unlock(&msg_ids(ns).mutex);
126 126
127 kfree(ns->ids[IPC_MSG_IDS]); 127 kfree(ns->ids[IPC_MSG_IDS]);
128 ns->ids[IPC_MSG_IDS] = NULL; 128 ns->ids[IPC_MSG_IDS] = NULL;
129 } 129 }
130 130
131 void __init msg_init(void) 131 void __init msg_init(void)
132 { 132 {
133 __msg_init_ns(&init_ipc_ns, &init_msg_ids); 133 __msg_init_ns(&init_ipc_ns, &init_msg_ids);
134 ipc_init_proc_interface("sysvipc/msg", 134 ipc_init_proc_interface("sysvipc/msg",
135 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 135 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
136 IPC_MSG_IDS, sysvipc_msg_proc_show); 136 IPC_MSG_IDS, sysvipc_msg_proc_show);
137 } 137 }
138 138
139 static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id) 139 static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id)
140 { 140 {
141 struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id); 141 struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id);
142 142
143 return container_of(ipcp, struct msg_queue, q_perm); 143 return container_of(ipcp, struct msg_queue, q_perm);
144 } 144 }
145 145
146 static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, 146 static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns,
147 int id) 147 int id)
148 { 148 {
149 struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id); 149 struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id);
150 150
151 return container_of(ipcp, struct msg_queue, q_perm); 151 return container_of(ipcp, struct msg_queue, q_perm);
152 } 152 }
153 153
154 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 154 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
155 { 155 {
156 ipc_rmid(&msg_ids(ns), &s->q_perm); 156 ipc_rmid(&msg_ids(ns), &s->q_perm);
157 } 157 }
158 158
159 /**
160 * newque - Create a new msg queue
161 * @ns: namespace
162 * @params: ptr to the structure that contains the key and msgflg
163 *
164 * Called with msg_ids.mutex held
165 */
159 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 166 static int newque(struct ipc_namespace *ns, struct ipc_params *params)
160 { 167 {
161 struct msg_queue *msq; 168 struct msg_queue *msq;
162 int id, retval; 169 int id, retval;
163 key_t key = params->key; 170 key_t key = params->key;
164 int msgflg = params->flg; 171 int msgflg = params->flg;
165 172
166 msq = ipc_rcu_alloc(sizeof(*msq)); 173 msq = ipc_rcu_alloc(sizeof(*msq));
167 if (!msq) 174 if (!msq)
168 return -ENOMEM; 175 return -ENOMEM;
169 176
170 msq->q_perm.mode = msgflg & S_IRWXUGO; 177 msq->q_perm.mode = msgflg & S_IRWXUGO;
171 msq->q_perm.key = key; 178 msq->q_perm.key = key;
172 179
173 msq->q_perm.security = NULL; 180 msq->q_perm.security = NULL;
174 retval = security_msg_queue_alloc(msq); 181 retval = security_msg_queue_alloc(msq);
175 if (retval) { 182 if (retval) {
176 ipc_rcu_putref(msq); 183 ipc_rcu_putref(msq);
177 return retval; 184 return retval;
178 } 185 }
179 186
180 /* 187 /*
181 * ipc_addid() locks msq 188 * ipc_addid() locks msq
182 */ 189 */
183 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 190 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
184 if (id == -1) { 191 if (id == -1) {
185 security_msg_queue_free(msq); 192 security_msg_queue_free(msq);
186 ipc_rcu_putref(msq); 193 ipc_rcu_putref(msq);
187 return -ENOSPC; 194 return -ENOSPC;
188 } 195 }
189 196
190 msq->q_perm.id = msg_buildid(ns, id, msq->q_perm.seq); 197 msq->q_perm.id = msg_buildid(ns, id, msq->q_perm.seq);
191 msq->q_stime = msq->q_rtime = 0; 198 msq->q_stime = msq->q_rtime = 0;
192 msq->q_ctime = get_seconds(); 199 msq->q_ctime = get_seconds();
193 msq->q_cbytes = msq->q_qnum = 0; 200 msq->q_cbytes = msq->q_qnum = 0;
194 msq->q_qbytes = ns->msg_ctlmnb; 201 msq->q_qbytes = ns->msg_ctlmnb;
195 msq->q_lspid = msq->q_lrpid = 0; 202 msq->q_lspid = msq->q_lrpid = 0;
196 INIT_LIST_HEAD(&msq->q_messages); 203 INIT_LIST_HEAD(&msq->q_messages);
197 INIT_LIST_HEAD(&msq->q_receivers); 204 INIT_LIST_HEAD(&msq->q_receivers);
198 INIT_LIST_HEAD(&msq->q_senders); 205 INIT_LIST_HEAD(&msq->q_senders);
199 206
200 msg_unlock(msq); 207 msg_unlock(msq);
201 208
202 return msq->q_perm.id; 209 return msq->q_perm.id;
203 } 210 }
204 211
205 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) 212 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
206 { 213 {
207 mss->tsk = current; 214 mss->tsk = current;
208 current->state = TASK_INTERRUPTIBLE; 215 current->state = TASK_INTERRUPTIBLE;
209 list_add_tail(&mss->list, &msq->q_senders); 216 list_add_tail(&mss->list, &msq->q_senders);
210 } 217 }
211 218
212 static inline void ss_del(struct msg_sender *mss) 219 static inline void ss_del(struct msg_sender *mss)
213 { 220 {
214 if (mss->list.next != NULL) 221 if (mss->list.next != NULL)
215 list_del(&mss->list); 222 list_del(&mss->list);
216 } 223 }
217 224
218 static void ss_wakeup(struct list_head *h, int kill) 225 static void ss_wakeup(struct list_head *h, int kill)
219 { 226 {
220 struct list_head *tmp; 227 struct list_head *tmp;
221 228
222 tmp = h->next; 229 tmp = h->next;
223 while (tmp != h) { 230 while (tmp != h) {
224 struct msg_sender *mss; 231 struct msg_sender *mss;
225 232
226 mss = list_entry(tmp, struct msg_sender, list); 233 mss = list_entry(tmp, struct msg_sender, list);
227 tmp = tmp->next; 234 tmp = tmp->next;
228 if (kill) 235 if (kill)
229 mss->list.next = NULL; 236 mss->list.next = NULL;
230 wake_up_process(mss->tsk); 237 wake_up_process(mss->tsk);
231 } 238 }
232 } 239 }
233 240
234 static void expunge_all(struct msg_queue *msq, int res) 241 static void expunge_all(struct msg_queue *msq, int res)
235 { 242 {
236 struct list_head *tmp; 243 struct list_head *tmp;
237 244
238 tmp = msq->q_receivers.next; 245 tmp = msq->q_receivers.next;
239 while (tmp != &msq->q_receivers) { 246 while (tmp != &msq->q_receivers) {
240 struct msg_receiver *msr; 247 struct msg_receiver *msr;
241 248
242 msr = list_entry(tmp, struct msg_receiver, r_list); 249 msr = list_entry(tmp, struct msg_receiver, r_list);
243 tmp = tmp->next; 250 tmp = tmp->next;
244 msr->r_msg = NULL; 251 msr->r_msg = NULL;
245 wake_up_process(msr->r_tsk); 252 wake_up_process(msr->r_tsk);
246 smp_mb(); 253 smp_mb();
247 msr->r_msg = ERR_PTR(res); 254 msr->r_msg = ERR_PTR(res);
248 } 255 }
249 } 256 }
250 257
251 /* 258 /*
252 * freeque() wakes up waiters on the sender and receiver waiting queue, 259 * freeque() wakes up waiters on the sender and receiver waiting queue,
253 * removes the message queue from message queue ID 260 * removes the message queue from message queue ID IDR, and cleans up all the
254 * IDR, and cleans up all the messages associated with this queue. 261 * messages associated with this queue.
255 * 262 *
256 * msg_ids.mutex and the spinlock for this message queue are held 263 * msg_ids.mutex and the spinlock for this message queue are held
257 * before freeque() is called. msg_ids.mutex remains locked on exit. 264 * before freeque() is called. msg_ids.mutex remains locked on exit.
258 */ 265 */
259 static void freeque(struct ipc_namespace *ns, struct msg_queue *msq) 266 static void freeque(struct ipc_namespace *ns, struct msg_queue *msq)
260 { 267 {
261 struct list_head *tmp; 268 struct list_head *tmp;
262 269
263 expunge_all(msq, -EIDRM); 270 expunge_all(msq, -EIDRM);
264 ss_wakeup(&msq->q_senders, 1); 271 ss_wakeup(&msq->q_senders, 1);
265 msg_rmid(ns, msq); 272 msg_rmid(ns, msq);
266 msg_unlock(msq); 273 msg_unlock(msq);
267 274
268 tmp = msq->q_messages.next; 275 tmp = msq->q_messages.next;
269 while (tmp != &msq->q_messages) { 276 while (tmp != &msq->q_messages) {
270 struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list); 277 struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
271 278
272 tmp = tmp->next; 279 tmp = tmp->next;
273 atomic_dec(&msg_hdrs); 280 atomic_dec(&msg_hdrs);
274 free_msg(msg); 281 free_msg(msg);
275 } 282 }
276 atomic_sub(msq->q_cbytes, &msg_bytes); 283 atomic_sub(msq->q_cbytes, &msg_bytes);
277 security_msg_queue_free(msq); 284 security_msg_queue_free(msq);
278 ipc_rcu_putref(msq); 285 ipc_rcu_putref(msq);
279 } 286 }
280 287
288 /*
289 * Called with msg_ids.mutex and ipcp locked.
290 */
281 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 291 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
282 { 292 {
283 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 293 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
284 294
285 return security_msg_queue_associate(msq, msgflg); 295 return security_msg_queue_associate(msq, msgflg);
286 } 296 }
287 297
288 asmlinkage long sys_msgget(key_t key, int msgflg) 298 asmlinkage long sys_msgget(key_t key, int msgflg)
289 { 299 {
290 struct ipc_namespace *ns; 300 struct ipc_namespace *ns;
291 struct ipc_ops msg_ops; 301 struct ipc_ops msg_ops;
292 struct ipc_params msg_params; 302 struct ipc_params msg_params;
293 303
294 ns = current->nsproxy->ipc_ns; 304 ns = current->nsproxy->ipc_ns;
295 305
296 msg_ops.getnew = newque; 306 msg_ops.getnew = newque;
297 msg_ops.associate = msg_security; 307 msg_ops.associate = msg_security;
298 msg_ops.more_checks = NULL; 308 msg_ops.more_checks = NULL;
299 309
300 msg_params.key = key; 310 msg_params.key = key;
301 msg_params.flg = msgflg; 311 msg_params.flg = msgflg;
302 312
303 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 313 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
304 } 314 }
305 315
306 static inline unsigned long 316 static inline unsigned long
307 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 317 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
308 { 318 {
309 switch(version) { 319 switch(version) {
310 case IPC_64: 320 case IPC_64:
311 return copy_to_user(buf, in, sizeof(*in)); 321 return copy_to_user(buf, in, sizeof(*in));
312 case IPC_OLD: 322 case IPC_OLD:
313 { 323 {
314 struct msqid_ds out; 324 struct msqid_ds out;
315 325
316 memset(&out, 0, sizeof(out)); 326 memset(&out, 0, sizeof(out));
317 327
318 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 328 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm);
319 329
320 out.msg_stime = in->msg_stime; 330 out.msg_stime = in->msg_stime;
321 out.msg_rtime = in->msg_rtime; 331 out.msg_rtime = in->msg_rtime;
322 out.msg_ctime = in->msg_ctime; 332 out.msg_ctime = in->msg_ctime;
323 333
324 if (in->msg_cbytes > USHRT_MAX) 334 if (in->msg_cbytes > USHRT_MAX)
325 out.msg_cbytes = USHRT_MAX; 335 out.msg_cbytes = USHRT_MAX;
326 else 336 else
327 out.msg_cbytes = in->msg_cbytes; 337 out.msg_cbytes = in->msg_cbytes;
328 out.msg_lcbytes = in->msg_cbytes; 338 out.msg_lcbytes = in->msg_cbytes;
329 339
330 if (in->msg_qnum > USHRT_MAX) 340 if (in->msg_qnum > USHRT_MAX)
331 out.msg_qnum = USHRT_MAX; 341 out.msg_qnum = USHRT_MAX;
332 else 342 else
333 out.msg_qnum = in->msg_qnum; 343 out.msg_qnum = in->msg_qnum;
334 344
335 if (in->msg_qbytes > USHRT_MAX) 345 if (in->msg_qbytes > USHRT_MAX)
336 out.msg_qbytes = USHRT_MAX; 346 out.msg_qbytes = USHRT_MAX;
337 else 347 else
338 out.msg_qbytes = in->msg_qbytes; 348 out.msg_qbytes = in->msg_qbytes;
339 out.msg_lqbytes = in->msg_qbytes; 349 out.msg_lqbytes = in->msg_qbytes;
340 350
341 out.msg_lspid = in->msg_lspid; 351 out.msg_lspid = in->msg_lspid;
342 out.msg_lrpid = in->msg_lrpid; 352 out.msg_lrpid = in->msg_lrpid;
343 353
344 return copy_to_user(buf, &out, sizeof(out)); 354 return copy_to_user(buf, &out, sizeof(out));
345 } 355 }
346 default: 356 default:
347 return -EINVAL; 357 return -EINVAL;
348 } 358 }
349 } 359 }
350 360
351 struct msq_setbuf { 361 struct msq_setbuf {
352 unsigned long qbytes; 362 unsigned long qbytes;
353 uid_t uid; 363 uid_t uid;
354 gid_t gid; 364 gid_t gid;
355 mode_t mode; 365 mode_t mode;
356 }; 366 };
357 367
358 static inline unsigned long 368 static inline unsigned long
359 copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version) 369 copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version)
360 { 370 {
361 switch(version) { 371 switch(version) {
362 case IPC_64: 372 case IPC_64:
363 { 373 {
364 struct msqid64_ds tbuf; 374 struct msqid64_ds tbuf;
365 375
366 if (copy_from_user(&tbuf, buf, sizeof(tbuf))) 376 if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
367 return -EFAULT; 377 return -EFAULT;
368 378
369 out->qbytes = tbuf.msg_qbytes; 379 out->qbytes = tbuf.msg_qbytes;
370 out->uid = tbuf.msg_perm.uid; 380 out->uid = tbuf.msg_perm.uid;
371 out->gid = tbuf.msg_perm.gid; 381 out->gid = tbuf.msg_perm.gid;
372 out->mode = tbuf.msg_perm.mode; 382 out->mode = tbuf.msg_perm.mode;
373 383
374 return 0; 384 return 0;
375 } 385 }
376 case IPC_OLD: 386 case IPC_OLD:
377 { 387 {
378 struct msqid_ds tbuf_old; 388 struct msqid_ds tbuf_old;
379 389
380 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 390 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
381 return -EFAULT; 391 return -EFAULT;
382 392
383 out->uid = tbuf_old.msg_perm.uid; 393 out->uid = tbuf_old.msg_perm.uid;
384 out->gid = tbuf_old.msg_perm.gid; 394 out->gid = tbuf_old.msg_perm.gid;
385 out->mode = tbuf_old.msg_perm.mode; 395 out->mode = tbuf_old.msg_perm.mode;
386 396
387 if (tbuf_old.msg_qbytes == 0) 397 if (tbuf_old.msg_qbytes == 0)
388 out->qbytes = tbuf_old.msg_lqbytes; 398 out->qbytes = tbuf_old.msg_lqbytes;
389 else 399 else
390 out->qbytes = tbuf_old.msg_qbytes; 400 out->qbytes = tbuf_old.msg_qbytes;
391 401
392 return 0; 402 return 0;
393 } 403 }
394 default: 404 default:
395 return -EINVAL; 405 return -EINVAL;
396 } 406 }
397 } 407 }
398 408
399 asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) 409 asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
400 { 410 {
401 struct kern_ipc_perm *ipcp; 411 struct kern_ipc_perm *ipcp;
402 struct msq_setbuf uninitialized_var(setbuf); 412 struct msq_setbuf uninitialized_var(setbuf);
403 struct msg_queue *msq; 413 struct msg_queue *msq;
404 int err, version; 414 int err, version;
405 struct ipc_namespace *ns; 415 struct ipc_namespace *ns;
406 416
407 if (msqid < 0 || cmd < 0) 417 if (msqid < 0 || cmd < 0)
408 return -EINVAL; 418 return -EINVAL;
409 419
410 version = ipc_parse_version(&cmd); 420 version = ipc_parse_version(&cmd);
411 ns = current->nsproxy->ipc_ns; 421 ns = current->nsproxy->ipc_ns;
412 422
413 switch (cmd) { 423 switch (cmd) {
414 case IPC_INFO: 424 case IPC_INFO:
415 case MSG_INFO: 425 case MSG_INFO:
416 { 426 {
417 struct msginfo msginfo; 427 struct msginfo msginfo;
418 int max_id; 428 int max_id;
419 429
420 if (!buf) 430 if (!buf)
421 return -EFAULT; 431 return -EFAULT;
422 /* 432 /*
423 * We must not return kernel stack data. 433 * We must not return kernel stack data.
424 * due to padding, it's not enough 434 * due to padding, it's not enough
425 * to set all member fields. 435 * to set all member fields.
426 */ 436 */
427 err = security_msg_queue_msgctl(NULL, cmd); 437 err = security_msg_queue_msgctl(NULL, cmd);
428 if (err) 438 if (err)
429 return err; 439 return err;
430 440
431 memset(&msginfo, 0, sizeof(msginfo)); 441 memset(&msginfo, 0, sizeof(msginfo));
432 msginfo.msgmni = ns->msg_ctlmni; 442 msginfo.msgmni = ns->msg_ctlmni;
433 msginfo.msgmax = ns->msg_ctlmax; 443 msginfo.msgmax = ns->msg_ctlmax;
434 msginfo.msgmnb = ns->msg_ctlmnb; 444 msginfo.msgmnb = ns->msg_ctlmnb;
435 msginfo.msgssz = MSGSSZ; 445 msginfo.msgssz = MSGSSZ;
436 msginfo.msgseg = MSGSEG; 446 msginfo.msgseg = MSGSEG;
437 mutex_lock(&msg_ids(ns).mutex); 447 mutex_lock(&msg_ids(ns).mutex);
438 if (cmd == MSG_INFO) { 448 if (cmd == MSG_INFO) {
439 msginfo.msgpool = msg_ids(ns).in_use; 449 msginfo.msgpool = msg_ids(ns).in_use;
440 msginfo.msgmap = atomic_read(&msg_hdrs); 450 msginfo.msgmap = atomic_read(&msg_hdrs);
441 msginfo.msgtql = atomic_read(&msg_bytes); 451 msginfo.msgtql = atomic_read(&msg_bytes);
442 } else { 452 } else {
443 msginfo.msgmap = MSGMAP; 453 msginfo.msgmap = MSGMAP;
444 msginfo.msgpool = MSGPOOL; 454 msginfo.msgpool = MSGPOOL;
445 msginfo.msgtql = MSGTQL; 455 msginfo.msgtql = MSGTQL;
446 } 456 }
447 max_id = ipc_get_maxid(&msg_ids(ns)); 457 max_id = ipc_get_maxid(&msg_ids(ns));
448 mutex_unlock(&msg_ids(ns).mutex); 458 mutex_unlock(&msg_ids(ns).mutex);
449 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 459 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
450 return -EFAULT; 460 return -EFAULT;
451 return (max_id < 0) ? 0 : max_id; 461 return (max_id < 0) ? 0 : max_id;
452 } 462 }
453 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 463 case MSG_STAT: /* msqid is an index rather than a msg queue id */
454 case IPC_STAT: 464 case IPC_STAT:
455 { 465 {
456 struct msqid64_ds tbuf; 466 struct msqid64_ds tbuf;
457 int success_return; 467 int success_return;
458 468
459 if (!buf) 469 if (!buf)
460 return -EFAULT; 470 return -EFAULT;
461 471
462 if (cmd == MSG_STAT) { 472 if (cmd == MSG_STAT) {
463 msq = msg_lock(ns, msqid); 473 msq = msg_lock(ns, msqid);
464 if (IS_ERR(msq)) 474 if (IS_ERR(msq))
465 return PTR_ERR(msq); 475 return PTR_ERR(msq);
466 success_return = msq->q_perm.id; 476 success_return = msq->q_perm.id;
467 } else { 477 } else {
468 msq = msg_lock_check(ns, msqid); 478 msq = msg_lock_check(ns, msqid);
469 if (IS_ERR(msq)) 479 if (IS_ERR(msq))
470 return PTR_ERR(msq); 480 return PTR_ERR(msq);
471 success_return = 0; 481 success_return = 0;
472 } 482 }
473 err = -EACCES; 483 err = -EACCES;
474 if (ipcperms(&msq->q_perm, S_IRUGO)) 484 if (ipcperms(&msq->q_perm, S_IRUGO))
475 goto out_unlock; 485 goto out_unlock;
476 486
477 err = security_msg_queue_msgctl(msq, cmd); 487 err = security_msg_queue_msgctl(msq, cmd);
478 if (err) 488 if (err)
479 goto out_unlock; 489 goto out_unlock;
480 490
481 memset(&tbuf, 0, sizeof(tbuf)); 491 memset(&tbuf, 0, sizeof(tbuf));
482 492
483 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 493 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
484 tbuf.msg_stime = msq->q_stime; 494 tbuf.msg_stime = msq->q_stime;
485 tbuf.msg_rtime = msq->q_rtime; 495 tbuf.msg_rtime = msq->q_rtime;
486 tbuf.msg_ctime = msq->q_ctime; 496 tbuf.msg_ctime = msq->q_ctime;
487 tbuf.msg_cbytes = msq->q_cbytes; 497 tbuf.msg_cbytes = msq->q_cbytes;
488 tbuf.msg_qnum = msq->q_qnum; 498 tbuf.msg_qnum = msq->q_qnum;
489 tbuf.msg_qbytes = msq->q_qbytes; 499 tbuf.msg_qbytes = msq->q_qbytes;
490 tbuf.msg_lspid = msq->q_lspid; 500 tbuf.msg_lspid = msq->q_lspid;
491 tbuf.msg_lrpid = msq->q_lrpid; 501 tbuf.msg_lrpid = msq->q_lrpid;
492 msg_unlock(msq); 502 msg_unlock(msq);
493 if (copy_msqid_to_user(buf, &tbuf, version)) 503 if (copy_msqid_to_user(buf, &tbuf, version))
494 return -EFAULT; 504 return -EFAULT;
495 return success_return; 505 return success_return;
496 } 506 }
497 case IPC_SET: 507 case IPC_SET:
498 if (!buf) 508 if (!buf)
499 return -EFAULT; 509 return -EFAULT;
500 if (copy_msqid_from_user(&setbuf, buf, version)) 510 if (copy_msqid_from_user(&setbuf, buf, version))
501 return -EFAULT; 511 return -EFAULT;
502 break; 512 break;
503 case IPC_RMID: 513 case IPC_RMID:
504 break; 514 break;
505 default: 515 default:
506 return -EINVAL; 516 return -EINVAL;
507 } 517 }
508 518
509 mutex_lock(&msg_ids(ns).mutex); 519 mutex_lock(&msg_ids(ns).mutex);
510 msq = msg_lock_check(ns, msqid); 520 msq = msg_lock_check(ns, msqid);
511 if (IS_ERR(msq)) { 521 if (IS_ERR(msq)) {
512 err = PTR_ERR(msq); 522 err = PTR_ERR(msq);
513 goto out_up; 523 goto out_up;
514 } 524 }
515 525
516 ipcp = &msq->q_perm; 526 ipcp = &msq->q_perm;
517 527
518 err = audit_ipc_obj(ipcp); 528 err = audit_ipc_obj(ipcp);
519 if (err) 529 if (err)
520 goto out_unlock_up; 530 goto out_unlock_up;
521 if (cmd == IPC_SET) { 531 if (cmd == IPC_SET) {
522 err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid, 532 err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid,
523 setbuf.mode); 533 setbuf.mode);
524 if (err) 534 if (err)
525 goto out_unlock_up; 535 goto out_unlock_up;
526 } 536 }
527 537
528 err = -EPERM; 538 err = -EPERM;
529 if (current->euid != ipcp->cuid && 539 if (current->euid != ipcp->cuid &&
530 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) 540 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
531 /* We _could_ check for CAP_CHOWN above, but we don't */ 541 /* We _could_ check for CAP_CHOWN above, but we don't */
532 goto out_unlock_up; 542 goto out_unlock_up;
533 543
534 err = security_msg_queue_msgctl(msq, cmd); 544 err = security_msg_queue_msgctl(msq, cmd);
535 if (err) 545 if (err)
536 goto out_unlock_up; 546 goto out_unlock_up;
537 547
538 switch (cmd) { 548 switch (cmd) {
539 case IPC_SET: 549 case IPC_SET:
540 { 550 {
541 err = -EPERM; 551 err = -EPERM;
542 if (setbuf.qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE)) 552 if (setbuf.qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
543 goto out_unlock_up; 553 goto out_unlock_up;
544 554
545 msq->q_qbytes = setbuf.qbytes; 555 msq->q_qbytes = setbuf.qbytes;
546 556
547 ipcp->uid = setbuf.uid; 557 ipcp->uid = setbuf.uid;
548 ipcp->gid = setbuf.gid; 558 ipcp->gid = setbuf.gid;
549 ipcp->mode = (ipcp->mode & ~S_IRWXUGO) | 559 ipcp->mode = (ipcp->mode & ~S_IRWXUGO) |
550 (S_IRWXUGO & setbuf.mode); 560 (S_IRWXUGO & setbuf.mode);
551 msq->q_ctime = get_seconds(); 561 msq->q_ctime = get_seconds();
552 /* sleeping receivers might be excluded by 562 /* sleeping receivers might be excluded by
553 * stricter permissions. 563 * stricter permissions.
554 */ 564 */
555 expunge_all(msq, -EAGAIN); 565 expunge_all(msq, -EAGAIN);
556 /* sleeping senders might be able to send 566 /* sleeping senders might be able to send
557 * due to a larger queue size. 567 * due to a larger queue size.
558 */ 568 */
559 ss_wakeup(&msq->q_senders, 0); 569 ss_wakeup(&msq->q_senders, 0);
560 msg_unlock(msq); 570 msg_unlock(msq);
561 break; 571 break;
562 } 572 }
563 case IPC_RMID: 573 case IPC_RMID:
564 freeque(ns, msq); 574 freeque(ns, msq);
565 break; 575 break;
566 } 576 }
567 err = 0; 577 err = 0;
568 out_up: 578 out_up:
569 mutex_unlock(&msg_ids(ns).mutex); 579 mutex_unlock(&msg_ids(ns).mutex);
570 return err; 580 return err;
571 out_unlock_up: 581 out_unlock_up:
572 msg_unlock(msq); 582 msg_unlock(msq);
573 goto out_up; 583 goto out_up;
574 out_unlock: 584 out_unlock:
575 msg_unlock(msq); 585 msg_unlock(msq);
576 return err; 586 return err;
577 } 587 }
578 588
579 static int testmsg(struct msg_msg *msg, long type, int mode) 589 static int testmsg(struct msg_msg *msg, long type, int mode)
580 { 590 {
581 switch(mode) 591 switch(mode)
582 { 592 {
583 case SEARCH_ANY: 593 case SEARCH_ANY:
584 return 1; 594 return 1;
585 case SEARCH_LESSEQUAL: 595 case SEARCH_LESSEQUAL:
586 if (msg->m_type <=type) 596 if (msg->m_type <=type)
587 return 1; 597 return 1;
588 break; 598 break;
589 case SEARCH_EQUAL: 599 case SEARCH_EQUAL:
590 if (msg->m_type == type) 600 if (msg->m_type == type)
591 return 1; 601 return 1;
592 break; 602 break;
593 case SEARCH_NOTEQUAL: 603 case SEARCH_NOTEQUAL:
594 if (msg->m_type != type) 604 if (msg->m_type != type)
595 return 1; 605 return 1;
596 break; 606 break;
597 } 607 }
598 return 0; 608 return 0;
599 } 609 }
600 610
601 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) 611 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
602 { 612 {
603 struct list_head *tmp; 613 struct list_head *tmp;
604 614
605 tmp = msq->q_receivers.next; 615 tmp = msq->q_receivers.next;
606 while (tmp != &msq->q_receivers) { 616 while (tmp != &msq->q_receivers) {
607 struct msg_receiver *msr; 617 struct msg_receiver *msr;
608 618
609 msr = list_entry(tmp, struct msg_receiver, r_list); 619 msr = list_entry(tmp, struct msg_receiver, r_list);
610 tmp = tmp->next; 620 tmp = tmp->next;
611 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 621 if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
612 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 622 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
613 msr->r_msgtype, msr->r_mode)) { 623 msr->r_msgtype, msr->r_mode)) {
614 624
615 list_del(&msr->r_list); 625 list_del(&msr->r_list);
616 if (msr->r_maxsize < msg->m_ts) { 626 if (msr->r_maxsize < msg->m_ts) {
617 msr->r_msg = NULL; 627 msr->r_msg = NULL;
618 wake_up_process(msr->r_tsk); 628 wake_up_process(msr->r_tsk);
619 smp_mb(); 629 smp_mb();
620 msr->r_msg = ERR_PTR(-E2BIG); 630 msr->r_msg = ERR_PTR(-E2BIG);
621 } else { 631 } else {
622 msr->r_msg = NULL; 632 msr->r_msg = NULL;
623 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 633 msq->q_lrpid = task_pid_vnr(msr->r_tsk);
624 msq->q_rtime = get_seconds(); 634 msq->q_rtime = get_seconds();
625 wake_up_process(msr->r_tsk); 635 wake_up_process(msr->r_tsk);
626 smp_mb(); 636 smp_mb();
627 msr->r_msg = msg; 637 msr->r_msg = msg;
628 638
629 return 1; 639 return 1;
630 } 640 }
631 } 641 }
632 } 642 }
633 return 0; 643 return 0;
634 } 644 }
635 645
636 long do_msgsnd(int msqid, long mtype, void __user *mtext, 646 long do_msgsnd(int msqid, long mtype, void __user *mtext,
637 size_t msgsz, int msgflg) 647 size_t msgsz, int msgflg)
638 { 648 {
639 struct msg_queue *msq; 649 struct msg_queue *msq;
640 struct msg_msg *msg; 650 struct msg_msg *msg;
641 int err; 651 int err;
642 struct ipc_namespace *ns; 652 struct ipc_namespace *ns;
643 653
644 ns = current->nsproxy->ipc_ns; 654 ns = current->nsproxy->ipc_ns;
645 655
646 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 656 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
647 return -EINVAL; 657 return -EINVAL;
648 if (mtype < 1) 658 if (mtype < 1)
649 return -EINVAL; 659 return -EINVAL;
650 660
651 msg = load_msg(mtext, msgsz); 661 msg = load_msg(mtext, msgsz);
652 if (IS_ERR(msg)) 662 if (IS_ERR(msg))
653 return PTR_ERR(msg); 663 return PTR_ERR(msg);
654 664
655 msg->m_type = mtype; 665 msg->m_type = mtype;
656 msg->m_ts = msgsz; 666 msg->m_ts = msgsz;
657 667
658 msq = msg_lock_check(ns, msqid); 668 msq = msg_lock_check(ns, msqid);
659 if (IS_ERR(msq)) { 669 if (IS_ERR(msq)) {
660 err = PTR_ERR(msq); 670 err = PTR_ERR(msq);
661 goto out_free; 671 goto out_free;
662 } 672 }
663 673
664 for (;;) { 674 for (;;) {
665 struct msg_sender s; 675 struct msg_sender s;
666 676
667 err = -EACCES; 677 err = -EACCES;
668 if (ipcperms(&msq->q_perm, S_IWUGO)) 678 if (ipcperms(&msq->q_perm, S_IWUGO))
669 goto out_unlock_free; 679 goto out_unlock_free;
670 680
671 err = security_msg_queue_msgsnd(msq, msg, msgflg); 681 err = security_msg_queue_msgsnd(msq, msg, msgflg);
672 if (err) 682 if (err)
673 goto out_unlock_free; 683 goto out_unlock_free;
674 684
675 if (msgsz + msq->q_cbytes <= msq->q_qbytes && 685 if (msgsz + msq->q_cbytes <= msq->q_qbytes &&
676 1 + msq->q_qnum <= msq->q_qbytes) { 686 1 + msq->q_qnum <= msq->q_qbytes) {
677 break; 687 break;
678 } 688 }
679 689
680 /* queue full, wait: */ 690 /* queue full, wait: */
681 if (msgflg & IPC_NOWAIT) { 691 if (msgflg & IPC_NOWAIT) {
682 err = -EAGAIN; 692 err = -EAGAIN;
683 goto out_unlock_free; 693 goto out_unlock_free;
684 } 694 }
685 ss_add(msq, &s); 695 ss_add(msq, &s);
686 ipc_rcu_getref(msq); 696 ipc_rcu_getref(msq);
687 msg_unlock(msq); 697 msg_unlock(msq);
688 schedule(); 698 schedule();
689 699
690 ipc_lock_by_ptr(&msq->q_perm); 700 ipc_lock_by_ptr(&msq->q_perm);
691 ipc_rcu_putref(msq); 701 ipc_rcu_putref(msq);
692 if (msq->q_perm.deleted) { 702 if (msq->q_perm.deleted) {
693 err = -EIDRM; 703 err = -EIDRM;
694 goto out_unlock_free; 704 goto out_unlock_free;
695 } 705 }
696 ss_del(&s); 706 ss_del(&s);
697 707
698 if (signal_pending(current)) { 708 if (signal_pending(current)) {
699 err = -ERESTARTNOHAND; 709 err = -ERESTARTNOHAND;
700 goto out_unlock_free; 710 goto out_unlock_free;
701 } 711 }
702 } 712 }
703 713
704 msq->q_lspid = task_tgid_vnr(current); 714 msq->q_lspid = task_tgid_vnr(current);
705 msq->q_stime = get_seconds(); 715 msq->q_stime = get_seconds();
706 716
707 if (!pipelined_send(msq, msg)) { 717 if (!pipelined_send(msq, msg)) {
708 /* noone is waiting for this message, enqueue it */ 718 /* noone is waiting for this message, enqueue it */
709 list_add_tail(&msg->m_list, &msq->q_messages); 719 list_add_tail(&msg->m_list, &msq->q_messages);
710 msq->q_cbytes += msgsz; 720 msq->q_cbytes += msgsz;
711 msq->q_qnum++; 721 msq->q_qnum++;
712 atomic_add(msgsz, &msg_bytes); 722 atomic_add(msgsz, &msg_bytes);
713 atomic_inc(&msg_hdrs); 723 atomic_inc(&msg_hdrs);
714 } 724 }
715 725
716 err = 0; 726 err = 0;
717 msg = NULL; 727 msg = NULL;
718 728
719 out_unlock_free: 729 out_unlock_free:
720 msg_unlock(msq); 730 msg_unlock(msq);
721 out_free: 731 out_free:
722 if (msg != NULL) 732 if (msg != NULL)
723 free_msg(msg); 733 free_msg(msg);
724 return err; 734 return err;
725 } 735 }
726 736
727 asmlinkage long 737 asmlinkage long
728 sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg) 738 sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg)
729 { 739 {
730 long mtype; 740 long mtype;
731 741
732 if (get_user(mtype, &msgp->mtype)) 742 if (get_user(mtype, &msgp->mtype))
733 return -EFAULT; 743 return -EFAULT;
734 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 744 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
735 } 745 }
736 746
737 static inline int convert_mode(long *msgtyp, int msgflg) 747 static inline int convert_mode(long *msgtyp, int msgflg)
738 { 748 {
739 /* 749 /*
740 * find message of correct type. 750 * find message of correct type.
741 * msgtyp = 0 => get first. 751 * msgtyp = 0 => get first.
742 * msgtyp > 0 => get first message of matching type. 752 * msgtyp > 0 => get first message of matching type.
743 * msgtyp < 0 => get message with least type must be < abs(msgtype). 753 * msgtyp < 0 => get message with least type must be < abs(msgtype).
744 */ 754 */
745 if (*msgtyp == 0) 755 if (*msgtyp == 0)
746 return SEARCH_ANY; 756 return SEARCH_ANY;
747 if (*msgtyp < 0) { 757 if (*msgtyp < 0) {
748 *msgtyp = -*msgtyp; 758 *msgtyp = -*msgtyp;
749 return SEARCH_LESSEQUAL; 759 return SEARCH_LESSEQUAL;
750 } 760 }
751 if (msgflg & MSG_EXCEPT) 761 if (msgflg & MSG_EXCEPT)
752 return SEARCH_NOTEQUAL; 762 return SEARCH_NOTEQUAL;
753 return SEARCH_EQUAL; 763 return SEARCH_EQUAL;
754 } 764 }
755 765
756 long do_msgrcv(int msqid, long *pmtype, void __user *mtext, 766 long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
757 size_t msgsz, long msgtyp, int msgflg) 767 size_t msgsz, long msgtyp, int msgflg)
758 { 768 {
759 struct msg_queue *msq; 769 struct msg_queue *msq;
760 struct msg_msg *msg; 770 struct msg_msg *msg;
761 int mode; 771 int mode;
762 struct ipc_namespace *ns; 772 struct ipc_namespace *ns;
763 773
764 if (msqid < 0 || (long) msgsz < 0) 774 if (msqid < 0 || (long) msgsz < 0)
765 return -EINVAL; 775 return -EINVAL;
766 mode = convert_mode(&msgtyp, msgflg); 776 mode = convert_mode(&msgtyp, msgflg);
767 ns = current->nsproxy->ipc_ns; 777 ns = current->nsproxy->ipc_ns;
768 778
769 msq = msg_lock_check(ns, msqid); 779 msq = msg_lock_check(ns, msqid);
770 if (IS_ERR(msq)) 780 if (IS_ERR(msq))
771 return PTR_ERR(msq); 781 return PTR_ERR(msq);
772 782
773 for (;;) { 783 for (;;) {
774 struct msg_receiver msr_d; 784 struct msg_receiver msr_d;
775 struct list_head *tmp; 785 struct list_head *tmp;
776 786
777 msg = ERR_PTR(-EACCES); 787 msg = ERR_PTR(-EACCES);
778 if (ipcperms(&msq->q_perm, S_IRUGO)) 788 if (ipcperms(&msq->q_perm, S_IRUGO))
779 goto out_unlock; 789 goto out_unlock;
780 790
781 msg = ERR_PTR(-EAGAIN); 791 msg = ERR_PTR(-EAGAIN);
782 tmp = msq->q_messages.next; 792 tmp = msq->q_messages.next;
783 while (tmp != &msq->q_messages) { 793 while (tmp != &msq->q_messages) {
784 struct msg_msg *walk_msg; 794 struct msg_msg *walk_msg;
785 795
786 walk_msg = list_entry(tmp, struct msg_msg, m_list); 796 walk_msg = list_entry(tmp, struct msg_msg, m_list);
787 if (testmsg(walk_msg, msgtyp, mode) && 797 if (testmsg(walk_msg, msgtyp, mode) &&
788 !security_msg_queue_msgrcv(msq, walk_msg, current, 798 !security_msg_queue_msgrcv(msq, walk_msg, current,
789 msgtyp, mode)) { 799 msgtyp, mode)) {
790 800
791 msg = walk_msg; 801 msg = walk_msg;
792 if (mode == SEARCH_LESSEQUAL && 802 if (mode == SEARCH_LESSEQUAL &&
793 walk_msg->m_type != 1) { 803 walk_msg->m_type != 1) {
794 msg = walk_msg; 804 msg = walk_msg;
795 msgtyp = walk_msg->m_type - 1; 805 msgtyp = walk_msg->m_type - 1;
796 } else { 806 } else {
797 msg = walk_msg; 807 msg = walk_msg;
798 break; 808 break;
799 } 809 }
800 } 810 }
801 tmp = tmp->next; 811 tmp = tmp->next;
802 } 812 }
803 if (!IS_ERR(msg)) { 813 if (!IS_ERR(msg)) {
804 /* 814 /*
805 * Found a suitable message. 815 * Found a suitable message.
806 * Unlink it from the queue. 816 * Unlink it from the queue.
807 */ 817 */
808 if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 818 if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
809 msg = ERR_PTR(-E2BIG); 819 msg = ERR_PTR(-E2BIG);
810 goto out_unlock; 820 goto out_unlock;
811 } 821 }
812 list_del(&msg->m_list); 822 list_del(&msg->m_list);
813 msq->q_qnum--; 823 msq->q_qnum--;
814 msq->q_rtime = get_seconds(); 824 msq->q_rtime = get_seconds();
815 msq->q_lrpid = task_tgid_vnr(current); 825 msq->q_lrpid = task_tgid_vnr(current);
816 msq->q_cbytes -= msg->m_ts; 826 msq->q_cbytes -= msg->m_ts;
817 atomic_sub(msg->m_ts, &msg_bytes); 827 atomic_sub(msg->m_ts, &msg_bytes);
818 atomic_dec(&msg_hdrs); 828 atomic_dec(&msg_hdrs);
819 ss_wakeup(&msq->q_senders, 0); 829 ss_wakeup(&msq->q_senders, 0);
820 msg_unlock(msq); 830 msg_unlock(msq);
821 break; 831 break;
822 } 832 }
823 /* No message waiting. Wait for a message */ 833 /* No message waiting. Wait for a message */
824 if (msgflg & IPC_NOWAIT) { 834 if (msgflg & IPC_NOWAIT) {
825 msg = ERR_PTR(-ENOMSG); 835 msg = ERR_PTR(-ENOMSG);
826 goto out_unlock; 836 goto out_unlock;
827 } 837 }
828 list_add_tail(&msr_d.r_list, &msq->q_receivers); 838 list_add_tail(&msr_d.r_list, &msq->q_receivers);
829 msr_d.r_tsk = current; 839 msr_d.r_tsk = current;
830 msr_d.r_msgtype = msgtyp; 840 msr_d.r_msgtype = msgtyp;
831 msr_d.r_mode = mode; 841 msr_d.r_mode = mode;
832 if (msgflg & MSG_NOERROR) 842 if (msgflg & MSG_NOERROR)
833 msr_d.r_maxsize = INT_MAX; 843 msr_d.r_maxsize = INT_MAX;
834 else 844 else
835 msr_d.r_maxsize = msgsz; 845 msr_d.r_maxsize = msgsz;
836 msr_d.r_msg = ERR_PTR(-EAGAIN); 846 msr_d.r_msg = ERR_PTR(-EAGAIN);
837 current->state = TASK_INTERRUPTIBLE; 847 current->state = TASK_INTERRUPTIBLE;
838 msg_unlock(msq); 848 msg_unlock(msq);
839 849
840 schedule(); 850 schedule();
841 851
842 /* Lockless receive, part 1: 852 /* Lockless receive, part 1:
843 * Disable preemption. We don't hold a reference to the queue 853 * Disable preemption. We don't hold a reference to the queue
844 * and getting a reference would defeat the idea of a lockless 854 * and getting a reference would defeat the idea of a lockless
845 * operation, thus the code relies on rcu to guarantee the 855 * operation, thus the code relies on rcu to guarantee the
846 * existance of msq: 856 * existance of msq:
847 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 857 * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
848 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 858 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
849 * rcu_read_lock() prevents preemption between reading r_msg 859 * rcu_read_lock() prevents preemption between reading r_msg
850 * and the spin_lock() inside ipc_lock_by_ptr(). 860 * and the spin_lock() inside ipc_lock_by_ptr().
851 */ 861 */
852 rcu_read_lock(); 862 rcu_read_lock();
853 863
854 /* Lockless receive, part 2: 864 /* Lockless receive, part 2:
855 * Wait until pipelined_send or expunge_all are outside of 865 * Wait until pipelined_send or expunge_all are outside of
856 * wake_up_process(). There is a race with exit(), see 866 * wake_up_process(). There is a race with exit(), see
857 * ipc/mqueue.c for the details. 867 * ipc/mqueue.c for the details.
858 */ 868 */
859 msg = (struct msg_msg*)msr_d.r_msg; 869 msg = (struct msg_msg*)msr_d.r_msg;
860 while (msg == NULL) { 870 while (msg == NULL) {
861 cpu_relax(); 871 cpu_relax();
862 msg = (struct msg_msg *)msr_d.r_msg; 872 msg = (struct msg_msg *)msr_d.r_msg;
863 } 873 }
864 874
865 /* Lockless receive, part 3: 875 /* Lockless receive, part 3:
866 * If there is a message or an error then accept it without 876 * If there is a message or an error then accept it without
867 * locking. 877 * locking.
868 */ 878 */
869 if (msg != ERR_PTR(-EAGAIN)) { 879 if (msg != ERR_PTR(-EAGAIN)) {
870 rcu_read_unlock(); 880 rcu_read_unlock();
871 break; 881 break;
872 } 882 }
873 883
874 /* Lockless receive, part 3: 884 /* Lockless receive, part 3:
875 * Acquire the queue spinlock. 885 * Acquire the queue spinlock.
876 */ 886 */
877 ipc_lock_by_ptr(&msq->q_perm); 887 ipc_lock_by_ptr(&msq->q_perm);
878 rcu_read_unlock(); 888 rcu_read_unlock();
879 889
880 /* Lockless receive, part 4: 890 /* Lockless receive, part 4:
881 * Repeat test after acquiring the spinlock. 891 * Repeat test after acquiring the spinlock.
882 */ 892 */
883 msg = (struct msg_msg*)msr_d.r_msg; 893 msg = (struct msg_msg*)msr_d.r_msg;
884 if (msg != ERR_PTR(-EAGAIN)) 894 if (msg != ERR_PTR(-EAGAIN))
885 goto out_unlock; 895 goto out_unlock;
886 896
887 list_del(&msr_d.r_list); 897 list_del(&msr_d.r_list);
888 if (signal_pending(current)) { 898 if (signal_pending(current)) {
889 msg = ERR_PTR(-ERESTARTNOHAND); 899 msg = ERR_PTR(-ERESTARTNOHAND);
890 out_unlock: 900 out_unlock:
891 msg_unlock(msq); 901 msg_unlock(msq);
892 break; 902 break;
893 } 903 }
894 } 904 }
895 if (IS_ERR(msg)) 905 if (IS_ERR(msg))
896 return PTR_ERR(msg); 906 return PTR_ERR(msg);
897 907
898 msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz; 908 msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
899 *pmtype = msg->m_type; 909 *pmtype = msg->m_type;
900 if (store_msg(mtext, msg, msgsz)) 910 if (store_msg(mtext, msg, msgsz))
901 msgsz = -EFAULT; 911 msgsz = -EFAULT;
902 912
903 free_msg(msg); 913 free_msg(msg);
904 914
905 return msgsz; 915 return msgsz;
906 } 916 }
907 917
908 asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, 918 asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz,
909 long msgtyp, int msgflg) 919 long msgtyp, int msgflg)
910 { 920 {
911 long err, mtype; 921 long err, mtype;
912 922
913 err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg); 923 err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
914 if (err < 0) 924 if (err < 0)
915 goto out; 925 goto out;
916 926
917 if (put_user(mtype, &msgp->mtype)) 927 if (put_user(mtype, &msgp->mtype))
918 err = -EFAULT; 928 err = -EFAULT;
919 out: 929 out:
920 return err; 930 return err;
921 } 931 }
922 932
923 #ifdef CONFIG_PROC_FS 933 #ifdef CONFIG_PROC_FS
924 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 934 static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
925 { 935 {
926 struct msg_queue *msq = it; 936 struct msg_queue *msq = it;
927 937
928 return seq_printf(s, 938 return seq_printf(s,
929 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 939 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
930 msq->q_perm.key, 940 msq->q_perm.key,
931 msq->q_perm.id, 941 msq->q_perm.id,
932 msq->q_perm.mode, 942 msq->q_perm.mode,
933 msq->q_cbytes, 943 msq->q_cbytes,
934 msq->q_qnum, 944 msq->q_qnum,
935 msq->q_lspid, 945 msq->q_lspid,
936 msq->q_lrpid, 946 msq->q_lrpid,
937 msq->q_perm.uid, 947 msq->q_perm.uid,
938 msq->q_perm.gid, 948 msq->q_perm.gid,
939 msq->q_perm.cuid, 949 msq->q_perm.cuid,
940 msq->q_perm.cgid, 950 msq->q_perm.cgid,
941 msq->q_stime, 951 msq->q_stime,
942 msq->q_rtime, 952 msq->q_rtime,
943 msq->q_ctime); 953 msq->q_ctime);
944 } 954 }
945 #endif 955 #endif
946 956
1 /* 1 /*
2 * linux/ipc/sem.c 2 * linux/ipc/sem.c
3 * Copyright (C) 1992 Krishna Balasubramanian 3 * Copyright (C) 1992 Krishna Balasubramanian
4 * Copyright (C) 1995 Eric Schenk, Bruno Haible 4 * Copyright (C) 1995 Eric Schenk, Bruno Haible
5 * 5 *
6 * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995): 6 * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995):
7 * This code underwent a massive rewrite in order to solve some problems 7 * This code underwent a massive rewrite in order to solve some problems
8 * with the original code. In particular the original code failed to 8 * with the original code. In particular the original code failed to
9 * wake up processes that were waiting for semval to go to 0 if the 9 * wake up processes that were waiting for semval to go to 0 if the
10 * value went to 0 and was then incremented rapidly enough. In solving 10 * value went to 0 and was then incremented rapidly enough. In solving
11 * this problem I have also modified the implementation so that it 11 * this problem I have also modified the implementation so that it
12 * processes pending operations in a FIFO manner, thus give a guarantee 12 * processes pending operations in a FIFO manner, thus give a guarantee
13 * that processes waiting for a lock on the semaphore won't starve 13 * that processes waiting for a lock on the semaphore won't starve
14 * unless another locking process fails to unlock. 14 * unless another locking process fails to unlock.
15 * In addition the following two changes in behavior have been introduced: 15 * In addition the following two changes in behavior have been introduced:
16 * - The original implementation of semop returned the value 16 * - The original implementation of semop returned the value
17 * last semaphore element examined on success. This does not 17 * last semaphore element examined on success. This does not
18 * match the manual page specifications, and effectively 18 * match the manual page specifications, and effectively
19 * allows the user to read the semaphore even if they do not 19 * allows the user to read the semaphore even if they do not
20 * have read permissions. The implementation now returns 0 20 * have read permissions. The implementation now returns 0
21 * on success as stated in the manual page. 21 * on success as stated in the manual page.
22 * - There is some confusion over whether the set of undo adjustments 22 * - There is some confusion over whether the set of undo adjustments
23 * to be performed at exit should be done in an atomic manner. 23 * to be performed at exit should be done in an atomic manner.
24 * That is, if we are attempting to decrement the semval should we queue 24 * That is, if we are attempting to decrement the semval should we queue
25 * up and wait until we can do so legally? 25 * up and wait until we can do so legally?
26 * The original implementation attempted to do this. 26 * The original implementation attempted to do this.
27 * The current implementation does not do so. This is because I don't 27 * The current implementation does not do so. This is because I don't
28 * think it is the right thing (TM) to do, and because I couldn't 28 * think it is the right thing (TM) to do, and because I couldn't
29 * see a clean way to get the old behavior with the new design. 29 * see a clean way to get the old behavior with the new design.
30 * The POSIX standard and SVID should be consulted to determine 30 * The POSIX standard and SVID should be consulted to determine
31 * what behavior is mandated. 31 * what behavior is mandated.
32 * 32 *
33 * Further notes on refinement (Christoph Rohland, December 1998): 33 * Further notes on refinement (Christoph Rohland, December 1998):
34 * - The POSIX standard says, that the undo adjustments simply should 34 * - The POSIX standard says, that the undo adjustments simply should
35 * redo. So the current implementation is o.K. 35 * redo. So the current implementation is o.K.
36 * - The previous code had two flaws: 36 * - The previous code had two flaws:
37 * 1) It actively gave the semaphore to the next waiting process 37 * 1) It actively gave the semaphore to the next waiting process
38 * sleeping on the semaphore. Since this process did not have the 38 * sleeping on the semaphore. Since this process did not have the
39 * cpu this led to many unnecessary context switches and bad 39 * cpu this led to many unnecessary context switches and bad
40 * performance. Now we only check which process should be able to 40 * performance. Now we only check which process should be able to
41 * get the semaphore and if this process wants to reduce some 41 * get the semaphore and if this process wants to reduce some
42 * semaphore value we simply wake it up without doing the 42 * semaphore value we simply wake it up without doing the
43 * operation. So it has to try to get it later. Thus e.g. the 43 * operation. So it has to try to get it later. Thus e.g. the
44 * running process may reacquire the semaphore during the current 44 * running process may reacquire the semaphore during the current
45 * time slice. If it only waits for zero or increases the semaphore, 45 * time slice. If it only waits for zero or increases the semaphore,
46 * we do the operation in advance and wake it up. 46 * we do the operation in advance and wake it up.
47 * 2) It did not wake up all zero waiting processes. We try to do 47 * 2) It did not wake up all zero waiting processes. We try to do
48 * better but only get the semops right which only wait for zero or 48 * better but only get the semops right which only wait for zero or
49 * increase. If there are decrement operations in the operations 49 * increase. If there are decrement operations in the operations
50 * array we do the same as before. 50 * array we do the same as before.
51 * 51 *
52 * With the incarnation of O(1) scheduler, it becomes unnecessary to perform 52 * With the incarnation of O(1) scheduler, it becomes unnecessary to perform
53 * check/retry algorithm for waking up blocked processes as the new scheduler 53 * check/retry algorithm for waking up blocked processes as the new scheduler
54 * is better at handling thread switch than the old one. 54 * is better at handling thread switch than the old one.
55 * 55 *
56 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 56 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
57 * 57 *
58 * SMP-threaded, sysctl's added 58 * SMP-threaded, sysctl's added
59 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 59 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
60 * Enforced range limit on SEM_UNDO 60 * Enforced range limit on SEM_UNDO
61 * (c) 2001 Red Hat Inc <alan@redhat.com> 61 * (c) 2001 Red Hat Inc <alan@redhat.com>
62 * Lockless wakeup 62 * Lockless wakeup
63 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 63 * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
64 * 64 *
65 * support for audit of ipc object properties and permission changes 65 * support for audit of ipc object properties and permission changes
66 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 66 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
67 * 67 *
68 * namespaces support 68 * namespaces support
69 * OpenVZ, SWsoft Inc. 69 * OpenVZ, SWsoft Inc.
70 * Pavel Emelianov <xemul@openvz.org> 70 * Pavel Emelianov <xemul@openvz.org>
71 */ 71 */
72 72
73 #include <linux/slab.h> 73 #include <linux/slab.h>
74 #include <linux/spinlock.h> 74 #include <linux/spinlock.h>
75 #include <linux/init.h> 75 #include <linux/init.h>
76 #include <linux/proc_fs.h> 76 #include <linux/proc_fs.h>
77 #include <linux/time.h> 77 #include <linux/time.h>
78 #include <linux/security.h> 78 #include <linux/security.h>
79 #include <linux/syscalls.h> 79 #include <linux/syscalls.h>
80 #include <linux/audit.h> 80 #include <linux/audit.h>
81 #include <linux/capability.h> 81 #include <linux/capability.h>
82 #include <linux/seq_file.h> 82 #include <linux/seq_file.h>
83 #include <linux/mutex.h> 83 #include <linux/mutex.h>
84 #include <linux/nsproxy.h> 84 #include <linux/nsproxy.h>
85 85
86 #include <asm/uaccess.h> 86 #include <asm/uaccess.h>
87 #include "util.h" 87 #include "util.h"
88 88
89 #define sem_ids(ns) (*((ns)->ids[IPC_SEM_IDS])) 89 #define sem_ids(ns) (*((ns)->ids[IPC_SEM_IDS]))
90 90
91 #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) 91 #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
92 #define sem_checkid(ns, sma, semid) \ 92 #define sem_checkid(ns, sma, semid) \
93 ipc_checkid(&sem_ids(ns),&sma->sem_perm,semid) 93 ipc_checkid(&sem_ids(ns),&sma->sem_perm,semid)
94 #define sem_buildid(ns, id, seq) \ 94 #define sem_buildid(ns, id, seq) \
95 ipc_buildid(&sem_ids(ns), id, seq) 95 ipc_buildid(&sem_ids(ns), id, seq)
96 96
97 static struct ipc_ids init_sem_ids; 97 static struct ipc_ids init_sem_ids;
98 98
99 static int newary(struct ipc_namespace *, struct ipc_params *); 99 static int newary(struct ipc_namespace *, struct ipc_params *);
100 static void freeary(struct ipc_namespace *, struct sem_array *); 100 static void freeary(struct ipc_namespace *, struct sem_array *);
101 #ifdef CONFIG_PROC_FS 101 #ifdef CONFIG_PROC_FS
102 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 102 static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
103 #endif 103 #endif
104 104
105 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 105 #define SEMMSL_FAST 256 /* 512 bytes on stack */
106 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 106 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */
107 107
108 /* 108 /*
109 * linked list protection: 109 * linked list protection:
110 * sem_undo.id_next, 110 * sem_undo.id_next,
111 * sem_array.sem_pending{,last}, 111 * sem_array.sem_pending{,last},
112 * sem_array.sem_undo: sem_lock() for read/write 112 * sem_array.sem_undo: sem_lock() for read/write
113 * sem_undo.proc_next: only "current" is allowed to read/write that field. 113 * sem_undo.proc_next: only "current" is allowed to read/write that field.
114 * 114 *
115 */ 115 */
116 116
117 #define sc_semmsl sem_ctls[0] 117 #define sc_semmsl sem_ctls[0]
118 #define sc_semmns sem_ctls[1] 118 #define sc_semmns sem_ctls[1]
119 #define sc_semopm sem_ctls[2] 119 #define sc_semopm sem_ctls[2]
120 #define sc_semmni sem_ctls[3] 120 #define sc_semmni sem_ctls[3]
121 121
122 static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) 122 static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
123 { 123 {
124 ns->ids[IPC_SEM_IDS] = ids; 124 ns->ids[IPC_SEM_IDS] = ids;
125 ns->sc_semmsl = SEMMSL; 125 ns->sc_semmsl = SEMMSL;
126 ns->sc_semmns = SEMMNS; 126 ns->sc_semmns = SEMMNS;
127 ns->sc_semopm = SEMOPM; 127 ns->sc_semopm = SEMOPM;
128 ns->sc_semmni = SEMMNI; 128 ns->sc_semmni = SEMMNI;
129 ns->used_sems = 0; 129 ns->used_sems = 0;
130 ipc_init_ids(ids); 130 ipc_init_ids(ids);
131 } 131 }
132 132
133 int sem_init_ns(struct ipc_namespace *ns) 133 int sem_init_ns(struct ipc_namespace *ns)
134 { 134 {
135 struct ipc_ids *ids; 135 struct ipc_ids *ids;
136 136
137 ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL); 137 ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
138 if (ids == NULL) 138 if (ids == NULL)
139 return -ENOMEM; 139 return -ENOMEM;
140 140
141 __sem_init_ns(ns, ids); 141 __sem_init_ns(ns, ids);
142 return 0; 142 return 0;
143 } 143 }
144 144
145 void sem_exit_ns(struct ipc_namespace *ns) 145 void sem_exit_ns(struct ipc_namespace *ns)
146 { 146 {
147 struct sem_array *sma; 147 struct sem_array *sma;
148 int next_id; 148 int next_id;
149 int total, in_use; 149 int total, in_use;
150 150
151 mutex_lock(&sem_ids(ns).mutex); 151 mutex_lock(&sem_ids(ns).mutex);
152 152
153 in_use = sem_ids(ns).in_use; 153 in_use = sem_ids(ns).in_use;
154 154
155 for (total = 0, next_id = 0; total < in_use; next_id++) { 155 for (total = 0, next_id = 0; total < in_use; next_id++) {
156 sma = idr_find(&sem_ids(ns).ipcs_idr, next_id); 156 sma = idr_find(&sem_ids(ns).ipcs_idr, next_id);
157 if (sma == NULL) 157 if (sma == NULL)
158 continue; 158 continue;
159 ipc_lock_by_ptr(&sma->sem_perm); 159 ipc_lock_by_ptr(&sma->sem_perm);
160 freeary(ns, sma); 160 freeary(ns, sma);
161 total++; 161 total++;
162 } 162 }
163 mutex_unlock(&sem_ids(ns).mutex); 163 mutex_unlock(&sem_ids(ns).mutex);
164 164
165 kfree(ns->ids[IPC_SEM_IDS]); 165 kfree(ns->ids[IPC_SEM_IDS]);
166 ns->ids[IPC_SEM_IDS] = NULL; 166 ns->ids[IPC_SEM_IDS] = NULL;
167 } 167 }
168 168
169 void __init sem_init (void) 169 void __init sem_init (void)
170 { 170 {
171 __sem_init_ns(&init_ipc_ns, &init_sem_ids); 171 __sem_init_ns(&init_ipc_ns, &init_sem_ids);
172 ipc_init_proc_interface("sysvipc/sem", 172 ipc_init_proc_interface("sysvipc/sem",
173 " key semid perms nsems uid gid cuid cgid otime ctime\n", 173 " key semid perms nsems uid gid cuid cgid otime ctime\n",
174 IPC_SEM_IDS, sysvipc_sem_proc_show); 174 IPC_SEM_IDS, sysvipc_sem_proc_show);
175 } 175 }
176 176
177 static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id) 177 static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id)
178 { 178 {
179 struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id); 179 struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id);
180 180
181 return container_of(ipcp, struct sem_array, sem_perm); 181 return container_of(ipcp, struct sem_array, sem_perm);
182 } 182 }
183 183
184 static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns, 184 static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns,
185 int id) 185 int id)
186 { 186 {
187 struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id); 187 struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id);
188 188
189 return container_of(ipcp, struct sem_array, sem_perm); 189 return container_of(ipcp, struct sem_array, sem_perm);
190 } 190 }
191 191
192 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 192 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
193 { 193 {
194 ipc_rmid(&sem_ids(ns), &s->sem_perm); 194 ipc_rmid(&sem_ids(ns), &s->sem_perm);
195 } 195 }
196 196
197 /* 197 /*
198 * Lockless wakeup algorithm: 198 * Lockless wakeup algorithm:
199 * Without the check/retry algorithm a lockless wakeup is possible: 199 * Without the check/retry algorithm a lockless wakeup is possible:
200 * - queue.status is initialized to -EINTR before blocking. 200 * - queue.status is initialized to -EINTR before blocking.
201 * - wakeup is performed by 201 * - wakeup is performed by
202 * * unlinking the queue entry from sma->sem_pending 202 * * unlinking the queue entry from sma->sem_pending
203 * * setting queue.status to IN_WAKEUP 203 * * setting queue.status to IN_WAKEUP
204 * This is the notification for the blocked thread that a 204 * This is the notification for the blocked thread that a
205 * result value is imminent. 205 * result value is imminent.
206 * * call wake_up_process 206 * * call wake_up_process
207 * * set queue.status to the final value. 207 * * set queue.status to the final value.
208 * - the previously blocked thread checks queue.status: 208 * - the previously blocked thread checks queue.status:
209 * * if it's IN_WAKEUP, then it must wait until the value changes 209 * * if it's IN_WAKEUP, then it must wait until the value changes
210 * * if it's not -EINTR, then the operation was completed by 210 * * if it's not -EINTR, then the operation was completed by
211 * update_queue. semtimedop can return queue.status without 211 * update_queue. semtimedop can return queue.status without
212 * performing any operation on the sem array. 212 * performing any operation on the sem array.
213 * * otherwise it must acquire the spinlock and check what's up. 213 * * otherwise it must acquire the spinlock and check what's up.
214 * 214 *
215 * The two-stage algorithm is necessary to protect against the following 215 * The two-stage algorithm is necessary to protect against the following
216 * races: 216 * races:
217 * - if queue.status is set after wake_up_process, then the woken up idle 217 * - if queue.status is set after wake_up_process, then the woken up idle
218 * thread could race forward and try (and fail) to acquire sma->lock 218 * thread could race forward and try (and fail) to acquire sma->lock
219 * before update_queue had a chance to set queue.status 219 * before update_queue had a chance to set queue.status
220 * - if queue.status is written before wake_up_process and if the 220 * - if queue.status is written before wake_up_process and if the
221 * blocked process is woken up by a signal between writing 221 * blocked process is woken up by a signal between writing
222 * queue.status and the wake_up_process, then the woken up 222 * queue.status and the wake_up_process, then the woken up
223 * process could return from semtimedop and die by calling 223 * process could return from semtimedop and die by calling
224 * sys_exit before wake_up_process is called. Then wake_up_process 224 * sys_exit before wake_up_process is called. Then wake_up_process
225 * will oops, because the task structure is already invalid. 225 * will oops, because the task structure is already invalid.
226 * (yes, this happened on s390 with sysv msg). 226 * (yes, this happened on s390 with sysv msg).
227 * 227 *
228 */ 228 */
229 #define IN_WAKEUP 1 229 #define IN_WAKEUP 1
230 230
231 /**
232 * newary - Create a new semaphore set
233 * @ns: namespace
234 * @params: ptr to the structure that contains key, semflg and nsems
235 *
236 * Called with sem_ids.mutex held
237 */
238
231 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 239 static int newary(struct ipc_namespace *ns, struct ipc_params *params)
232 { 240 {
233 int id; 241 int id;
234 int retval; 242 int retval;
235 struct sem_array *sma; 243 struct sem_array *sma;
236 int size; 244 int size;
237 key_t key = params->key; 245 key_t key = params->key;
238 int nsems = params->u.nsems; 246 int nsems = params->u.nsems;
239 int semflg = params->flg; 247 int semflg = params->flg;
240 248
241 if (!nsems) 249 if (!nsems)
242 return -EINVAL; 250 return -EINVAL;
243 if (ns->used_sems + nsems > ns->sc_semmns) 251 if (ns->used_sems + nsems > ns->sc_semmns)
244 return -ENOSPC; 252 return -ENOSPC;
245 253
246 size = sizeof (*sma) + nsems * sizeof (struct sem); 254 size = sizeof (*sma) + nsems * sizeof (struct sem);
247 sma = ipc_rcu_alloc(size); 255 sma = ipc_rcu_alloc(size);
248 if (!sma) { 256 if (!sma) {
249 return -ENOMEM; 257 return -ENOMEM;
250 } 258 }
251 memset (sma, 0, size); 259 memset (sma, 0, size);
252 260
253 sma->sem_perm.mode = (semflg & S_IRWXUGO); 261 sma->sem_perm.mode = (semflg & S_IRWXUGO);
254 sma->sem_perm.key = key; 262 sma->sem_perm.key = key;
255 263
256 sma->sem_perm.security = NULL; 264 sma->sem_perm.security = NULL;
257 retval = security_sem_alloc(sma); 265 retval = security_sem_alloc(sma);
258 if (retval) { 266 if (retval) {
259 ipc_rcu_putref(sma); 267 ipc_rcu_putref(sma);
260 return retval; 268 return retval;
261 } 269 }
262 270
263 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 271 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
264 if(id == -1) { 272 if(id == -1) {
265 security_sem_free(sma); 273 security_sem_free(sma);
266 ipc_rcu_putref(sma); 274 ipc_rcu_putref(sma);
267 return -ENOSPC; 275 return -ENOSPC;
268 } 276 }
269 ns->used_sems += nsems; 277 ns->used_sems += nsems;
270 278
271 sma->sem_perm.id = sem_buildid(ns, id, sma->sem_perm.seq); 279 sma->sem_perm.id = sem_buildid(ns, id, sma->sem_perm.seq);
272 sma->sem_base = (struct sem *) &sma[1]; 280 sma->sem_base = (struct sem *) &sma[1];
273 /* sma->sem_pending = NULL; */ 281 /* sma->sem_pending = NULL; */
274 sma->sem_pending_last = &sma->sem_pending; 282 sma->sem_pending_last = &sma->sem_pending;
275 /* sma->undo = NULL; */ 283 /* sma->undo = NULL; */
276 sma->sem_nsems = nsems; 284 sma->sem_nsems = nsems;
277 sma->sem_ctime = get_seconds(); 285 sma->sem_ctime = get_seconds();
278 sem_unlock(sma); 286 sem_unlock(sma);
279 287
280 return sma->sem_perm.id; 288 return sma->sem_perm.id;
281 } 289 }
282 290
283 291
292 /*
293 * Called with sem_ids.mutex and ipcp locked.
294 */
284 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) 295 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
285 { 296 {
286 struct sem_array *sma; 297 struct sem_array *sma;
287 298
288 sma = container_of(ipcp, struct sem_array, sem_perm); 299 sma = container_of(ipcp, struct sem_array, sem_perm);
289 return security_sem_associate(sma, semflg); 300 return security_sem_associate(sma, semflg);
290 } 301 }
291 302
303 /*
304 * Called with sem_ids.mutex and ipcp locked.
305 */
292 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 306 static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
293 struct ipc_params *params) 307 struct ipc_params *params)
294 { 308 {
295 struct sem_array *sma; 309 struct sem_array *sma;
296 310
297 sma = container_of(ipcp, struct sem_array, sem_perm); 311 sma = container_of(ipcp, struct sem_array, sem_perm);
298 if (params->u.nsems > sma->sem_nsems) 312 if (params->u.nsems > sma->sem_nsems)
299 return -EINVAL; 313 return -EINVAL;
300 314
301 return 0; 315 return 0;
302 } 316 }
303 317
304 asmlinkage long sys_semget(key_t key, int nsems, int semflg) 318 asmlinkage long sys_semget(key_t key, int nsems, int semflg)
305 { 319 {
306 struct ipc_namespace *ns; 320 struct ipc_namespace *ns;
307 struct ipc_ops sem_ops; 321 struct ipc_ops sem_ops;
308 struct ipc_params sem_params; 322 struct ipc_params sem_params;
309 323
310 ns = current->nsproxy->ipc_ns; 324 ns = current->nsproxy->ipc_ns;
311 325
312 if (nsems < 0 || nsems > ns->sc_semmsl) 326 if (nsems < 0 || nsems > ns->sc_semmsl)
313 return -EINVAL; 327 return -EINVAL;
314 328
315 sem_ops.getnew = newary; 329 sem_ops.getnew = newary;
316 sem_ops.associate = sem_security; 330 sem_ops.associate = sem_security;
317 sem_ops.more_checks = sem_more_checks; 331 sem_ops.more_checks = sem_more_checks;
318 332
319 sem_params.key = key; 333 sem_params.key = key;
320 sem_params.flg = semflg; 334 sem_params.flg = semflg;
321 sem_params.u.nsems = nsems; 335 sem_params.u.nsems = nsems;
322 336
323 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 337 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
324 } 338 }
325 339
326 /* Manage the doubly linked list sma->sem_pending as a FIFO: 340 /* Manage the doubly linked list sma->sem_pending as a FIFO:
327 * insert new queue elements at the tail sma->sem_pending_last. 341 * insert new queue elements at the tail sma->sem_pending_last.
328 */ 342 */
329 static inline void append_to_queue (struct sem_array * sma, 343 static inline void append_to_queue (struct sem_array * sma,
330 struct sem_queue * q) 344 struct sem_queue * q)
331 { 345 {
332 *(q->prev = sma->sem_pending_last) = q; 346 *(q->prev = sma->sem_pending_last) = q;
333 *(sma->sem_pending_last = &q->next) = NULL; 347 *(sma->sem_pending_last = &q->next) = NULL;
334 } 348 }
335 349
336 static inline void prepend_to_queue (struct sem_array * sma, 350 static inline void prepend_to_queue (struct sem_array * sma,
337 struct sem_queue * q) 351 struct sem_queue * q)
338 { 352 {
339 q->next = sma->sem_pending; 353 q->next = sma->sem_pending;
340 *(q->prev = &sma->sem_pending) = q; 354 *(q->prev = &sma->sem_pending) = q;
341 if (q->next) 355 if (q->next)
342 q->next->prev = &q->next; 356 q->next->prev = &q->next;
343 else /* sma->sem_pending_last == &sma->sem_pending */ 357 else /* sma->sem_pending_last == &sma->sem_pending */
344 sma->sem_pending_last = &q->next; 358 sma->sem_pending_last = &q->next;
345 } 359 }
346 360
347 static inline void remove_from_queue (struct sem_array * sma, 361 static inline void remove_from_queue (struct sem_array * sma,
348 struct sem_queue * q) 362 struct sem_queue * q)
349 { 363 {
350 *(q->prev) = q->next; 364 *(q->prev) = q->next;
351 if (q->next) 365 if (q->next)
352 q->next->prev = q->prev; 366 q->next->prev = q->prev;
353 else /* sma->sem_pending_last == &q->next */ 367 else /* sma->sem_pending_last == &q->next */
354 sma->sem_pending_last = q->prev; 368 sma->sem_pending_last = q->prev;
355 q->prev = NULL; /* mark as removed */ 369 q->prev = NULL; /* mark as removed */
356 } 370 }
357 371
358 /* 372 /*
359 * Determine whether a sequence of semaphore operations would succeed 373 * Determine whether a sequence of semaphore operations would succeed
360 * all at once. Return 0 if yes, 1 if need to sleep, else return error code. 374 * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
361 */ 375 */
362 376
363 static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, 377 static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
364 int nsops, struct sem_undo *un, int pid) 378 int nsops, struct sem_undo *un, int pid)
365 { 379 {
366 int result, sem_op; 380 int result, sem_op;
367 struct sembuf *sop; 381 struct sembuf *sop;
368 struct sem * curr; 382 struct sem * curr;
369 383
370 for (sop = sops; sop < sops + nsops; sop++) { 384 for (sop = sops; sop < sops + nsops; sop++) {
371 curr = sma->sem_base + sop->sem_num; 385 curr = sma->sem_base + sop->sem_num;
372 sem_op = sop->sem_op; 386 sem_op = sop->sem_op;
373 result = curr->semval; 387 result = curr->semval;
374 388
375 if (!sem_op && result) 389 if (!sem_op && result)
376 goto would_block; 390 goto would_block;
377 391
378 result += sem_op; 392 result += sem_op;
379 if (result < 0) 393 if (result < 0)
380 goto would_block; 394 goto would_block;
381 if (result > SEMVMX) 395 if (result > SEMVMX)
382 goto out_of_range; 396 goto out_of_range;
383 if (sop->sem_flg & SEM_UNDO) { 397 if (sop->sem_flg & SEM_UNDO) {
384 int undo = un->semadj[sop->sem_num] - sem_op; 398 int undo = un->semadj[sop->sem_num] - sem_op;
385 /* 399 /*
386 * Exceeding the undo range is an error. 400 * Exceeding the undo range is an error.
387 */ 401 */
388 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 402 if (undo < (-SEMAEM - 1) || undo > SEMAEM)
389 goto out_of_range; 403 goto out_of_range;
390 } 404 }
391 curr->semval = result; 405 curr->semval = result;
392 } 406 }
393 407
394 sop--; 408 sop--;
395 while (sop >= sops) { 409 while (sop >= sops) {
396 sma->sem_base[sop->sem_num].sempid = pid; 410 sma->sem_base[sop->sem_num].sempid = pid;
397 if (sop->sem_flg & SEM_UNDO) 411 if (sop->sem_flg & SEM_UNDO)
398 un->semadj[sop->sem_num] -= sop->sem_op; 412 un->semadj[sop->sem_num] -= sop->sem_op;
399 sop--; 413 sop--;
400 } 414 }
401 415
402 sma->sem_otime = get_seconds(); 416 sma->sem_otime = get_seconds();
403 return 0; 417 return 0;
404 418
405 out_of_range: 419 out_of_range:
406 result = -ERANGE; 420 result = -ERANGE;
407 goto undo; 421 goto undo;
408 422
409 would_block: 423 would_block:
410 if (sop->sem_flg & IPC_NOWAIT) 424 if (sop->sem_flg & IPC_NOWAIT)
411 result = -EAGAIN; 425 result = -EAGAIN;
412 else 426 else
413 result = 1; 427 result = 1;
414 428
415 undo: 429 undo:
416 sop--; 430 sop--;
417 while (sop >= sops) { 431 while (sop >= sops) {
418 sma->sem_base[sop->sem_num].semval -= sop->sem_op; 432 sma->sem_base[sop->sem_num].semval -= sop->sem_op;
419 sop--; 433 sop--;
420 } 434 }
421 435
422 return result; 436 return result;
423 } 437 }
424 438
425 /* Go through the pending queue for the indicated semaphore 439 /* Go through the pending queue for the indicated semaphore
426 * looking for tasks that can be completed. 440 * looking for tasks that can be completed.
427 */ 441 */
428 static void update_queue (struct sem_array * sma) 442 static void update_queue (struct sem_array * sma)
429 { 443 {
430 int error; 444 int error;
431 struct sem_queue * q; 445 struct sem_queue * q;
432 446
433 q = sma->sem_pending; 447 q = sma->sem_pending;
434 while(q) { 448 while(q) {
435 error = try_atomic_semop(sma, q->sops, q->nsops, 449 error = try_atomic_semop(sma, q->sops, q->nsops,
436 q->undo, q->pid); 450 q->undo, q->pid);
437 451
438 /* Does q->sleeper still need to sleep? */ 452 /* Does q->sleeper still need to sleep? */
439 if (error <= 0) { 453 if (error <= 0) {
440 struct sem_queue *n; 454 struct sem_queue *n;
441 remove_from_queue(sma,q); 455 remove_from_queue(sma,q);
442 q->status = IN_WAKEUP; 456 q->status = IN_WAKEUP;
443 /* 457 /*
444 * Continue scanning. The next operation 458 * Continue scanning. The next operation
445 * that must be checked depends on the type of the 459 * that must be checked depends on the type of the
446 * completed operation: 460 * completed operation:
447 * - if the operation modified the array, then 461 * - if the operation modified the array, then
448 * restart from the head of the queue and 462 * restart from the head of the queue and
449 * check for threads that might be waiting 463 * check for threads that might be waiting
450 * for semaphore values to become 0. 464 * for semaphore values to become 0.
451 * - if the operation didn't modify the array, 465 * - if the operation didn't modify the array,
452 * then just continue. 466 * then just continue.
453 */ 467 */
454 if (q->alter) 468 if (q->alter)
455 n = sma->sem_pending; 469 n = sma->sem_pending;
456 else 470 else
457 n = q->next; 471 n = q->next;
458 wake_up_process(q->sleeper); 472 wake_up_process(q->sleeper);
459 /* hands-off: q will disappear immediately after 473 /* hands-off: q will disappear immediately after
460 * writing q->status. 474 * writing q->status.
461 */ 475 */
462 smp_wmb(); 476 smp_wmb();
463 q->status = error; 477 q->status = error;
464 q = n; 478 q = n;
465 } else { 479 } else {
466 q = q->next; 480 q = q->next;
467 } 481 }
468 } 482 }
469 } 483 }
470 484
471 /* The following counts are associated to each semaphore: 485 /* The following counts are associated to each semaphore:
472 * semncnt number of tasks waiting on semval being nonzero 486 * semncnt number of tasks waiting on semval being nonzero
473 * semzcnt number of tasks waiting on semval being zero 487 * semzcnt number of tasks waiting on semval being zero
474 * This model assumes that a task waits on exactly one semaphore. 488 * This model assumes that a task waits on exactly one semaphore.
475 * Since semaphore operations are to be performed atomically, tasks actually 489 * Since semaphore operations are to be performed atomically, tasks actually
476 * wait on a whole sequence of semaphores simultaneously. 490 * wait on a whole sequence of semaphores simultaneously.
477 * The counts we return here are a rough approximation, but still 491 * The counts we return here are a rough approximation, but still
478 * warrant that semncnt+semzcnt>0 if the task is on the pending queue. 492 * warrant that semncnt+semzcnt>0 if the task is on the pending queue.
479 */ 493 */
480 static int count_semncnt (struct sem_array * sma, ushort semnum) 494 static int count_semncnt (struct sem_array * sma, ushort semnum)
481 { 495 {
482 int semncnt; 496 int semncnt;
483 struct sem_queue * q; 497 struct sem_queue * q;
484 498
485 semncnt = 0; 499 semncnt = 0;
486 for (q = sma->sem_pending; q; q = q->next) { 500 for (q = sma->sem_pending; q; q = q->next) {
487 struct sembuf * sops = q->sops; 501 struct sembuf * sops = q->sops;
488 int nsops = q->nsops; 502 int nsops = q->nsops;
489 int i; 503 int i;
490 for (i = 0; i < nsops; i++) 504 for (i = 0; i < nsops; i++)
491 if (sops[i].sem_num == semnum 505 if (sops[i].sem_num == semnum
492 && (sops[i].sem_op < 0) 506 && (sops[i].sem_op < 0)
493 && !(sops[i].sem_flg & IPC_NOWAIT)) 507 && !(sops[i].sem_flg & IPC_NOWAIT))
494 semncnt++; 508 semncnt++;
495 } 509 }
496 return semncnt; 510 return semncnt;
497 } 511 }
498 static int count_semzcnt (struct sem_array * sma, ushort semnum) 512 static int count_semzcnt (struct sem_array * sma, ushort semnum)
499 { 513 {
500 int semzcnt; 514 int semzcnt;
501 struct sem_queue * q; 515 struct sem_queue * q;
502 516
503 semzcnt = 0; 517 semzcnt = 0;
504 for (q = sma->sem_pending; q; q = q->next) { 518 for (q = sma->sem_pending; q; q = q->next) {
505 struct sembuf * sops = q->sops; 519 struct sembuf * sops = q->sops;
506 int nsops = q->nsops; 520 int nsops = q->nsops;
507 int i; 521 int i;
508 for (i = 0; i < nsops; i++) 522 for (i = 0; i < nsops; i++)
509 if (sops[i].sem_num == semnum 523 if (sops[i].sem_num == semnum
510 && (sops[i].sem_op == 0) 524 && (sops[i].sem_op == 0)
511 && !(sops[i].sem_flg & IPC_NOWAIT)) 525 && !(sops[i].sem_flg & IPC_NOWAIT))
512 semzcnt++; 526 semzcnt++;
513 } 527 }
514 return semzcnt; 528 return semzcnt;
515 } 529 }
516 530
517 /* Free a semaphore set. freeary() is called with sem_ids.mutex locked and 531 /* Free a semaphore set. freeary() is called with sem_ids.mutex locked and
518 * the spinlock for this semaphore set hold. sem_ids.mutex remains locked 532 * the spinlock for this semaphore set hold. sem_ids.mutex remains locked
519 * on exit. 533 * on exit.
520 */ 534 */
521 static void freeary(struct ipc_namespace *ns, struct sem_array *sma) 535 static void freeary(struct ipc_namespace *ns, struct sem_array *sma)
522 { 536 {
523 struct sem_undo *un; 537 struct sem_undo *un;
524 struct sem_queue *q; 538 struct sem_queue *q;
525 539
526 /* Invalidate the existing undo structures for this semaphore set. 540 /* Invalidate the existing undo structures for this semaphore set.
527 * (They will be freed without any further action in exit_sem() 541 * (They will be freed without any further action in exit_sem()
528 * or during the next semop.) 542 * or during the next semop.)
529 */ 543 */
530 for (un = sma->undo; un; un = un->id_next) 544 for (un = sma->undo; un; un = un->id_next)
531 un->semid = -1; 545 un->semid = -1;
532 546
533 /* Wake up all pending processes and let them fail with EIDRM. */ 547 /* Wake up all pending processes and let them fail with EIDRM. */
534 q = sma->sem_pending; 548 q = sma->sem_pending;
535 while(q) { 549 while(q) {
536 struct sem_queue *n; 550 struct sem_queue *n;
537 /* lazy remove_from_queue: we are killing the whole queue */ 551 /* lazy remove_from_queue: we are killing the whole queue */
538 q->prev = NULL; 552 q->prev = NULL;
539 n = q->next; 553 n = q->next;
540 q->status = IN_WAKEUP; 554 q->status = IN_WAKEUP;
541 wake_up_process(q->sleeper); /* doesn't sleep */ 555 wake_up_process(q->sleeper); /* doesn't sleep */
542 smp_wmb(); 556 smp_wmb();
543 q->status = -EIDRM; /* hands-off q */ 557 q->status = -EIDRM; /* hands-off q */
544 q = n; 558 q = n;
545 } 559 }
546 560
547 /* Remove the semaphore set from the IDR */ 561 /* Remove the semaphore set from the IDR */
548 sem_rmid(ns, sma); 562 sem_rmid(ns, sma);
549 sem_unlock(sma); 563 sem_unlock(sma);
550 564
551 ns->used_sems -= sma->sem_nsems; 565 ns->used_sems -= sma->sem_nsems;
552 security_sem_free(sma); 566 security_sem_free(sma);
553 ipc_rcu_putref(sma); 567 ipc_rcu_putref(sma);
554 } 568 }
555 569
556 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 570 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
557 { 571 {
558 switch(version) { 572 switch(version) {
559 case IPC_64: 573 case IPC_64:
560 return copy_to_user(buf, in, sizeof(*in)); 574 return copy_to_user(buf, in, sizeof(*in));
561 case IPC_OLD: 575 case IPC_OLD:
562 { 576 {
563 struct semid_ds out; 577 struct semid_ds out;
564 578
565 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 579 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);
566 580
567 out.sem_otime = in->sem_otime; 581 out.sem_otime = in->sem_otime;
568 out.sem_ctime = in->sem_ctime; 582 out.sem_ctime = in->sem_ctime;
569 out.sem_nsems = in->sem_nsems; 583 out.sem_nsems = in->sem_nsems;
570 584
571 return copy_to_user(buf, &out, sizeof(out)); 585 return copy_to_user(buf, &out, sizeof(out));
572 } 586 }
573 default: 587 default:
574 return -EINVAL; 588 return -EINVAL;
575 } 589 }
576 } 590 }
577 591
578 static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum, 592 static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
579 int cmd, int version, union semun arg) 593 int cmd, int version, union semun arg)
580 { 594 {
581 int err = -EINVAL; 595 int err = -EINVAL;
582 struct sem_array *sma; 596 struct sem_array *sma;
583 597
584 switch(cmd) { 598 switch(cmd) {
585 case IPC_INFO: 599 case IPC_INFO:
586 case SEM_INFO: 600 case SEM_INFO:
587 { 601 {
588 struct seminfo seminfo; 602 struct seminfo seminfo;
589 int max_id; 603 int max_id;
590 604
591 err = security_sem_semctl(NULL, cmd); 605 err = security_sem_semctl(NULL, cmd);
592 if (err) 606 if (err)
593 return err; 607 return err;
594 608
595 memset(&seminfo,0,sizeof(seminfo)); 609 memset(&seminfo,0,sizeof(seminfo));
596 seminfo.semmni = ns->sc_semmni; 610 seminfo.semmni = ns->sc_semmni;
597 seminfo.semmns = ns->sc_semmns; 611 seminfo.semmns = ns->sc_semmns;
598 seminfo.semmsl = ns->sc_semmsl; 612 seminfo.semmsl = ns->sc_semmsl;
599 seminfo.semopm = ns->sc_semopm; 613 seminfo.semopm = ns->sc_semopm;
600 seminfo.semvmx = SEMVMX; 614 seminfo.semvmx = SEMVMX;
601 seminfo.semmnu = SEMMNU; 615 seminfo.semmnu = SEMMNU;
602 seminfo.semmap = SEMMAP; 616 seminfo.semmap = SEMMAP;
603 seminfo.semume = SEMUME; 617 seminfo.semume = SEMUME;
604 mutex_lock(&sem_ids(ns).mutex); 618 mutex_lock(&sem_ids(ns).mutex);
605 if (cmd == SEM_INFO) { 619 if (cmd == SEM_INFO) {
606 seminfo.semusz = sem_ids(ns).in_use; 620 seminfo.semusz = sem_ids(ns).in_use;
607 seminfo.semaem = ns->used_sems; 621 seminfo.semaem = ns->used_sems;
608 } else { 622 } else {
609 seminfo.semusz = SEMUSZ; 623 seminfo.semusz = SEMUSZ;
610 seminfo.semaem = SEMAEM; 624 seminfo.semaem = SEMAEM;
611 } 625 }
612 max_id = ipc_get_maxid(&sem_ids(ns)); 626 max_id = ipc_get_maxid(&sem_ids(ns));
613 mutex_unlock(&sem_ids(ns).mutex); 627 mutex_unlock(&sem_ids(ns).mutex);
614 if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) 628 if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo)))
615 return -EFAULT; 629 return -EFAULT;
616 return (max_id < 0) ? 0: max_id; 630 return (max_id < 0) ? 0: max_id;
617 } 631 }
618 case SEM_STAT: 632 case SEM_STAT:
619 { 633 {
620 struct semid64_ds tbuf; 634 struct semid64_ds tbuf;
621 int id; 635 int id;
622 636
623 sma = sem_lock(ns, semid); 637 sma = sem_lock(ns, semid);
624 if (IS_ERR(sma)) 638 if (IS_ERR(sma))
625 return PTR_ERR(sma); 639 return PTR_ERR(sma);
626 640
627 err = -EACCES; 641 err = -EACCES;
628 if (ipcperms (&sma->sem_perm, S_IRUGO)) 642 if (ipcperms (&sma->sem_perm, S_IRUGO))
629 goto out_unlock; 643 goto out_unlock;
630 644
631 err = security_sem_semctl(sma, cmd); 645 err = security_sem_semctl(sma, cmd);
632 if (err) 646 if (err)
633 goto out_unlock; 647 goto out_unlock;
634 648
635 id = sma->sem_perm.id; 649 id = sma->sem_perm.id;
636 650
637 memset(&tbuf, 0, sizeof(tbuf)); 651 memset(&tbuf, 0, sizeof(tbuf));
638 652
639 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 653 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
640 tbuf.sem_otime = sma->sem_otime; 654 tbuf.sem_otime = sma->sem_otime;
641 tbuf.sem_ctime = sma->sem_ctime; 655 tbuf.sem_ctime = sma->sem_ctime;
642 tbuf.sem_nsems = sma->sem_nsems; 656 tbuf.sem_nsems = sma->sem_nsems;
643 sem_unlock(sma); 657 sem_unlock(sma);
644 if (copy_semid_to_user (arg.buf, &tbuf, version)) 658 if (copy_semid_to_user (arg.buf, &tbuf, version))
645 return -EFAULT; 659 return -EFAULT;
646 return id; 660 return id;
647 } 661 }
648 default: 662 default:
649 return -EINVAL; 663 return -EINVAL;
650 } 664 }
651 return err; 665 return err;
652 out_unlock: 666 out_unlock:
653 sem_unlock(sma); 667 sem_unlock(sma);
654 return err; 668 return err;
655 } 669 }
656 670
657 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 671 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
658 int cmd, int version, union semun arg) 672 int cmd, int version, union semun arg)
659 { 673 {
660 struct sem_array *sma; 674 struct sem_array *sma;
661 struct sem* curr; 675 struct sem* curr;
662 int err; 676 int err;
663 ushort fast_sem_io[SEMMSL_FAST]; 677 ushort fast_sem_io[SEMMSL_FAST];
664 ushort* sem_io = fast_sem_io; 678 ushort* sem_io = fast_sem_io;
665 int nsems; 679 int nsems;
666 680
667 sma = sem_lock_check(ns, semid); 681 sma = sem_lock_check(ns, semid);
668 if (IS_ERR(sma)) 682 if (IS_ERR(sma))
669 return PTR_ERR(sma); 683 return PTR_ERR(sma);
670 684
671 nsems = sma->sem_nsems; 685 nsems = sma->sem_nsems;
672 686
673 err = -EACCES; 687 err = -EACCES;
674 if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO)) 688 if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO))
675 goto out_unlock; 689 goto out_unlock;
676 690
677 err = security_sem_semctl(sma, cmd); 691 err = security_sem_semctl(sma, cmd);
678 if (err) 692 if (err)
679 goto out_unlock; 693 goto out_unlock;
680 694
681 err = -EACCES; 695 err = -EACCES;
682 switch (cmd) { 696 switch (cmd) {
683 case GETALL: 697 case GETALL:
684 { 698 {
685 ushort __user *array = arg.array; 699 ushort __user *array = arg.array;
686 int i; 700 int i;
687 701
688 if(nsems > SEMMSL_FAST) { 702 if(nsems > SEMMSL_FAST) {
689 ipc_rcu_getref(sma); 703 ipc_rcu_getref(sma);
690 sem_unlock(sma); 704 sem_unlock(sma);
691 705
692 sem_io = ipc_alloc(sizeof(ushort)*nsems); 706 sem_io = ipc_alloc(sizeof(ushort)*nsems);
693 if(sem_io == NULL) { 707 if(sem_io == NULL) {
694 ipc_lock_by_ptr(&sma->sem_perm); 708 ipc_lock_by_ptr(&sma->sem_perm);
695 ipc_rcu_putref(sma); 709 ipc_rcu_putref(sma);
696 sem_unlock(sma); 710 sem_unlock(sma);
697 return -ENOMEM; 711 return -ENOMEM;
698 } 712 }
699 713
700 ipc_lock_by_ptr(&sma->sem_perm); 714 ipc_lock_by_ptr(&sma->sem_perm);
701 ipc_rcu_putref(sma); 715 ipc_rcu_putref(sma);
702 if (sma->sem_perm.deleted) { 716 if (sma->sem_perm.deleted) {
703 sem_unlock(sma); 717 sem_unlock(sma);
704 err = -EIDRM; 718 err = -EIDRM;
705 goto out_free; 719 goto out_free;
706 } 720 }
707 } 721 }
708 722
709 for (i = 0; i < sma->sem_nsems; i++) 723 for (i = 0; i < sma->sem_nsems; i++)
710 sem_io[i] = sma->sem_base[i].semval; 724 sem_io[i] = sma->sem_base[i].semval;
711 sem_unlock(sma); 725 sem_unlock(sma);
712 err = 0; 726 err = 0;
713 if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) 727 if(copy_to_user(array, sem_io, nsems*sizeof(ushort)))
714 err = -EFAULT; 728 err = -EFAULT;
715 goto out_free; 729 goto out_free;
716 } 730 }
717 case SETALL: 731 case SETALL:
718 { 732 {
719 int i; 733 int i;
720 struct sem_undo *un; 734 struct sem_undo *un;
721 735
722 ipc_rcu_getref(sma); 736 ipc_rcu_getref(sma);
723 sem_unlock(sma); 737 sem_unlock(sma);
724 738
725 if(nsems > SEMMSL_FAST) { 739 if(nsems > SEMMSL_FAST) {
726 sem_io = ipc_alloc(sizeof(ushort)*nsems); 740 sem_io = ipc_alloc(sizeof(ushort)*nsems);
727 if(sem_io == NULL) { 741 if(sem_io == NULL) {
728 ipc_lock_by_ptr(&sma->sem_perm); 742 ipc_lock_by_ptr(&sma->sem_perm);
729 ipc_rcu_putref(sma); 743 ipc_rcu_putref(sma);
730 sem_unlock(sma); 744 sem_unlock(sma);
731 return -ENOMEM; 745 return -ENOMEM;
732 } 746 }
733 } 747 }
734 748
735 if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) { 749 if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) {
736 ipc_lock_by_ptr(&sma->sem_perm); 750 ipc_lock_by_ptr(&sma->sem_perm);
737 ipc_rcu_putref(sma); 751 ipc_rcu_putref(sma);
738 sem_unlock(sma); 752 sem_unlock(sma);
739 err = -EFAULT; 753 err = -EFAULT;
740 goto out_free; 754 goto out_free;
741 } 755 }
742 756
743 for (i = 0; i < nsems; i++) { 757 for (i = 0; i < nsems; i++) {
744 if (sem_io[i] > SEMVMX) { 758 if (sem_io[i] > SEMVMX) {
745 ipc_lock_by_ptr(&sma->sem_perm); 759 ipc_lock_by_ptr(&sma->sem_perm);
746 ipc_rcu_putref(sma); 760 ipc_rcu_putref(sma);
747 sem_unlock(sma); 761 sem_unlock(sma);
748 err = -ERANGE; 762 err = -ERANGE;
749 goto out_free; 763 goto out_free;
750 } 764 }
751 } 765 }
752 ipc_lock_by_ptr(&sma->sem_perm); 766 ipc_lock_by_ptr(&sma->sem_perm);
753 ipc_rcu_putref(sma); 767 ipc_rcu_putref(sma);
754 if (sma->sem_perm.deleted) { 768 if (sma->sem_perm.deleted) {
755 sem_unlock(sma); 769 sem_unlock(sma);
756 err = -EIDRM; 770 err = -EIDRM;
757 goto out_free; 771 goto out_free;
758 } 772 }
759 773
760 for (i = 0; i < nsems; i++) 774 for (i = 0; i < nsems; i++)
761 sma->sem_base[i].semval = sem_io[i]; 775 sma->sem_base[i].semval = sem_io[i];
762 for (un = sma->undo; un; un = un->id_next) 776 for (un = sma->undo; un; un = un->id_next)
763 for (i = 0; i < nsems; i++) 777 for (i = 0; i < nsems; i++)
764 un->semadj[i] = 0; 778 un->semadj[i] = 0;
765 sma->sem_ctime = get_seconds(); 779 sma->sem_ctime = get_seconds();
766 /* maybe some queued-up processes were waiting for this */ 780 /* maybe some queued-up processes were waiting for this */
767 update_queue(sma); 781 update_queue(sma);
768 err = 0; 782 err = 0;
769 goto out_unlock; 783 goto out_unlock;
770 } 784 }
771 case IPC_STAT: 785 case IPC_STAT:
772 { 786 {
773 struct semid64_ds tbuf; 787 struct semid64_ds tbuf;
774 memset(&tbuf,0,sizeof(tbuf)); 788 memset(&tbuf,0,sizeof(tbuf));
775 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 789 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
776 tbuf.sem_otime = sma->sem_otime; 790 tbuf.sem_otime = sma->sem_otime;
777 tbuf.sem_ctime = sma->sem_ctime; 791 tbuf.sem_ctime = sma->sem_ctime;
778 tbuf.sem_nsems = sma->sem_nsems; 792 tbuf.sem_nsems = sma->sem_nsems;
779 sem_unlock(sma); 793 sem_unlock(sma);
780 if (copy_semid_to_user (arg.buf, &tbuf, version)) 794 if (copy_semid_to_user (arg.buf, &tbuf, version))
781 return -EFAULT; 795 return -EFAULT;
782 return 0; 796 return 0;
783 } 797 }
784 /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */ 798 /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */
785 } 799 }
786 err = -EINVAL; 800 err = -EINVAL;
787 if(semnum < 0 || semnum >= nsems) 801 if(semnum < 0 || semnum >= nsems)
788 goto out_unlock; 802 goto out_unlock;
789 803
790 curr = &sma->sem_base[semnum]; 804 curr = &sma->sem_base[semnum];
791 805
792 switch (cmd) { 806 switch (cmd) {
793 case GETVAL: 807 case GETVAL:
794 err = curr->semval; 808 err = curr->semval;
795 goto out_unlock; 809 goto out_unlock;
796 case GETPID: 810 case GETPID:
797 err = curr->sempid; 811 err = curr->sempid;
798 goto out_unlock; 812 goto out_unlock;
799 case GETNCNT: 813 case GETNCNT:
800 err = count_semncnt(sma,semnum); 814 err = count_semncnt(sma,semnum);
801 goto out_unlock; 815 goto out_unlock;
802 case GETZCNT: 816 case GETZCNT:
803 err = count_semzcnt(sma,semnum); 817 err = count_semzcnt(sma,semnum);
804 goto out_unlock; 818 goto out_unlock;
805 case SETVAL: 819 case SETVAL:
806 { 820 {
807 int val = arg.val; 821 int val = arg.val;
808 struct sem_undo *un; 822 struct sem_undo *un;
809 err = -ERANGE; 823 err = -ERANGE;
810 if (val > SEMVMX || val < 0) 824 if (val > SEMVMX || val < 0)
811 goto out_unlock; 825 goto out_unlock;
812 826
813 for (un = sma->undo; un; un = un->id_next) 827 for (un = sma->undo; un; un = un->id_next)
814 un->semadj[semnum] = 0; 828 un->semadj[semnum] = 0;
815 curr->semval = val; 829 curr->semval = val;
816 curr->sempid = task_tgid_vnr(current); 830 curr->sempid = task_tgid_vnr(current);
817 sma->sem_ctime = get_seconds(); 831 sma->sem_ctime = get_seconds();
818 /* maybe some queued-up processes were waiting for this */ 832 /* maybe some queued-up processes were waiting for this */
819 update_queue(sma); 833 update_queue(sma);
820 err = 0; 834 err = 0;
821 goto out_unlock; 835 goto out_unlock;
822 } 836 }
823 } 837 }
824 out_unlock: 838 out_unlock:
825 sem_unlock(sma); 839 sem_unlock(sma);
826 out_free: 840 out_free:
827 if(sem_io != fast_sem_io) 841 if(sem_io != fast_sem_io)
828 ipc_free(sem_io, sizeof(ushort)*nsems); 842 ipc_free(sem_io, sizeof(ushort)*nsems);
829 return err; 843 return err;
830 } 844 }
831 845
832 struct sem_setbuf { 846 struct sem_setbuf {
833 uid_t uid; 847 uid_t uid;
834 gid_t gid; 848 gid_t gid;
835 mode_t mode; 849 mode_t mode;
836 }; 850 };
837 851
838 static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __user *buf, int version) 852 static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __user *buf, int version)
839 { 853 {
840 switch(version) { 854 switch(version) {
841 case IPC_64: 855 case IPC_64:
842 { 856 {
843 struct semid64_ds tbuf; 857 struct semid64_ds tbuf;
844 858
845 if(copy_from_user(&tbuf, buf, sizeof(tbuf))) 859 if(copy_from_user(&tbuf, buf, sizeof(tbuf)))
846 return -EFAULT; 860 return -EFAULT;
847 861
848 out->uid = tbuf.sem_perm.uid; 862 out->uid = tbuf.sem_perm.uid;
849 out->gid = tbuf.sem_perm.gid; 863 out->gid = tbuf.sem_perm.gid;
850 out->mode = tbuf.sem_perm.mode; 864 out->mode = tbuf.sem_perm.mode;
851 865
852 return 0; 866 return 0;
853 } 867 }
854 case IPC_OLD: 868 case IPC_OLD:
855 { 869 {
856 struct semid_ds tbuf_old; 870 struct semid_ds tbuf_old;
857 871
858 if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 872 if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
859 return -EFAULT; 873 return -EFAULT;
860 874
861 out->uid = tbuf_old.sem_perm.uid; 875 out->uid = tbuf_old.sem_perm.uid;
862 out->gid = tbuf_old.sem_perm.gid; 876 out->gid = tbuf_old.sem_perm.gid;
863 out->mode = tbuf_old.sem_perm.mode; 877 out->mode = tbuf_old.sem_perm.mode;
864 878
865 return 0; 879 return 0;
866 } 880 }
867 default: 881 default:
868 return -EINVAL; 882 return -EINVAL;
869 } 883 }
870 } 884 }
871 885
872 static int semctl_down(struct ipc_namespace *ns, int semid, int semnum, 886 static int semctl_down(struct ipc_namespace *ns, int semid, int semnum,
873 int cmd, int version, union semun arg) 887 int cmd, int version, union semun arg)
874 { 888 {
875 struct sem_array *sma; 889 struct sem_array *sma;
876 int err; 890 int err;
877 struct sem_setbuf uninitialized_var(setbuf); 891 struct sem_setbuf uninitialized_var(setbuf);
878 struct kern_ipc_perm *ipcp; 892 struct kern_ipc_perm *ipcp;
879 893
880 if(cmd == IPC_SET) { 894 if(cmd == IPC_SET) {
881 if(copy_semid_from_user (&setbuf, arg.buf, version)) 895 if(copy_semid_from_user (&setbuf, arg.buf, version))
882 return -EFAULT; 896 return -EFAULT;
883 } 897 }
884 sma = sem_lock_check(ns, semid); 898 sma = sem_lock_check(ns, semid);
885 if (IS_ERR(sma)) 899 if (IS_ERR(sma))
886 return PTR_ERR(sma); 900 return PTR_ERR(sma);
887 901
888 ipcp = &sma->sem_perm; 902 ipcp = &sma->sem_perm;
889 903
890 err = audit_ipc_obj(ipcp); 904 err = audit_ipc_obj(ipcp);
891 if (err) 905 if (err)
892 goto out_unlock; 906 goto out_unlock;
893 907
894 if (cmd == IPC_SET) { 908 if (cmd == IPC_SET) {
895 err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode); 909 err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode);
896 if (err) 910 if (err)
897 goto out_unlock; 911 goto out_unlock;
898 } 912 }
899 if (current->euid != ipcp->cuid && 913 if (current->euid != ipcp->cuid &&
900 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) { 914 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
901 err=-EPERM; 915 err=-EPERM;
902 goto out_unlock; 916 goto out_unlock;
903 } 917 }
904 918
905 err = security_sem_semctl(sma, cmd); 919 err = security_sem_semctl(sma, cmd);
906 if (err) 920 if (err)
907 goto out_unlock; 921 goto out_unlock;
908 922
909 switch(cmd){ 923 switch(cmd){
910 case IPC_RMID: 924 case IPC_RMID:
911 freeary(ns, sma); 925 freeary(ns, sma);
912 err = 0; 926 err = 0;
913 break; 927 break;
914 case IPC_SET: 928 case IPC_SET:
915 ipcp->uid = setbuf.uid; 929 ipcp->uid = setbuf.uid;
916 ipcp->gid = setbuf.gid; 930 ipcp->gid = setbuf.gid;
917 ipcp->mode = (ipcp->mode & ~S_IRWXUGO) 931 ipcp->mode = (ipcp->mode & ~S_IRWXUGO)
918 | (setbuf.mode & S_IRWXUGO); 932 | (setbuf.mode & S_IRWXUGO);
919 sma->sem_ctime = get_seconds(); 933 sma->sem_ctime = get_seconds();
920 sem_unlock(sma); 934 sem_unlock(sma);
921 err = 0; 935 err = 0;
922 break; 936 break;
923 default: 937 default:
924 sem_unlock(sma); 938 sem_unlock(sma);
925 err = -EINVAL; 939 err = -EINVAL;
926 break; 940 break;
927 } 941 }
928 return err; 942 return err;
929 943
930 out_unlock: 944 out_unlock:
931 sem_unlock(sma); 945 sem_unlock(sma);
932 return err; 946 return err;
933 } 947 }
934 948
935 asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg) 949 asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg)
936 { 950 {
937 int err = -EINVAL; 951 int err = -EINVAL;
938 int version; 952 int version;
939 struct ipc_namespace *ns; 953 struct ipc_namespace *ns;
940 954
941 if (semid < 0) 955 if (semid < 0)
942 return -EINVAL; 956 return -EINVAL;
943 957
944 version = ipc_parse_version(&cmd); 958 version = ipc_parse_version(&cmd);
945 ns = current->nsproxy->ipc_ns; 959 ns = current->nsproxy->ipc_ns;
946 960
947 switch(cmd) { 961 switch(cmd) {
948 case IPC_INFO: 962 case IPC_INFO:
949 case SEM_INFO: 963 case SEM_INFO:
950 case SEM_STAT: 964 case SEM_STAT:
951 err = semctl_nolock(ns,semid,semnum,cmd,version,arg); 965 err = semctl_nolock(ns,semid,semnum,cmd,version,arg);
952 return err; 966 return err;
953 case GETALL: 967 case GETALL:
954 case GETVAL: 968 case GETVAL:
955 case GETPID: 969 case GETPID:
956 case GETNCNT: 970 case GETNCNT:
957 case GETZCNT: 971 case GETZCNT:
958 case IPC_STAT: 972 case IPC_STAT:
959 case SETVAL: 973 case SETVAL:
960 case SETALL: 974 case SETALL:
961 err = semctl_main(ns,semid,semnum,cmd,version,arg); 975 err = semctl_main(ns,semid,semnum,cmd,version,arg);
962 return err; 976 return err;
963 case IPC_RMID: 977 case IPC_RMID:
964 case IPC_SET: 978 case IPC_SET:
965 mutex_lock(&sem_ids(ns).mutex); 979 mutex_lock(&sem_ids(ns).mutex);
966 err = semctl_down(ns,semid,semnum,cmd,version,arg); 980 err = semctl_down(ns,semid,semnum,cmd,version,arg);
967 mutex_unlock(&sem_ids(ns).mutex); 981 mutex_unlock(&sem_ids(ns).mutex);
968 return err; 982 return err;
969 default: 983 default:
970 return -EINVAL; 984 return -EINVAL;
971 } 985 }
972 } 986 }
973 987
974 static inline void lock_semundo(void) 988 static inline void lock_semundo(void)
975 { 989 {
976 struct sem_undo_list *undo_list; 990 struct sem_undo_list *undo_list;
977 991
978 undo_list = current->sysvsem.undo_list; 992 undo_list = current->sysvsem.undo_list;
979 if (undo_list) 993 if (undo_list)
980 spin_lock(&undo_list->lock); 994 spin_lock(&undo_list->lock);
981 } 995 }
982 996
983 /* This code has an interaction with copy_semundo(). 997 /* This code has an interaction with copy_semundo().
984 * Consider; two tasks are sharing the undo_list. task1 998 * Consider; two tasks are sharing the undo_list. task1
985 * acquires the undo_list lock in lock_semundo(). If task2 now 999 * acquires the undo_list lock in lock_semundo(). If task2 now
986 * exits before task1 releases the lock (by calling 1000 * exits before task1 releases the lock (by calling
987 * unlock_semundo()), then task1 will never call spin_unlock(). 1001 * unlock_semundo()), then task1 will never call spin_unlock().
988 * This leave the sem_undo_list in a locked state. If task1 now creats task3 1002 * This leave the sem_undo_list in a locked state. If task1 now creats task3
989 * and once again shares the sem_undo_list, the sem_undo_list will still be 1003 * and once again shares the sem_undo_list, the sem_undo_list will still be
990 * locked, and future SEM_UNDO operations will deadlock. This case is 1004 * locked, and future SEM_UNDO operations will deadlock. This case is
991 * dealt with in copy_semundo() by having it reinitialize the spin lock when 1005 * dealt with in copy_semundo() by having it reinitialize the spin lock when
992 * the refcnt goes from 1 to 2. 1006 * the refcnt goes from 1 to 2.
993 */ 1007 */
994 static inline void unlock_semundo(void) 1008 static inline void unlock_semundo(void)
995 { 1009 {
996 struct sem_undo_list *undo_list; 1010 struct sem_undo_list *undo_list;
997 1011
998 undo_list = current->sysvsem.undo_list; 1012 undo_list = current->sysvsem.undo_list;
999 if (undo_list) 1013 if (undo_list)
1000 spin_unlock(&undo_list->lock); 1014 spin_unlock(&undo_list->lock);
1001 } 1015 }
1002 1016
1003 1017
1004 /* If the task doesn't already have a undo_list, then allocate one 1018 /* If the task doesn't already have a undo_list, then allocate one
1005 * here. We guarantee there is only one thread using this undo list, 1019 * here. We guarantee there is only one thread using this undo list,
1006 * and current is THE ONE 1020 * and current is THE ONE
1007 * 1021 *
1008 * If this allocation and assignment succeeds, but later 1022 * If this allocation and assignment succeeds, but later
1009 * portions of this code fail, there is no need to free the sem_undo_list. 1023 * portions of this code fail, there is no need to free the sem_undo_list.
1010 * Just let it stay associated with the task, and it'll be freed later 1024 * Just let it stay associated with the task, and it'll be freed later
1011 * at exit time. 1025 * at exit time.
1012 * 1026 *
1013 * This can block, so callers must hold no locks. 1027 * This can block, so callers must hold no locks.
1014 */ 1028 */
1015 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1029 static inline int get_undo_list(struct sem_undo_list **undo_listp)
1016 { 1030 {
1017 struct sem_undo_list *undo_list; 1031 struct sem_undo_list *undo_list;
1018 1032
1019 undo_list = current->sysvsem.undo_list; 1033 undo_list = current->sysvsem.undo_list;
1020 if (!undo_list) { 1034 if (!undo_list) {
1021 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1035 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
1022 if (undo_list == NULL) 1036 if (undo_list == NULL)
1023 return -ENOMEM; 1037 return -ENOMEM;
1024 spin_lock_init(&undo_list->lock); 1038 spin_lock_init(&undo_list->lock);
1025 atomic_set(&undo_list->refcnt, 1); 1039 atomic_set(&undo_list->refcnt, 1);
1026 current->sysvsem.undo_list = undo_list; 1040 current->sysvsem.undo_list = undo_list;
1027 } 1041 }
1028 *undo_listp = undo_list; 1042 *undo_listp = undo_list;
1029 return 0; 1043 return 0;
1030 } 1044 }
1031 1045
1032 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1046 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
1033 { 1047 {
1034 struct sem_undo **last, *un; 1048 struct sem_undo **last, *un;
1035 1049
1036 last = &ulp->proc_list; 1050 last = &ulp->proc_list;
1037 un = *last; 1051 un = *last;
1038 while(un != NULL) { 1052 while(un != NULL) {
1039 if(un->semid==semid) 1053 if(un->semid==semid)
1040 break; 1054 break;
1041 if(un->semid==-1) { 1055 if(un->semid==-1) {
1042 *last=un->proc_next; 1056 *last=un->proc_next;
1043 kfree(un); 1057 kfree(un);
1044 } else { 1058 } else {
1045 last=&un->proc_next; 1059 last=&un->proc_next;
1046 } 1060 }
1047 un=*last; 1061 un=*last;
1048 } 1062 }
1049 return un; 1063 return un;
1050 } 1064 }
1051 1065
1052 static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) 1066 static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
1053 { 1067 {
1054 struct sem_array *sma; 1068 struct sem_array *sma;
1055 struct sem_undo_list *ulp; 1069 struct sem_undo_list *ulp;
1056 struct sem_undo *un, *new; 1070 struct sem_undo *un, *new;
1057 int nsems; 1071 int nsems;
1058 int error; 1072 int error;
1059 1073
1060 error = get_undo_list(&ulp); 1074 error = get_undo_list(&ulp);
1061 if (error) 1075 if (error)
1062 return ERR_PTR(error); 1076 return ERR_PTR(error);
1063 1077
1064 lock_semundo(); 1078 lock_semundo();
1065 un = lookup_undo(ulp, semid); 1079 un = lookup_undo(ulp, semid);
1066 unlock_semundo(); 1080 unlock_semundo();
1067 if (likely(un!=NULL)) 1081 if (likely(un!=NULL))
1068 goto out; 1082 goto out;
1069 1083
1070 /* no undo structure around - allocate one. */ 1084 /* no undo structure around - allocate one. */
1071 sma = sem_lock_check(ns, semid); 1085 sma = sem_lock_check(ns, semid);
1072 if (IS_ERR(sma)) 1086 if (IS_ERR(sma))
1073 return ERR_PTR(PTR_ERR(sma)); 1087 return ERR_PTR(PTR_ERR(sma));
1074 1088
1075 nsems = sma->sem_nsems; 1089 nsems = sma->sem_nsems;
1076 ipc_rcu_getref(sma); 1090 ipc_rcu_getref(sma);
1077 sem_unlock(sma); 1091 sem_unlock(sma);
1078 1092
1079 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1093 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1080 if (!new) { 1094 if (!new) {
1081 ipc_lock_by_ptr(&sma->sem_perm); 1095 ipc_lock_by_ptr(&sma->sem_perm);
1082 ipc_rcu_putref(sma); 1096 ipc_rcu_putref(sma);
1083 sem_unlock(sma); 1097 sem_unlock(sma);
1084 return ERR_PTR(-ENOMEM); 1098 return ERR_PTR(-ENOMEM);
1085 } 1099 }
1086 new->semadj = (short *) &new[1]; 1100 new->semadj = (short *) &new[1];
1087 new->semid = semid; 1101 new->semid = semid;
1088 1102
1089 lock_semundo(); 1103 lock_semundo();
1090 un = lookup_undo(ulp, semid); 1104 un = lookup_undo(ulp, semid);
1091 if (un) { 1105 if (un) {
1092 unlock_semundo(); 1106 unlock_semundo();
1093 kfree(new); 1107 kfree(new);
1094 ipc_lock_by_ptr(&sma->sem_perm); 1108 ipc_lock_by_ptr(&sma->sem_perm);
1095 ipc_rcu_putref(sma); 1109 ipc_rcu_putref(sma);
1096 sem_unlock(sma); 1110 sem_unlock(sma);
1097 goto out; 1111 goto out;
1098 } 1112 }
1099 ipc_lock_by_ptr(&sma->sem_perm); 1113 ipc_lock_by_ptr(&sma->sem_perm);
1100 ipc_rcu_putref(sma); 1114 ipc_rcu_putref(sma);
1101 if (sma->sem_perm.deleted) { 1115 if (sma->sem_perm.deleted) {
1102 sem_unlock(sma); 1116 sem_unlock(sma);
1103 unlock_semundo(); 1117 unlock_semundo();
1104 kfree(new); 1118 kfree(new);
1105 un = ERR_PTR(-EIDRM); 1119 un = ERR_PTR(-EIDRM);
1106 goto out; 1120 goto out;
1107 } 1121 }
1108 new->proc_next = ulp->proc_list; 1122 new->proc_next = ulp->proc_list;
1109 ulp->proc_list = new; 1123 ulp->proc_list = new;
1110 new->id_next = sma->undo; 1124 new->id_next = sma->undo;
1111 sma->undo = new; 1125 sma->undo = new;
1112 sem_unlock(sma); 1126 sem_unlock(sma);
1113 un = new; 1127 un = new;
1114 unlock_semundo(); 1128 unlock_semundo();
1115 out: 1129 out:
1116 return un; 1130 return un;
1117 } 1131 }
1118 1132
1119 asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, 1133 asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
1120 unsigned nsops, const struct timespec __user *timeout) 1134 unsigned nsops, const struct timespec __user *timeout)
1121 { 1135 {
1122 int error = -EINVAL; 1136 int error = -EINVAL;
1123 struct sem_array *sma; 1137 struct sem_array *sma;
1124 struct sembuf fast_sops[SEMOPM_FAST]; 1138 struct sembuf fast_sops[SEMOPM_FAST];
1125 struct sembuf* sops = fast_sops, *sop; 1139 struct sembuf* sops = fast_sops, *sop;
1126 struct sem_undo *un; 1140 struct sem_undo *un;
1127 int undos = 0, alter = 0, max; 1141 int undos = 0, alter = 0, max;
1128 struct sem_queue queue; 1142 struct sem_queue queue;
1129 unsigned long jiffies_left = 0; 1143 unsigned long jiffies_left = 0;
1130 struct ipc_namespace *ns; 1144 struct ipc_namespace *ns;
1131 1145
1132 ns = current->nsproxy->ipc_ns; 1146 ns = current->nsproxy->ipc_ns;
1133 1147
1134 if (nsops < 1 || semid < 0) 1148 if (nsops < 1 || semid < 0)
1135 return -EINVAL; 1149 return -EINVAL;
1136 if (nsops > ns->sc_semopm) 1150 if (nsops > ns->sc_semopm)
1137 return -E2BIG; 1151 return -E2BIG;
1138 if(nsops > SEMOPM_FAST) { 1152 if(nsops > SEMOPM_FAST) {
1139 sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL); 1153 sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
1140 if(sops==NULL) 1154 if(sops==NULL)
1141 return -ENOMEM; 1155 return -ENOMEM;
1142 } 1156 }
1143 if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) { 1157 if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) {
1144 error=-EFAULT; 1158 error=-EFAULT;
1145 goto out_free; 1159 goto out_free;
1146 } 1160 }
1147 if (timeout) { 1161 if (timeout) {
1148 struct timespec _timeout; 1162 struct timespec _timeout;
1149 if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { 1163 if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) {
1150 error = -EFAULT; 1164 error = -EFAULT;
1151 goto out_free; 1165 goto out_free;
1152 } 1166 }
1153 if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 || 1167 if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 ||
1154 _timeout.tv_nsec >= 1000000000L) { 1168 _timeout.tv_nsec >= 1000000000L) {
1155 error = -EINVAL; 1169 error = -EINVAL;
1156 goto out_free; 1170 goto out_free;
1157 } 1171 }
1158 jiffies_left = timespec_to_jiffies(&_timeout); 1172 jiffies_left = timespec_to_jiffies(&_timeout);
1159 } 1173 }
1160 max = 0; 1174 max = 0;
1161 for (sop = sops; sop < sops + nsops; sop++) { 1175 for (sop = sops; sop < sops + nsops; sop++) {
1162 if (sop->sem_num >= max) 1176 if (sop->sem_num >= max)
1163 max = sop->sem_num; 1177 max = sop->sem_num;
1164 if (sop->sem_flg & SEM_UNDO) 1178 if (sop->sem_flg & SEM_UNDO)
1165 undos = 1; 1179 undos = 1;
1166 if (sop->sem_op != 0) 1180 if (sop->sem_op != 0)
1167 alter = 1; 1181 alter = 1;
1168 } 1182 }
1169 1183
1170 retry_undos: 1184 retry_undos:
1171 if (undos) { 1185 if (undos) {
1172 un = find_undo(ns, semid); 1186 un = find_undo(ns, semid);
1173 if (IS_ERR(un)) { 1187 if (IS_ERR(un)) {
1174 error = PTR_ERR(un); 1188 error = PTR_ERR(un);
1175 goto out_free; 1189 goto out_free;
1176 } 1190 }
1177 } else 1191 } else
1178 un = NULL; 1192 un = NULL;
1179 1193
1180 sma = sem_lock_check(ns, semid); 1194 sma = sem_lock_check(ns, semid);
1181 if (IS_ERR(sma)) { 1195 if (IS_ERR(sma)) {
1182 error = PTR_ERR(sma); 1196 error = PTR_ERR(sma);
1183 goto out_free; 1197 goto out_free;
1184 } 1198 }
1185 1199
1186 /* 1200 /*
1187 * semid identifiers are not unique - find_undo may have 1201 * semid identifiers are not unique - find_undo may have
1188 * allocated an undo structure, it was invalidated by an RMID 1202 * allocated an undo structure, it was invalidated by an RMID
1189 * and now a new array with received the same id. Check and retry. 1203 * and now a new array with received the same id. Check and retry.
1190 */ 1204 */
1191 if (un && un->semid == -1) { 1205 if (un && un->semid == -1) {
1192 sem_unlock(sma); 1206 sem_unlock(sma);
1193 goto retry_undos; 1207 goto retry_undos;
1194 } 1208 }
1195 error = -EFBIG; 1209 error = -EFBIG;
1196 if (max >= sma->sem_nsems) 1210 if (max >= sma->sem_nsems)
1197 goto out_unlock_free; 1211 goto out_unlock_free;
1198 1212
1199 error = -EACCES; 1213 error = -EACCES;
1200 if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) 1214 if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO))
1201 goto out_unlock_free; 1215 goto out_unlock_free;
1202 1216
1203 error = security_sem_semop(sma, sops, nsops, alter); 1217 error = security_sem_semop(sma, sops, nsops, alter);
1204 if (error) 1218 if (error)
1205 goto out_unlock_free; 1219 goto out_unlock_free;
1206 1220
1207 error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); 1221 error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
1208 if (error <= 0) { 1222 if (error <= 0) {
1209 if (alter && error == 0) 1223 if (alter && error == 0)
1210 update_queue (sma); 1224 update_queue (sma);
1211 goto out_unlock_free; 1225 goto out_unlock_free;
1212 } 1226 }
1213 1227
1214 /* We need to sleep on this operation, so we put the current 1228 /* We need to sleep on this operation, so we put the current
1215 * task into the pending queue and go to sleep. 1229 * task into the pending queue and go to sleep.
1216 */ 1230 */
1217 1231
1218 queue.sma = sma; 1232 queue.sma = sma;
1219 queue.sops = sops; 1233 queue.sops = sops;
1220 queue.nsops = nsops; 1234 queue.nsops = nsops;
1221 queue.undo = un; 1235 queue.undo = un;
1222 queue.pid = task_tgid_vnr(current); 1236 queue.pid = task_tgid_vnr(current);
1223 queue.id = semid; 1237 queue.id = semid;
1224 queue.alter = alter; 1238 queue.alter = alter;
1225 if (alter) 1239 if (alter)
1226 append_to_queue(sma ,&queue); 1240 append_to_queue(sma ,&queue);
1227 else 1241 else
1228 prepend_to_queue(sma ,&queue); 1242 prepend_to_queue(sma ,&queue);
1229 1243
1230 queue.status = -EINTR; 1244 queue.status = -EINTR;
1231 queue.sleeper = current; 1245 queue.sleeper = current;
1232 current->state = TASK_INTERRUPTIBLE; 1246 current->state = TASK_INTERRUPTIBLE;
1233 sem_unlock(sma); 1247 sem_unlock(sma);
1234 1248
1235 if (timeout) 1249 if (timeout)
1236 jiffies_left = schedule_timeout(jiffies_left); 1250 jiffies_left = schedule_timeout(jiffies_left);
1237 else 1251 else
1238 schedule(); 1252 schedule();
1239 1253
1240 error = queue.status; 1254 error = queue.status;
1241 while(unlikely(error == IN_WAKEUP)) { 1255 while(unlikely(error == IN_WAKEUP)) {
1242 cpu_relax(); 1256 cpu_relax();
1243 error = queue.status; 1257 error = queue.status;
1244 } 1258 }
1245 1259
1246 if (error != -EINTR) { 1260 if (error != -EINTR) {
1247 /* fast path: update_queue already obtained all requested 1261 /* fast path: update_queue already obtained all requested
1248 * resources */ 1262 * resources */
1249 goto out_free; 1263 goto out_free;
1250 } 1264 }
1251 1265
1252 sma = sem_lock(ns, semid); 1266 sma = sem_lock(ns, semid);
1253 if (IS_ERR(sma)) { 1267 if (IS_ERR(sma)) {
1254 BUG_ON(queue.prev != NULL); 1268 BUG_ON(queue.prev != NULL);
1255 error = -EIDRM; 1269 error = -EIDRM;
1256 goto out_free; 1270 goto out_free;
1257 } 1271 }
1258 1272
1259 /* 1273 /*
1260 * If queue.status != -EINTR we are woken up by another process 1274 * If queue.status != -EINTR we are woken up by another process
1261 */ 1275 */
1262 error = queue.status; 1276 error = queue.status;
1263 if (error != -EINTR) { 1277 if (error != -EINTR) {
1264 goto out_unlock_free; 1278 goto out_unlock_free;
1265 } 1279 }
1266 1280
1267 /* 1281 /*
1268 * If an interrupt occurred we have to clean up the queue 1282 * If an interrupt occurred we have to clean up the queue
1269 */ 1283 */
1270 if (timeout && jiffies_left == 0) 1284 if (timeout && jiffies_left == 0)
1271 error = -EAGAIN; 1285 error = -EAGAIN;
1272 remove_from_queue(sma,&queue); 1286 remove_from_queue(sma,&queue);
1273 goto out_unlock_free; 1287 goto out_unlock_free;
1274 1288
1275 out_unlock_free: 1289 out_unlock_free:
1276 sem_unlock(sma); 1290 sem_unlock(sma);
1277 out_free: 1291 out_free:
1278 if(sops != fast_sops) 1292 if(sops != fast_sops)
1279 kfree(sops); 1293 kfree(sops);
1280 return error; 1294 return error;
1281 } 1295 }
1282 1296
1283 asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsops) 1297 asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsops)
1284 { 1298 {
1285 return sys_semtimedop(semid, tsops, nsops, NULL); 1299 return sys_semtimedop(semid, tsops, nsops, NULL);
1286 } 1300 }
1287 1301
1288 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 1302 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
1289 * parent and child tasks. 1303 * parent and child tasks.
1290 * 1304 *
1291 * See the notes above unlock_semundo() regarding the spin_lock_init() 1305 * See the notes above unlock_semundo() regarding the spin_lock_init()
1292 * in this code. Initialize the undo_list->lock here instead of get_undo_list() 1306 * in this code. Initialize the undo_list->lock here instead of get_undo_list()
1293 * because of the reasoning in the comment above unlock_semundo. 1307 * because of the reasoning in the comment above unlock_semundo.
1294 */ 1308 */
1295 1309
1296 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 1310 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
1297 { 1311 {
1298 struct sem_undo_list *undo_list; 1312 struct sem_undo_list *undo_list;
1299 int error; 1313 int error;
1300 1314
1301 if (clone_flags & CLONE_SYSVSEM) { 1315 if (clone_flags & CLONE_SYSVSEM) {
1302 error = get_undo_list(&undo_list); 1316 error = get_undo_list(&undo_list);
1303 if (error) 1317 if (error)
1304 return error; 1318 return error;
1305 atomic_inc(&undo_list->refcnt); 1319 atomic_inc(&undo_list->refcnt);
1306 tsk->sysvsem.undo_list = undo_list; 1320 tsk->sysvsem.undo_list = undo_list;
1307 } else 1321 } else
1308 tsk->sysvsem.undo_list = NULL; 1322 tsk->sysvsem.undo_list = NULL;
1309 1323
1310 return 0; 1324 return 0;
1311 } 1325 }
1312 1326
1313 /* 1327 /*
1314 * add semadj values to semaphores, free undo structures. 1328 * add semadj values to semaphores, free undo structures.
1315 * undo structures are not freed when semaphore arrays are destroyed 1329 * undo structures are not freed when semaphore arrays are destroyed
1316 * so some of them may be out of date. 1330 * so some of them may be out of date.
1317 * IMPLEMENTATION NOTE: There is some confusion over whether the 1331 * IMPLEMENTATION NOTE: There is some confusion over whether the
1318 * set of adjustments that needs to be done should be done in an atomic 1332 * set of adjustments that needs to be done should be done in an atomic
1319 * manner or not. That is, if we are attempting to decrement the semval 1333 * manner or not. That is, if we are attempting to decrement the semval
1320 * should we queue up and wait until we can do so legally? 1334 * should we queue up and wait until we can do so legally?
1321 * The original implementation attempted to do this (queue and wait). 1335 * The original implementation attempted to do this (queue and wait).
1322 * The current implementation does not do so. The POSIX standard 1336 * The current implementation does not do so. The POSIX standard
1323 * and SVID should be consulted to determine what behavior is mandated. 1337 * and SVID should be consulted to determine what behavior is mandated.
1324 */ 1338 */
1325 void exit_sem(struct task_struct *tsk) 1339 void exit_sem(struct task_struct *tsk)
1326 { 1340 {
1327 struct sem_undo_list *undo_list; 1341 struct sem_undo_list *undo_list;
1328 struct sem_undo *u, **up; 1342 struct sem_undo *u, **up;
1329 struct ipc_namespace *ns; 1343 struct ipc_namespace *ns;
1330 1344
1331 undo_list = tsk->sysvsem.undo_list; 1345 undo_list = tsk->sysvsem.undo_list;
1332 if (!undo_list) 1346 if (!undo_list)
1333 return; 1347 return;
1334 1348
1335 if (!atomic_dec_and_test(&undo_list->refcnt)) 1349 if (!atomic_dec_and_test(&undo_list->refcnt))
1336 return; 1350 return;
1337 1351
1338 ns = tsk->nsproxy->ipc_ns; 1352 ns = tsk->nsproxy->ipc_ns;
1339 /* There's no need to hold the semundo list lock, as current 1353 /* There's no need to hold the semundo list lock, as current
1340 * is the last task exiting for this undo list. 1354 * is the last task exiting for this undo list.
1341 */ 1355 */
1342 for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) { 1356 for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
1343 struct sem_array *sma; 1357 struct sem_array *sma;
1344 int nsems, i; 1358 int nsems, i;
1345 struct sem_undo *un, **unp; 1359 struct sem_undo *un, **unp;
1346 int semid; 1360 int semid;
1347 1361
1348 semid = u->semid; 1362 semid = u->semid;
1349 1363
1350 if(semid == -1) 1364 if(semid == -1)
1351 continue; 1365 continue;
1352 sma = sem_lock(ns, semid); 1366 sma = sem_lock(ns, semid);
1353 if (IS_ERR(sma)) 1367 if (IS_ERR(sma))
1354 continue; 1368 continue;
1355 1369
1356 if (u->semid == -1) 1370 if (u->semid == -1)
1357 goto next_entry; 1371 goto next_entry;
1358 1372
1359 BUG_ON(sem_checkid(ns,sma,u->semid)); 1373 BUG_ON(sem_checkid(ns,sma,u->semid));
1360 1374
1361 /* remove u from the sma->undo list */ 1375 /* remove u from the sma->undo list */
1362 for (unp = &sma->undo; (un = *unp); unp = &un->id_next) { 1376 for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
1363 if (u == un) 1377 if (u == un)
1364 goto found; 1378 goto found;
1365 } 1379 }
1366 printk ("exit_sem undo list error id=%d\n", u->semid); 1380 printk ("exit_sem undo list error id=%d\n", u->semid);
1367 goto next_entry; 1381 goto next_entry;
1368 found: 1382 found:
1369 *unp = un->id_next; 1383 *unp = un->id_next;
1370 /* perform adjustments registered in u */ 1384 /* perform adjustments registered in u */
1371 nsems = sma->sem_nsems; 1385 nsems = sma->sem_nsems;
1372 for (i = 0; i < nsems; i++) { 1386 for (i = 0; i < nsems; i++) {
1373 struct sem * semaphore = &sma->sem_base[i]; 1387 struct sem * semaphore = &sma->sem_base[i];
1374 if (u->semadj[i]) { 1388 if (u->semadj[i]) {
1375 semaphore->semval += u->semadj[i]; 1389 semaphore->semval += u->semadj[i];
1376 /* 1390 /*
1377 * Range checks of the new semaphore value, 1391 * Range checks of the new semaphore value,
1378 * not defined by sus: 1392 * not defined by sus:
1379 * - Some unices ignore the undo entirely 1393 * - Some unices ignore the undo entirely
1380 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 1394 * (e.g. HP UX 11i 11.22, Tru64 V5.1)
1381 * - some cap the value (e.g. FreeBSD caps 1395 * - some cap the value (e.g. FreeBSD caps
1382 * at 0, but doesn't enforce SEMVMX) 1396 * at 0, but doesn't enforce SEMVMX)
1383 * 1397 *
1384 * Linux caps the semaphore value, both at 0 1398 * Linux caps the semaphore value, both at 0
1385 * and at SEMVMX. 1399 * and at SEMVMX.
1386 * 1400 *
1387 * Manfred <manfred@colorfullife.com> 1401 * Manfred <manfred@colorfullife.com>
1388 */ 1402 */
1389 if (semaphore->semval < 0) 1403 if (semaphore->semval < 0)
1390 semaphore->semval = 0; 1404 semaphore->semval = 0;
1391 if (semaphore->semval > SEMVMX) 1405 if (semaphore->semval > SEMVMX)
1392 semaphore->semval = SEMVMX; 1406 semaphore->semval = SEMVMX;
1393 semaphore->sempid = task_tgid_vnr(current); 1407 semaphore->sempid = task_tgid_vnr(current);
1394 } 1408 }
1395 } 1409 }
1396 sma->sem_otime = get_seconds(); 1410 sma->sem_otime = get_seconds();
1397 /* maybe some queued-up processes were waiting for this */ 1411 /* maybe some queued-up processes were waiting for this */
1398 update_queue(sma); 1412 update_queue(sma);
1399 next_entry: 1413 next_entry:
1400 sem_unlock(sma); 1414 sem_unlock(sma);
1401 } 1415 }
1402 kfree(undo_list); 1416 kfree(undo_list);
1403 } 1417 }
1404 1418
1405 #ifdef CONFIG_PROC_FS 1419 #ifdef CONFIG_PROC_FS
1406 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 1420 static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
1407 { 1421 {
1408 struct sem_array *sma = it; 1422 struct sem_array *sma = it;
1409 1423
1410 return seq_printf(s, 1424 return seq_printf(s,
1411 "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", 1425 "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
1412 sma->sem_perm.key, 1426 sma->sem_perm.key,
1413 sma->sem_perm.id, 1427 sma->sem_perm.id,
1414 sma->sem_perm.mode, 1428 sma->sem_perm.mode,
1415 sma->sem_nsems, 1429 sma->sem_nsems,
1416 sma->sem_perm.uid, 1430 sma->sem_perm.uid,
1417 sma->sem_perm.gid, 1431 sma->sem_perm.gid,
1418 sma->sem_perm.cuid, 1432 sma->sem_perm.cuid,
1419 sma->sem_perm.cgid, 1433 sma->sem_perm.cgid,
1420 sma->sem_otime, 1434 sma->sem_otime,
1421 sma->sem_ctime); 1435 sma->sem_ctime);
1422 } 1436 }
1423 #endif 1437 #endif
1424 1438
1 /* 1 /*
2 * linux/ipc/shm.c 2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian 3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible. 4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. 5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. 6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7 * 7 *
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de> 9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr> 10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com> 11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com> 12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com> 13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com> 14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15 * 15 *
16 * support for audit of ipc object properties and permission changes 16 * support for audit of ipc object properties and permission changes
17 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 17 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
18 * 18 *
19 * namespaces support 19 * namespaces support
20 * OpenVZ, SWsoft Inc. 20 * OpenVZ, SWsoft Inc.
21 * Pavel Emelianov <xemul@openvz.org> 21 * Pavel Emelianov <xemul@openvz.org>
22 */ 22 */
23 23
24 #include <linux/slab.h> 24 #include <linux/slab.h>
25 #include <linux/mm.h> 25 #include <linux/mm.h>
26 #include <linux/hugetlb.h> 26 #include <linux/hugetlb.h>
27 #include <linux/shm.h> 27 #include <linux/shm.h>
28 #include <linux/init.h> 28 #include <linux/init.h>
29 #include <linux/file.h> 29 #include <linux/file.h>
30 #include <linux/mman.h> 30 #include <linux/mman.h>
31 #include <linux/shmem_fs.h> 31 #include <linux/shmem_fs.h>
32 #include <linux/security.h> 32 #include <linux/security.h>
33 #include <linux/syscalls.h> 33 #include <linux/syscalls.h>
34 #include <linux/audit.h> 34 #include <linux/audit.h>
35 #include <linux/capability.h> 35 #include <linux/capability.h>
36 #include <linux/ptrace.h> 36 #include <linux/ptrace.h>
37 #include <linux/seq_file.h> 37 #include <linux/seq_file.h>
38 #include <linux/mutex.h> 38 #include <linux/mutex.h>
39 #include <linux/nsproxy.h> 39 #include <linux/nsproxy.h>
40 #include <linux/mount.h> 40 #include <linux/mount.h>
41 41
42 #include <asm/uaccess.h> 42 #include <asm/uaccess.h>
43 43
44 #include "util.h" 44 #include "util.h"
45 45
46 struct shm_file_data { 46 struct shm_file_data {
47 int id; 47 int id;
48 struct ipc_namespace *ns; 48 struct ipc_namespace *ns;
49 struct file *file; 49 struct file *file;
50 const struct vm_operations_struct *vm_ops; 50 const struct vm_operations_struct *vm_ops;
51 }; 51 };
52 52
53 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data)) 53 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
54 54
55 static const struct file_operations shm_file_operations; 55 static const struct file_operations shm_file_operations;
56 static struct vm_operations_struct shm_vm_ops; 56 static struct vm_operations_struct shm_vm_ops;
57 57
58 static struct ipc_ids init_shm_ids; 58 static struct ipc_ids init_shm_ids;
59 59
60 #define shm_ids(ns) (*((ns)->ids[IPC_SHM_IDS])) 60 #define shm_ids(ns) (*((ns)->ids[IPC_SHM_IDS]))
61 61
62 #define shm_unlock(shp) \ 62 #define shm_unlock(shp) \
63 ipc_unlock(&(shp)->shm_perm) 63 ipc_unlock(&(shp)->shm_perm)
64 #define shm_buildid(ns, id, seq) \ 64 #define shm_buildid(ns, id, seq) \
65 ipc_buildid(&shm_ids(ns), id, seq) 65 ipc_buildid(&shm_ids(ns), id, seq)
66 66
67 static int newseg(struct ipc_namespace *, struct ipc_params *); 67 static int newseg(struct ipc_namespace *, struct ipc_params *);
68 static void shm_open(struct vm_area_struct *vma); 68 static void shm_open(struct vm_area_struct *vma);
69 static void shm_close(struct vm_area_struct *vma); 69 static void shm_close(struct vm_area_struct *vma);
70 static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp); 70 static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
71 #ifdef CONFIG_PROC_FS 71 #ifdef CONFIG_PROC_FS
72 static int sysvipc_shm_proc_show(struct seq_file *s, void *it); 72 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
73 #endif 73 #endif
74 74
75 static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) 75 static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
76 { 76 {
77 ns->ids[IPC_SHM_IDS] = ids; 77 ns->ids[IPC_SHM_IDS] = ids;
78 ns->shm_ctlmax = SHMMAX; 78 ns->shm_ctlmax = SHMMAX;
79 ns->shm_ctlall = SHMALL; 79 ns->shm_ctlall = SHMALL;
80 ns->shm_ctlmni = SHMMNI; 80 ns->shm_ctlmni = SHMMNI;
81 ns->shm_tot = 0; 81 ns->shm_tot = 0;
82 ipc_init_ids(ids); 82 ipc_init_ids(ids);
83 } 83 }
84 84
85 /*
86 * Called with shm_ids.mutex and the shp structure locked.
87 * Only shm_ids.mutex remains locked on exit.
88 */
85 static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp) 89 static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
86 { 90 {
87 if (shp->shm_nattch){ 91 if (shp->shm_nattch){
88 shp->shm_perm.mode |= SHM_DEST; 92 shp->shm_perm.mode |= SHM_DEST;
89 /* Do not find it any more */ 93 /* Do not find it any more */
90 shp->shm_perm.key = IPC_PRIVATE; 94 shp->shm_perm.key = IPC_PRIVATE;
91 shm_unlock(shp); 95 shm_unlock(shp);
92 } else 96 } else
93 shm_destroy(ns, shp); 97 shm_destroy(ns, shp);
94 } 98 }
95 99
96 int shm_init_ns(struct ipc_namespace *ns) 100 int shm_init_ns(struct ipc_namespace *ns)
97 { 101 {
98 struct ipc_ids *ids; 102 struct ipc_ids *ids;
99 103
100 ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL); 104 ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
101 if (ids == NULL) 105 if (ids == NULL)
102 return -ENOMEM; 106 return -ENOMEM;
103 107
104 __shm_init_ns(ns, ids); 108 __shm_init_ns(ns, ids);
105 return 0; 109 return 0;
106 } 110 }
107 111
108 void shm_exit_ns(struct ipc_namespace *ns) 112 void shm_exit_ns(struct ipc_namespace *ns)
109 { 113 {
110 struct shmid_kernel *shp; 114 struct shmid_kernel *shp;
111 int next_id; 115 int next_id;
112 int total, in_use; 116 int total, in_use;
113 117
114 mutex_lock(&shm_ids(ns).mutex); 118 mutex_lock(&shm_ids(ns).mutex);
115 119
116 in_use = shm_ids(ns).in_use; 120 in_use = shm_ids(ns).in_use;
117 121
118 for (total = 0, next_id = 0; total < in_use; next_id++) { 122 for (total = 0, next_id = 0; total < in_use; next_id++) {
119 shp = idr_find(&shm_ids(ns).ipcs_idr, next_id); 123 shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
120 if (shp == NULL) 124 if (shp == NULL)
121 continue; 125 continue;
122 ipc_lock_by_ptr(&shp->shm_perm); 126 ipc_lock_by_ptr(&shp->shm_perm);
123 do_shm_rmid(ns, shp); 127 do_shm_rmid(ns, shp);
124 total++; 128 total++;
125 } 129 }
126 mutex_unlock(&shm_ids(ns).mutex); 130 mutex_unlock(&shm_ids(ns).mutex);
127 131
128 kfree(ns->ids[IPC_SHM_IDS]); 132 kfree(ns->ids[IPC_SHM_IDS]);
129 ns->ids[IPC_SHM_IDS] = NULL; 133 ns->ids[IPC_SHM_IDS] = NULL;
130 } 134 }
131 135
132 void __init shm_init (void) 136 void __init shm_init (void)
133 { 137 {
134 __shm_init_ns(&init_ipc_ns, &init_shm_ids); 138 __shm_init_ns(&init_ipc_ns, &init_shm_ids);
135 ipc_init_proc_interface("sysvipc/shm", 139 ipc_init_proc_interface("sysvipc/shm",
136 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n", 140 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n",
137 IPC_SHM_IDS, sysvipc_shm_proc_show); 141 IPC_SHM_IDS, sysvipc_shm_proc_show);
138 } 142 }
139 143
140 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) 144 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
141 { 145 {
142 struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); 146 struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
143 147
144 return container_of(ipcp, struct shmid_kernel, shm_perm); 148 return container_of(ipcp, struct shmid_kernel, shm_perm);
145 } 149 }
146 150
147 static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, 151 static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
148 int id) 152 int id)
149 { 153 {
150 struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id); 154 struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
151 155
152 return container_of(ipcp, struct shmid_kernel, shm_perm); 156 return container_of(ipcp, struct shmid_kernel, shm_perm);
153 } 157 }
154 158
155 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) 159 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
156 { 160 {
157 ipc_rmid(&shm_ids(ns), &s->shm_perm); 161 ipc_rmid(&shm_ids(ns), &s->shm_perm);
158 } 162 }
159 163
160 static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp) 164 static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp)
161 { 165 {
162 return ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); 166 return ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
163 } 167 }
164 168
165 169
166 170
167 /* This is called by fork, once for every shm attach. */ 171 /* This is called by fork, once for every shm attach. */
168 static void shm_open(struct vm_area_struct *vma) 172 static void shm_open(struct vm_area_struct *vma)
169 { 173 {
170 struct file *file = vma->vm_file; 174 struct file *file = vma->vm_file;
171 struct shm_file_data *sfd = shm_file_data(file); 175 struct shm_file_data *sfd = shm_file_data(file);
172 struct shmid_kernel *shp; 176 struct shmid_kernel *shp;
173 177
174 shp = shm_lock(sfd->ns, sfd->id); 178 shp = shm_lock(sfd->ns, sfd->id);
175 BUG_ON(IS_ERR(shp)); 179 BUG_ON(IS_ERR(shp));
176 shp->shm_atim = get_seconds(); 180 shp->shm_atim = get_seconds();
177 shp->shm_lprid = task_tgid_vnr(current); 181 shp->shm_lprid = task_tgid_vnr(current);
178 shp->shm_nattch++; 182 shp->shm_nattch++;
179 shm_unlock(shp); 183 shm_unlock(shp);
180 } 184 }
181 185
182 /* 186 /*
183 * shm_destroy - free the struct shmid_kernel 187 * shm_destroy - free the struct shmid_kernel
184 * 188 *
189 * @ns: namespace
185 * @shp: struct to free 190 * @shp: struct to free
186 * 191 *
187 * It has to be called with shp and shm_ids.mutex locked, 192 * It has to be called with shp and shm_ids.mutex locked,
188 * but returns with shp unlocked and freed. 193 * but returns with shp unlocked and freed.
189 */ 194 */
190 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 195 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
191 { 196 {
192 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; 197 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
193 shm_rmid(ns, shp); 198 shm_rmid(ns, shp);
194 shm_unlock(shp); 199 shm_unlock(shp);
195 if (!is_file_hugepages(shp->shm_file)) 200 if (!is_file_hugepages(shp->shm_file))
196 shmem_lock(shp->shm_file, 0, shp->mlock_user); 201 shmem_lock(shp->shm_file, 0, shp->mlock_user);
197 else 202 else
198 user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size, 203 user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size,
199 shp->mlock_user); 204 shp->mlock_user);
200 fput (shp->shm_file); 205 fput (shp->shm_file);
201 security_shm_free(shp); 206 security_shm_free(shp);
202 ipc_rcu_putref(shp); 207 ipc_rcu_putref(shp);
203 } 208 }
204 209
205 /* 210 /*
206 * remove the attach descriptor vma. 211 * remove the attach descriptor vma.
207 * free memory for segment if it is marked destroyed. 212 * free memory for segment if it is marked destroyed.
208 * The descriptor has already been removed from the current->mm->mmap list 213 * The descriptor has already been removed from the current->mm->mmap list
209 * and will later be kfree()d. 214 * and will later be kfree()d.
210 */ 215 */
211 static void shm_close(struct vm_area_struct *vma) 216 static void shm_close(struct vm_area_struct *vma)
212 { 217 {
213 struct file * file = vma->vm_file; 218 struct file * file = vma->vm_file;
214 struct shm_file_data *sfd = shm_file_data(file); 219 struct shm_file_data *sfd = shm_file_data(file);
215 struct shmid_kernel *shp; 220 struct shmid_kernel *shp;
216 struct ipc_namespace *ns = sfd->ns; 221 struct ipc_namespace *ns = sfd->ns;
217 222
218 mutex_lock(&shm_ids(ns).mutex); 223 mutex_lock(&shm_ids(ns).mutex);
219 /* remove from the list of attaches of the shm segment */ 224 /* remove from the list of attaches of the shm segment */
220 shp = shm_lock(ns, sfd->id); 225 shp = shm_lock(ns, sfd->id);
221 BUG_ON(IS_ERR(shp)); 226 BUG_ON(IS_ERR(shp));
222 shp->shm_lprid = task_tgid_vnr(current); 227 shp->shm_lprid = task_tgid_vnr(current);
223 shp->shm_dtim = get_seconds(); 228 shp->shm_dtim = get_seconds();
224 shp->shm_nattch--; 229 shp->shm_nattch--;
225 if(shp->shm_nattch == 0 && 230 if(shp->shm_nattch == 0 &&
226 shp->shm_perm.mode & SHM_DEST) 231 shp->shm_perm.mode & SHM_DEST)
227 shm_destroy(ns, shp); 232 shm_destroy(ns, shp);
228 else 233 else
229 shm_unlock(shp); 234 shm_unlock(shp);
230 mutex_unlock(&shm_ids(ns).mutex); 235 mutex_unlock(&shm_ids(ns).mutex);
231 } 236 }
232 237
233 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 238 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
234 { 239 {
235 struct file *file = vma->vm_file; 240 struct file *file = vma->vm_file;
236 struct shm_file_data *sfd = shm_file_data(file); 241 struct shm_file_data *sfd = shm_file_data(file);
237 242
238 return sfd->vm_ops->fault(vma, vmf); 243 return sfd->vm_ops->fault(vma, vmf);
239 } 244 }
240 245
241 #ifdef CONFIG_NUMA 246 #ifdef CONFIG_NUMA
242 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) 247 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
243 { 248 {
244 struct file *file = vma->vm_file; 249 struct file *file = vma->vm_file;
245 struct shm_file_data *sfd = shm_file_data(file); 250 struct shm_file_data *sfd = shm_file_data(file);
246 int err = 0; 251 int err = 0;
247 if (sfd->vm_ops->set_policy) 252 if (sfd->vm_ops->set_policy)
248 err = sfd->vm_ops->set_policy(vma, new); 253 err = sfd->vm_ops->set_policy(vma, new);
249 return err; 254 return err;
250 } 255 }
251 256
252 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, 257 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
253 unsigned long addr) 258 unsigned long addr)
254 { 259 {
255 struct file *file = vma->vm_file; 260 struct file *file = vma->vm_file;
256 struct shm_file_data *sfd = shm_file_data(file); 261 struct shm_file_data *sfd = shm_file_data(file);
257 struct mempolicy *pol = NULL; 262 struct mempolicy *pol = NULL;
258 263
259 if (sfd->vm_ops->get_policy) 264 if (sfd->vm_ops->get_policy)
260 pol = sfd->vm_ops->get_policy(vma, addr); 265 pol = sfd->vm_ops->get_policy(vma, addr);
261 else if (vma->vm_policy) 266 else if (vma->vm_policy)
262 pol = vma->vm_policy; 267 pol = vma->vm_policy;
263 else 268 else
264 pol = current->mempolicy; 269 pol = current->mempolicy;
265 return pol; 270 return pol;
266 } 271 }
267 #endif 272 #endif
268 273
269 static int shm_mmap(struct file * file, struct vm_area_struct * vma) 274 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
270 { 275 {
271 struct shm_file_data *sfd = shm_file_data(file); 276 struct shm_file_data *sfd = shm_file_data(file);
272 int ret; 277 int ret;
273 278
274 ret = sfd->file->f_op->mmap(sfd->file, vma); 279 ret = sfd->file->f_op->mmap(sfd->file, vma);
275 if (ret != 0) 280 if (ret != 0)
276 return ret; 281 return ret;
277 sfd->vm_ops = vma->vm_ops; 282 sfd->vm_ops = vma->vm_ops;
278 #ifdef CONFIG_MMU 283 #ifdef CONFIG_MMU
279 BUG_ON(!sfd->vm_ops->fault); 284 BUG_ON(!sfd->vm_ops->fault);
280 #endif 285 #endif
281 vma->vm_ops = &shm_vm_ops; 286 vma->vm_ops = &shm_vm_ops;
282 shm_open(vma); 287 shm_open(vma);
283 288
284 return ret; 289 return ret;
285 } 290 }
286 291
287 static int shm_release(struct inode *ino, struct file *file) 292 static int shm_release(struct inode *ino, struct file *file)
288 { 293 {
289 struct shm_file_data *sfd = shm_file_data(file); 294 struct shm_file_data *sfd = shm_file_data(file);
290 295
291 put_ipc_ns(sfd->ns); 296 put_ipc_ns(sfd->ns);
292 shm_file_data(file) = NULL; 297 shm_file_data(file) = NULL;
293 kfree(sfd); 298 kfree(sfd);
294 return 0; 299 return 0;
295 } 300 }
296 301
297 static int shm_fsync(struct file *file, struct dentry *dentry, int datasync) 302 static int shm_fsync(struct file *file, struct dentry *dentry, int datasync)
298 { 303 {
299 int (*fsync) (struct file *, struct dentry *, int datasync); 304 int (*fsync) (struct file *, struct dentry *, int datasync);
300 struct shm_file_data *sfd = shm_file_data(file); 305 struct shm_file_data *sfd = shm_file_data(file);
301 int ret = -EINVAL; 306 int ret = -EINVAL;
302 307
303 fsync = sfd->file->f_op->fsync; 308 fsync = sfd->file->f_op->fsync;
304 if (fsync) 309 if (fsync)
305 ret = fsync(sfd->file, sfd->file->f_path.dentry, datasync); 310 ret = fsync(sfd->file, sfd->file->f_path.dentry, datasync);
306 return ret; 311 return ret;
307 } 312 }
308 313
309 static unsigned long shm_get_unmapped_area(struct file *file, 314 static unsigned long shm_get_unmapped_area(struct file *file,
310 unsigned long addr, unsigned long len, unsigned long pgoff, 315 unsigned long addr, unsigned long len, unsigned long pgoff,
311 unsigned long flags) 316 unsigned long flags)
312 { 317 {
313 struct shm_file_data *sfd = shm_file_data(file); 318 struct shm_file_data *sfd = shm_file_data(file);
314 return get_unmapped_area(sfd->file, addr, len, pgoff, flags); 319 return get_unmapped_area(sfd->file, addr, len, pgoff, flags);
315 } 320 }
316 321
317 int is_file_shm_hugepages(struct file *file) 322 int is_file_shm_hugepages(struct file *file)
318 { 323 {
319 int ret = 0; 324 int ret = 0;
320 325
321 if (file->f_op == &shm_file_operations) { 326 if (file->f_op == &shm_file_operations) {
322 struct shm_file_data *sfd; 327 struct shm_file_data *sfd;
323 sfd = shm_file_data(file); 328 sfd = shm_file_data(file);
324 ret = is_file_hugepages(sfd->file); 329 ret = is_file_hugepages(sfd->file);
325 } 330 }
326 return ret; 331 return ret;
327 } 332 }
328 333
329 static const struct file_operations shm_file_operations = { 334 static const struct file_operations shm_file_operations = {
330 .mmap = shm_mmap, 335 .mmap = shm_mmap,
331 .fsync = shm_fsync, 336 .fsync = shm_fsync,
332 .release = shm_release, 337 .release = shm_release,
333 .get_unmapped_area = shm_get_unmapped_area, 338 .get_unmapped_area = shm_get_unmapped_area,
334 }; 339 };
335 340
336 static struct vm_operations_struct shm_vm_ops = { 341 static struct vm_operations_struct shm_vm_ops = {
337 .open = shm_open, /* callback for a new vm-area open */ 342 .open = shm_open, /* callback for a new vm-area open */
338 .close = shm_close, /* callback for when the vm-area is released */ 343 .close = shm_close, /* callback for when the vm-area is released */
339 .fault = shm_fault, 344 .fault = shm_fault,
340 #if defined(CONFIG_NUMA) 345 #if defined(CONFIG_NUMA)
341 .set_policy = shm_set_policy, 346 .set_policy = shm_set_policy,
342 .get_policy = shm_get_policy, 347 .get_policy = shm_get_policy,
343 #endif 348 #endif
344 }; 349 };
345 350
351 /**
352 * newseg - Create a new shared memory segment
353 * @ns: namespace
354 * @params: ptr to the structure that contains key, size and shmflg
355 *
356 * Called with shm_ids.mutex held
357 */
358
346 static int newseg(struct ipc_namespace *ns, struct ipc_params *params) 359 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
347 { 360 {
348 key_t key = params->key; 361 key_t key = params->key;
349 int shmflg = params->flg; 362 int shmflg = params->flg;
350 size_t size = params->u.size; 363 size_t size = params->u.size;
351 int error; 364 int error;
352 struct shmid_kernel *shp; 365 struct shmid_kernel *shp;
353 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; 366 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
354 struct file * file; 367 struct file * file;
355 char name[13]; 368 char name[13];
356 int id; 369 int id;
357 370
358 if (size < SHMMIN || size > ns->shm_ctlmax) 371 if (size < SHMMIN || size > ns->shm_ctlmax)
359 return -EINVAL; 372 return -EINVAL;
360 373
361 if (ns->shm_tot + numpages > ns->shm_ctlall) 374 if (ns->shm_tot + numpages > ns->shm_ctlall)
362 return -ENOSPC; 375 return -ENOSPC;
363 376
364 shp = ipc_rcu_alloc(sizeof(*shp)); 377 shp = ipc_rcu_alloc(sizeof(*shp));
365 if (!shp) 378 if (!shp)
366 return -ENOMEM; 379 return -ENOMEM;
367 380
368 shp->shm_perm.key = key; 381 shp->shm_perm.key = key;
369 shp->shm_perm.mode = (shmflg & S_IRWXUGO); 382 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
370 shp->mlock_user = NULL; 383 shp->mlock_user = NULL;
371 384
372 shp->shm_perm.security = NULL; 385 shp->shm_perm.security = NULL;
373 error = security_shm_alloc(shp); 386 error = security_shm_alloc(shp);
374 if (error) { 387 if (error) {
375 ipc_rcu_putref(shp); 388 ipc_rcu_putref(shp);
376 return error; 389 return error;
377 } 390 }
378 391
379 sprintf (name, "SYSV%08x", key); 392 sprintf (name, "SYSV%08x", key);
380 if (shmflg & SHM_HUGETLB) { 393 if (shmflg & SHM_HUGETLB) {
381 /* hugetlb_file_setup takes care of mlock user accounting */ 394 /* hugetlb_file_setup takes care of mlock user accounting */
382 file = hugetlb_file_setup(name, size); 395 file = hugetlb_file_setup(name, size);
383 shp->mlock_user = current->user; 396 shp->mlock_user = current->user;
384 } else { 397 } else {
385 int acctflag = VM_ACCOUNT; 398 int acctflag = VM_ACCOUNT;
386 /* 399 /*
387 * Do not allow no accounting for OVERCOMMIT_NEVER, even 400 * Do not allow no accounting for OVERCOMMIT_NEVER, even
388 * if it's asked for. 401 * if it's asked for.
389 */ 402 */
390 if ((shmflg & SHM_NORESERVE) && 403 if ((shmflg & SHM_NORESERVE) &&
391 sysctl_overcommit_memory != OVERCOMMIT_NEVER) 404 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
392 acctflag = 0; 405 acctflag = 0;
393 file = shmem_file_setup(name, size, acctflag); 406 file = shmem_file_setup(name, size, acctflag);
394 } 407 }
395 error = PTR_ERR(file); 408 error = PTR_ERR(file);
396 if (IS_ERR(file)) 409 if (IS_ERR(file))
397 goto no_file; 410 goto no_file;
398 411
399 error = -ENOSPC; 412 error = -ENOSPC;
400 id = shm_addid(ns, shp); 413 id = shm_addid(ns, shp);
401 if(id == -1) 414 if(id == -1)
402 goto no_id; 415 goto no_id;
403 416
404 shp->shm_cprid = task_tgid_vnr(current); 417 shp->shm_cprid = task_tgid_vnr(current);
405 shp->shm_lprid = 0; 418 shp->shm_lprid = 0;
406 shp->shm_atim = shp->shm_dtim = 0; 419 shp->shm_atim = shp->shm_dtim = 0;
407 shp->shm_ctim = get_seconds(); 420 shp->shm_ctim = get_seconds();
408 shp->shm_segsz = size; 421 shp->shm_segsz = size;
409 shp->shm_nattch = 0; 422 shp->shm_nattch = 0;
410 shp->shm_perm.id = shm_buildid(ns, id, shp->shm_perm.seq); 423 shp->shm_perm.id = shm_buildid(ns, id, shp->shm_perm.seq);
411 shp->shm_file = file; 424 shp->shm_file = file;
412 /* 425 /*
413 * shmid gets reported as "inode#" in /proc/pid/maps. 426 * shmid gets reported as "inode#" in /proc/pid/maps.
414 * proc-ps tools use this. Changing this will break them. 427 * proc-ps tools use this. Changing this will break them.
415 */ 428 */
416 file->f_dentry->d_inode->i_ino = shp->shm_perm.id; 429 file->f_dentry->d_inode->i_ino = shp->shm_perm.id;
417 430
418 ns->shm_tot += numpages; 431 ns->shm_tot += numpages;
419 error = shp->shm_perm.id; 432 error = shp->shm_perm.id;
420 shm_unlock(shp); 433 shm_unlock(shp);
421 return error; 434 return error;
422 435
423 no_id: 436 no_id:
424 fput(file); 437 fput(file);
425 no_file: 438 no_file:
426 security_shm_free(shp); 439 security_shm_free(shp);
427 ipc_rcu_putref(shp); 440 ipc_rcu_putref(shp);
428 return error; 441 return error;
429 } 442 }
430 443
444 /*
445 * Called with shm_ids.mutex and ipcp locked.
446 */
431 static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) 447 static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
432 { 448 {
433 struct shmid_kernel *shp; 449 struct shmid_kernel *shp;
434 450
435 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 451 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
436 return security_shm_associate(shp, shmflg); 452 return security_shm_associate(shp, shmflg);
437 } 453 }
438 454
455 /*
456 * Called with shm_ids.mutex and ipcp locked.
457 */
439 static inline int shm_more_checks(struct kern_ipc_perm *ipcp, 458 static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
440 struct ipc_params *params) 459 struct ipc_params *params)
441 { 460 {
442 struct shmid_kernel *shp; 461 struct shmid_kernel *shp;
443 462
444 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 463 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
445 if (shp->shm_segsz < params->u.size) 464 if (shp->shm_segsz < params->u.size)
446 return -EINVAL; 465 return -EINVAL;
447 466
448 return 0; 467 return 0;
449 } 468 }
450 469
451 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg) 470 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
452 { 471 {
453 struct ipc_namespace *ns; 472 struct ipc_namespace *ns;
454 struct ipc_ops shm_ops; 473 struct ipc_ops shm_ops;
455 struct ipc_params shm_params; 474 struct ipc_params shm_params;
456 475
457 ns = current->nsproxy->ipc_ns; 476 ns = current->nsproxy->ipc_ns;
458 477
459 shm_ops.getnew = newseg; 478 shm_ops.getnew = newseg;
460 shm_ops.associate = shm_security; 479 shm_ops.associate = shm_security;
461 shm_ops.more_checks = shm_more_checks; 480 shm_ops.more_checks = shm_more_checks;
462 481
463 shm_params.key = key; 482 shm_params.key = key;
464 shm_params.flg = shmflg; 483 shm_params.flg = shmflg;
465 shm_params.u.size = size; 484 shm_params.u.size = size;
466 485
467 return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); 486 return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
468 } 487 }
469 488
470 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) 489 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
471 { 490 {
472 switch(version) { 491 switch(version) {
473 case IPC_64: 492 case IPC_64:
474 return copy_to_user(buf, in, sizeof(*in)); 493 return copy_to_user(buf, in, sizeof(*in));
475 case IPC_OLD: 494 case IPC_OLD:
476 { 495 {
477 struct shmid_ds out; 496 struct shmid_ds out;
478 497
479 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm); 498 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
480 out.shm_segsz = in->shm_segsz; 499 out.shm_segsz = in->shm_segsz;
481 out.shm_atime = in->shm_atime; 500 out.shm_atime = in->shm_atime;
482 out.shm_dtime = in->shm_dtime; 501 out.shm_dtime = in->shm_dtime;
483 out.shm_ctime = in->shm_ctime; 502 out.shm_ctime = in->shm_ctime;
484 out.shm_cpid = in->shm_cpid; 503 out.shm_cpid = in->shm_cpid;
485 out.shm_lpid = in->shm_lpid; 504 out.shm_lpid = in->shm_lpid;
486 out.shm_nattch = in->shm_nattch; 505 out.shm_nattch = in->shm_nattch;
487 506
488 return copy_to_user(buf, &out, sizeof(out)); 507 return copy_to_user(buf, &out, sizeof(out));
489 } 508 }
490 default: 509 default:
491 return -EINVAL; 510 return -EINVAL;
492 } 511 }
493 } 512 }
494 513
495 struct shm_setbuf { 514 struct shm_setbuf {
496 uid_t uid; 515 uid_t uid;
497 gid_t gid; 516 gid_t gid;
498 mode_t mode; 517 mode_t mode;
499 }; 518 };
500 519
501 static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __user *buf, int version) 520 static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __user *buf, int version)
502 { 521 {
503 switch(version) { 522 switch(version) {
504 case IPC_64: 523 case IPC_64:
505 { 524 {
506 struct shmid64_ds tbuf; 525 struct shmid64_ds tbuf;
507 526
508 if (copy_from_user(&tbuf, buf, sizeof(tbuf))) 527 if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
509 return -EFAULT; 528 return -EFAULT;
510 529
511 out->uid = tbuf.shm_perm.uid; 530 out->uid = tbuf.shm_perm.uid;
512 out->gid = tbuf.shm_perm.gid; 531 out->gid = tbuf.shm_perm.gid;
513 out->mode = tbuf.shm_perm.mode; 532 out->mode = tbuf.shm_perm.mode;
514 533
515 return 0; 534 return 0;
516 } 535 }
517 case IPC_OLD: 536 case IPC_OLD:
518 { 537 {
519 struct shmid_ds tbuf_old; 538 struct shmid_ds tbuf_old;
520 539
521 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 540 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
522 return -EFAULT; 541 return -EFAULT;
523 542
524 out->uid = tbuf_old.shm_perm.uid; 543 out->uid = tbuf_old.shm_perm.uid;
525 out->gid = tbuf_old.shm_perm.gid; 544 out->gid = tbuf_old.shm_perm.gid;
526 out->mode = tbuf_old.shm_perm.mode; 545 out->mode = tbuf_old.shm_perm.mode;
527 546
528 return 0; 547 return 0;
529 } 548 }
530 default: 549 default:
531 return -EINVAL; 550 return -EINVAL;
532 } 551 }
533 } 552 }
534 553
535 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) 554 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
536 { 555 {
537 switch(version) { 556 switch(version) {
538 case IPC_64: 557 case IPC_64:
539 return copy_to_user(buf, in, sizeof(*in)); 558 return copy_to_user(buf, in, sizeof(*in));
540 case IPC_OLD: 559 case IPC_OLD:
541 { 560 {
542 struct shminfo out; 561 struct shminfo out;
543 562
544 if(in->shmmax > INT_MAX) 563 if(in->shmmax > INT_MAX)
545 out.shmmax = INT_MAX; 564 out.shmmax = INT_MAX;
546 else 565 else
547 out.shmmax = (int)in->shmmax; 566 out.shmmax = (int)in->shmmax;
548 567
549 out.shmmin = in->shmmin; 568 out.shmmin = in->shmmin;
550 out.shmmni = in->shmmni; 569 out.shmmni = in->shmmni;
551 out.shmseg = in->shmseg; 570 out.shmseg = in->shmseg;
552 out.shmall = in->shmall; 571 out.shmall = in->shmall;
553 572
554 return copy_to_user(buf, &out, sizeof(out)); 573 return copy_to_user(buf, &out, sizeof(out));
555 } 574 }
556 default: 575 default:
557 return -EINVAL; 576 return -EINVAL;
558 } 577 }
559 } 578 }
560 579
580 /*
581 * Called with shm_ids.mutex held
582 */
561 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, 583 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
562 unsigned long *swp) 584 unsigned long *swp)
563 { 585 {
564 int next_id; 586 int next_id;
565 int total, in_use; 587 int total, in_use;
566 588
567 *rss = 0; 589 *rss = 0;
568 *swp = 0; 590 *swp = 0;
569 591
570 in_use = shm_ids(ns).in_use; 592 in_use = shm_ids(ns).in_use;
571 593
572 for (total = 0, next_id = 0; total < in_use; next_id++) { 594 for (total = 0, next_id = 0; total < in_use; next_id++) {
573 struct shmid_kernel *shp; 595 struct shmid_kernel *shp;
574 struct inode *inode; 596 struct inode *inode;
575 597
576 /*
577 * idr_find() is called via shm_get(), so with shm_ids.mutex
578 * locked. Since ipc_addid() is also called with
579 * shm_ids.mutex down, there is no need to add read barriers
580 * here to gurantee the writes in ipc_addid() are seen in
581 * order here (for Alpha).
582 * However idr_find() itself does not necessary require
583 * ipc_ids.mutex down. So if idr_find() is used by other
584 * places without ipc_ids.mutex down, then it needs read
585 * read memory barriers as ipc_lock() does.
586 */
587
588 shp = idr_find(&shm_ids(ns).ipcs_idr, next_id); 598 shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
589 if (shp == NULL) 599 if (shp == NULL)
590 continue; 600 continue;
591 601
592 inode = shp->shm_file->f_path.dentry->d_inode; 602 inode = shp->shm_file->f_path.dentry->d_inode;
593 603
594 if (is_file_hugepages(shp->shm_file)) { 604 if (is_file_hugepages(shp->shm_file)) {
595 struct address_space *mapping = inode->i_mapping; 605 struct address_space *mapping = inode->i_mapping;
596 *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages; 606 *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages;
597 } else { 607 } else {
598 struct shmem_inode_info *info = SHMEM_I(inode); 608 struct shmem_inode_info *info = SHMEM_I(inode);
599 spin_lock(&info->lock); 609 spin_lock(&info->lock);
600 *rss += inode->i_mapping->nrpages; 610 *rss += inode->i_mapping->nrpages;
601 *swp += info->swapped; 611 *swp += info->swapped;
602 spin_unlock(&info->lock); 612 spin_unlock(&info->lock);
603 } 613 }
604 614
605 total++; 615 total++;
606 } 616 }
607 } 617 }
608 618
609 asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) 619 asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
610 { 620 {
611 struct shm_setbuf setbuf; 621 struct shm_setbuf setbuf;
612 struct shmid_kernel *shp; 622 struct shmid_kernel *shp;
613 int err, version; 623 int err, version;
614 struct ipc_namespace *ns; 624 struct ipc_namespace *ns;
615 625
616 if (cmd < 0 || shmid < 0) { 626 if (cmd < 0 || shmid < 0) {
617 err = -EINVAL; 627 err = -EINVAL;
618 goto out; 628 goto out;
619 } 629 }
620 630
621 version = ipc_parse_version(&cmd); 631 version = ipc_parse_version(&cmd);
622 ns = current->nsproxy->ipc_ns; 632 ns = current->nsproxy->ipc_ns;
623 633
624 switch (cmd) { /* replace with proc interface ? */ 634 switch (cmd) { /* replace with proc interface ? */
625 case IPC_INFO: 635 case IPC_INFO:
626 { 636 {
627 struct shminfo64 shminfo; 637 struct shminfo64 shminfo;
628 638
629 err = security_shm_shmctl(NULL, cmd); 639 err = security_shm_shmctl(NULL, cmd);
630 if (err) 640 if (err)
631 return err; 641 return err;
632 642
633 memset(&shminfo,0,sizeof(shminfo)); 643 memset(&shminfo,0,sizeof(shminfo));
634 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; 644 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
635 shminfo.shmmax = ns->shm_ctlmax; 645 shminfo.shmmax = ns->shm_ctlmax;
636 shminfo.shmall = ns->shm_ctlall; 646 shminfo.shmall = ns->shm_ctlall;
637 647
638 shminfo.shmmin = SHMMIN; 648 shminfo.shmmin = SHMMIN;
639 if(copy_shminfo_to_user (buf, &shminfo, version)) 649 if(copy_shminfo_to_user (buf, &shminfo, version))
640 return -EFAULT; 650 return -EFAULT;
641 /* reading a integer is always atomic */ 651
652 mutex_lock(&shm_ids(ns).mutex);
642 err = ipc_get_maxid(&shm_ids(ns)); 653 err = ipc_get_maxid(&shm_ids(ns));
654 mutex_unlock(&shm_ids(ns).mutex);
655
643 if(err<0) 656 if(err<0)
644 err = 0; 657 err = 0;
645 goto out; 658 goto out;
646 } 659 }
647 case SHM_INFO: 660 case SHM_INFO:
648 { 661 {
649 struct shm_info shm_info; 662 struct shm_info shm_info;
650 663
651 err = security_shm_shmctl(NULL, cmd); 664 err = security_shm_shmctl(NULL, cmd);
652 if (err) 665 if (err)
653 return err; 666 return err;
654 667
655 memset(&shm_info,0,sizeof(shm_info)); 668 memset(&shm_info,0,sizeof(shm_info));
656 mutex_lock(&shm_ids(ns).mutex); 669 mutex_lock(&shm_ids(ns).mutex);
657 shm_info.used_ids = shm_ids(ns).in_use; 670 shm_info.used_ids = shm_ids(ns).in_use;
658 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); 671 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
659 shm_info.shm_tot = ns->shm_tot; 672 shm_info.shm_tot = ns->shm_tot;
660 shm_info.swap_attempts = 0; 673 shm_info.swap_attempts = 0;
661 shm_info.swap_successes = 0; 674 shm_info.swap_successes = 0;
662 err = ipc_get_maxid(&shm_ids(ns)); 675 err = ipc_get_maxid(&shm_ids(ns));
663 mutex_unlock(&shm_ids(ns).mutex); 676 mutex_unlock(&shm_ids(ns).mutex);
664 if(copy_to_user (buf, &shm_info, sizeof(shm_info))) { 677 if(copy_to_user (buf, &shm_info, sizeof(shm_info))) {
665 err = -EFAULT; 678 err = -EFAULT;
666 goto out; 679 goto out;
667 } 680 }
668 681
669 err = err < 0 ? 0 : err; 682 err = err < 0 ? 0 : err;
670 goto out; 683 goto out;
671 } 684 }
672 case SHM_STAT: 685 case SHM_STAT:
673 case IPC_STAT: 686 case IPC_STAT:
674 { 687 {
675 struct shmid64_ds tbuf; 688 struct shmid64_ds tbuf;
676 int result; 689 int result;
677 690
678 if (!buf) { 691 if (!buf) {
679 err = -EFAULT; 692 err = -EFAULT;
680 goto out; 693 goto out;
681 } 694 }
682 695
683 if (cmd == SHM_STAT) { 696 if (cmd == SHM_STAT) {
684 shp = shm_lock(ns, shmid); 697 shp = shm_lock(ns, shmid);
685 if (IS_ERR(shp)) { 698 if (IS_ERR(shp)) {
686 err = PTR_ERR(shp); 699 err = PTR_ERR(shp);
687 goto out; 700 goto out;
688 } 701 }
689 result = shp->shm_perm.id; 702 result = shp->shm_perm.id;
690 } else { 703 } else {
691 shp = shm_lock_check(ns, shmid); 704 shp = shm_lock_check(ns, shmid);
692 if (IS_ERR(shp)) { 705 if (IS_ERR(shp)) {
693 err = PTR_ERR(shp); 706 err = PTR_ERR(shp);
694 goto out; 707 goto out;
695 } 708 }
696 result = 0; 709 result = 0;
697 } 710 }
698 err=-EACCES; 711 err=-EACCES;
699 if (ipcperms (&shp->shm_perm, S_IRUGO)) 712 if (ipcperms (&shp->shm_perm, S_IRUGO))
700 goto out_unlock; 713 goto out_unlock;
701 err = security_shm_shmctl(shp, cmd); 714 err = security_shm_shmctl(shp, cmd);
702 if (err) 715 if (err)
703 goto out_unlock; 716 goto out_unlock;
704 memset(&tbuf, 0, sizeof(tbuf)); 717 memset(&tbuf, 0, sizeof(tbuf));
705 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); 718 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
706 tbuf.shm_segsz = shp->shm_segsz; 719 tbuf.shm_segsz = shp->shm_segsz;
707 tbuf.shm_atime = shp->shm_atim; 720 tbuf.shm_atime = shp->shm_atim;
708 tbuf.shm_dtime = shp->shm_dtim; 721 tbuf.shm_dtime = shp->shm_dtim;
709 tbuf.shm_ctime = shp->shm_ctim; 722 tbuf.shm_ctime = shp->shm_ctim;
710 tbuf.shm_cpid = shp->shm_cprid; 723 tbuf.shm_cpid = shp->shm_cprid;
711 tbuf.shm_lpid = shp->shm_lprid; 724 tbuf.shm_lpid = shp->shm_lprid;
712 tbuf.shm_nattch = shp->shm_nattch; 725 tbuf.shm_nattch = shp->shm_nattch;
713 shm_unlock(shp); 726 shm_unlock(shp);
714 if(copy_shmid_to_user (buf, &tbuf, version)) 727 if(copy_shmid_to_user (buf, &tbuf, version))
715 err = -EFAULT; 728 err = -EFAULT;
716 else 729 else
717 err = result; 730 err = result;
718 goto out; 731 goto out;
719 } 732 }
720 case SHM_LOCK: 733 case SHM_LOCK:
721 case SHM_UNLOCK: 734 case SHM_UNLOCK:
722 { 735 {
723 shp = shm_lock_check(ns, shmid); 736 shp = shm_lock_check(ns, shmid);
724 if (IS_ERR(shp)) { 737 if (IS_ERR(shp)) {
725 err = PTR_ERR(shp); 738 err = PTR_ERR(shp);
726 goto out; 739 goto out;
727 } 740 }
728 741
729 err = audit_ipc_obj(&(shp->shm_perm)); 742 err = audit_ipc_obj(&(shp->shm_perm));
730 if (err) 743 if (err)
731 goto out_unlock; 744 goto out_unlock;
732 745
733 if (!capable(CAP_IPC_LOCK)) { 746 if (!capable(CAP_IPC_LOCK)) {
734 err = -EPERM; 747 err = -EPERM;
735 if (current->euid != shp->shm_perm.uid && 748 if (current->euid != shp->shm_perm.uid &&
736 current->euid != shp->shm_perm.cuid) 749 current->euid != shp->shm_perm.cuid)
737 goto out_unlock; 750 goto out_unlock;
738 if (cmd == SHM_LOCK && 751 if (cmd == SHM_LOCK &&
739 !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) 752 !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
740 goto out_unlock; 753 goto out_unlock;
741 } 754 }
742 755
743 err = security_shm_shmctl(shp, cmd); 756 err = security_shm_shmctl(shp, cmd);
744 if (err) 757 if (err)
745 goto out_unlock; 758 goto out_unlock;
746 759
747 if(cmd==SHM_LOCK) { 760 if(cmd==SHM_LOCK) {
748 struct user_struct * user = current->user; 761 struct user_struct * user = current->user;
749 if (!is_file_hugepages(shp->shm_file)) { 762 if (!is_file_hugepages(shp->shm_file)) {
750 err = shmem_lock(shp->shm_file, 1, user); 763 err = shmem_lock(shp->shm_file, 1, user);
751 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ 764 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){
752 shp->shm_perm.mode |= SHM_LOCKED; 765 shp->shm_perm.mode |= SHM_LOCKED;
753 shp->mlock_user = user; 766 shp->mlock_user = user;
754 } 767 }
755 } 768 }
756 } else if (!is_file_hugepages(shp->shm_file)) { 769 } else if (!is_file_hugepages(shp->shm_file)) {
757 shmem_lock(shp->shm_file, 0, shp->mlock_user); 770 shmem_lock(shp->shm_file, 0, shp->mlock_user);
758 shp->shm_perm.mode &= ~SHM_LOCKED; 771 shp->shm_perm.mode &= ~SHM_LOCKED;
759 shp->mlock_user = NULL; 772 shp->mlock_user = NULL;
760 } 773 }
761 shm_unlock(shp); 774 shm_unlock(shp);
762 goto out; 775 goto out;
763 } 776 }
764 case IPC_RMID: 777 case IPC_RMID:
765 { 778 {
766 /* 779 /*
767 * We cannot simply remove the file. The SVID states 780 * We cannot simply remove the file. The SVID states
768 * that the block remains until the last person 781 * that the block remains until the last person
769 * detaches from it, then is deleted. A shmat() on 782 * detaches from it, then is deleted. A shmat() on
770 * an RMID segment is legal in older Linux and if 783 * an RMID segment is legal in older Linux and if
771 * we change it apps break... 784 * we change it apps break...
772 * 785 *
773 * Instead we set a destroyed flag, and then blow 786 * Instead we set a destroyed flag, and then blow
774 * the name away when the usage hits zero. 787 * the name away when the usage hits zero.
775 */ 788 */
776 mutex_lock(&shm_ids(ns).mutex); 789 mutex_lock(&shm_ids(ns).mutex);
777 shp = shm_lock_check(ns, shmid); 790 shp = shm_lock_check(ns, shmid);
778 if (IS_ERR(shp)) { 791 if (IS_ERR(shp)) {
779 err = PTR_ERR(shp); 792 err = PTR_ERR(shp);
780 goto out_up; 793 goto out_up;
781 } 794 }
782 795
783 err = audit_ipc_obj(&(shp->shm_perm)); 796 err = audit_ipc_obj(&(shp->shm_perm));
784 if (err) 797 if (err)
785 goto out_unlock_up; 798 goto out_unlock_up;
786 799
787 if (current->euid != shp->shm_perm.uid && 800 if (current->euid != shp->shm_perm.uid &&
788 current->euid != shp->shm_perm.cuid && 801 current->euid != shp->shm_perm.cuid &&
789 !capable(CAP_SYS_ADMIN)) { 802 !capable(CAP_SYS_ADMIN)) {
790 err=-EPERM; 803 err=-EPERM;
791 goto out_unlock_up; 804 goto out_unlock_up;
792 } 805 }
793 806
794 err = security_shm_shmctl(shp, cmd); 807 err = security_shm_shmctl(shp, cmd);
795 if (err) 808 if (err)
796 goto out_unlock_up; 809 goto out_unlock_up;
797 810
798 do_shm_rmid(ns, shp); 811 do_shm_rmid(ns, shp);
799 mutex_unlock(&shm_ids(ns).mutex); 812 mutex_unlock(&shm_ids(ns).mutex);
800 goto out; 813 goto out;
801 } 814 }
802 815
803 case IPC_SET: 816 case IPC_SET:
804 { 817 {
805 if (!buf) { 818 if (!buf) {
806 err = -EFAULT; 819 err = -EFAULT;
807 goto out; 820 goto out;
808 } 821 }
809 822
810 if (copy_shmid_from_user (&setbuf, buf, version)) { 823 if (copy_shmid_from_user (&setbuf, buf, version)) {
811 err = -EFAULT; 824 err = -EFAULT;
812 goto out; 825 goto out;
813 } 826 }
814 mutex_lock(&shm_ids(ns).mutex); 827 mutex_lock(&shm_ids(ns).mutex);
815 shp = shm_lock_check(ns, shmid); 828 shp = shm_lock_check(ns, shmid);
816 if (IS_ERR(shp)) { 829 if (IS_ERR(shp)) {
817 err = PTR_ERR(shp); 830 err = PTR_ERR(shp);
818 goto out_up; 831 goto out_up;
819 } 832 }
820 err = audit_ipc_obj(&(shp->shm_perm)); 833 err = audit_ipc_obj(&(shp->shm_perm));
821 if (err) 834 if (err)
822 goto out_unlock_up; 835 goto out_unlock_up;
823 err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode); 836 err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode);
824 if (err) 837 if (err)
825 goto out_unlock_up; 838 goto out_unlock_up;
826 err=-EPERM; 839 err=-EPERM;
827 if (current->euid != shp->shm_perm.uid && 840 if (current->euid != shp->shm_perm.uid &&
828 current->euid != shp->shm_perm.cuid && 841 current->euid != shp->shm_perm.cuid &&
829 !capable(CAP_SYS_ADMIN)) { 842 !capable(CAP_SYS_ADMIN)) {
830 goto out_unlock_up; 843 goto out_unlock_up;
831 } 844 }
832 845
833 err = security_shm_shmctl(shp, cmd); 846 err = security_shm_shmctl(shp, cmd);
834 if (err) 847 if (err)
835 goto out_unlock_up; 848 goto out_unlock_up;
836 849
837 shp->shm_perm.uid = setbuf.uid; 850 shp->shm_perm.uid = setbuf.uid;
838 shp->shm_perm.gid = setbuf.gid; 851 shp->shm_perm.gid = setbuf.gid;
839 shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO) 852 shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO)
840 | (setbuf.mode & S_IRWXUGO); 853 | (setbuf.mode & S_IRWXUGO);
841 shp->shm_ctim = get_seconds(); 854 shp->shm_ctim = get_seconds();
842 break; 855 break;
843 } 856 }
844 857
845 default: 858 default:
846 err = -EINVAL; 859 err = -EINVAL;
847 goto out; 860 goto out;
848 } 861 }
849 862
850 err = 0; 863 err = 0;
851 out_unlock_up: 864 out_unlock_up:
852 shm_unlock(shp); 865 shm_unlock(shp);
853 out_up: 866 out_up:
854 mutex_unlock(&shm_ids(ns).mutex); 867 mutex_unlock(&shm_ids(ns).mutex);
855 goto out; 868 goto out;
856 out_unlock: 869 out_unlock:
857 shm_unlock(shp); 870 shm_unlock(shp);
858 out: 871 out:
859 return err; 872 return err;
860 } 873 }
861 874
862 /* 875 /*
863 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. 876 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
864 * 877 *
865 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The 878 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
866 * "raddr" thing points to kernel space, and there has to be a wrapper around 879 * "raddr" thing points to kernel space, and there has to be a wrapper around
867 * this. 880 * this.
868 */ 881 */
869 long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) 882 long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
870 { 883 {
871 struct shmid_kernel *shp; 884 struct shmid_kernel *shp;
872 unsigned long addr; 885 unsigned long addr;
873 unsigned long size; 886 unsigned long size;
874 struct file * file; 887 struct file * file;
875 int err; 888 int err;
876 unsigned long flags; 889 unsigned long flags;
877 unsigned long prot; 890 unsigned long prot;
878 int acc_mode; 891 int acc_mode;
879 unsigned long user_addr; 892 unsigned long user_addr;
880 struct ipc_namespace *ns; 893 struct ipc_namespace *ns;
881 struct shm_file_data *sfd; 894 struct shm_file_data *sfd;
882 struct path path; 895 struct path path;
883 mode_t f_mode; 896 mode_t f_mode;
884 897
885 err = -EINVAL; 898 err = -EINVAL;
886 if (shmid < 0) 899 if (shmid < 0)
887 goto out; 900 goto out;
888 else if ((addr = (ulong)shmaddr)) { 901 else if ((addr = (ulong)shmaddr)) {
889 if (addr & (SHMLBA-1)) { 902 if (addr & (SHMLBA-1)) {
890 if (shmflg & SHM_RND) 903 if (shmflg & SHM_RND)
891 addr &= ~(SHMLBA-1); /* round down */ 904 addr &= ~(SHMLBA-1); /* round down */
892 else 905 else
893 #ifndef __ARCH_FORCE_SHMLBA 906 #ifndef __ARCH_FORCE_SHMLBA
894 if (addr & ~PAGE_MASK) 907 if (addr & ~PAGE_MASK)
895 #endif 908 #endif
896 goto out; 909 goto out;
897 } 910 }
898 flags = MAP_SHARED | MAP_FIXED; 911 flags = MAP_SHARED | MAP_FIXED;
899 } else { 912 } else {
900 if ((shmflg & SHM_REMAP)) 913 if ((shmflg & SHM_REMAP))
901 goto out; 914 goto out;
902 915
903 flags = MAP_SHARED; 916 flags = MAP_SHARED;
904 } 917 }
905 918
906 if (shmflg & SHM_RDONLY) { 919 if (shmflg & SHM_RDONLY) {
907 prot = PROT_READ; 920 prot = PROT_READ;
908 acc_mode = S_IRUGO; 921 acc_mode = S_IRUGO;
909 f_mode = FMODE_READ; 922 f_mode = FMODE_READ;
910 } else { 923 } else {
911 prot = PROT_READ | PROT_WRITE; 924 prot = PROT_READ | PROT_WRITE;
912 acc_mode = S_IRUGO | S_IWUGO; 925 acc_mode = S_IRUGO | S_IWUGO;
913 f_mode = FMODE_READ | FMODE_WRITE; 926 f_mode = FMODE_READ | FMODE_WRITE;
914 } 927 }
915 if (shmflg & SHM_EXEC) { 928 if (shmflg & SHM_EXEC) {
916 prot |= PROT_EXEC; 929 prot |= PROT_EXEC;
917 acc_mode |= S_IXUGO; 930 acc_mode |= S_IXUGO;
918 } 931 }
919 932
920 /* 933 /*
921 * We cannot rely on the fs check since SYSV IPC does have an 934 * We cannot rely on the fs check since SYSV IPC does have an
922 * additional creator id... 935 * additional creator id...
923 */ 936 */
924 ns = current->nsproxy->ipc_ns; 937 ns = current->nsproxy->ipc_ns;
925 shp = shm_lock_check(ns, shmid); 938 shp = shm_lock_check(ns, shmid);
926 if (IS_ERR(shp)) { 939 if (IS_ERR(shp)) {
927 err = PTR_ERR(shp); 940 err = PTR_ERR(shp);
928 goto out; 941 goto out;
929 } 942 }
930 943
931 err = -EACCES; 944 err = -EACCES;
932 if (ipcperms(&shp->shm_perm, acc_mode)) 945 if (ipcperms(&shp->shm_perm, acc_mode))
933 goto out_unlock; 946 goto out_unlock;
934 947
935 err = security_shm_shmat(shp, shmaddr, shmflg); 948 err = security_shm_shmat(shp, shmaddr, shmflg);
936 if (err) 949 if (err)
937 goto out_unlock; 950 goto out_unlock;
938 951
939 path.dentry = dget(shp->shm_file->f_path.dentry); 952 path.dentry = dget(shp->shm_file->f_path.dentry);
940 path.mnt = shp->shm_file->f_path.mnt; 953 path.mnt = shp->shm_file->f_path.mnt;
941 shp->shm_nattch++; 954 shp->shm_nattch++;
942 size = i_size_read(path.dentry->d_inode); 955 size = i_size_read(path.dentry->d_inode);
943 shm_unlock(shp); 956 shm_unlock(shp);
944 957
945 err = -ENOMEM; 958 err = -ENOMEM;
946 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); 959 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
947 if (!sfd) 960 if (!sfd)
948 goto out_put_dentry; 961 goto out_put_dentry;
949 962
950 err = -ENOMEM; 963 err = -ENOMEM;
951 964
952 file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations); 965 file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations);
953 if (!file) 966 if (!file)
954 goto out_free; 967 goto out_free;
955 968
956 file->private_data = sfd; 969 file->private_data = sfd;
957 file->f_mapping = shp->shm_file->f_mapping; 970 file->f_mapping = shp->shm_file->f_mapping;
958 sfd->id = shp->shm_perm.id; 971 sfd->id = shp->shm_perm.id;
959 sfd->ns = get_ipc_ns(ns); 972 sfd->ns = get_ipc_ns(ns);
960 sfd->file = shp->shm_file; 973 sfd->file = shp->shm_file;
961 sfd->vm_ops = NULL; 974 sfd->vm_ops = NULL;
962 975
963 down_write(&current->mm->mmap_sem); 976 down_write(&current->mm->mmap_sem);
964 if (addr && !(shmflg & SHM_REMAP)) { 977 if (addr && !(shmflg & SHM_REMAP)) {
965 err = -EINVAL; 978 err = -EINVAL;
966 if (find_vma_intersection(current->mm, addr, addr + size)) 979 if (find_vma_intersection(current->mm, addr, addr + size))
967 goto invalid; 980 goto invalid;
968 /* 981 /*
969 * If shm segment goes below stack, make sure there is some 982 * If shm segment goes below stack, make sure there is some
970 * space left for the stack to grow (at least 4 pages). 983 * space left for the stack to grow (at least 4 pages).
971 */ 984 */
972 if (addr < current->mm->start_stack && 985 if (addr < current->mm->start_stack &&
973 addr > current->mm->start_stack - size - PAGE_SIZE * 5) 986 addr > current->mm->start_stack - size - PAGE_SIZE * 5)
974 goto invalid; 987 goto invalid;
975 } 988 }
976 989
977 user_addr = do_mmap (file, addr, size, prot, flags, 0); 990 user_addr = do_mmap (file, addr, size, prot, flags, 0);
978 *raddr = user_addr; 991 *raddr = user_addr;
979 err = 0; 992 err = 0;
980 if (IS_ERR_VALUE(user_addr)) 993 if (IS_ERR_VALUE(user_addr))
981 err = (long)user_addr; 994 err = (long)user_addr;
982 invalid: 995 invalid:
983 up_write(&current->mm->mmap_sem); 996 up_write(&current->mm->mmap_sem);
984 997
985 fput(file); 998 fput(file);
986 999
987 out_nattch: 1000 out_nattch:
988 mutex_lock(&shm_ids(ns).mutex); 1001 mutex_lock(&shm_ids(ns).mutex);
989 shp = shm_lock(ns, shmid); 1002 shp = shm_lock(ns, shmid);
990 BUG_ON(IS_ERR(shp)); 1003 BUG_ON(IS_ERR(shp));
991 shp->shm_nattch--; 1004 shp->shm_nattch--;
992 if(shp->shm_nattch == 0 && 1005 if(shp->shm_nattch == 0 &&
993 shp->shm_perm.mode & SHM_DEST) 1006 shp->shm_perm.mode & SHM_DEST)
994 shm_destroy(ns, shp); 1007 shm_destroy(ns, shp);
995 else 1008 else
996 shm_unlock(shp); 1009 shm_unlock(shp);
997 mutex_unlock(&shm_ids(ns).mutex); 1010 mutex_unlock(&shm_ids(ns).mutex);
998 1011
999 out: 1012 out:
1000 return err; 1013 return err;
1001 1014
1002 out_unlock: 1015 out_unlock:
1003 shm_unlock(shp); 1016 shm_unlock(shp);
1004 goto out; 1017 goto out;
1005 1018
1006 out_free: 1019 out_free:
1007 kfree(sfd); 1020 kfree(sfd);
1008 out_put_dentry: 1021 out_put_dentry:
1009 dput(path.dentry); 1022 dput(path.dentry);
1010 goto out_nattch; 1023 goto out_nattch;
1011 } 1024 }
1012 1025
1013 asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg) 1026 asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg)
1014 { 1027 {
1015 unsigned long ret; 1028 unsigned long ret;
1016 long err; 1029 long err;
1017 1030
1018 err = do_shmat(shmid, shmaddr, shmflg, &ret); 1031 err = do_shmat(shmid, shmaddr, shmflg, &ret);
1019 if (err) 1032 if (err)
1020 return err; 1033 return err;
1021 force_successful_syscall_return(); 1034 force_successful_syscall_return();
1022 return (long)ret; 1035 return (long)ret;
1023 } 1036 }
1024 1037
1025 /* 1038 /*
1026 * detach and kill segment if marked destroyed. 1039 * detach and kill segment if marked destroyed.
1027 * The work is done in shm_close. 1040 * The work is done in shm_close.
1028 */ 1041 */
1029 asmlinkage long sys_shmdt(char __user *shmaddr) 1042 asmlinkage long sys_shmdt(char __user *shmaddr)
1030 { 1043 {
1031 struct mm_struct *mm = current->mm; 1044 struct mm_struct *mm = current->mm;
1032 struct vm_area_struct *vma, *next; 1045 struct vm_area_struct *vma, *next;
1033 unsigned long addr = (unsigned long)shmaddr; 1046 unsigned long addr = (unsigned long)shmaddr;
1034 loff_t size = 0; 1047 loff_t size = 0;
1035 int retval = -EINVAL; 1048 int retval = -EINVAL;
1036 1049
1037 if (addr & ~PAGE_MASK) 1050 if (addr & ~PAGE_MASK)
1038 return retval; 1051 return retval;
1039 1052
1040 down_write(&mm->mmap_sem); 1053 down_write(&mm->mmap_sem);
1041 1054
1042 /* 1055 /*
1043 * This function tries to be smart and unmap shm segments that 1056 * This function tries to be smart and unmap shm segments that
1044 * were modified by partial mlock or munmap calls: 1057 * were modified by partial mlock or munmap calls:
1045 * - It first determines the size of the shm segment that should be 1058 * - It first determines the size of the shm segment that should be
1046 * unmapped: It searches for a vma that is backed by shm and that 1059 * unmapped: It searches for a vma that is backed by shm and that
1047 * started at address shmaddr. It records it's size and then unmaps 1060 * started at address shmaddr. It records it's size and then unmaps
1048 * it. 1061 * it.
1049 * - Then it unmaps all shm vmas that started at shmaddr and that 1062 * - Then it unmaps all shm vmas that started at shmaddr and that
1050 * are within the initially determined size. 1063 * are within the initially determined size.
1051 * Errors from do_munmap are ignored: the function only fails if 1064 * Errors from do_munmap are ignored: the function only fails if
1052 * it's called with invalid parameters or if it's called to unmap 1065 * it's called with invalid parameters or if it's called to unmap
1053 * a part of a vma. Both calls in this function are for full vmas, 1066 * a part of a vma. Both calls in this function are for full vmas,
1054 * the parameters are directly copied from the vma itself and always 1067 * the parameters are directly copied from the vma itself and always
1055 * valid - therefore do_munmap cannot fail. (famous last words?) 1068 * valid - therefore do_munmap cannot fail. (famous last words?)
1056 */ 1069 */
1057 /* 1070 /*
1058 * If it had been mremap()'d, the starting address would not 1071 * If it had been mremap()'d, the starting address would not
1059 * match the usual checks anyway. So assume all vma's are 1072 * match the usual checks anyway. So assume all vma's are
1060 * above the starting address given. 1073 * above the starting address given.
1061 */ 1074 */
1062 vma = find_vma(mm, addr); 1075 vma = find_vma(mm, addr);
1063 1076
1064 while (vma) { 1077 while (vma) {
1065 next = vma->vm_next; 1078 next = vma->vm_next;
1066 1079
1067 /* 1080 /*
1068 * Check if the starting address would match, i.e. it's 1081 * Check if the starting address would match, i.e. it's
1069 * a fragment created by mprotect() and/or munmap(), or it 1082 * a fragment created by mprotect() and/or munmap(), or it
1070 * otherwise it starts at this address with no hassles. 1083 * otherwise it starts at this address with no hassles.
1071 */ 1084 */
1072 if ((vma->vm_ops == &shm_vm_ops) && 1085 if ((vma->vm_ops == &shm_vm_ops) &&
1073 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { 1086 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1074 1087
1075 1088
1076 size = vma->vm_file->f_path.dentry->d_inode->i_size; 1089 size = vma->vm_file->f_path.dentry->d_inode->i_size;
1077 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1090 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1078 /* 1091 /*
1079 * We discovered the size of the shm segment, so 1092 * We discovered the size of the shm segment, so
1080 * break out of here and fall through to the next 1093 * break out of here and fall through to the next
1081 * loop that uses the size information to stop 1094 * loop that uses the size information to stop
1082 * searching for matching vma's. 1095 * searching for matching vma's.
1083 */ 1096 */
1084 retval = 0; 1097 retval = 0;
1085 vma = next; 1098 vma = next;
1086 break; 1099 break;
1087 } 1100 }
1088 vma = next; 1101 vma = next;
1089 } 1102 }
1090 1103
1091 /* 1104 /*
1092 * We need look no further than the maximum address a fragment 1105 * We need look no further than the maximum address a fragment
1093 * could possibly have landed at. Also cast things to loff_t to 1106 * could possibly have landed at. Also cast things to loff_t to
1094 * prevent overflows and make comparisions vs. equal-width types. 1107 * prevent overflows and make comparisions vs. equal-width types.
1095 */ 1108 */
1096 size = PAGE_ALIGN(size); 1109 size = PAGE_ALIGN(size);
1097 while (vma && (loff_t)(vma->vm_end - addr) <= size) { 1110 while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1098 next = vma->vm_next; 1111 next = vma->vm_next;
1099 1112
1100 /* finding a matching vma now does not alter retval */ 1113 /* finding a matching vma now does not alter retval */
1101 if ((vma->vm_ops == &shm_vm_ops) && 1114 if ((vma->vm_ops == &shm_vm_ops) &&
1102 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) 1115 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
1103 1116
1104 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1117 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1105 vma = next; 1118 vma = next;
1106 } 1119 }
1107 1120
1108 up_write(&mm->mmap_sem); 1121 up_write(&mm->mmap_sem);
1109 return retval; 1122 return retval;
1110 } 1123 }
1111 1124
1112 #ifdef CONFIG_PROC_FS 1125 #ifdef CONFIG_PROC_FS
1113 static int sysvipc_shm_proc_show(struct seq_file *s, void *it) 1126 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1114 { 1127 {
1115 struct shmid_kernel *shp = it; 1128 struct shmid_kernel *shp = it;
1116 char *format; 1129 char *format;
1117 1130
1118 #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" 1131 #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
1119 #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" 1132 #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
1120 1133
1121 if (sizeof(size_t) <= sizeof(int)) 1134 if (sizeof(size_t) <= sizeof(int))
1122 format = SMALL_STRING; 1135 format = SMALL_STRING;
1123 else 1136 else
1124 format = BIG_STRING; 1137 format = BIG_STRING;
1125 return seq_printf(s, format, 1138 return seq_printf(s, format,
1126 shp->shm_perm.key, 1139 shp->shm_perm.key,
1127 shp->shm_perm.id, 1140 shp->shm_perm.id,
1128 shp->shm_perm.mode, 1141 shp->shm_perm.mode,
1129 shp->shm_segsz, 1142 shp->shm_segsz,
1130 shp->shm_cprid, 1143 shp->shm_cprid,
1 /* 1 /*
2 * linux/ipc/util.c 2 * linux/ipc/util.c
3 * Copyright (C) 1992 Krishna Balasubramanian 3 * Copyright (C) 1992 Krishna Balasubramanian
4 * 4 *
5 * Sep 1997 - Call suser() last after "normal" permission checks so we 5 * Sep 1997 - Call suser() last after "normal" permission checks so we
6 * get BSD style process accounting right. 6 * get BSD style process accounting right.
7 * Occurs in several places in the IPC code. 7 * Occurs in several places in the IPC code.
8 * Chris Evans, <chris@ferret.lmh.ox.ac.uk> 8 * Chris Evans, <chris@ferret.lmh.ox.ac.uk>
9 * Nov 1999 - ipc helper functions, unified SMP locking 9 * Nov 1999 - ipc helper functions, unified SMP locking
10 * Manfred Spraul <manfred@colorfullife.com> 10 * Manfred Spraul <manfred@colorfullife.com>
11 * Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary(). 11 * Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary().
12 * Mingming Cao <cmm@us.ibm.com> 12 * Mingming Cao <cmm@us.ibm.com>
13 * Mar 2006 - support for audit of ipc object properties 13 * Mar 2006 - support for audit of ipc object properties
14 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 14 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
15 * Jun 2006 - namespaces ssupport 15 * Jun 2006 - namespaces ssupport
16 * OpenVZ, SWsoft Inc. 16 * OpenVZ, SWsoft Inc.
17 * Pavel Emelianov <xemul@openvz.org> 17 * Pavel Emelianov <xemul@openvz.org>
18 */ 18 */
19 19
20 #include <linux/mm.h> 20 #include <linux/mm.h>
21 #include <linux/shm.h> 21 #include <linux/shm.h>
22 #include <linux/init.h> 22 #include <linux/init.h>
23 #include <linux/msg.h> 23 #include <linux/msg.h>
24 #include <linux/vmalloc.h> 24 #include <linux/vmalloc.h>
25 #include <linux/slab.h> 25 #include <linux/slab.h>
26 #include <linux/capability.h> 26 #include <linux/capability.h>
27 #include <linux/highuid.h> 27 #include <linux/highuid.h>
28 #include <linux/security.h> 28 #include <linux/security.h>
29 #include <linux/rcupdate.h> 29 #include <linux/rcupdate.h>
30 #include <linux/workqueue.h> 30 #include <linux/workqueue.h>
31 #include <linux/seq_file.h> 31 #include <linux/seq_file.h>
32 #include <linux/proc_fs.h> 32 #include <linux/proc_fs.h>
33 #include <linux/audit.h> 33 #include <linux/audit.h>
34 #include <linux/nsproxy.h> 34 #include <linux/nsproxy.h>
35 35
36 #include <asm/unistd.h> 36 #include <asm/unistd.h>
37 37
38 #include "util.h" 38 #include "util.h"
39 39
40 struct ipc_proc_iface { 40 struct ipc_proc_iface {
41 const char *path; 41 const char *path;
42 const char *header; 42 const char *header;
43 int ids; 43 int ids;
44 int (*show)(struct seq_file *, void *); 44 int (*show)(struct seq_file *, void *);
45 }; 45 };
46 46
47 struct ipc_namespace init_ipc_ns = { 47 struct ipc_namespace init_ipc_ns = {
48 .kref = { 48 .kref = {
49 .refcount = ATOMIC_INIT(2), 49 .refcount = ATOMIC_INIT(2),
50 }, 50 },
51 }; 51 };
52 52
53 static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) 53 static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
54 { 54 {
55 int err; 55 int err;
56 struct ipc_namespace *ns; 56 struct ipc_namespace *ns;
57 57
58 err = -ENOMEM; 58 err = -ENOMEM;
59 ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); 59 ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
60 if (ns == NULL) 60 if (ns == NULL)
61 goto err_mem; 61 goto err_mem;
62 62
63 err = sem_init_ns(ns); 63 err = sem_init_ns(ns);
64 if (err) 64 if (err)
65 goto err_sem; 65 goto err_sem;
66 err = msg_init_ns(ns); 66 err = msg_init_ns(ns);
67 if (err) 67 if (err)
68 goto err_msg; 68 goto err_msg;
69 err = shm_init_ns(ns); 69 err = shm_init_ns(ns);
70 if (err) 70 if (err)
71 goto err_shm; 71 goto err_shm;
72 72
73 kref_init(&ns->kref); 73 kref_init(&ns->kref);
74 return ns; 74 return ns;
75 75
76 err_shm: 76 err_shm:
77 msg_exit_ns(ns); 77 msg_exit_ns(ns);
78 err_msg: 78 err_msg:
79 sem_exit_ns(ns); 79 sem_exit_ns(ns);
80 err_sem: 80 err_sem:
81 kfree(ns); 81 kfree(ns);
82 err_mem: 82 err_mem:
83 return ERR_PTR(err); 83 return ERR_PTR(err);
84 } 84 }
85 85
86 struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns) 86 struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
87 { 87 {
88 struct ipc_namespace *new_ns; 88 struct ipc_namespace *new_ns;
89 89
90 BUG_ON(!ns); 90 BUG_ON(!ns);
91 get_ipc_ns(ns); 91 get_ipc_ns(ns);
92 92
93 if (!(flags & CLONE_NEWIPC)) 93 if (!(flags & CLONE_NEWIPC))
94 return ns; 94 return ns;
95 95
96 new_ns = clone_ipc_ns(ns); 96 new_ns = clone_ipc_ns(ns);
97 97
98 put_ipc_ns(ns); 98 put_ipc_ns(ns);
99 return new_ns; 99 return new_ns;
100 } 100 }
101 101
102 void free_ipc_ns(struct kref *kref) 102 void free_ipc_ns(struct kref *kref)
103 { 103 {
104 struct ipc_namespace *ns; 104 struct ipc_namespace *ns;
105 105
106 ns = container_of(kref, struct ipc_namespace, kref); 106 ns = container_of(kref, struct ipc_namespace, kref);
107 sem_exit_ns(ns); 107 sem_exit_ns(ns);
108 msg_exit_ns(ns); 108 msg_exit_ns(ns);
109 shm_exit_ns(ns); 109 shm_exit_ns(ns);
110 kfree(ns); 110 kfree(ns);
111 } 111 }
112 112
113 /** 113 /**
114 * ipc_init - initialise IPC subsystem 114 * ipc_init - initialise IPC subsystem
115 * 115 *
116 * The various system5 IPC resources (semaphores, messages and shared 116 * The various system5 IPC resources (semaphores, messages and shared
117 * memory) are initialised 117 * memory) are initialised
118 */ 118 */
119 119
120 static int __init ipc_init(void) 120 static int __init ipc_init(void)
121 { 121 {
122 sem_init(); 122 sem_init();
123 msg_init(); 123 msg_init();
124 shm_init(); 124 shm_init();
125 return 0; 125 return 0;
126 } 126 }
127 __initcall(ipc_init); 127 __initcall(ipc_init);
128 128
129 /** 129 /**
130 * ipc_init_ids - initialise IPC identifiers 130 * ipc_init_ids - initialise IPC identifiers
131 * @ids: Identifier set 131 * @ids: Identifier set
132 * 132 *
133 * Set up the sequence range to use for the ipc identifier range (limited 133 * Set up the sequence range to use for the ipc identifier range (limited
134 * below IPCMNI) then initialise the ids idr. 134 * below IPCMNI) then initialise the ids idr.
135 */ 135 */
136 136
137 void ipc_init_ids(struct ipc_ids *ids) 137 void ipc_init_ids(struct ipc_ids *ids)
138 { 138 {
139 mutex_init(&ids->mutex); 139 mutex_init(&ids->mutex);
140 140
141 ids->in_use = 0; 141 ids->in_use = 0;
142 ids->seq = 0; 142 ids->seq = 0;
143 { 143 {
144 int seq_limit = INT_MAX/SEQ_MULTIPLIER; 144 int seq_limit = INT_MAX/SEQ_MULTIPLIER;
145 if(seq_limit > USHRT_MAX) 145 if(seq_limit > USHRT_MAX)
146 ids->seq_max = USHRT_MAX; 146 ids->seq_max = USHRT_MAX;
147 else 147 else
148 ids->seq_max = seq_limit; 148 ids->seq_max = seq_limit;
149 } 149 }
150 150
151 idr_init(&ids->ipcs_idr); 151 idr_init(&ids->ipcs_idr);
152 } 152 }
153 153
154 #ifdef CONFIG_PROC_FS 154 #ifdef CONFIG_PROC_FS
155 static const struct file_operations sysvipc_proc_fops; 155 static const struct file_operations sysvipc_proc_fops;
156 /** 156 /**
157 * ipc_init_proc_interface - Create a proc interface for sysipc types using a seq_file interface. 157 * ipc_init_proc_interface - Create a proc interface for sysipc types using a seq_file interface.
158 * @path: Path in procfs 158 * @path: Path in procfs
159 * @header: Banner to be printed at the beginning of the file. 159 * @header: Banner to be printed at the beginning of the file.
160 * @ids: ipc id table to iterate. 160 * @ids: ipc id table to iterate.
161 * @show: show routine. 161 * @show: show routine.
162 */ 162 */
163 void __init ipc_init_proc_interface(const char *path, const char *header, 163 void __init ipc_init_proc_interface(const char *path, const char *header,
164 int ids, int (*show)(struct seq_file *, void *)) 164 int ids, int (*show)(struct seq_file *, void *))
165 { 165 {
166 struct proc_dir_entry *pde; 166 struct proc_dir_entry *pde;
167 struct ipc_proc_iface *iface; 167 struct ipc_proc_iface *iface;
168 168
169 iface = kmalloc(sizeof(*iface), GFP_KERNEL); 169 iface = kmalloc(sizeof(*iface), GFP_KERNEL);
170 if (!iface) 170 if (!iface)
171 return; 171 return;
172 iface->path = path; 172 iface->path = path;
173 iface->header = header; 173 iface->header = header;
174 iface->ids = ids; 174 iface->ids = ids;
175 iface->show = show; 175 iface->show = show;
176 176
177 pde = create_proc_entry(path, 177 pde = create_proc_entry(path,
178 S_IRUGO, /* world readable */ 178 S_IRUGO, /* world readable */
179 NULL /* parent dir */); 179 NULL /* parent dir */);
180 if (pde) { 180 if (pde) {
181 pde->data = iface; 181 pde->data = iface;
182 pde->proc_fops = &sysvipc_proc_fops; 182 pde->proc_fops = &sysvipc_proc_fops;
183 } else { 183 } else {
184 kfree(iface); 184 kfree(iface);
185 } 185 }
186 } 186 }
187 #endif 187 #endif
188 188
189 /** 189 /**
190 * ipc_findkey - find a key in an ipc identifier set 190 * ipc_findkey - find a key in an ipc identifier set
191 * @ids: Identifier set 191 * @ids: Identifier set
192 * @key: The key to find 192 * @key: The key to find
193 * 193 *
194 * Requires ipc_ids.mutex locked. 194 * Requires ipc_ids.mutex locked.
195 * Returns the LOCKED pointer to the ipc structure if found or NULL 195 * Returns the LOCKED pointer to the ipc structure if found or NULL
196 * if not. 196 * if not.
197 * If key is found ipc contains its ipc structure 197 * If key is found ipc points to the owning ipc structure
198 */ 198 */
199 199
200 static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) 200 static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
201 { 201 {
202 struct kern_ipc_perm *ipc; 202 struct kern_ipc_perm *ipc;
203 int next_id; 203 int next_id;
204 int total; 204 int total;
205 205
206 for (total = 0, next_id = 0; total < ids->in_use; next_id++) { 206 for (total = 0, next_id = 0; total < ids->in_use; next_id++) {
207 ipc = idr_find(&ids->ipcs_idr, next_id); 207 ipc = idr_find(&ids->ipcs_idr, next_id);
208 208
209 if (ipc == NULL) 209 if (ipc == NULL)
210 continue; 210 continue;
211 211
212 if (ipc->key != key) { 212 if (ipc->key != key) {
213 total++; 213 total++;
214 continue; 214 continue;
215 } 215 }
216 216
217 ipc_lock_by_ptr(ipc); 217 ipc_lock_by_ptr(ipc);
218 return ipc; 218 return ipc;
219 } 219 }
220 220
221 return NULL; 221 return NULL;
222 } 222 }
223 223
224 /** 224 /**
225 * ipc_get_maxid - get the last assigned id 225 * ipc_get_maxid - get the last assigned id
226 * @ids: IPC identifier set 226 * @ids: IPC identifier set
227 * 227 *
228 * Called with ipc_ids.mutex held. 228 * Called with ipc_ids.mutex held.
229 */ 229 */
230 230
231 int ipc_get_maxid(struct ipc_ids *ids) 231 int ipc_get_maxid(struct ipc_ids *ids)
232 { 232 {
233 struct kern_ipc_perm *ipc; 233 struct kern_ipc_perm *ipc;
234 int max_id = -1; 234 int max_id = -1;
235 int total, id; 235 int total, id;
236 236
237 if (ids->in_use == 0) 237 if (ids->in_use == 0)
238 return -1; 238 return -1;
239 239
240 if (ids->in_use == IPCMNI) 240 if (ids->in_use == IPCMNI)
241 return IPCMNI - 1; 241 return IPCMNI - 1;
242 242
243 /* Look for the last assigned id */ 243 /* Look for the last assigned id */
244 total = 0; 244 total = 0;
245 for (id = 0; id < IPCMNI && total < ids->in_use; id++) { 245 for (id = 0; id < IPCMNI && total < ids->in_use; id++) {
246 ipc = idr_find(&ids->ipcs_idr, id); 246 ipc = idr_find(&ids->ipcs_idr, id);
247 if (ipc != NULL) { 247 if (ipc != NULL) {
248 max_id = id; 248 max_id = id;
249 total++; 249 total++;
250 } 250 }
251 } 251 }
252 return max_id; 252 return max_id;
253 } 253 }
254 254
255 /** 255 /**
256 * ipc_addid - add an IPC identifier 256 * ipc_addid - add an IPC identifier
257 * @ids: IPC identifier set 257 * @ids: IPC identifier set
258 * @new: new IPC permission set 258 * @new: new IPC permission set
259 * @size: limit for the number of used ids 259 * @size: limit for the number of used ids
260 * 260 *
261 * Add an entry 'new' to the IPC idr. The permissions object is 261 * Add an entry 'new' to the IPC ids idr. The permissions object is
262 * initialised and the first free entry is set up and the id assigned 262 * initialised and the first free entry is set up and the id assigned
263 * is returned. The list is returned in a locked state on success. 263 * is returned. The 'new' entry is returned in a locked state on success.
264 * On failure the list is not locked and -1 is returned. 264 * On failure the entry is not locked and -1 is returned.
265 * 265 *
266 * Called with ipc_ids.mutex held. 266 * Called with ipc_ids.mutex held.
267 */ 267 */
268 268
269 int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) 269 int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
270 { 270 {
271 int id, err; 271 int id, err;
272 272
273 /*
274 * rcu_dereference()() is not needed here since
275 * ipc_ids.mutex is held
276 */
277 if (size > IPCMNI) 273 if (size > IPCMNI)
278 size = IPCMNI; 274 size = IPCMNI;
279 275
280 if (ids->in_use >= size) 276 if (ids->in_use >= size)
281 return -1; 277 return -1;
282 278
283 err = idr_get_new(&ids->ipcs_idr, new, &id); 279 err = idr_get_new(&ids->ipcs_idr, new, &id);
284 if (err) 280 if (err)
285 return -1; 281 return -1;
286 282
287 ids->in_use++; 283 ids->in_use++;
288 284
289 new->cuid = new->uid = current->euid; 285 new->cuid = new->uid = current->euid;
290 new->gid = new->cgid = current->egid; 286 new->gid = new->cgid = current->egid;
291 287
292 new->seq = ids->seq++; 288 new->seq = ids->seq++;
293 if(ids->seq > ids->seq_max) 289 if(ids->seq > ids->seq_max)
294 ids->seq = 0; 290 ids->seq = 0;
295 291
296 spin_lock_init(&new->lock); 292 spin_lock_init(&new->lock);
297 new->deleted = 0; 293 new->deleted = 0;
298 rcu_read_lock(); 294 rcu_read_lock();
299 spin_lock(&new->lock); 295 spin_lock(&new->lock);
300 return id; 296 return id;
301 } 297 }
302 298
303 /** 299 /**
304 * ipcget_new - create a new ipc object 300 * ipcget_new - create a new ipc object
305 * @ns: namespace 301 * @ns: namespace
306 * @ids: identifer set 302 * @ids: IPC identifer set
307 * @ops: the actual creation routine to call 303 * @ops: the actual creation routine to call
308 * @params: its parameters 304 * @params: its parameters
309 * 305 *
310 * This routine is called sys_msgget, sys_semget() and sys_shmget() when 306 * This routine is called by sys_msgget, sys_semget() and sys_shmget()
311 * the key is IPC_PRIVATE 307 * when the key is IPC_PRIVATE.
312 */ 308 */
313 int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, 309 int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
314 struct ipc_ops *ops, struct ipc_params *params) 310 struct ipc_ops *ops, struct ipc_params *params)
315 { 311 {
316 int err; 312 int err;
317 313
318 err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL); 314 err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL);
319 315
320 if (!err) 316 if (!err)
321 return -ENOMEM; 317 return -ENOMEM;
322 318
323 mutex_lock(&ids->mutex); 319 mutex_lock(&ids->mutex);
324 err = ops->getnew(ns, params); 320 err = ops->getnew(ns, params);
325 mutex_unlock(&ids->mutex); 321 mutex_unlock(&ids->mutex);
326 322
327 return err; 323 return err;
328 } 324 }
329 325
330 /** 326 /**
331 * ipc_check_perms - check security and permissions for an IPC 327 * ipc_check_perms - check security and permissions for an IPC
332 * @ipcp: ipc permission set 328 * @ipcp: ipc permission set
333 * @ids: identifer set
334 * @ops: the actual security routine to call 329 * @ops: the actual security routine to call
335 * @params: its parameters 330 * @params: its parameters
331 *
332 * This routine is called by sys_msgget(), sys_semget() and sys_shmget()
333 * when the key is not IPC_PRIVATE and that key already exists in the
334 * ids IDR.
335 *
336 * On success, the IPC id is returned.
337 *
338 * It is called with ipc_ids.mutex and ipcp->lock held.
336 */ 339 */
337 static int ipc_check_perms(struct kern_ipc_perm *ipcp, struct ipc_ops *ops, 340 static int ipc_check_perms(struct kern_ipc_perm *ipcp, struct ipc_ops *ops,
338 struct ipc_params *params) 341 struct ipc_params *params)
339 { 342 {
340 int err; 343 int err;
341 344
342 if (ipcperms(ipcp, params->flg)) 345 if (ipcperms(ipcp, params->flg))
343 err = -EACCES; 346 err = -EACCES;
344 else { 347 else {
345 err = ops->associate(ipcp, params->flg); 348 err = ops->associate(ipcp, params->flg);
346 if (!err) 349 if (!err)
347 err = ipcp->id; 350 err = ipcp->id;
348 } 351 }
349 352
350 return err; 353 return err;
351 } 354 }
352 355
353 /** 356 /**
354 * ipcget_public - get an ipc object or create a new one 357 * ipcget_public - get an ipc object or create a new one
355 * @ns: namespace 358 * @ns: namespace
356 * @ids: identifer set 359 * @ids: IPC identifer set
357 * @ops: the actual creation routine to call 360 * @ops: the actual creation routine to call
358 * @params: its parameters 361 * @params: its parameters
359 * 362 *
360 * This routine is called sys_msgget, sys_semget() and sys_shmget() when 363 * This routine is called by sys_msgget, sys_semget() and sys_shmget()
361 * the key is not IPC_PRIVATE 364 * when the key is not IPC_PRIVATE.
365 * It adds a new entry if the key is not found and does some permission
366 * / security checkings if the key is found.
367 *
368 * On success, the ipc id is returned.
362 */ 369 */
363 int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, 370 int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
364 struct ipc_ops *ops, struct ipc_params *params) 371 struct ipc_ops *ops, struct ipc_params *params)
365 { 372 {
366 struct kern_ipc_perm *ipcp; 373 struct kern_ipc_perm *ipcp;
367 int flg = params->flg; 374 int flg = params->flg;
368 int err; 375 int err;
369 376
370 err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL); 377 err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL);
371 378
372 mutex_lock(&ids->mutex); 379 mutex_lock(&ids->mutex);
373 ipcp = ipc_findkey(ids, params->key); 380 ipcp = ipc_findkey(ids, params->key);
374 if (ipcp == NULL) { 381 if (ipcp == NULL) {
375 /* key not used */ 382 /* key not used */
376 if (!(flg & IPC_CREAT)) 383 if (!(flg & IPC_CREAT))
377 err = -ENOENT; 384 err = -ENOENT;
378 else if (!err) 385 else if (!err)
379 err = -ENOMEM; 386 err = -ENOMEM;
380 else 387 else
381 err = ops->getnew(ns, params); 388 err = ops->getnew(ns, params);
382 } else { 389 } else {
383 /* ipc object has been locked by ipc_findkey() */ 390 /* ipc object has been locked by ipc_findkey() */
384 391
385 if (flg & IPC_CREAT && flg & IPC_EXCL) 392 if (flg & IPC_CREAT && flg & IPC_EXCL)
386 err = -EEXIST; 393 err = -EEXIST;
387 else { 394 else {
388 err = 0; 395 err = 0;
389 if (ops->more_checks) 396 if (ops->more_checks)
390 err = ops->more_checks(ipcp, params); 397 err = ops->more_checks(ipcp, params);
391 if (!err) 398 if (!err)
399 /*
400 * ipc_check_perms returns the IPC id on
401 * success
402 */
392 err = ipc_check_perms(ipcp, ops, params); 403 err = ipc_check_perms(ipcp, ops, params);
393 } 404 }
394 ipc_unlock(ipcp); 405 ipc_unlock(ipcp);
395 } 406 }
396 mutex_unlock(&ids->mutex); 407 mutex_unlock(&ids->mutex);
397 408
398 return err; 409 return err;
399 } 410 }
400 411
401 412
402 /** 413 /**
403 * ipc_rmid - remove an IPC identifier 414 * ipc_rmid - remove an IPC identifier
404 * @ids: identifier set 415 * @ids: IPC identifier set
405 * @id: ipc perm structure containing the identifier to remove 416 * @ipcp: ipc perm structure containing the identifier to remove
406 * 417 *
407 * The identifier must be valid, and in use. The kernel will panic if
408 * fed an invalid identifier. The entry is removed and internal
409 * variables recomputed.
410 * ipc_ids.mutex and the spinlock for this ID are held before this 418 * ipc_ids.mutex and the spinlock for this ID are held before this
411 * function is called, and remain locked on the exit. 419 * function is called, and remain locked on the exit.
412 */ 420 */
413 421
414 void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) 422 void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
415 { 423 {
416 int lid = ipcid_to_idx(ipcp->id); 424 int lid = ipcid_to_idx(ipcp->id);
417 425
418 idr_remove(&ids->ipcs_idr, lid); 426 idr_remove(&ids->ipcs_idr, lid);
419 427
420 ids->in_use--; 428 ids->in_use--;
421 429
422 ipcp->deleted = 1; 430 ipcp->deleted = 1;
423 431
424 return; 432 return;
425 } 433 }
426 434
427 /** 435 /**
428 * ipc_alloc - allocate ipc space 436 * ipc_alloc - allocate ipc space
429 * @size: size desired 437 * @size: size desired
430 * 438 *
431 * Allocate memory from the appropriate pools and return a pointer to it. 439 * Allocate memory from the appropriate pools and return a pointer to it.
432 * NULL is returned if the allocation fails 440 * NULL is returned if the allocation fails
433 */ 441 */
434 442
435 void* ipc_alloc(int size) 443 void* ipc_alloc(int size)
436 { 444 {
437 void* out; 445 void* out;
438 if(size > PAGE_SIZE) 446 if(size > PAGE_SIZE)
439 out = vmalloc(size); 447 out = vmalloc(size);
440 else 448 else
441 out = kmalloc(size, GFP_KERNEL); 449 out = kmalloc(size, GFP_KERNEL);
442 return out; 450 return out;
443 } 451 }
444 452
445 /** 453 /**
446 * ipc_free - free ipc space 454 * ipc_free - free ipc space
447 * @ptr: pointer returned by ipc_alloc 455 * @ptr: pointer returned by ipc_alloc
448 * @size: size of block 456 * @size: size of block
449 * 457 *
450 * Free a block created with ipc_alloc(). The caller must know the size 458 * Free a block created with ipc_alloc(). The caller must know the size
451 * used in the allocation call. 459 * used in the allocation call.
452 */ 460 */
453 461
454 void ipc_free(void* ptr, int size) 462 void ipc_free(void* ptr, int size)
455 { 463 {
456 if(size > PAGE_SIZE) 464 if(size > PAGE_SIZE)
457 vfree(ptr); 465 vfree(ptr);
458 else 466 else
459 kfree(ptr); 467 kfree(ptr);
460 } 468 }
461 469
462 /* 470 /*
463 * rcu allocations: 471 * rcu allocations:
464 * There are three headers that are prepended to the actual allocation: 472 * There are three headers that are prepended to the actual allocation:
465 * - during use: ipc_rcu_hdr. 473 * - during use: ipc_rcu_hdr.
466 * - during the rcu grace period: ipc_rcu_grace. 474 * - during the rcu grace period: ipc_rcu_grace.
467 * - [only if vmalloc]: ipc_rcu_sched. 475 * - [only if vmalloc]: ipc_rcu_sched.
468 * Their lifetime doesn't overlap, thus the headers share the same memory. 476 * Their lifetime doesn't overlap, thus the headers share the same memory.
469 * Unlike a normal union, they are right-aligned, thus some container_of 477 * Unlike a normal union, they are right-aligned, thus some container_of
470 * forward/backward casting is necessary: 478 * forward/backward casting is necessary:
471 */ 479 */
472 struct ipc_rcu_hdr 480 struct ipc_rcu_hdr
473 { 481 {
474 int refcount; 482 int refcount;
475 int is_vmalloc; 483 int is_vmalloc;
476 void *data[0]; 484 void *data[0];
477 }; 485 };
478 486
479 487
480 struct ipc_rcu_grace 488 struct ipc_rcu_grace
481 { 489 {
482 struct rcu_head rcu; 490 struct rcu_head rcu;
483 /* "void *" makes sure alignment of following data is sane. */ 491 /* "void *" makes sure alignment of following data is sane. */
484 void *data[0]; 492 void *data[0];
485 }; 493 };
486 494
487 struct ipc_rcu_sched 495 struct ipc_rcu_sched
488 { 496 {
489 struct work_struct work; 497 struct work_struct work;
490 /* "void *" makes sure alignment of following data is sane. */ 498 /* "void *" makes sure alignment of following data is sane. */
491 void *data[0]; 499 void *data[0];
492 }; 500 };
493 501
494 #define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \ 502 #define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \
495 sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr)) 503 sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr))
496 #define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \ 504 #define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \
497 sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC) 505 sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC)
498 506
499 static inline int rcu_use_vmalloc(int size) 507 static inline int rcu_use_vmalloc(int size)
500 { 508 {
501 /* Too big for a single page? */ 509 /* Too big for a single page? */
502 if (HDRLEN_KMALLOC + size > PAGE_SIZE) 510 if (HDRLEN_KMALLOC + size > PAGE_SIZE)
503 return 1; 511 return 1;
504 return 0; 512 return 0;
505 } 513 }
506 514
507 /** 515 /**
508 * ipc_rcu_alloc - allocate ipc and rcu space 516 * ipc_rcu_alloc - allocate ipc and rcu space
509 * @size: size desired 517 * @size: size desired
510 * 518 *
511 * Allocate memory for the rcu header structure + the object. 519 * Allocate memory for the rcu header structure + the object.
512 * Returns the pointer to the object. 520 * Returns the pointer to the object.
513 * NULL is returned if the allocation fails. 521 * NULL is returned if the allocation fails.
514 */ 522 */
515 523
516 void* ipc_rcu_alloc(int size) 524 void* ipc_rcu_alloc(int size)
517 { 525 {
518 void* out; 526 void* out;
519 /* 527 /*
520 * We prepend the allocation with the rcu struct, and 528 * We prepend the allocation with the rcu struct, and
521 * workqueue if necessary (for vmalloc). 529 * workqueue if necessary (for vmalloc).
522 */ 530 */
523 if (rcu_use_vmalloc(size)) { 531 if (rcu_use_vmalloc(size)) {
524 out = vmalloc(HDRLEN_VMALLOC + size); 532 out = vmalloc(HDRLEN_VMALLOC + size);
525 if (out) { 533 if (out) {
526 out += HDRLEN_VMALLOC; 534 out += HDRLEN_VMALLOC;
527 container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1; 535 container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
528 container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; 536 container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
529 } 537 }
530 } else { 538 } else {
531 out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL); 539 out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
532 if (out) { 540 if (out) {
533 out += HDRLEN_KMALLOC; 541 out += HDRLEN_KMALLOC;
534 container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0; 542 container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
535 container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; 543 container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
536 } 544 }
537 } 545 }
538 546
539 return out; 547 return out;
540 } 548 }
541 549
542 void ipc_rcu_getref(void *ptr) 550 void ipc_rcu_getref(void *ptr)
543 { 551 {
544 container_of(ptr, struct ipc_rcu_hdr, data)->refcount++; 552 container_of(ptr, struct ipc_rcu_hdr, data)->refcount++;
545 } 553 }
546 554
547 static void ipc_do_vfree(struct work_struct *work) 555 static void ipc_do_vfree(struct work_struct *work)
548 { 556 {
549 vfree(container_of(work, struct ipc_rcu_sched, work)); 557 vfree(container_of(work, struct ipc_rcu_sched, work));
550 } 558 }
551 559
552 /** 560 /**
553 * ipc_schedule_free - free ipc + rcu space 561 * ipc_schedule_free - free ipc + rcu space
554 * @head: RCU callback structure for queued work 562 * @head: RCU callback structure for queued work
555 * 563 *
556 * Since RCU callback function is called in bh, 564 * Since RCU callback function is called in bh,
557 * we need to defer the vfree to schedule_work(). 565 * we need to defer the vfree to schedule_work().
558 */ 566 */
559 static void ipc_schedule_free(struct rcu_head *head) 567 static void ipc_schedule_free(struct rcu_head *head)
560 { 568 {
561 struct ipc_rcu_grace *grace = 569 struct ipc_rcu_grace *grace;
562 container_of(head, struct ipc_rcu_grace, rcu); 570 struct ipc_rcu_sched *sched;
563 struct ipc_rcu_sched *sched =
564 container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]);
565 571
572 grace = container_of(head, struct ipc_rcu_grace, rcu);
573 sched = container_of(&(grace->data[0]), struct ipc_rcu_sched,
574 data[0]);
575
566 INIT_WORK(&sched->work, ipc_do_vfree); 576 INIT_WORK(&sched->work, ipc_do_vfree);
567 schedule_work(&sched->work); 577 schedule_work(&sched->work);
568 } 578 }
569 579
570 /** 580 /**
571 * ipc_immediate_free - free ipc + rcu space 581 * ipc_immediate_free - free ipc + rcu space
572 * @head: RCU callback structure that contains pointer to be freed 582 * @head: RCU callback structure that contains pointer to be freed
573 * 583 *
574 * Free from the RCU callback context. 584 * Free from the RCU callback context.
575 */ 585 */
576 static void ipc_immediate_free(struct rcu_head *head) 586 static void ipc_immediate_free(struct rcu_head *head)
577 { 587 {
578 struct ipc_rcu_grace *free = 588 struct ipc_rcu_grace *free =
579 container_of(head, struct ipc_rcu_grace, rcu); 589 container_of(head, struct ipc_rcu_grace, rcu);
580 kfree(free); 590 kfree(free);
581 } 591 }
582 592
583 void ipc_rcu_putref(void *ptr) 593 void ipc_rcu_putref(void *ptr)
584 { 594 {
585 if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0) 595 if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0)
586 return; 596 return;
587 597
588 if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) { 598 if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) {
589 call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, 599 call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu,
590 ipc_schedule_free); 600 ipc_schedule_free);
591 } else { 601 } else {
592 call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, 602 call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu,
593 ipc_immediate_free); 603 ipc_immediate_free);
594 } 604 }
595 } 605 }
596 606
597 /** 607 /**
598 * ipcperms - check IPC permissions 608 * ipcperms - check IPC permissions
599 * @ipcp: IPC permission set 609 * @ipcp: IPC permission set
600 * @flag: desired permission set. 610 * @flag: desired permission set.
601 * 611 *
602 * Check user, group, other permissions for access 612 * Check user, group, other permissions for access
603 * to ipc resources. return 0 if allowed 613 * to ipc resources. return 0 if allowed
604 */ 614 */
605 615
606 int ipcperms (struct kern_ipc_perm *ipcp, short flag) 616 int ipcperms (struct kern_ipc_perm *ipcp, short flag)
607 { /* flag will most probably be 0 or S_...UGO from <linux/stat.h> */ 617 { /* flag will most probably be 0 or S_...UGO from <linux/stat.h> */
608 int requested_mode, granted_mode, err; 618 int requested_mode, granted_mode, err;
609 619
610 if (unlikely((err = audit_ipc_obj(ipcp)))) 620 if (unlikely((err = audit_ipc_obj(ipcp))))
611 return err; 621 return err;
612 requested_mode = (flag >> 6) | (flag >> 3) | flag; 622 requested_mode = (flag >> 6) | (flag >> 3) | flag;
613 granted_mode = ipcp->mode; 623 granted_mode = ipcp->mode;
614 if (current->euid == ipcp->cuid || current->euid == ipcp->uid) 624 if (current->euid == ipcp->cuid || current->euid == ipcp->uid)
615 granted_mode >>= 6; 625 granted_mode >>= 6;
616 else if (in_group_p(ipcp->cgid) || in_group_p(ipcp->gid)) 626 else if (in_group_p(ipcp->cgid) || in_group_p(ipcp->gid))
617 granted_mode >>= 3; 627 granted_mode >>= 3;
618 /* is there some bit set in requested_mode but not in granted_mode? */ 628 /* is there some bit set in requested_mode but not in granted_mode? */
619 if ((requested_mode & ~granted_mode & 0007) && 629 if ((requested_mode & ~granted_mode & 0007) &&
620 !capable(CAP_IPC_OWNER)) 630 !capable(CAP_IPC_OWNER))
621 return -1; 631 return -1;
622 632
623 return security_ipc_permission(ipcp, flag); 633 return security_ipc_permission(ipcp, flag);
624 } 634 }
625 635
626 /* 636 /*
627 * Functions to convert between the kern_ipc_perm structure and the 637 * Functions to convert between the kern_ipc_perm structure and the
628 * old/new ipc_perm structures 638 * old/new ipc_perm structures
629 */ 639 */
630 640
631 /** 641 /**
632 * kernel_to_ipc64_perm - convert kernel ipc permissions to user 642 * kernel_to_ipc64_perm - convert kernel ipc permissions to user
633 * @in: kernel permissions 643 * @in: kernel permissions
634 * @out: new style IPC permissions 644 * @out: new style IPC permissions
635 * 645 *
636 * Turn the kernel object @in into a set of permissions descriptions 646 * Turn the kernel object @in into a set of permissions descriptions
637 * for returning to userspace (@out). 647 * for returning to userspace (@out).
638 */ 648 */
639 649
640 650
641 void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out) 651 void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out)
642 { 652 {
643 out->key = in->key; 653 out->key = in->key;
644 out->uid = in->uid; 654 out->uid = in->uid;
645 out->gid = in->gid; 655 out->gid = in->gid;
646 out->cuid = in->cuid; 656 out->cuid = in->cuid;
647 out->cgid = in->cgid; 657 out->cgid = in->cgid;
648 out->mode = in->mode; 658 out->mode = in->mode;
649 out->seq = in->seq; 659 out->seq = in->seq;
650 } 660 }
651 661
652 /** 662 /**
653 * ipc64_perm_to_ipc_perm - convert old ipc permissions to new 663 * ipc64_perm_to_ipc_perm - convert new ipc permissions to old
654 * @in: new style IPC permissions 664 * @in: new style IPC permissions
655 * @out: old style IPC permissions 665 * @out: old style IPC permissions
656 * 666 *
657 * Turn the new style permissions object @in into a compatibility 667 * Turn the new style permissions object @in into a compatibility
658 * object and store it into the @out pointer. 668 * object and store it into the @out pointer.
659 */ 669 */
660 670
661 void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) 671 void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
662 { 672 {
663 out->key = in->key; 673 out->key = in->key;
664 SET_UID(out->uid, in->uid); 674 SET_UID(out->uid, in->uid);
665 SET_GID(out->gid, in->gid); 675 SET_GID(out->gid, in->gid);
666 SET_UID(out->cuid, in->cuid); 676 SET_UID(out->cuid, in->cuid);
667 SET_GID(out->cgid, in->cgid); 677 SET_GID(out->cgid, in->cgid);
668 out->mode = in->mode; 678 out->mode = in->mode;
669 out->seq = in->seq; 679 out->seq = in->seq;
670 } 680 }
671 681
682 /**
683 * ipc_lock - Lock an ipc structure
684 * @ids: IPC identifier set
685 * @id: ipc id to look for
686 *
687 * Look for an id in the ipc ids idr and lock the associated ipc object.
688 *
689 * ipc_ids.mutex is not necessarily held before this function is called,
690 * that's why we enter a RCU read section.
691 * The ipc object is locked on exit.
692 */
693
672 struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) 694 struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
673 { 695 {
674 struct kern_ipc_perm *out; 696 struct kern_ipc_perm *out;
675 int lid = ipcid_to_idx(id); 697 int lid = ipcid_to_idx(id);
676 698
677 rcu_read_lock(); 699 rcu_read_lock();
678 out = idr_find(&ids->ipcs_idr, lid); 700 out = idr_find(&ids->ipcs_idr, lid);
679 if (out == NULL) { 701 if (out == NULL) {
680 rcu_read_unlock(); 702 rcu_read_unlock();
681 return ERR_PTR(-EINVAL); 703 return ERR_PTR(-EINVAL);
682 } 704 }
683 705
684 spin_lock(&out->lock); 706 spin_lock(&out->lock);
685 707
686 /* ipc_rmid() may have already freed the ID while ipc_lock 708 /* ipc_rmid() may have already freed the ID while ipc_lock
687 * was spinning: here verify that the structure is still valid 709 * was spinning: here verify that the structure is still valid
688 */ 710 */
689 if (out->deleted) { 711 if (out->deleted) {
690 spin_unlock(&out->lock); 712 spin_unlock(&out->lock);
691 rcu_read_unlock(); 713 rcu_read_unlock();
692 return ERR_PTR(-EINVAL); 714 return ERR_PTR(-EINVAL);
693 } 715 }
694 716
695 return out; 717 return out;
696 } 718 }
697 719
698 #ifdef __ARCH_WANT_IPC_PARSE_VERSION 720 #ifdef __ARCH_WANT_IPC_PARSE_VERSION
699 721
700 722
701 /** 723 /**
702 * ipc_parse_version - IPC call version 724 * ipc_parse_version - IPC call version
703 * @cmd: pointer to command 725 * @cmd: pointer to command
704 * 726 *
705 * Return IPC_64 for new style IPC and IPC_OLD for old style IPC. 727 * Return IPC_64 for new style IPC and IPC_OLD for old style IPC.
706 * The @cmd value is turned from an encoding command and version into 728 * The @cmd value is turned from an encoding command and version into
707 * just the command code. 729 * just the command code.
708 */ 730 */
709 731
710 int ipc_parse_version (int *cmd) 732 int ipc_parse_version (int *cmd)
711 { 733 {
712 if (*cmd & IPC_64) { 734 if (*cmd & IPC_64) {
713 *cmd ^= IPC_64; 735 *cmd ^= IPC_64;
714 return IPC_64; 736 return IPC_64;
715 } else { 737 } else {
716 return IPC_OLD; 738 return IPC_OLD;
717 } 739 }
718 } 740 }
719 741
720 #endif /* __ARCH_WANT_IPC_PARSE_VERSION */ 742 #endif /* __ARCH_WANT_IPC_PARSE_VERSION */
721 743
722 #ifdef CONFIG_PROC_FS 744 #ifdef CONFIG_PROC_FS
723 struct ipc_proc_iter { 745 struct ipc_proc_iter {
724 struct ipc_namespace *ns; 746 struct ipc_namespace *ns;
725 struct ipc_proc_iface *iface; 747 struct ipc_proc_iface *iface;
726 }; 748 };
727 749
728 /* 750 /*
729 * This routine locks the ipc structure found at least at position pos. 751 * This routine locks the ipc structure found at least at position pos.
730 */ 752 */
731 struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, 753 struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
732 loff_t *new_pos) 754 loff_t *new_pos)
733 { 755 {
734 struct kern_ipc_perm *ipc; 756 struct kern_ipc_perm *ipc;
735 int total, id; 757 int total, id;
736 758
737 total = 0; 759 total = 0;
738 for (id = 0; id < pos && total < ids->in_use; id++) { 760 for (id = 0; id < pos && total < ids->in_use; id++) {
739 ipc = idr_find(&ids->ipcs_idr, id); 761 ipc = idr_find(&ids->ipcs_idr, id);
740 if (ipc != NULL) 762 if (ipc != NULL)
741 total++; 763 total++;
742 } 764 }
743 765
744 if (total >= ids->in_use) 766 if (total >= ids->in_use)
745 return NULL; 767 return NULL;
746 768
747 for ( ; pos < IPCMNI; pos++) { 769 for ( ; pos < IPCMNI; pos++) {
748 ipc = idr_find(&ids->ipcs_idr, pos); 770 ipc = idr_find(&ids->ipcs_idr, pos);
749 if (ipc != NULL) { 771 if (ipc != NULL) {
750 *new_pos = pos + 1; 772 *new_pos = pos + 1;
751 ipc_lock_by_ptr(ipc); 773 ipc_lock_by_ptr(ipc);
752 return ipc; 774 return ipc;
753 } 775 }
754 } 776 }
755 777
756 /* Out of range - return NULL to terminate iteration */ 778 /* Out of range - return NULL to terminate iteration */
757 return NULL; 779 return NULL;
758 } 780 }
759 781
760 static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos) 782 static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
761 { 783 {
762 struct ipc_proc_iter *iter = s->private; 784 struct ipc_proc_iter *iter = s->private;
763 struct ipc_proc_iface *iface = iter->iface; 785 struct ipc_proc_iface *iface = iter->iface;
764 struct kern_ipc_perm *ipc = it; 786 struct kern_ipc_perm *ipc = it;
765 787
766 /* If we had an ipc id locked before, unlock it */ 788 /* If we had an ipc id locked before, unlock it */
767 if (ipc && ipc != SEQ_START_TOKEN) 789 if (ipc && ipc != SEQ_START_TOKEN)
768 ipc_unlock(ipc); 790 ipc_unlock(ipc);
769 791
770 return sysvipc_find_ipc(iter->ns->ids[iface->ids], *pos, pos); 792 return sysvipc_find_ipc(iter->ns->ids[iface->ids], *pos, pos);
771 } 793 }
772 794
773 /* 795 /*
774 * File positions: pos 0 -> header, pos n -> ipc id + 1. 796 * File positions: pos 0 -> header, pos n -> ipc id = n - 1.
775 * SeqFile iterator: iterator value locked shp or SEQ_TOKEN_START. 797 * SeqFile iterator: iterator value locked ipc pointer or SEQ_TOKEN_START.
776 */ 798 */
777 static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) 799 static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
778 { 800 {
779 struct ipc_proc_iter *iter = s->private; 801 struct ipc_proc_iter *iter = s->private;
780 struct ipc_proc_iface *iface = iter->iface; 802 struct ipc_proc_iface *iface = iter->iface;
781 struct ipc_ids *ids; 803 struct ipc_ids *ids;
782 804
783 ids = iter->ns->ids[iface->ids]; 805 ids = iter->ns->ids[iface->ids];
784 806
785 /* 807 /*
786 * Take the lock - this will be released by the corresponding 808 * Take the lock - this will be released by the corresponding
787 * call to stop(). 809 * call to stop().
788 */ 810 */
789 mutex_lock(&ids->mutex); 811 mutex_lock(&ids->mutex);
790 812
791 /* pos < 0 is invalid */ 813 /* pos < 0 is invalid */
792 if (*pos < 0) 814 if (*pos < 0)
793 return NULL; 815 return NULL;
794 816
795 /* pos == 0 means header */ 817 /* pos == 0 means header */
796 if (*pos == 0) 818 if (*pos == 0)
797 return SEQ_START_TOKEN; 819 return SEQ_START_TOKEN;
798 820
799 /* Find the (pos-1)th ipc */ 821 /* Find the (pos-1)th ipc */
800 return sysvipc_find_ipc(ids, *pos - 1, pos); 822 return sysvipc_find_ipc(ids, *pos - 1, pos);
801 } 823 }
802 824
803 static void sysvipc_proc_stop(struct seq_file *s, void *it) 825 static void sysvipc_proc_stop(struct seq_file *s, void *it)
804 { 826 {
805 struct kern_ipc_perm *ipc = it; 827 struct kern_ipc_perm *ipc = it;
806 struct ipc_proc_iter *iter = s->private; 828 struct ipc_proc_iter *iter = s->private;
807 struct ipc_proc_iface *iface = iter->iface; 829 struct ipc_proc_iface *iface = iter->iface;
808 struct ipc_ids *ids; 830 struct ipc_ids *ids;
809 831
810 /* If we had a locked segment, release it */ 832 /* If we had a locked structure, release it */
811 if (ipc && ipc != SEQ_START_TOKEN) 833 if (ipc && ipc != SEQ_START_TOKEN)
812 ipc_unlock(ipc); 834 ipc_unlock(ipc);
813 835
814 ids = iter->ns->ids[iface->ids]; 836 ids = iter->ns->ids[iface->ids];
815 /* Release the lock we took in start() */ 837 /* Release the lock we took in start() */
816 mutex_unlock(&ids->mutex); 838 mutex_unlock(&ids->mutex);
817 } 839 }
818 840
819 static int sysvipc_proc_show(struct seq_file *s, void *it) 841 static int sysvipc_proc_show(struct seq_file *s, void *it)
820 { 842 {
821 struct ipc_proc_iter *iter = s->private; 843 struct ipc_proc_iter *iter = s->private;
822 struct ipc_proc_iface *iface = iter->iface; 844 struct ipc_proc_iface *iface = iter->iface;
823 845
824 if (it == SEQ_START_TOKEN) 846 if (it == SEQ_START_TOKEN)
825 return seq_puts(s, iface->header); 847 return seq_puts(s, iface->header);
826 848
827 return iface->show(s, it); 849 return iface->show(s, it);
828 } 850 }
829 851
830 static struct seq_operations sysvipc_proc_seqops = { 852 static struct seq_operations sysvipc_proc_seqops = {
831 .start = sysvipc_proc_start, 853 .start = sysvipc_proc_start,
832 .stop = sysvipc_proc_stop, 854 .stop = sysvipc_proc_stop,
833 .next = sysvipc_proc_next, 855 .next = sysvipc_proc_next,
834 .show = sysvipc_proc_show, 856 .show = sysvipc_proc_show,
835 }; 857 };
836 858
837 static int sysvipc_proc_open(struct inode *inode, struct file *file) 859 static int sysvipc_proc_open(struct inode *inode, struct file *file)
838 { 860 {
839 int ret; 861 int ret;
840 struct seq_file *seq; 862 struct seq_file *seq;
841 struct ipc_proc_iter *iter; 863 struct ipc_proc_iter *iter;
842 864
843 ret = -ENOMEM; 865 ret = -ENOMEM;
844 iter = kmalloc(sizeof(*iter), GFP_KERNEL); 866 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
845 if (!iter) 867 if (!iter)
846 goto out; 868 goto out;
847 869
848 ret = seq_open(file, &sysvipc_proc_seqops); 870 ret = seq_open(file, &sysvipc_proc_seqops);
849 if (ret) 871 if (ret)
850 goto out_kfree; 872 goto out_kfree;
851 873
852 seq = file->private_data; 874 seq = file->private_data;
853 seq->private = iter; 875 seq->private = iter;
854 876
855 iter->iface = PDE(inode)->data; 877 iter->iface = PDE(inode)->data;
856 iter->ns = get_ipc_ns(current->nsproxy->ipc_ns); 878 iter->ns = get_ipc_ns(current->nsproxy->ipc_ns);
857 out: 879 out:
858 return ret; 880 return ret;
859 out_kfree: 881 out_kfree:
860 kfree(iter); 882 kfree(iter);
861 goto out; 883 goto out;
862 } 884 }
863 885
864 static int sysvipc_proc_release(struct inode *inode, struct file *file) 886 static int sysvipc_proc_release(struct inode *inode, struct file *file)
865 { 887 {
866 struct seq_file *seq = file->private_data; 888 struct seq_file *seq = file->private_data;
867 struct ipc_proc_iter *iter = seq->private; 889 struct ipc_proc_iter *iter = seq->private;
868 put_ipc_ns(iter->ns); 890 put_ipc_ns(iter->ns);
869 return seq_release_private(inode, file); 891 return seq_release_private(inode, file);
1 /* 1 /*
2 * linux/ipc/util.h 2 * linux/ipc/util.h
3 * Copyright (C) 1999 Christoph Rohland 3 * Copyright (C) 1999 Christoph Rohland
4 * 4 *
5 * ipc helper functions (c) 1999 Manfred Spraul <manfred@colorfullife.com> 5 * ipc helper functions (c) 1999 Manfred Spraul <manfred@colorfullife.com>
6 * namespaces support. 2006 OpenVZ, SWsoft Inc. 6 * namespaces support. 2006 OpenVZ, SWsoft Inc.
7 * Pavel Emelianov <xemul@openvz.org> 7 * Pavel Emelianov <xemul@openvz.org>
8 */ 8 */
9 9
10 #ifndef _IPC_UTIL_H 10 #ifndef _IPC_UTIL_H
11 #define _IPC_UTIL_H 11 #define _IPC_UTIL_H
12 12
13 #include <linux/idr.h> 13 #include <linux/idr.h>
14 #include <linux/err.h> 14 #include <linux/err.h>
15 15
16 #define USHRT_MAX 0xffff 16 #define USHRT_MAX 0xffff
17 #define SEQ_MULTIPLIER (IPCMNI) 17 #define SEQ_MULTIPLIER (IPCMNI)
18 18
19 void sem_init (void); 19 void sem_init (void);
20 void msg_init (void); 20 void msg_init (void);
21 void shm_init (void); 21 void shm_init (void);
22 22
23 int sem_init_ns(struct ipc_namespace *ns); 23 int sem_init_ns(struct ipc_namespace *ns);
24 int msg_init_ns(struct ipc_namespace *ns); 24 int msg_init_ns(struct ipc_namespace *ns);
25 int shm_init_ns(struct ipc_namespace *ns); 25 int shm_init_ns(struct ipc_namespace *ns);
26 26
27 void sem_exit_ns(struct ipc_namespace *ns); 27 void sem_exit_ns(struct ipc_namespace *ns);
28 void msg_exit_ns(struct ipc_namespace *ns); 28 void msg_exit_ns(struct ipc_namespace *ns);
29 void shm_exit_ns(struct ipc_namespace *ns); 29 void shm_exit_ns(struct ipc_namespace *ns);
30 30
31 struct ipc_ids { 31 struct ipc_ids {
32 int in_use; 32 int in_use;
33 unsigned short seq; 33 unsigned short seq;
34 unsigned short seq_max; 34 unsigned short seq_max;
35 struct mutex mutex; 35 struct mutex mutex;
36 struct idr ipcs_idr; 36 struct idr ipcs_idr;
37 }; 37 };
38 38
39 /* 39 /*
40 * Structure that holds the parameters needed by the ipc operations 40 * Structure that holds the parameters needed by the ipc operations
41 * (see after) 41 * (see after)
42 */ 42 */
43 struct ipc_params { 43 struct ipc_params {
44 key_t key; 44 key_t key;
45 int flg; 45 int flg;
46 union { 46 union {
47 size_t size; /* for shared memories */ 47 size_t size; /* for shared memories */
48 int nsems; /* for semaphores */ 48 int nsems; /* for semaphores */
49 } u; /* holds the getnew() specific param */ 49 } u; /* holds the getnew() specific param */
50 }; 50 };
51 51
52 /* 52 /*
53 * Structure that holds some ipc operations. This structure is used to unify 53 * Structure that holds some ipc operations. This structure is used to unify
54 * the calls to sys_msgget(), sys_semget(), sys_shmget() 54 * the calls to sys_msgget(), sys_semget(), sys_shmget()
55 * . routine to call to create a new ipc object. Can be one of newque, 55 * . routine to call to create a new ipc object. Can be one of newque,
56 * newary, newseg 56 * newary, newseg
57 * . routine to call to call to check permissions for a new ipc object. 57 * . routine to call to check permissions for a new ipc object.
58 * Can be one of security_msg_associate, security_sem_associate, 58 * Can be one of security_msg_associate, security_sem_associate,
59 * security_shm_associate 59 * security_shm_associate
60 * . routine to call for an extra check if needed 60 * . routine to call for an extra check if needed
61 */ 61 */
62 struct ipc_ops { 62 struct ipc_ops {
63 int (*getnew) (struct ipc_namespace *, struct ipc_params *); 63 int (*getnew) (struct ipc_namespace *, struct ipc_params *);
64 int (*associate) (struct kern_ipc_perm *, int); 64 int (*associate) (struct kern_ipc_perm *, int);
65 int (*more_checks) (struct kern_ipc_perm *, struct ipc_params *); 65 int (*more_checks) (struct kern_ipc_perm *, struct ipc_params *);
66 }; 66 };
67 67
68 struct seq_file; 68 struct seq_file;
69 69
70 void ipc_init_ids(struct ipc_ids *); 70 void ipc_init_ids(struct ipc_ids *);
71 #ifdef CONFIG_PROC_FS 71 #ifdef CONFIG_PROC_FS
72 void __init ipc_init_proc_interface(const char *path, const char *header, 72 void __init ipc_init_proc_interface(const char *path, const char *header,
73 int ids, int (*show)(struct seq_file *, void *)); 73 int ids, int (*show)(struct seq_file *, void *));
74 #else 74 #else
75 #define ipc_init_proc_interface(path, header, ids, show) do {} while (0) 75 #define ipc_init_proc_interface(path, header, ids, show) do {} while (0)
76 #endif 76 #endif
77 77
78 #define IPC_SEM_IDS 0 78 #define IPC_SEM_IDS 0
79 #define IPC_MSG_IDS 1 79 #define IPC_MSG_IDS 1
80 #define IPC_SHM_IDS 2 80 #define IPC_SHM_IDS 2
81 81
82 #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) 82 #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
83 83
84 /* must be called with ids->mutex acquired.*/ 84 /* must be called with ids->mutex acquired.*/
85 int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); 85 int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
86 int ipc_get_maxid(struct ipc_ids *); 86 int ipc_get_maxid(struct ipc_ids *);
87 87
88 /* must be called with both locks acquired. */ 88 /* must be called with both locks acquired. */
89 void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *); 89 void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *);
90 90
91 int ipcperms (struct kern_ipc_perm *ipcp, short flg); 91 /* must be called with ipcp locked */
92 int ipcperms(struct kern_ipc_perm *ipcp, short flg);
92 93
93 /* for rare, potentially huge allocations. 94 /* for rare, potentially huge allocations.
94 * both function can sleep 95 * both function can sleep
95 */ 96 */
96 void* ipc_alloc(int size); 97 void* ipc_alloc(int size);
97 void ipc_free(void* ptr, int size); 98 void ipc_free(void* ptr, int size);
98 99
99 /* 100 /*
100 * For allocation that need to be freed by RCU. 101 * For allocation that need to be freed by RCU.
101 * Objects are reference counted, they start with reference count 1. 102 * Objects are reference counted, they start with reference count 1.
102 * getref increases the refcount, the putref call that reduces the recount 103 * getref increases the refcount, the putref call that reduces the recount
103 * to 0 schedules the rcu destruction. Caller must guarantee locking. 104 * to 0 schedules the rcu destruction. Caller must guarantee locking.
104 */ 105 */
105 void* ipc_rcu_alloc(int size); 106 void* ipc_rcu_alloc(int size);
106 void ipc_rcu_getref(void *ptr); 107 void ipc_rcu_getref(void *ptr);
107 void ipc_rcu_putref(void *ptr); 108 void ipc_rcu_putref(void *ptr);
108 109
109 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); 110 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
110 111
111 void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); 112 void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
112 void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); 113 void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
113 114
114 #if defined(__ia64__) || defined(__x86_64__) || defined(__hppa__) || defined(__XTENSA__) 115 #if defined(__ia64__) || defined(__x86_64__) || defined(__hppa__) || defined(__XTENSA__)
115 /* On IA-64, we always use the "64-bit version" of the IPC structures. */ 116 /* On IA-64, we always use the "64-bit version" of the IPC structures. */
116 # define ipc_parse_version(cmd) IPC_64 117 # define ipc_parse_version(cmd) IPC_64
117 #else 118 #else
118 int ipc_parse_version (int *cmd); 119 int ipc_parse_version (int *cmd);
119 #endif 120 #endif
120 121
121 extern void free_msg(struct msg_msg *msg); 122 extern void free_msg(struct msg_msg *msg);
122 extern struct msg_msg *load_msg(const void __user *src, int len); 123 extern struct msg_msg *load_msg(const void __user *src, int len);
123 extern int store_msg(void __user *dest, struct msg_msg *msg, int len); 124 extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
124 extern int ipcget_new(struct ipc_namespace *, struct ipc_ids *, 125 extern int ipcget_new(struct ipc_namespace *, struct ipc_ids *,
125 struct ipc_ops *, struct ipc_params *); 126 struct ipc_ops *, struct ipc_params *);
126 extern int ipcget_public(struct ipc_namespace *, struct ipc_ids *, 127 extern int ipcget_public(struct ipc_namespace *, struct ipc_ids *,
127 struct ipc_ops *, struct ipc_params *); 128 struct ipc_ops *, struct ipc_params *);
128 129
129 static inline int ipc_buildid(struct ipc_ids *ids, int id, int seq) 130 static inline int ipc_buildid(struct ipc_ids *ids, int id, int seq)
130 { 131 {
131 return SEQ_MULTIPLIER * seq + id; 132 return SEQ_MULTIPLIER * seq + id;
132 } 133 }
133 134
135 /*
136 * Must be called with ipcp locked
137 */
134 static inline int ipc_checkid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp, 138 static inline int ipc_checkid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp,
135 int uid) 139 int uid)
136 { 140 {
137 if (uid / SEQ_MULTIPLIER != ipcp->seq) 141 if (uid / SEQ_MULTIPLIER != ipcp->seq)
138 return 1; 142 return 1;
139 return 0; 143 return 0;
140 } 144 }
141 145
142 static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) 146 static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
143 { 147 {
144 rcu_read_lock(); 148 rcu_read_lock();
145 spin_lock(&perm->lock); 149 spin_lock(&perm->lock);
146 } 150 }
147 151
148 static inline void ipc_unlock(struct kern_ipc_perm *perm) 152 static inline void ipc_unlock(struct kern_ipc_perm *perm)
149 { 153 {
150 spin_unlock(&perm->lock); 154 spin_unlock(&perm->lock);
151 rcu_read_unlock(); 155 rcu_read_unlock();
152 } 156 }
153 157
154 static inline struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, 158 static inline struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids,
155 int id) 159 int id)
156 { 160 {
157 struct kern_ipc_perm *out; 161 struct kern_ipc_perm *out;
158 162
159 out = ipc_lock(ids, id); 163 out = ipc_lock(ids, id);
160 if (IS_ERR(out)) 164 if (IS_ERR(out))
161 return out; 165 return out;
162 166
163 if (ipc_checkid(ids, out, id)) { 167 if (ipc_checkid(ids, out, id)) {
164 ipc_unlock(out); 168 ipc_unlock(out);
165 return ERR_PTR(-EIDRM); 169 return ERR_PTR(-EIDRM);
166 } 170 }
167 171
168 return out; 172 return out;
169 } 173 }
170 174
175 /**
176 * ipcget - Common sys_*get() code
177 * @ns : namsepace
178 * @ids : IPC identifier set
179 * @ops : operations to be called on ipc object creation, permission checks
180 * and further checks
181 * @params : the parameters needed by the previous operations.
182 *
183 * Common routine called by sys_msgget(), sys_semget() and sys_shmget().
184 */
171 static inline int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, 185 static inline int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
172 struct ipc_ops *ops, struct ipc_params *params) 186 struct ipc_ops *ops, struct ipc_params *params)
173 { 187 {
174 if (params->key == IPC_PRIVATE) 188 if (params->key == IPC_PRIVATE)
175 return ipcget_new(ns, ids, ops, params); 189 return ipcget_new(ns, ids, ops, params);
176 else 190 else
177 return ipcget_public(ns, ids, ops, params); 191 return ipcget_public(ns, ids, ops, params);
178 } 192 }
179 193
180 #endif 194 #endif
181 195