Commit f4566f04854d78acfc74b9acb029744acde9d033
Committed by
Linus Torvalds
1 parent
2802831313
Exists in
master
and in
39 other branches
ipc: fix wrong comments
This patch fixes the wrong / obsolete comments in the ipc code. Also adds a missing lock around ipc_get_maxid() in shm_get_stat(). Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 5 changed files with 118 additions and 45 deletions Inline Diff
ipc/msg.c
1 | /* | 1 | /* |
2 | * linux/ipc/msg.c | 2 | * linux/ipc/msg.c |
3 | * Copyright (C) 1992 Krishna Balasubramanian | 3 | * Copyright (C) 1992 Krishna Balasubramanian |
4 | * | 4 | * |
5 | * Removed all the remaining kerneld mess | 5 | * Removed all the remaining kerneld mess |
6 | * Catch the -EFAULT stuff properly | 6 | * Catch the -EFAULT stuff properly |
7 | * Use GFP_KERNEL for messages as in 1.2 | 7 | * Use GFP_KERNEL for messages as in 1.2 |
8 | * Fixed up the unchecked user space derefs | 8 | * Fixed up the unchecked user space derefs |
9 | * Copyright (C) 1998 Alan Cox & Andi Kleen | 9 | * Copyright (C) 1998 Alan Cox & Andi Kleen |
10 | * | 10 | * |
11 | * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> | 11 | * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> |
12 | * | 12 | * |
13 | * mostly rewritten, threaded and wake-one semantics added | 13 | * mostly rewritten, threaded and wake-one semantics added |
14 | * MSGMAX limit removed, sysctl's added | 14 | * MSGMAX limit removed, sysctl's added |
15 | * (c) 1999 Manfred Spraul <manfred@colorfullife.com> | 15 | * (c) 1999 Manfred Spraul <manfred@colorfullife.com> |
16 | * | 16 | * |
17 | * support for audit of ipc object properties and permission changes | 17 | * support for audit of ipc object properties and permission changes |
18 | * Dustin Kirkland <dustin.kirkland@us.ibm.com> | 18 | * Dustin Kirkland <dustin.kirkland@us.ibm.com> |
19 | * | 19 | * |
20 | * namespaces support | 20 | * namespaces support |
21 | * OpenVZ, SWsoft Inc. | 21 | * OpenVZ, SWsoft Inc. |
22 | * Pavel Emelianov <xemul@openvz.org> | 22 | * Pavel Emelianov <xemul@openvz.org> |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/capability.h> | 25 | #include <linux/capability.h> |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/msg.h> | 27 | #include <linux/msg.h> |
28 | #include <linux/spinlock.h> | 28 | #include <linux/spinlock.h> |
29 | #include <linux/init.h> | 29 | #include <linux/init.h> |
30 | #include <linux/proc_fs.h> | 30 | #include <linux/proc_fs.h> |
31 | #include <linux/list.h> | 31 | #include <linux/list.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/sched.h> | 33 | #include <linux/sched.h> |
34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
35 | #include <linux/audit.h> | 35 | #include <linux/audit.h> |
36 | #include <linux/seq_file.h> | 36 | #include <linux/seq_file.h> |
37 | #include <linux/mutex.h> | 37 | #include <linux/mutex.h> |
38 | #include <linux/nsproxy.h> | 38 | #include <linux/nsproxy.h> |
39 | 39 | ||
40 | #include <asm/current.h> | 40 | #include <asm/current.h> |
41 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
42 | #include "util.h" | 42 | #include "util.h" |
43 | 43 | ||
44 | /* | 44 | /* |
45 | * one msg_receiver structure for each sleeping receiver: | 45 | * one msg_receiver structure for each sleeping receiver: |
46 | */ | 46 | */ |
47 | struct msg_receiver { | 47 | struct msg_receiver { |
48 | struct list_head r_list; | 48 | struct list_head r_list; |
49 | struct task_struct *r_tsk; | 49 | struct task_struct *r_tsk; |
50 | 50 | ||
51 | int r_mode; | 51 | int r_mode; |
52 | long r_msgtype; | 52 | long r_msgtype; |
53 | long r_maxsize; | 53 | long r_maxsize; |
54 | 54 | ||
55 | struct msg_msg *volatile r_msg; | 55 | struct msg_msg *volatile r_msg; |
56 | }; | 56 | }; |
57 | 57 | ||
58 | /* one msg_sender for each sleeping sender */ | 58 | /* one msg_sender for each sleeping sender */ |
59 | struct msg_sender { | 59 | struct msg_sender { |
60 | struct list_head list; | 60 | struct list_head list; |
61 | struct task_struct *tsk; | 61 | struct task_struct *tsk; |
62 | }; | 62 | }; |
63 | 63 | ||
64 | #define SEARCH_ANY 1 | 64 | #define SEARCH_ANY 1 |
65 | #define SEARCH_EQUAL 2 | 65 | #define SEARCH_EQUAL 2 |
66 | #define SEARCH_NOTEQUAL 3 | 66 | #define SEARCH_NOTEQUAL 3 |
67 | #define SEARCH_LESSEQUAL 4 | 67 | #define SEARCH_LESSEQUAL 4 |
68 | 68 | ||
69 | static atomic_t msg_bytes = ATOMIC_INIT(0); | 69 | static atomic_t msg_bytes = ATOMIC_INIT(0); |
70 | static atomic_t msg_hdrs = ATOMIC_INIT(0); | 70 | static atomic_t msg_hdrs = ATOMIC_INIT(0); |
71 | 71 | ||
72 | static struct ipc_ids init_msg_ids; | 72 | static struct ipc_ids init_msg_ids; |
73 | 73 | ||
74 | #define msg_ids(ns) (*((ns)->ids[IPC_MSG_IDS])) | 74 | #define msg_ids(ns) (*((ns)->ids[IPC_MSG_IDS])) |
75 | 75 | ||
76 | #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) | 76 | #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) |
77 | #define msg_buildid(ns, id, seq) \ | 77 | #define msg_buildid(ns, id, seq) \ |
78 | ipc_buildid(&msg_ids(ns), id, seq) | 78 | ipc_buildid(&msg_ids(ns), id, seq) |
79 | 79 | ||
80 | static void freeque(struct ipc_namespace *, struct msg_queue *); | 80 | static void freeque(struct ipc_namespace *, struct msg_queue *); |
81 | static int newque(struct ipc_namespace *, struct ipc_params *); | 81 | static int newque(struct ipc_namespace *, struct ipc_params *); |
82 | #ifdef CONFIG_PROC_FS | 82 | #ifdef CONFIG_PROC_FS |
83 | static int sysvipc_msg_proc_show(struct seq_file *s, void *it); | 83 | static int sysvipc_msg_proc_show(struct seq_file *s, void *it); |
84 | #endif | 84 | #endif |
85 | 85 | ||
86 | static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) | 86 | static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) |
87 | { | 87 | { |
88 | ns->ids[IPC_MSG_IDS] = ids; | 88 | ns->ids[IPC_MSG_IDS] = ids; |
89 | ns->msg_ctlmax = MSGMAX; | 89 | ns->msg_ctlmax = MSGMAX; |
90 | ns->msg_ctlmnb = MSGMNB; | 90 | ns->msg_ctlmnb = MSGMNB; |
91 | ns->msg_ctlmni = MSGMNI; | 91 | ns->msg_ctlmni = MSGMNI; |
92 | ipc_init_ids(ids); | 92 | ipc_init_ids(ids); |
93 | } | 93 | } |
94 | 94 | ||
95 | int msg_init_ns(struct ipc_namespace *ns) | 95 | int msg_init_ns(struct ipc_namespace *ns) |
96 | { | 96 | { |
97 | struct ipc_ids *ids; | 97 | struct ipc_ids *ids; |
98 | 98 | ||
99 | ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL); | 99 | ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL); |
100 | if (ids == NULL) | 100 | if (ids == NULL) |
101 | return -ENOMEM; | 101 | return -ENOMEM; |
102 | 102 | ||
103 | __msg_init_ns(ns, ids); | 103 | __msg_init_ns(ns, ids); |
104 | return 0; | 104 | return 0; |
105 | } | 105 | } |
106 | 106 | ||
107 | void msg_exit_ns(struct ipc_namespace *ns) | 107 | void msg_exit_ns(struct ipc_namespace *ns) |
108 | { | 108 | { |
109 | struct msg_queue *msq; | 109 | struct msg_queue *msq; |
110 | int next_id; | 110 | int next_id; |
111 | int total, in_use; | 111 | int total, in_use; |
112 | 112 | ||
113 | mutex_lock(&msg_ids(ns).mutex); | 113 | mutex_lock(&msg_ids(ns).mutex); |
114 | 114 | ||
115 | in_use = msg_ids(ns).in_use; | 115 | in_use = msg_ids(ns).in_use; |
116 | 116 | ||
117 | for (total = 0, next_id = 0; total < in_use; next_id++) { | 117 | for (total = 0, next_id = 0; total < in_use; next_id++) { |
118 | msq = idr_find(&msg_ids(ns).ipcs_idr, next_id); | 118 | msq = idr_find(&msg_ids(ns).ipcs_idr, next_id); |
119 | if (msq == NULL) | 119 | if (msq == NULL) |
120 | continue; | 120 | continue; |
121 | ipc_lock_by_ptr(&msq->q_perm); | 121 | ipc_lock_by_ptr(&msq->q_perm); |
122 | freeque(ns, msq); | 122 | freeque(ns, msq); |
123 | total++; | 123 | total++; |
124 | } | 124 | } |
125 | mutex_unlock(&msg_ids(ns).mutex); | 125 | mutex_unlock(&msg_ids(ns).mutex); |
126 | 126 | ||
127 | kfree(ns->ids[IPC_MSG_IDS]); | 127 | kfree(ns->ids[IPC_MSG_IDS]); |
128 | ns->ids[IPC_MSG_IDS] = NULL; | 128 | ns->ids[IPC_MSG_IDS] = NULL; |
129 | } | 129 | } |
130 | 130 | ||
131 | void __init msg_init(void) | 131 | void __init msg_init(void) |
132 | { | 132 | { |
133 | __msg_init_ns(&init_ipc_ns, &init_msg_ids); | 133 | __msg_init_ns(&init_ipc_ns, &init_msg_ids); |
134 | ipc_init_proc_interface("sysvipc/msg", | 134 | ipc_init_proc_interface("sysvipc/msg", |
135 | " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", | 135 | " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", |
136 | IPC_MSG_IDS, sysvipc_msg_proc_show); | 136 | IPC_MSG_IDS, sysvipc_msg_proc_show); |
137 | } | 137 | } |
138 | 138 | ||
139 | static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id) | 139 | static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id) |
140 | { | 140 | { |
141 | struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id); | 141 | struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id); |
142 | 142 | ||
143 | return container_of(ipcp, struct msg_queue, q_perm); | 143 | return container_of(ipcp, struct msg_queue, q_perm); |
144 | } | 144 | } |
145 | 145 | ||
146 | static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, | 146 | static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, |
147 | int id) | 147 | int id) |
148 | { | 148 | { |
149 | struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id); | 149 | struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id); |
150 | 150 | ||
151 | return container_of(ipcp, struct msg_queue, q_perm); | 151 | return container_of(ipcp, struct msg_queue, q_perm); |
152 | } | 152 | } |
153 | 153 | ||
154 | static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) | 154 | static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) |
155 | { | 155 | { |
156 | ipc_rmid(&msg_ids(ns), &s->q_perm); | 156 | ipc_rmid(&msg_ids(ns), &s->q_perm); |
157 | } | 157 | } |
158 | 158 | ||
159 | /** | ||
160 | * newque - Create a new msg queue | ||
161 | * @ns: namespace | ||
162 | * @params: ptr to the structure that contains the key and msgflg | ||
163 | * | ||
164 | * Called with msg_ids.mutex held | ||
165 | */ | ||
159 | static int newque(struct ipc_namespace *ns, struct ipc_params *params) | 166 | static int newque(struct ipc_namespace *ns, struct ipc_params *params) |
160 | { | 167 | { |
161 | struct msg_queue *msq; | 168 | struct msg_queue *msq; |
162 | int id, retval; | 169 | int id, retval; |
163 | key_t key = params->key; | 170 | key_t key = params->key; |
164 | int msgflg = params->flg; | 171 | int msgflg = params->flg; |
165 | 172 | ||
166 | msq = ipc_rcu_alloc(sizeof(*msq)); | 173 | msq = ipc_rcu_alloc(sizeof(*msq)); |
167 | if (!msq) | 174 | if (!msq) |
168 | return -ENOMEM; | 175 | return -ENOMEM; |
169 | 176 | ||
170 | msq->q_perm.mode = msgflg & S_IRWXUGO; | 177 | msq->q_perm.mode = msgflg & S_IRWXUGO; |
171 | msq->q_perm.key = key; | 178 | msq->q_perm.key = key; |
172 | 179 | ||
173 | msq->q_perm.security = NULL; | 180 | msq->q_perm.security = NULL; |
174 | retval = security_msg_queue_alloc(msq); | 181 | retval = security_msg_queue_alloc(msq); |
175 | if (retval) { | 182 | if (retval) { |
176 | ipc_rcu_putref(msq); | 183 | ipc_rcu_putref(msq); |
177 | return retval; | 184 | return retval; |
178 | } | 185 | } |
179 | 186 | ||
180 | /* | 187 | /* |
181 | * ipc_addid() locks msq | 188 | * ipc_addid() locks msq |
182 | */ | 189 | */ |
183 | id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); | 190 | id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); |
184 | if (id == -1) { | 191 | if (id == -1) { |
185 | security_msg_queue_free(msq); | 192 | security_msg_queue_free(msq); |
186 | ipc_rcu_putref(msq); | 193 | ipc_rcu_putref(msq); |
187 | return -ENOSPC; | 194 | return -ENOSPC; |
188 | } | 195 | } |
189 | 196 | ||
190 | msq->q_perm.id = msg_buildid(ns, id, msq->q_perm.seq); | 197 | msq->q_perm.id = msg_buildid(ns, id, msq->q_perm.seq); |
191 | msq->q_stime = msq->q_rtime = 0; | 198 | msq->q_stime = msq->q_rtime = 0; |
192 | msq->q_ctime = get_seconds(); | 199 | msq->q_ctime = get_seconds(); |
193 | msq->q_cbytes = msq->q_qnum = 0; | 200 | msq->q_cbytes = msq->q_qnum = 0; |
194 | msq->q_qbytes = ns->msg_ctlmnb; | 201 | msq->q_qbytes = ns->msg_ctlmnb; |
195 | msq->q_lspid = msq->q_lrpid = 0; | 202 | msq->q_lspid = msq->q_lrpid = 0; |
196 | INIT_LIST_HEAD(&msq->q_messages); | 203 | INIT_LIST_HEAD(&msq->q_messages); |
197 | INIT_LIST_HEAD(&msq->q_receivers); | 204 | INIT_LIST_HEAD(&msq->q_receivers); |
198 | INIT_LIST_HEAD(&msq->q_senders); | 205 | INIT_LIST_HEAD(&msq->q_senders); |
199 | 206 | ||
200 | msg_unlock(msq); | 207 | msg_unlock(msq); |
201 | 208 | ||
202 | return msq->q_perm.id; | 209 | return msq->q_perm.id; |
203 | } | 210 | } |
204 | 211 | ||
205 | static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) | 212 | static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) |
206 | { | 213 | { |
207 | mss->tsk = current; | 214 | mss->tsk = current; |
208 | current->state = TASK_INTERRUPTIBLE; | 215 | current->state = TASK_INTERRUPTIBLE; |
209 | list_add_tail(&mss->list, &msq->q_senders); | 216 | list_add_tail(&mss->list, &msq->q_senders); |
210 | } | 217 | } |
211 | 218 | ||
212 | static inline void ss_del(struct msg_sender *mss) | 219 | static inline void ss_del(struct msg_sender *mss) |
213 | { | 220 | { |
214 | if (mss->list.next != NULL) | 221 | if (mss->list.next != NULL) |
215 | list_del(&mss->list); | 222 | list_del(&mss->list); |
216 | } | 223 | } |
217 | 224 | ||
218 | static void ss_wakeup(struct list_head *h, int kill) | 225 | static void ss_wakeup(struct list_head *h, int kill) |
219 | { | 226 | { |
220 | struct list_head *tmp; | 227 | struct list_head *tmp; |
221 | 228 | ||
222 | tmp = h->next; | 229 | tmp = h->next; |
223 | while (tmp != h) { | 230 | while (tmp != h) { |
224 | struct msg_sender *mss; | 231 | struct msg_sender *mss; |
225 | 232 | ||
226 | mss = list_entry(tmp, struct msg_sender, list); | 233 | mss = list_entry(tmp, struct msg_sender, list); |
227 | tmp = tmp->next; | 234 | tmp = tmp->next; |
228 | if (kill) | 235 | if (kill) |
229 | mss->list.next = NULL; | 236 | mss->list.next = NULL; |
230 | wake_up_process(mss->tsk); | 237 | wake_up_process(mss->tsk); |
231 | } | 238 | } |
232 | } | 239 | } |
233 | 240 | ||
234 | static void expunge_all(struct msg_queue *msq, int res) | 241 | static void expunge_all(struct msg_queue *msq, int res) |
235 | { | 242 | { |
236 | struct list_head *tmp; | 243 | struct list_head *tmp; |
237 | 244 | ||
238 | tmp = msq->q_receivers.next; | 245 | tmp = msq->q_receivers.next; |
239 | while (tmp != &msq->q_receivers) { | 246 | while (tmp != &msq->q_receivers) { |
240 | struct msg_receiver *msr; | 247 | struct msg_receiver *msr; |
241 | 248 | ||
242 | msr = list_entry(tmp, struct msg_receiver, r_list); | 249 | msr = list_entry(tmp, struct msg_receiver, r_list); |
243 | tmp = tmp->next; | 250 | tmp = tmp->next; |
244 | msr->r_msg = NULL; | 251 | msr->r_msg = NULL; |
245 | wake_up_process(msr->r_tsk); | 252 | wake_up_process(msr->r_tsk); |
246 | smp_mb(); | 253 | smp_mb(); |
247 | msr->r_msg = ERR_PTR(res); | 254 | msr->r_msg = ERR_PTR(res); |
248 | } | 255 | } |
249 | } | 256 | } |
250 | 257 | ||
251 | /* | 258 | /* |
252 | * freeque() wakes up waiters on the sender and receiver waiting queue, | 259 | * freeque() wakes up waiters on the sender and receiver waiting queue, |
253 | * removes the message queue from message queue ID | 260 | * removes the message queue from message queue ID IDR, and cleans up all the |
254 | * IDR, and cleans up all the messages associated with this queue. | 261 | * messages associated with this queue. |
255 | * | 262 | * |
256 | * msg_ids.mutex and the spinlock for this message queue are held | 263 | * msg_ids.mutex and the spinlock for this message queue are held |
257 | * before freeque() is called. msg_ids.mutex remains locked on exit. | 264 | * before freeque() is called. msg_ids.mutex remains locked on exit. |
258 | */ | 265 | */ |
259 | static void freeque(struct ipc_namespace *ns, struct msg_queue *msq) | 266 | static void freeque(struct ipc_namespace *ns, struct msg_queue *msq) |
260 | { | 267 | { |
261 | struct list_head *tmp; | 268 | struct list_head *tmp; |
262 | 269 | ||
263 | expunge_all(msq, -EIDRM); | 270 | expunge_all(msq, -EIDRM); |
264 | ss_wakeup(&msq->q_senders, 1); | 271 | ss_wakeup(&msq->q_senders, 1); |
265 | msg_rmid(ns, msq); | 272 | msg_rmid(ns, msq); |
266 | msg_unlock(msq); | 273 | msg_unlock(msq); |
267 | 274 | ||
268 | tmp = msq->q_messages.next; | 275 | tmp = msq->q_messages.next; |
269 | while (tmp != &msq->q_messages) { | 276 | while (tmp != &msq->q_messages) { |
270 | struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list); | 277 | struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list); |
271 | 278 | ||
272 | tmp = tmp->next; | 279 | tmp = tmp->next; |
273 | atomic_dec(&msg_hdrs); | 280 | atomic_dec(&msg_hdrs); |
274 | free_msg(msg); | 281 | free_msg(msg); |
275 | } | 282 | } |
276 | atomic_sub(msq->q_cbytes, &msg_bytes); | 283 | atomic_sub(msq->q_cbytes, &msg_bytes); |
277 | security_msg_queue_free(msq); | 284 | security_msg_queue_free(msq); |
278 | ipc_rcu_putref(msq); | 285 | ipc_rcu_putref(msq); |
279 | } | 286 | } |
280 | 287 | ||
288 | /* | ||
289 | * Called with msg_ids.mutex and ipcp locked. | ||
290 | */ | ||
281 | static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) | 291 | static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) |
282 | { | 292 | { |
283 | struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); | 293 | struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); |
284 | 294 | ||
285 | return security_msg_queue_associate(msq, msgflg); | 295 | return security_msg_queue_associate(msq, msgflg); |
286 | } | 296 | } |
287 | 297 | ||
288 | asmlinkage long sys_msgget(key_t key, int msgflg) | 298 | asmlinkage long sys_msgget(key_t key, int msgflg) |
289 | { | 299 | { |
290 | struct ipc_namespace *ns; | 300 | struct ipc_namespace *ns; |
291 | struct ipc_ops msg_ops; | 301 | struct ipc_ops msg_ops; |
292 | struct ipc_params msg_params; | 302 | struct ipc_params msg_params; |
293 | 303 | ||
294 | ns = current->nsproxy->ipc_ns; | 304 | ns = current->nsproxy->ipc_ns; |
295 | 305 | ||
296 | msg_ops.getnew = newque; | 306 | msg_ops.getnew = newque; |
297 | msg_ops.associate = msg_security; | 307 | msg_ops.associate = msg_security; |
298 | msg_ops.more_checks = NULL; | 308 | msg_ops.more_checks = NULL; |
299 | 309 | ||
300 | msg_params.key = key; | 310 | msg_params.key = key; |
301 | msg_params.flg = msgflg; | 311 | msg_params.flg = msgflg; |
302 | 312 | ||
303 | return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); | 313 | return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); |
304 | } | 314 | } |
305 | 315 | ||
306 | static inline unsigned long | 316 | static inline unsigned long |
307 | copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) | 317 | copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) |
308 | { | 318 | { |
309 | switch(version) { | 319 | switch(version) { |
310 | case IPC_64: | 320 | case IPC_64: |
311 | return copy_to_user(buf, in, sizeof(*in)); | 321 | return copy_to_user(buf, in, sizeof(*in)); |
312 | case IPC_OLD: | 322 | case IPC_OLD: |
313 | { | 323 | { |
314 | struct msqid_ds out; | 324 | struct msqid_ds out; |
315 | 325 | ||
316 | memset(&out, 0, sizeof(out)); | 326 | memset(&out, 0, sizeof(out)); |
317 | 327 | ||
318 | ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); | 328 | ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); |
319 | 329 | ||
320 | out.msg_stime = in->msg_stime; | 330 | out.msg_stime = in->msg_stime; |
321 | out.msg_rtime = in->msg_rtime; | 331 | out.msg_rtime = in->msg_rtime; |
322 | out.msg_ctime = in->msg_ctime; | 332 | out.msg_ctime = in->msg_ctime; |
323 | 333 | ||
324 | if (in->msg_cbytes > USHRT_MAX) | 334 | if (in->msg_cbytes > USHRT_MAX) |
325 | out.msg_cbytes = USHRT_MAX; | 335 | out.msg_cbytes = USHRT_MAX; |
326 | else | 336 | else |
327 | out.msg_cbytes = in->msg_cbytes; | 337 | out.msg_cbytes = in->msg_cbytes; |
328 | out.msg_lcbytes = in->msg_cbytes; | 338 | out.msg_lcbytes = in->msg_cbytes; |
329 | 339 | ||
330 | if (in->msg_qnum > USHRT_MAX) | 340 | if (in->msg_qnum > USHRT_MAX) |
331 | out.msg_qnum = USHRT_MAX; | 341 | out.msg_qnum = USHRT_MAX; |
332 | else | 342 | else |
333 | out.msg_qnum = in->msg_qnum; | 343 | out.msg_qnum = in->msg_qnum; |
334 | 344 | ||
335 | if (in->msg_qbytes > USHRT_MAX) | 345 | if (in->msg_qbytes > USHRT_MAX) |
336 | out.msg_qbytes = USHRT_MAX; | 346 | out.msg_qbytes = USHRT_MAX; |
337 | else | 347 | else |
338 | out.msg_qbytes = in->msg_qbytes; | 348 | out.msg_qbytes = in->msg_qbytes; |
339 | out.msg_lqbytes = in->msg_qbytes; | 349 | out.msg_lqbytes = in->msg_qbytes; |
340 | 350 | ||
341 | out.msg_lspid = in->msg_lspid; | 351 | out.msg_lspid = in->msg_lspid; |
342 | out.msg_lrpid = in->msg_lrpid; | 352 | out.msg_lrpid = in->msg_lrpid; |
343 | 353 | ||
344 | return copy_to_user(buf, &out, sizeof(out)); | 354 | return copy_to_user(buf, &out, sizeof(out)); |
345 | } | 355 | } |
346 | default: | 356 | default: |
347 | return -EINVAL; | 357 | return -EINVAL; |
348 | } | 358 | } |
349 | } | 359 | } |
350 | 360 | ||
351 | struct msq_setbuf { | 361 | struct msq_setbuf { |
352 | unsigned long qbytes; | 362 | unsigned long qbytes; |
353 | uid_t uid; | 363 | uid_t uid; |
354 | gid_t gid; | 364 | gid_t gid; |
355 | mode_t mode; | 365 | mode_t mode; |
356 | }; | 366 | }; |
357 | 367 | ||
358 | static inline unsigned long | 368 | static inline unsigned long |
359 | copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version) | 369 | copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version) |
360 | { | 370 | { |
361 | switch(version) { | 371 | switch(version) { |
362 | case IPC_64: | 372 | case IPC_64: |
363 | { | 373 | { |
364 | struct msqid64_ds tbuf; | 374 | struct msqid64_ds tbuf; |
365 | 375 | ||
366 | if (copy_from_user(&tbuf, buf, sizeof(tbuf))) | 376 | if (copy_from_user(&tbuf, buf, sizeof(tbuf))) |
367 | return -EFAULT; | 377 | return -EFAULT; |
368 | 378 | ||
369 | out->qbytes = tbuf.msg_qbytes; | 379 | out->qbytes = tbuf.msg_qbytes; |
370 | out->uid = tbuf.msg_perm.uid; | 380 | out->uid = tbuf.msg_perm.uid; |
371 | out->gid = tbuf.msg_perm.gid; | 381 | out->gid = tbuf.msg_perm.gid; |
372 | out->mode = tbuf.msg_perm.mode; | 382 | out->mode = tbuf.msg_perm.mode; |
373 | 383 | ||
374 | return 0; | 384 | return 0; |
375 | } | 385 | } |
376 | case IPC_OLD: | 386 | case IPC_OLD: |
377 | { | 387 | { |
378 | struct msqid_ds tbuf_old; | 388 | struct msqid_ds tbuf_old; |
379 | 389 | ||
380 | if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) | 390 | if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) |
381 | return -EFAULT; | 391 | return -EFAULT; |
382 | 392 | ||
383 | out->uid = tbuf_old.msg_perm.uid; | 393 | out->uid = tbuf_old.msg_perm.uid; |
384 | out->gid = tbuf_old.msg_perm.gid; | 394 | out->gid = tbuf_old.msg_perm.gid; |
385 | out->mode = tbuf_old.msg_perm.mode; | 395 | out->mode = tbuf_old.msg_perm.mode; |
386 | 396 | ||
387 | if (tbuf_old.msg_qbytes == 0) | 397 | if (tbuf_old.msg_qbytes == 0) |
388 | out->qbytes = tbuf_old.msg_lqbytes; | 398 | out->qbytes = tbuf_old.msg_lqbytes; |
389 | else | 399 | else |
390 | out->qbytes = tbuf_old.msg_qbytes; | 400 | out->qbytes = tbuf_old.msg_qbytes; |
391 | 401 | ||
392 | return 0; | 402 | return 0; |
393 | } | 403 | } |
394 | default: | 404 | default: |
395 | return -EINVAL; | 405 | return -EINVAL; |
396 | } | 406 | } |
397 | } | 407 | } |
398 | 408 | ||
399 | asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) | 409 | asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) |
400 | { | 410 | { |
401 | struct kern_ipc_perm *ipcp; | 411 | struct kern_ipc_perm *ipcp; |
402 | struct msq_setbuf uninitialized_var(setbuf); | 412 | struct msq_setbuf uninitialized_var(setbuf); |
403 | struct msg_queue *msq; | 413 | struct msg_queue *msq; |
404 | int err, version; | 414 | int err, version; |
405 | struct ipc_namespace *ns; | 415 | struct ipc_namespace *ns; |
406 | 416 | ||
407 | if (msqid < 0 || cmd < 0) | 417 | if (msqid < 0 || cmd < 0) |
408 | return -EINVAL; | 418 | return -EINVAL; |
409 | 419 | ||
410 | version = ipc_parse_version(&cmd); | 420 | version = ipc_parse_version(&cmd); |
411 | ns = current->nsproxy->ipc_ns; | 421 | ns = current->nsproxy->ipc_ns; |
412 | 422 | ||
413 | switch (cmd) { | 423 | switch (cmd) { |
414 | case IPC_INFO: | 424 | case IPC_INFO: |
415 | case MSG_INFO: | 425 | case MSG_INFO: |
416 | { | 426 | { |
417 | struct msginfo msginfo; | 427 | struct msginfo msginfo; |
418 | int max_id; | 428 | int max_id; |
419 | 429 | ||
420 | if (!buf) | 430 | if (!buf) |
421 | return -EFAULT; | 431 | return -EFAULT; |
422 | /* | 432 | /* |
423 | * We must not return kernel stack data. | 433 | * We must not return kernel stack data. |
424 | * due to padding, it's not enough | 434 | * due to padding, it's not enough |
425 | * to set all member fields. | 435 | * to set all member fields. |
426 | */ | 436 | */ |
427 | err = security_msg_queue_msgctl(NULL, cmd); | 437 | err = security_msg_queue_msgctl(NULL, cmd); |
428 | if (err) | 438 | if (err) |
429 | return err; | 439 | return err; |
430 | 440 | ||
431 | memset(&msginfo, 0, sizeof(msginfo)); | 441 | memset(&msginfo, 0, sizeof(msginfo)); |
432 | msginfo.msgmni = ns->msg_ctlmni; | 442 | msginfo.msgmni = ns->msg_ctlmni; |
433 | msginfo.msgmax = ns->msg_ctlmax; | 443 | msginfo.msgmax = ns->msg_ctlmax; |
434 | msginfo.msgmnb = ns->msg_ctlmnb; | 444 | msginfo.msgmnb = ns->msg_ctlmnb; |
435 | msginfo.msgssz = MSGSSZ; | 445 | msginfo.msgssz = MSGSSZ; |
436 | msginfo.msgseg = MSGSEG; | 446 | msginfo.msgseg = MSGSEG; |
437 | mutex_lock(&msg_ids(ns).mutex); | 447 | mutex_lock(&msg_ids(ns).mutex); |
438 | if (cmd == MSG_INFO) { | 448 | if (cmd == MSG_INFO) { |
439 | msginfo.msgpool = msg_ids(ns).in_use; | 449 | msginfo.msgpool = msg_ids(ns).in_use; |
440 | msginfo.msgmap = atomic_read(&msg_hdrs); | 450 | msginfo.msgmap = atomic_read(&msg_hdrs); |
441 | msginfo.msgtql = atomic_read(&msg_bytes); | 451 | msginfo.msgtql = atomic_read(&msg_bytes); |
442 | } else { | 452 | } else { |
443 | msginfo.msgmap = MSGMAP; | 453 | msginfo.msgmap = MSGMAP; |
444 | msginfo.msgpool = MSGPOOL; | 454 | msginfo.msgpool = MSGPOOL; |
445 | msginfo.msgtql = MSGTQL; | 455 | msginfo.msgtql = MSGTQL; |
446 | } | 456 | } |
447 | max_id = ipc_get_maxid(&msg_ids(ns)); | 457 | max_id = ipc_get_maxid(&msg_ids(ns)); |
448 | mutex_unlock(&msg_ids(ns).mutex); | 458 | mutex_unlock(&msg_ids(ns).mutex); |
449 | if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) | 459 | if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) |
450 | return -EFAULT; | 460 | return -EFAULT; |
451 | return (max_id < 0) ? 0 : max_id; | 461 | return (max_id < 0) ? 0 : max_id; |
452 | } | 462 | } |
453 | case MSG_STAT: /* msqid is an index rather than a msg queue id */ | 463 | case MSG_STAT: /* msqid is an index rather than a msg queue id */ |
454 | case IPC_STAT: | 464 | case IPC_STAT: |
455 | { | 465 | { |
456 | struct msqid64_ds tbuf; | 466 | struct msqid64_ds tbuf; |
457 | int success_return; | 467 | int success_return; |
458 | 468 | ||
459 | if (!buf) | 469 | if (!buf) |
460 | return -EFAULT; | 470 | return -EFAULT; |
461 | 471 | ||
462 | if (cmd == MSG_STAT) { | 472 | if (cmd == MSG_STAT) { |
463 | msq = msg_lock(ns, msqid); | 473 | msq = msg_lock(ns, msqid); |
464 | if (IS_ERR(msq)) | 474 | if (IS_ERR(msq)) |
465 | return PTR_ERR(msq); | 475 | return PTR_ERR(msq); |
466 | success_return = msq->q_perm.id; | 476 | success_return = msq->q_perm.id; |
467 | } else { | 477 | } else { |
468 | msq = msg_lock_check(ns, msqid); | 478 | msq = msg_lock_check(ns, msqid); |
469 | if (IS_ERR(msq)) | 479 | if (IS_ERR(msq)) |
470 | return PTR_ERR(msq); | 480 | return PTR_ERR(msq); |
471 | success_return = 0; | 481 | success_return = 0; |
472 | } | 482 | } |
473 | err = -EACCES; | 483 | err = -EACCES; |
474 | if (ipcperms(&msq->q_perm, S_IRUGO)) | 484 | if (ipcperms(&msq->q_perm, S_IRUGO)) |
475 | goto out_unlock; | 485 | goto out_unlock; |
476 | 486 | ||
477 | err = security_msg_queue_msgctl(msq, cmd); | 487 | err = security_msg_queue_msgctl(msq, cmd); |
478 | if (err) | 488 | if (err) |
479 | goto out_unlock; | 489 | goto out_unlock; |
480 | 490 | ||
481 | memset(&tbuf, 0, sizeof(tbuf)); | 491 | memset(&tbuf, 0, sizeof(tbuf)); |
482 | 492 | ||
483 | kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); | 493 | kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); |
484 | tbuf.msg_stime = msq->q_stime; | 494 | tbuf.msg_stime = msq->q_stime; |
485 | tbuf.msg_rtime = msq->q_rtime; | 495 | tbuf.msg_rtime = msq->q_rtime; |
486 | tbuf.msg_ctime = msq->q_ctime; | 496 | tbuf.msg_ctime = msq->q_ctime; |
487 | tbuf.msg_cbytes = msq->q_cbytes; | 497 | tbuf.msg_cbytes = msq->q_cbytes; |
488 | tbuf.msg_qnum = msq->q_qnum; | 498 | tbuf.msg_qnum = msq->q_qnum; |
489 | tbuf.msg_qbytes = msq->q_qbytes; | 499 | tbuf.msg_qbytes = msq->q_qbytes; |
490 | tbuf.msg_lspid = msq->q_lspid; | 500 | tbuf.msg_lspid = msq->q_lspid; |
491 | tbuf.msg_lrpid = msq->q_lrpid; | 501 | tbuf.msg_lrpid = msq->q_lrpid; |
492 | msg_unlock(msq); | 502 | msg_unlock(msq); |
493 | if (copy_msqid_to_user(buf, &tbuf, version)) | 503 | if (copy_msqid_to_user(buf, &tbuf, version)) |
494 | return -EFAULT; | 504 | return -EFAULT; |
495 | return success_return; | 505 | return success_return; |
496 | } | 506 | } |
497 | case IPC_SET: | 507 | case IPC_SET: |
498 | if (!buf) | 508 | if (!buf) |
499 | return -EFAULT; | 509 | return -EFAULT; |
500 | if (copy_msqid_from_user(&setbuf, buf, version)) | 510 | if (copy_msqid_from_user(&setbuf, buf, version)) |
501 | return -EFAULT; | 511 | return -EFAULT; |
502 | break; | 512 | break; |
503 | case IPC_RMID: | 513 | case IPC_RMID: |
504 | break; | 514 | break; |
505 | default: | 515 | default: |
506 | return -EINVAL; | 516 | return -EINVAL; |
507 | } | 517 | } |
508 | 518 | ||
509 | mutex_lock(&msg_ids(ns).mutex); | 519 | mutex_lock(&msg_ids(ns).mutex); |
510 | msq = msg_lock_check(ns, msqid); | 520 | msq = msg_lock_check(ns, msqid); |
511 | if (IS_ERR(msq)) { | 521 | if (IS_ERR(msq)) { |
512 | err = PTR_ERR(msq); | 522 | err = PTR_ERR(msq); |
513 | goto out_up; | 523 | goto out_up; |
514 | } | 524 | } |
515 | 525 | ||
516 | ipcp = &msq->q_perm; | 526 | ipcp = &msq->q_perm; |
517 | 527 | ||
518 | err = audit_ipc_obj(ipcp); | 528 | err = audit_ipc_obj(ipcp); |
519 | if (err) | 529 | if (err) |
520 | goto out_unlock_up; | 530 | goto out_unlock_up; |
521 | if (cmd == IPC_SET) { | 531 | if (cmd == IPC_SET) { |
522 | err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid, | 532 | err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid, |
523 | setbuf.mode); | 533 | setbuf.mode); |
524 | if (err) | 534 | if (err) |
525 | goto out_unlock_up; | 535 | goto out_unlock_up; |
526 | } | 536 | } |
527 | 537 | ||
528 | err = -EPERM; | 538 | err = -EPERM; |
529 | if (current->euid != ipcp->cuid && | 539 | if (current->euid != ipcp->cuid && |
530 | current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) | 540 | current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) |
531 | /* We _could_ check for CAP_CHOWN above, but we don't */ | 541 | /* We _could_ check for CAP_CHOWN above, but we don't */ |
532 | goto out_unlock_up; | 542 | goto out_unlock_up; |
533 | 543 | ||
534 | err = security_msg_queue_msgctl(msq, cmd); | 544 | err = security_msg_queue_msgctl(msq, cmd); |
535 | if (err) | 545 | if (err) |
536 | goto out_unlock_up; | 546 | goto out_unlock_up; |
537 | 547 | ||
538 | switch (cmd) { | 548 | switch (cmd) { |
539 | case IPC_SET: | 549 | case IPC_SET: |
540 | { | 550 | { |
541 | err = -EPERM; | 551 | err = -EPERM; |
542 | if (setbuf.qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE)) | 552 | if (setbuf.qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE)) |
543 | goto out_unlock_up; | 553 | goto out_unlock_up; |
544 | 554 | ||
545 | msq->q_qbytes = setbuf.qbytes; | 555 | msq->q_qbytes = setbuf.qbytes; |
546 | 556 | ||
547 | ipcp->uid = setbuf.uid; | 557 | ipcp->uid = setbuf.uid; |
548 | ipcp->gid = setbuf.gid; | 558 | ipcp->gid = setbuf.gid; |
549 | ipcp->mode = (ipcp->mode & ~S_IRWXUGO) | | 559 | ipcp->mode = (ipcp->mode & ~S_IRWXUGO) | |
550 | (S_IRWXUGO & setbuf.mode); | 560 | (S_IRWXUGO & setbuf.mode); |
551 | msq->q_ctime = get_seconds(); | 561 | msq->q_ctime = get_seconds(); |
552 | /* sleeping receivers might be excluded by | 562 | /* sleeping receivers might be excluded by |
553 | * stricter permissions. | 563 | * stricter permissions. |
554 | */ | 564 | */ |
555 | expunge_all(msq, -EAGAIN); | 565 | expunge_all(msq, -EAGAIN); |
556 | /* sleeping senders might be able to send | 566 | /* sleeping senders might be able to send |
557 | * due to a larger queue size. | 567 | * due to a larger queue size. |
558 | */ | 568 | */ |
559 | ss_wakeup(&msq->q_senders, 0); | 569 | ss_wakeup(&msq->q_senders, 0); |
560 | msg_unlock(msq); | 570 | msg_unlock(msq); |
561 | break; | 571 | break; |
562 | } | 572 | } |
563 | case IPC_RMID: | 573 | case IPC_RMID: |
564 | freeque(ns, msq); | 574 | freeque(ns, msq); |
565 | break; | 575 | break; |
566 | } | 576 | } |
567 | err = 0; | 577 | err = 0; |
568 | out_up: | 578 | out_up: |
569 | mutex_unlock(&msg_ids(ns).mutex); | 579 | mutex_unlock(&msg_ids(ns).mutex); |
570 | return err; | 580 | return err; |
571 | out_unlock_up: | 581 | out_unlock_up: |
572 | msg_unlock(msq); | 582 | msg_unlock(msq); |
573 | goto out_up; | 583 | goto out_up; |
574 | out_unlock: | 584 | out_unlock: |
575 | msg_unlock(msq); | 585 | msg_unlock(msq); |
576 | return err; | 586 | return err; |
577 | } | 587 | } |
578 | 588 | ||
579 | static int testmsg(struct msg_msg *msg, long type, int mode) | 589 | static int testmsg(struct msg_msg *msg, long type, int mode) |
580 | { | 590 | { |
581 | switch(mode) | 591 | switch(mode) |
582 | { | 592 | { |
583 | case SEARCH_ANY: | 593 | case SEARCH_ANY: |
584 | return 1; | 594 | return 1; |
585 | case SEARCH_LESSEQUAL: | 595 | case SEARCH_LESSEQUAL: |
586 | if (msg->m_type <=type) | 596 | if (msg->m_type <=type) |
587 | return 1; | 597 | return 1; |
588 | break; | 598 | break; |
589 | case SEARCH_EQUAL: | 599 | case SEARCH_EQUAL: |
590 | if (msg->m_type == type) | 600 | if (msg->m_type == type) |
591 | return 1; | 601 | return 1; |
592 | break; | 602 | break; |
593 | case SEARCH_NOTEQUAL: | 603 | case SEARCH_NOTEQUAL: |
594 | if (msg->m_type != type) | 604 | if (msg->m_type != type) |
595 | return 1; | 605 | return 1; |
596 | break; | 606 | break; |
597 | } | 607 | } |
598 | return 0; | 608 | return 0; |
599 | } | 609 | } |
600 | 610 | ||
601 | static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) | 611 | static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) |
602 | { | 612 | { |
603 | struct list_head *tmp; | 613 | struct list_head *tmp; |
604 | 614 | ||
605 | tmp = msq->q_receivers.next; | 615 | tmp = msq->q_receivers.next; |
606 | while (tmp != &msq->q_receivers) { | 616 | while (tmp != &msq->q_receivers) { |
607 | struct msg_receiver *msr; | 617 | struct msg_receiver *msr; |
608 | 618 | ||
609 | msr = list_entry(tmp, struct msg_receiver, r_list); | 619 | msr = list_entry(tmp, struct msg_receiver, r_list); |
610 | tmp = tmp->next; | 620 | tmp = tmp->next; |
611 | if (testmsg(msg, msr->r_msgtype, msr->r_mode) && | 621 | if (testmsg(msg, msr->r_msgtype, msr->r_mode) && |
612 | !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, | 622 | !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, |
613 | msr->r_msgtype, msr->r_mode)) { | 623 | msr->r_msgtype, msr->r_mode)) { |
614 | 624 | ||
615 | list_del(&msr->r_list); | 625 | list_del(&msr->r_list); |
616 | if (msr->r_maxsize < msg->m_ts) { | 626 | if (msr->r_maxsize < msg->m_ts) { |
617 | msr->r_msg = NULL; | 627 | msr->r_msg = NULL; |
618 | wake_up_process(msr->r_tsk); | 628 | wake_up_process(msr->r_tsk); |
619 | smp_mb(); | 629 | smp_mb(); |
620 | msr->r_msg = ERR_PTR(-E2BIG); | 630 | msr->r_msg = ERR_PTR(-E2BIG); |
621 | } else { | 631 | } else { |
622 | msr->r_msg = NULL; | 632 | msr->r_msg = NULL; |
623 | msq->q_lrpid = task_pid_vnr(msr->r_tsk); | 633 | msq->q_lrpid = task_pid_vnr(msr->r_tsk); |
624 | msq->q_rtime = get_seconds(); | 634 | msq->q_rtime = get_seconds(); |
625 | wake_up_process(msr->r_tsk); | 635 | wake_up_process(msr->r_tsk); |
626 | smp_mb(); | 636 | smp_mb(); |
627 | msr->r_msg = msg; | 637 | msr->r_msg = msg; |
628 | 638 | ||
629 | return 1; | 639 | return 1; |
630 | } | 640 | } |
631 | } | 641 | } |
632 | } | 642 | } |
633 | return 0; | 643 | return 0; |
634 | } | 644 | } |
635 | 645 | ||
636 | long do_msgsnd(int msqid, long mtype, void __user *mtext, | 646 | long do_msgsnd(int msqid, long mtype, void __user *mtext, |
637 | size_t msgsz, int msgflg) | 647 | size_t msgsz, int msgflg) |
638 | { | 648 | { |
639 | struct msg_queue *msq; | 649 | struct msg_queue *msq; |
640 | struct msg_msg *msg; | 650 | struct msg_msg *msg; |
641 | int err; | 651 | int err; |
642 | struct ipc_namespace *ns; | 652 | struct ipc_namespace *ns; |
643 | 653 | ||
644 | ns = current->nsproxy->ipc_ns; | 654 | ns = current->nsproxy->ipc_ns; |
645 | 655 | ||
646 | if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) | 656 | if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) |
647 | return -EINVAL; | 657 | return -EINVAL; |
648 | if (mtype < 1) | 658 | if (mtype < 1) |
649 | return -EINVAL; | 659 | return -EINVAL; |
650 | 660 | ||
651 | msg = load_msg(mtext, msgsz); | 661 | msg = load_msg(mtext, msgsz); |
652 | if (IS_ERR(msg)) | 662 | if (IS_ERR(msg)) |
653 | return PTR_ERR(msg); | 663 | return PTR_ERR(msg); |
654 | 664 | ||
655 | msg->m_type = mtype; | 665 | msg->m_type = mtype; |
656 | msg->m_ts = msgsz; | 666 | msg->m_ts = msgsz; |
657 | 667 | ||
658 | msq = msg_lock_check(ns, msqid); | 668 | msq = msg_lock_check(ns, msqid); |
659 | if (IS_ERR(msq)) { | 669 | if (IS_ERR(msq)) { |
660 | err = PTR_ERR(msq); | 670 | err = PTR_ERR(msq); |
661 | goto out_free; | 671 | goto out_free; |
662 | } | 672 | } |
663 | 673 | ||
664 | for (;;) { | 674 | for (;;) { |
665 | struct msg_sender s; | 675 | struct msg_sender s; |
666 | 676 | ||
667 | err = -EACCES; | 677 | err = -EACCES; |
668 | if (ipcperms(&msq->q_perm, S_IWUGO)) | 678 | if (ipcperms(&msq->q_perm, S_IWUGO)) |
669 | goto out_unlock_free; | 679 | goto out_unlock_free; |
670 | 680 | ||
671 | err = security_msg_queue_msgsnd(msq, msg, msgflg); | 681 | err = security_msg_queue_msgsnd(msq, msg, msgflg); |
672 | if (err) | 682 | if (err) |
673 | goto out_unlock_free; | 683 | goto out_unlock_free; |
674 | 684 | ||
675 | if (msgsz + msq->q_cbytes <= msq->q_qbytes && | 685 | if (msgsz + msq->q_cbytes <= msq->q_qbytes && |
676 | 1 + msq->q_qnum <= msq->q_qbytes) { | 686 | 1 + msq->q_qnum <= msq->q_qbytes) { |
677 | break; | 687 | break; |
678 | } | 688 | } |
679 | 689 | ||
680 | /* queue full, wait: */ | 690 | /* queue full, wait: */ |
681 | if (msgflg & IPC_NOWAIT) { | 691 | if (msgflg & IPC_NOWAIT) { |
682 | err = -EAGAIN; | 692 | err = -EAGAIN; |
683 | goto out_unlock_free; | 693 | goto out_unlock_free; |
684 | } | 694 | } |
685 | ss_add(msq, &s); | 695 | ss_add(msq, &s); |
686 | ipc_rcu_getref(msq); | 696 | ipc_rcu_getref(msq); |
687 | msg_unlock(msq); | 697 | msg_unlock(msq); |
688 | schedule(); | 698 | schedule(); |
689 | 699 | ||
690 | ipc_lock_by_ptr(&msq->q_perm); | 700 | ipc_lock_by_ptr(&msq->q_perm); |
691 | ipc_rcu_putref(msq); | 701 | ipc_rcu_putref(msq); |
692 | if (msq->q_perm.deleted) { | 702 | if (msq->q_perm.deleted) { |
693 | err = -EIDRM; | 703 | err = -EIDRM; |
694 | goto out_unlock_free; | 704 | goto out_unlock_free; |
695 | } | 705 | } |
696 | ss_del(&s); | 706 | ss_del(&s); |
697 | 707 | ||
698 | if (signal_pending(current)) { | 708 | if (signal_pending(current)) { |
699 | err = -ERESTARTNOHAND; | 709 | err = -ERESTARTNOHAND; |
700 | goto out_unlock_free; | 710 | goto out_unlock_free; |
701 | } | 711 | } |
702 | } | 712 | } |
703 | 713 | ||
704 | msq->q_lspid = task_tgid_vnr(current); | 714 | msq->q_lspid = task_tgid_vnr(current); |
705 | msq->q_stime = get_seconds(); | 715 | msq->q_stime = get_seconds(); |
706 | 716 | ||
707 | if (!pipelined_send(msq, msg)) { | 717 | if (!pipelined_send(msq, msg)) { |
708 | /* noone is waiting for this message, enqueue it */ | 718 | /* noone is waiting for this message, enqueue it */ |
709 | list_add_tail(&msg->m_list, &msq->q_messages); | 719 | list_add_tail(&msg->m_list, &msq->q_messages); |
710 | msq->q_cbytes += msgsz; | 720 | msq->q_cbytes += msgsz; |
711 | msq->q_qnum++; | 721 | msq->q_qnum++; |
712 | atomic_add(msgsz, &msg_bytes); | 722 | atomic_add(msgsz, &msg_bytes); |
713 | atomic_inc(&msg_hdrs); | 723 | atomic_inc(&msg_hdrs); |
714 | } | 724 | } |
715 | 725 | ||
716 | err = 0; | 726 | err = 0; |
717 | msg = NULL; | 727 | msg = NULL; |
718 | 728 | ||
719 | out_unlock_free: | 729 | out_unlock_free: |
720 | msg_unlock(msq); | 730 | msg_unlock(msq); |
721 | out_free: | 731 | out_free: |
722 | if (msg != NULL) | 732 | if (msg != NULL) |
723 | free_msg(msg); | 733 | free_msg(msg); |
724 | return err; | 734 | return err; |
725 | } | 735 | } |
726 | 736 | ||
727 | asmlinkage long | 737 | asmlinkage long |
728 | sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg) | 738 | sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg) |
729 | { | 739 | { |
730 | long mtype; | 740 | long mtype; |
731 | 741 | ||
732 | if (get_user(mtype, &msgp->mtype)) | 742 | if (get_user(mtype, &msgp->mtype)) |
733 | return -EFAULT; | 743 | return -EFAULT; |
734 | return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); | 744 | return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); |
735 | } | 745 | } |
736 | 746 | ||
737 | static inline int convert_mode(long *msgtyp, int msgflg) | 747 | static inline int convert_mode(long *msgtyp, int msgflg) |
738 | { | 748 | { |
739 | /* | 749 | /* |
740 | * find message of correct type. | 750 | * find message of correct type. |
741 | * msgtyp = 0 => get first. | 751 | * msgtyp = 0 => get first. |
742 | * msgtyp > 0 => get first message of matching type. | 752 | * msgtyp > 0 => get first message of matching type. |
743 | * msgtyp < 0 => get message with least type must be < abs(msgtype). | 753 | * msgtyp < 0 => get message with least type must be < abs(msgtype). |
744 | */ | 754 | */ |
745 | if (*msgtyp == 0) | 755 | if (*msgtyp == 0) |
746 | return SEARCH_ANY; | 756 | return SEARCH_ANY; |
747 | if (*msgtyp < 0) { | 757 | if (*msgtyp < 0) { |
748 | *msgtyp = -*msgtyp; | 758 | *msgtyp = -*msgtyp; |
749 | return SEARCH_LESSEQUAL; | 759 | return SEARCH_LESSEQUAL; |
750 | } | 760 | } |
751 | if (msgflg & MSG_EXCEPT) | 761 | if (msgflg & MSG_EXCEPT) |
752 | return SEARCH_NOTEQUAL; | 762 | return SEARCH_NOTEQUAL; |
753 | return SEARCH_EQUAL; | 763 | return SEARCH_EQUAL; |
754 | } | 764 | } |
755 | 765 | ||
756 | long do_msgrcv(int msqid, long *pmtype, void __user *mtext, | 766 | long do_msgrcv(int msqid, long *pmtype, void __user *mtext, |
757 | size_t msgsz, long msgtyp, int msgflg) | 767 | size_t msgsz, long msgtyp, int msgflg) |
758 | { | 768 | { |
759 | struct msg_queue *msq; | 769 | struct msg_queue *msq; |
760 | struct msg_msg *msg; | 770 | struct msg_msg *msg; |
761 | int mode; | 771 | int mode; |
762 | struct ipc_namespace *ns; | 772 | struct ipc_namespace *ns; |
763 | 773 | ||
764 | if (msqid < 0 || (long) msgsz < 0) | 774 | if (msqid < 0 || (long) msgsz < 0) |
765 | return -EINVAL; | 775 | return -EINVAL; |
766 | mode = convert_mode(&msgtyp, msgflg); | 776 | mode = convert_mode(&msgtyp, msgflg); |
767 | ns = current->nsproxy->ipc_ns; | 777 | ns = current->nsproxy->ipc_ns; |
768 | 778 | ||
769 | msq = msg_lock_check(ns, msqid); | 779 | msq = msg_lock_check(ns, msqid); |
770 | if (IS_ERR(msq)) | 780 | if (IS_ERR(msq)) |
771 | return PTR_ERR(msq); | 781 | return PTR_ERR(msq); |
772 | 782 | ||
773 | for (;;) { | 783 | for (;;) { |
774 | struct msg_receiver msr_d; | 784 | struct msg_receiver msr_d; |
775 | struct list_head *tmp; | 785 | struct list_head *tmp; |
776 | 786 | ||
777 | msg = ERR_PTR(-EACCES); | 787 | msg = ERR_PTR(-EACCES); |
778 | if (ipcperms(&msq->q_perm, S_IRUGO)) | 788 | if (ipcperms(&msq->q_perm, S_IRUGO)) |
779 | goto out_unlock; | 789 | goto out_unlock; |
780 | 790 | ||
781 | msg = ERR_PTR(-EAGAIN); | 791 | msg = ERR_PTR(-EAGAIN); |
782 | tmp = msq->q_messages.next; | 792 | tmp = msq->q_messages.next; |
783 | while (tmp != &msq->q_messages) { | 793 | while (tmp != &msq->q_messages) { |
784 | struct msg_msg *walk_msg; | 794 | struct msg_msg *walk_msg; |
785 | 795 | ||
786 | walk_msg = list_entry(tmp, struct msg_msg, m_list); | 796 | walk_msg = list_entry(tmp, struct msg_msg, m_list); |
787 | if (testmsg(walk_msg, msgtyp, mode) && | 797 | if (testmsg(walk_msg, msgtyp, mode) && |
788 | !security_msg_queue_msgrcv(msq, walk_msg, current, | 798 | !security_msg_queue_msgrcv(msq, walk_msg, current, |
789 | msgtyp, mode)) { | 799 | msgtyp, mode)) { |
790 | 800 | ||
791 | msg = walk_msg; | 801 | msg = walk_msg; |
792 | if (mode == SEARCH_LESSEQUAL && | 802 | if (mode == SEARCH_LESSEQUAL && |
793 | walk_msg->m_type != 1) { | 803 | walk_msg->m_type != 1) { |
794 | msg = walk_msg; | 804 | msg = walk_msg; |
795 | msgtyp = walk_msg->m_type - 1; | 805 | msgtyp = walk_msg->m_type - 1; |
796 | } else { | 806 | } else { |
797 | msg = walk_msg; | 807 | msg = walk_msg; |
798 | break; | 808 | break; |
799 | } | 809 | } |
800 | } | 810 | } |
801 | tmp = tmp->next; | 811 | tmp = tmp->next; |
802 | } | 812 | } |
803 | if (!IS_ERR(msg)) { | 813 | if (!IS_ERR(msg)) { |
804 | /* | 814 | /* |
805 | * Found a suitable message. | 815 | * Found a suitable message. |
806 | * Unlink it from the queue. | 816 | * Unlink it from the queue. |
807 | */ | 817 | */ |
808 | if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { | 818 | if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { |
809 | msg = ERR_PTR(-E2BIG); | 819 | msg = ERR_PTR(-E2BIG); |
810 | goto out_unlock; | 820 | goto out_unlock; |
811 | } | 821 | } |
812 | list_del(&msg->m_list); | 822 | list_del(&msg->m_list); |
813 | msq->q_qnum--; | 823 | msq->q_qnum--; |
814 | msq->q_rtime = get_seconds(); | 824 | msq->q_rtime = get_seconds(); |
815 | msq->q_lrpid = task_tgid_vnr(current); | 825 | msq->q_lrpid = task_tgid_vnr(current); |
816 | msq->q_cbytes -= msg->m_ts; | 826 | msq->q_cbytes -= msg->m_ts; |
817 | atomic_sub(msg->m_ts, &msg_bytes); | 827 | atomic_sub(msg->m_ts, &msg_bytes); |
818 | atomic_dec(&msg_hdrs); | 828 | atomic_dec(&msg_hdrs); |
819 | ss_wakeup(&msq->q_senders, 0); | 829 | ss_wakeup(&msq->q_senders, 0); |
820 | msg_unlock(msq); | 830 | msg_unlock(msq); |
821 | break; | 831 | break; |
822 | } | 832 | } |
823 | /* No message waiting. Wait for a message */ | 833 | /* No message waiting. Wait for a message */ |
824 | if (msgflg & IPC_NOWAIT) { | 834 | if (msgflg & IPC_NOWAIT) { |
825 | msg = ERR_PTR(-ENOMSG); | 835 | msg = ERR_PTR(-ENOMSG); |
826 | goto out_unlock; | 836 | goto out_unlock; |
827 | } | 837 | } |
828 | list_add_tail(&msr_d.r_list, &msq->q_receivers); | 838 | list_add_tail(&msr_d.r_list, &msq->q_receivers); |
829 | msr_d.r_tsk = current; | 839 | msr_d.r_tsk = current; |
830 | msr_d.r_msgtype = msgtyp; | 840 | msr_d.r_msgtype = msgtyp; |
831 | msr_d.r_mode = mode; | 841 | msr_d.r_mode = mode; |
832 | if (msgflg & MSG_NOERROR) | 842 | if (msgflg & MSG_NOERROR) |
833 | msr_d.r_maxsize = INT_MAX; | 843 | msr_d.r_maxsize = INT_MAX; |
834 | else | 844 | else |
835 | msr_d.r_maxsize = msgsz; | 845 | msr_d.r_maxsize = msgsz; |
836 | msr_d.r_msg = ERR_PTR(-EAGAIN); | 846 | msr_d.r_msg = ERR_PTR(-EAGAIN); |
837 | current->state = TASK_INTERRUPTIBLE; | 847 | current->state = TASK_INTERRUPTIBLE; |
838 | msg_unlock(msq); | 848 | msg_unlock(msq); |
839 | 849 | ||
840 | schedule(); | 850 | schedule(); |
841 | 851 | ||
842 | /* Lockless receive, part 1: | 852 | /* Lockless receive, part 1: |
843 | * Disable preemption. We don't hold a reference to the queue | 853 | * Disable preemption. We don't hold a reference to the queue |
844 | * and getting a reference would defeat the idea of a lockless | 854 | * and getting a reference would defeat the idea of a lockless |
845 | * operation, thus the code relies on rcu to guarantee the | 855 | * operation, thus the code relies on rcu to guarantee the |
846 | * existance of msq: | 856 | * existance of msq: |
847 | * Prior to destruction, expunge_all(-EIRDM) changes r_msg. | 857 | * Prior to destruction, expunge_all(-EIRDM) changes r_msg. |
848 | * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. | 858 | * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. |
849 | * rcu_read_lock() prevents preemption between reading r_msg | 859 | * rcu_read_lock() prevents preemption between reading r_msg |
850 | * and the spin_lock() inside ipc_lock_by_ptr(). | 860 | * and the spin_lock() inside ipc_lock_by_ptr(). |
851 | */ | 861 | */ |
852 | rcu_read_lock(); | 862 | rcu_read_lock(); |
853 | 863 | ||
854 | /* Lockless receive, part 2: | 864 | /* Lockless receive, part 2: |
855 | * Wait until pipelined_send or expunge_all are outside of | 865 | * Wait until pipelined_send or expunge_all are outside of |
856 | * wake_up_process(). There is a race with exit(), see | 866 | * wake_up_process(). There is a race with exit(), see |
857 | * ipc/mqueue.c for the details. | 867 | * ipc/mqueue.c for the details. |
858 | */ | 868 | */ |
859 | msg = (struct msg_msg*)msr_d.r_msg; | 869 | msg = (struct msg_msg*)msr_d.r_msg; |
860 | while (msg == NULL) { | 870 | while (msg == NULL) { |
861 | cpu_relax(); | 871 | cpu_relax(); |
862 | msg = (struct msg_msg *)msr_d.r_msg; | 872 | msg = (struct msg_msg *)msr_d.r_msg; |
863 | } | 873 | } |
864 | 874 | ||
865 | /* Lockless receive, part 3: | 875 | /* Lockless receive, part 3: |
866 | * If there is a message or an error then accept it without | 876 | * If there is a message or an error then accept it without |
867 | * locking. | 877 | * locking. |
868 | */ | 878 | */ |
869 | if (msg != ERR_PTR(-EAGAIN)) { | 879 | if (msg != ERR_PTR(-EAGAIN)) { |
870 | rcu_read_unlock(); | 880 | rcu_read_unlock(); |
871 | break; | 881 | break; |
872 | } | 882 | } |
873 | 883 | ||
874 | /* Lockless receive, part 3: | 884 | /* Lockless receive, part 3: |
875 | * Acquire the queue spinlock. | 885 | * Acquire the queue spinlock. |
876 | */ | 886 | */ |
877 | ipc_lock_by_ptr(&msq->q_perm); | 887 | ipc_lock_by_ptr(&msq->q_perm); |
878 | rcu_read_unlock(); | 888 | rcu_read_unlock(); |
879 | 889 | ||
880 | /* Lockless receive, part 4: | 890 | /* Lockless receive, part 4: |
881 | * Repeat test after acquiring the spinlock. | 891 | * Repeat test after acquiring the spinlock. |
882 | */ | 892 | */ |
883 | msg = (struct msg_msg*)msr_d.r_msg; | 893 | msg = (struct msg_msg*)msr_d.r_msg; |
884 | if (msg != ERR_PTR(-EAGAIN)) | 894 | if (msg != ERR_PTR(-EAGAIN)) |
885 | goto out_unlock; | 895 | goto out_unlock; |
886 | 896 | ||
887 | list_del(&msr_d.r_list); | 897 | list_del(&msr_d.r_list); |
888 | if (signal_pending(current)) { | 898 | if (signal_pending(current)) { |
889 | msg = ERR_PTR(-ERESTARTNOHAND); | 899 | msg = ERR_PTR(-ERESTARTNOHAND); |
890 | out_unlock: | 900 | out_unlock: |
891 | msg_unlock(msq); | 901 | msg_unlock(msq); |
892 | break; | 902 | break; |
893 | } | 903 | } |
894 | } | 904 | } |
895 | if (IS_ERR(msg)) | 905 | if (IS_ERR(msg)) |
896 | return PTR_ERR(msg); | 906 | return PTR_ERR(msg); |
897 | 907 | ||
898 | msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz; | 908 | msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz; |
899 | *pmtype = msg->m_type; | 909 | *pmtype = msg->m_type; |
900 | if (store_msg(mtext, msg, msgsz)) | 910 | if (store_msg(mtext, msg, msgsz)) |
901 | msgsz = -EFAULT; | 911 | msgsz = -EFAULT; |
902 | 912 | ||
903 | free_msg(msg); | 913 | free_msg(msg); |
904 | 914 | ||
905 | return msgsz; | 915 | return msgsz; |
906 | } | 916 | } |
907 | 917 | ||
908 | asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, | 918 | asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, |
909 | long msgtyp, int msgflg) | 919 | long msgtyp, int msgflg) |
910 | { | 920 | { |
911 | long err, mtype; | 921 | long err, mtype; |
912 | 922 | ||
913 | err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg); | 923 | err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg); |
914 | if (err < 0) | 924 | if (err < 0) |
915 | goto out; | 925 | goto out; |
916 | 926 | ||
917 | if (put_user(mtype, &msgp->mtype)) | 927 | if (put_user(mtype, &msgp->mtype)) |
918 | err = -EFAULT; | 928 | err = -EFAULT; |
919 | out: | 929 | out: |
920 | return err; | 930 | return err; |
921 | } | 931 | } |
922 | 932 | ||
923 | #ifdef CONFIG_PROC_FS | 933 | #ifdef CONFIG_PROC_FS |
924 | static int sysvipc_msg_proc_show(struct seq_file *s, void *it) | 934 | static int sysvipc_msg_proc_show(struct seq_file *s, void *it) |
925 | { | 935 | { |
926 | struct msg_queue *msq = it; | 936 | struct msg_queue *msq = it; |
927 | 937 | ||
928 | return seq_printf(s, | 938 | return seq_printf(s, |
929 | "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", | 939 | "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", |
930 | msq->q_perm.key, | 940 | msq->q_perm.key, |
931 | msq->q_perm.id, | 941 | msq->q_perm.id, |
932 | msq->q_perm.mode, | 942 | msq->q_perm.mode, |
933 | msq->q_cbytes, | 943 | msq->q_cbytes, |
934 | msq->q_qnum, | 944 | msq->q_qnum, |
935 | msq->q_lspid, | 945 | msq->q_lspid, |
936 | msq->q_lrpid, | 946 | msq->q_lrpid, |
937 | msq->q_perm.uid, | 947 | msq->q_perm.uid, |
938 | msq->q_perm.gid, | 948 | msq->q_perm.gid, |
939 | msq->q_perm.cuid, | 949 | msq->q_perm.cuid, |
940 | msq->q_perm.cgid, | 950 | msq->q_perm.cgid, |
941 | msq->q_stime, | 951 | msq->q_stime, |
942 | msq->q_rtime, | 952 | msq->q_rtime, |
943 | msq->q_ctime); | 953 | msq->q_ctime); |
944 | } | 954 | } |
945 | #endif | 955 | #endif |
946 | 956 |
ipc/sem.c
1 | /* | 1 | /* |
2 | * linux/ipc/sem.c | 2 | * linux/ipc/sem.c |
3 | * Copyright (C) 1992 Krishna Balasubramanian | 3 | * Copyright (C) 1992 Krishna Balasubramanian |
4 | * Copyright (C) 1995 Eric Schenk, Bruno Haible | 4 | * Copyright (C) 1995 Eric Schenk, Bruno Haible |
5 | * | 5 | * |
6 | * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995): | 6 | * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995): |
7 | * This code underwent a massive rewrite in order to solve some problems | 7 | * This code underwent a massive rewrite in order to solve some problems |
8 | * with the original code. In particular the original code failed to | 8 | * with the original code. In particular the original code failed to |
9 | * wake up processes that were waiting for semval to go to 0 if the | 9 | * wake up processes that were waiting for semval to go to 0 if the |
10 | * value went to 0 and was then incremented rapidly enough. In solving | 10 | * value went to 0 and was then incremented rapidly enough. In solving |
11 | * this problem I have also modified the implementation so that it | 11 | * this problem I have also modified the implementation so that it |
12 | * processes pending operations in a FIFO manner, thus give a guarantee | 12 | * processes pending operations in a FIFO manner, thus give a guarantee |
13 | * that processes waiting for a lock on the semaphore won't starve | 13 | * that processes waiting for a lock on the semaphore won't starve |
14 | * unless another locking process fails to unlock. | 14 | * unless another locking process fails to unlock. |
15 | * In addition the following two changes in behavior have been introduced: | 15 | * In addition the following two changes in behavior have been introduced: |
16 | * - The original implementation of semop returned the value | 16 | * - The original implementation of semop returned the value |
17 | * last semaphore element examined on success. This does not | 17 | * last semaphore element examined on success. This does not |
18 | * match the manual page specifications, and effectively | 18 | * match the manual page specifications, and effectively |
19 | * allows the user to read the semaphore even if they do not | 19 | * allows the user to read the semaphore even if they do not |
20 | * have read permissions. The implementation now returns 0 | 20 | * have read permissions. The implementation now returns 0 |
21 | * on success as stated in the manual page. | 21 | * on success as stated in the manual page. |
22 | * - There is some confusion over whether the set of undo adjustments | 22 | * - There is some confusion over whether the set of undo adjustments |
23 | * to be performed at exit should be done in an atomic manner. | 23 | * to be performed at exit should be done in an atomic manner. |
24 | * That is, if we are attempting to decrement the semval should we queue | 24 | * That is, if we are attempting to decrement the semval should we queue |
25 | * up and wait until we can do so legally? | 25 | * up and wait until we can do so legally? |
26 | * The original implementation attempted to do this. | 26 | * The original implementation attempted to do this. |
27 | * The current implementation does not do so. This is because I don't | 27 | * The current implementation does not do so. This is because I don't |
28 | * think it is the right thing (TM) to do, and because I couldn't | 28 | * think it is the right thing (TM) to do, and because I couldn't |
29 | * see a clean way to get the old behavior with the new design. | 29 | * see a clean way to get the old behavior with the new design. |
30 | * The POSIX standard and SVID should be consulted to determine | 30 | * The POSIX standard and SVID should be consulted to determine |
31 | * what behavior is mandated. | 31 | * what behavior is mandated. |
32 | * | 32 | * |
33 | * Further notes on refinement (Christoph Rohland, December 1998): | 33 | * Further notes on refinement (Christoph Rohland, December 1998): |
34 | * - The POSIX standard says, that the undo adjustments simply should | 34 | * - The POSIX standard says, that the undo adjustments simply should |
35 | * redo. So the current implementation is o.K. | 35 | * redo. So the current implementation is o.K. |
36 | * - The previous code had two flaws: | 36 | * - The previous code had two flaws: |
37 | * 1) It actively gave the semaphore to the next waiting process | 37 | * 1) It actively gave the semaphore to the next waiting process |
38 | * sleeping on the semaphore. Since this process did not have the | 38 | * sleeping on the semaphore. Since this process did not have the |
39 | * cpu this led to many unnecessary context switches and bad | 39 | * cpu this led to many unnecessary context switches and bad |
40 | * performance. Now we only check which process should be able to | 40 | * performance. Now we only check which process should be able to |
41 | * get the semaphore and if this process wants to reduce some | 41 | * get the semaphore and if this process wants to reduce some |
42 | * semaphore value we simply wake it up without doing the | 42 | * semaphore value we simply wake it up without doing the |
43 | * operation. So it has to try to get it later. Thus e.g. the | 43 | * operation. So it has to try to get it later. Thus e.g. the |
44 | * running process may reacquire the semaphore during the current | 44 | * running process may reacquire the semaphore during the current |
45 | * time slice. If it only waits for zero or increases the semaphore, | 45 | * time slice. If it only waits for zero or increases the semaphore, |
46 | * we do the operation in advance and wake it up. | 46 | * we do the operation in advance and wake it up. |
47 | * 2) It did not wake up all zero waiting processes. We try to do | 47 | * 2) It did not wake up all zero waiting processes. We try to do |
48 | * better but only get the semops right which only wait for zero or | 48 | * better but only get the semops right which only wait for zero or |
49 | * increase. If there are decrement operations in the operations | 49 | * increase. If there are decrement operations in the operations |
50 | * array we do the same as before. | 50 | * array we do the same as before. |
51 | * | 51 | * |
52 | * With the incarnation of O(1) scheduler, it becomes unnecessary to perform | 52 | * With the incarnation of O(1) scheduler, it becomes unnecessary to perform |
53 | * check/retry algorithm for waking up blocked processes as the new scheduler | 53 | * check/retry algorithm for waking up blocked processes as the new scheduler |
54 | * is better at handling thread switch than the old one. | 54 | * is better at handling thread switch than the old one. |
55 | * | 55 | * |
56 | * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> | 56 | * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> |
57 | * | 57 | * |
58 | * SMP-threaded, sysctl's added | 58 | * SMP-threaded, sysctl's added |
59 | * (c) 1999 Manfred Spraul <manfred@colorfullife.com> | 59 | * (c) 1999 Manfred Spraul <manfred@colorfullife.com> |
60 | * Enforced range limit on SEM_UNDO | 60 | * Enforced range limit on SEM_UNDO |
61 | * (c) 2001 Red Hat Inc <alan@redhat.com> | 61 | * (c) 2001 Red Hat Inc <alan@redhat.com> |
62 | * Lockless wakeup | 62 | * Lockless wakeup |
63 | * (c) 2003 Manfred Spraul <manfred@colorfullife.com> | 63 | * (c) 2003 Manfred Spraul <manfred@colorfullife.com> |
64 | * | 64 | * |
65 | * support for audit of ipc object properties and permission changes | 65 | * support for audit of ipc object properties and permission changes |
66 | * Dustin Kirkland <dustin.kirkland@us.ibm.com> | 66 | * Dustin Kirkland <dustin.kirkland@us.ibm.com> |
67 | * | 67 | * |
68 | * namespaces support | 68 | * namespaces support |
69 | * OpenVZ, SWsoft Inc. | 69 | * OpenVZ, SWsoft Inc. |
70 | * Pavel Emelianov <xemul@openvz.org> | 70 | * Pavel Emelianov <xemul@openvz.org> |
71 | */ | 71 | */ |
72 | 72 | ||
73 | #include <linux/slab.h> | 73 | #include <linux/slab.h> |
74 | #include <linux/spinlock.h> | 74 | #include <linux/spinlock.h> |
75 | #include <linux/init.h> | 75 | #include <linux/init.h> |
76 | #include <linux/proc_fs.h> | 76 | #include <linux/proc_fs.h> |
77 | #include <linux/time.h> | 77 | #include <linux/time.h> |
78 | #include <linux/security.h> | 78 | #include <linux/security.h> |
79 | #include <linux/syscalls.h> | 79 | #include <linux/syscalls.h> |
80 | #include <linux/audit.h> | 80 | #include <linux/audit.h> |
81 | #include <linux/capability.h> | 81 | #include <linux/capability.h> |
82 | #include <linux/seq_file.h> | 82 | #include <linux/seq_file.h> |
83 | #include <linux/mutex.h> | 83 | #include <linux/mutex.h> |
84 | #include <linux/nsproxy.h> | 84 | #include <linux/nsproxy.h> |
85 | 85 | ||
86 | #include <asm/uaccess.h> | 86 | #include <asm/uaccess.h> |
87 | #include "util.h" | 87 | #include "util.h" |
88 | 88 | ||
89 | #define sem_ids(ns) (*((ns)->ids[IPC_SEM_IDS])) | 89 | #define sem_ids(ns) (*((ns)->ids[IPC_SEM_IDS])) |
90 | 90 | ||
91 | #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) | 91 | #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) |
92 | #define sem_checkid(ns, sma, semid) \ | 92 | #define sem_checkid(ns, sma, semid) \ |
93 | ipc_checkid(&sem_ids(ns),&sma->sem_perm,semid) | 93 | ipc_checkid(&sem_ids(ns),&sma->sem_perm,semid) |
94 | #define sem_buildid(ns, id, seq) \ | 94 | #define sem_buildid(ns, id, seq) \ |
95 | ipc_buildid(&sem_ids(ns), id, seq) | 95 | ipc_buildid(&sem_ids(ns), id, seq) |
96 | 96 | ||
97 | static struct ipc_ids init_sem_ids; | 97 | static struct ipc_ids init_sem_ids; |
98 | 98 | ||
99 | static int newary(struct ipc_namespace *, struct ipc_params *); | 99 | static int newary(struct ipc_namespace *, struct ipc_params *); |
100 | static void freeary(struct ipc_namespace *, struct sem_array *); | 100 | static void freeary(struct ipc_namespace *, struct sem_array *); |
101 | #ifdef CONFIG_PROC_FS | 101 | #ifdef CONFIG_PROC_FS |
102 | static int sysvipc_sem_proc_show(struct seq_file *s, void *it); | 102 | static int sysvipc_sem_proc_show(struct seq_file *s, void *it); |
103 | #endif | 103 | #endif |
104 | 104 | ||
105 | #define SEMMSL_FAST 256 /* 512 bytes on stack */ | 105 | #define SEMMSL_FAST 256 /* 512 bytes on stack */ |
106 | #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ | 106 | #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ |
107 | 107 | ||
108 | /* | 108 | /* |
109 | * linked list protection: | 109 | * linked list protection: |
110 | * sem_undo.id_next, | 110 | * sem_undo.id_next, |
111 | * sem_array.sem_pending{,last}, | 111 | * sem_array.sem_pending{,last}, |
112 | * sem_array.sem_undo: sem_lock() for read/write | 112 | * sem_array.sem_undo: sem_lock() for read/write |
113 | * sem_undo.proc_next: only "current" is allowed to read/write that field. | 113 | * sem_undo.proc_next: only "current" is allowed to read/write that field. |
114 | * | 114 | * |
115 | */ | 115 | */ |
116 | 116 | ||
117 | #define sc_semmsl sem_ctls[0] | 117 | #define sc_semmsl sem_ctls[0] |
118 | #define sc_semmns sem_ctls[1] | 118 | #define sc_semmns sem_ctls[1] |
119 | #define sc_semopm sem_ctls[2] | 119 | #define sc_semopm sem_ctls[2] |
120 | #define sc_semmni sem_ctls[3] | 120 | #define sc_semmni sem_ctls[3] |
121 | 121 | ||
122 | static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) | 122 | static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) |
123 | { | 123 | { |
124 | ns->ids[IPC_SEM_IDS] = ids; | 124 | ns->ids[IPC_SEM_IDS] = ids; |
125 | ns->sc_semmsl = SEMMSL; | 125 | ns->sc_semmsl = SEMMSL; |
126 | ns->sc_semmns = SEMMNS; | 126 | ns->sc_semmns = SEMMNS; |
127 | ns->sc_semopm = SEMOPM; | 127 | ns->sc_semopm = SEMOPM; |
128 | ns->sc_semmni = SEMMNI; | 128 | ns->sc_semmni = SEMMNI; |
129 | ns->used_sems = 0; | 129 | ns->used_sems = 0; |
130 | ipc_init_ids(ids); | 130 | ipc_init_ids(ids); |
131 | } | 131 | } |
132 | 132 | ||
133 | int sem_init_ns(struct ipc_namespace *ns) | 133 | int sem_init_ns(struct ipc_namespace *ns) |
134 | { | 134 | { |
135 | struct ipc_ids *ids; | 135 | struct ipc_ids *ids; |
136 | 136 | ||
137 | ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL); | 137 | ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL); |
138 | if (ids == NULL) | 138 | if (ids == NULL) |
139 | return -ENOMEM; | 139 | return -ENOMEM; |
140 | 140 | ||
141 | __sem_init_ns(ns, ids); | 141 | __sem_init_ns(ns, ids); |
142 | return 0; | 142 | return 0; |
143 | } | 143 | } |
144 | 144 | ||
145 | void sem_exit_ns(struct ipc_namespace *ns) | 145 | void sem_exit_ns(struct ipc_namespace *ns) |
146 | { | 146 | { |
147 | struct sem_array *sma; | 147 | struct sem_array *sma; |
148 | int next_id; | 148 | int next_id; |
149 | int total, in_use; | 149 | int total, in_use; |
150 | 150 | ||
151 | mutex_lock(&sem_ids(ns).mutex); | 151 | mutex_lock(&sem_ids(ns).mutex); |
152 | 152 | ||
153 | in_use = sem_ids(ns).in_use; | 153 | in_use = sem_ids(ns).in_use; |
154 | 154 | ||
155 | for (total = 0, next_id = 0; total < in_use; next_id++) { | 155 | for (total = 0, next_id = 0; total < in_use; next_id++) { |
156 | sma = idr_find(&sem_ids(ns).ipcs_idr, next_id); | 156 | sma = idr_find(&sem_ids(ns).ipcs_idr, next_id); |
157 | if (sma == NULL) | 157 | if (sma == NULL) |
158 | continue; | 158 | continue; |
159 | ipc_lock_by_ptr(&sma->sem_perm); | 159 | ipc_lock_by_ptr(&sma->sem_perm); |
160 | freeary(ns, sma); | 160 | freeary(ns, sma); |
161 | total++; | 161 | total++; |
162 | } | 162 | } |
163 | mutex_unlock(&sem_ids(ns).mutex); | 163 | mutex_unlock(&sem_ids(ns).mutex); |
164 | 164 | ||
165 | kfree(ns->ids[IPC_SEM_IDS]); | 165 | kfree(ns->ids[IPC_SEM_IDS]); |
166 | ns->ids[IPC_SEM_IDS] = NULL; | 166 | ns->ids[IPC_SEM_IDS] = NULL; |
167 | } | 167 | } |
168 | 168 | ||
169 | void __init sem_init (void) | 169 | void __init sem_init (void) |
170 | { | 170 | { |
171 | __sem_init_ns(&init_ipc_ns, &init_sem_ids); | 171 | __sem_init_ns(&init_ipc_ns, &init_sem_ids); |
172 | ipc_init_proc_interface("sysvipc/sem", | 172 | ipc_init_proc_interface("sysvipc/sem", |
173 | " key semid perms nsems uid gid cuid cgid otime ctime\n", | 173 | " key semid perms nsems uid gid cuid cgid otime ctime\n", |
174 | IPC_SEM_IDS, sysvipc_sem_proc_show); | 174 | IPC_SEM_IDS, sysvipc_sem_proc_show); |
175 | } | 175 | } |
176 | 176 | ||
177 | static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id) | 177 | static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id) |
178 | { | 178 | { |
179 | struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id); | 179 | struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id); |
180 | 180 | ||
181 | return container_of(ipcp, struct sem_array, sem_perm); | 181 | return container_of(ipcp, struct sem_array, sem_perm); |
182 | } | 182 | } |
183 | 183 | ||
184 | static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns, | 184 | static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns, |
185 | int id) | 185 | int id) |
186 | { | 186 | { |
187 | struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id); | 187 | struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id); |
188 | 188 | ||
189 | return container_of(ipcp, struct sem_array, sem_perm); | 189 | return container_of(ipcp, struct sem_array, sem_perm); |
190 | } | 190 | } |
191 | 191 | ||
192 | static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) | 192 | static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) |
193 | { | 193 | { |
194 | ipc_rmid(&sem_ids(ns), &s->sem_perm); | 194 | ipc_rmid(&sem_ids(ns), &s->sem_perm); |
195 | } | 195 | } |
196 | 196 | ||
197 | /* | 197 | /* |
198 | * Lockless wakeup algorithm: | 198 | * Lockless wakeup algorithm: |
199 | * Without the check/retry algorithm a lockless wakeup is possible: | 199 | * Without the check/retry algorithm a lockless wakeup is possible: |
200 | * - queue.status is initialized to -EINTR before blocking. | 200 | * - queue.status is initialized to -EINTR before blocking. |
201 | * - wakeup is performed by | 201 | * - wakeup is performed by |
202 | * * unlinking the queue entry from sma->sem_pending | 202 | * * unlinking the queue entry from sma->sem_pending |
203 | * * setting queue.status to IN_WAKEUP | 203 | * * setting queue.status to IN_WAKEUP |
204 | * This is the notification for the blocked thread that a | 204 | * This is the notification for the blocked thread that a |
205 | * result value is imminent. | 205 | * result value is imminent. |
206 | * * call wake_up_process | 206 | * * call wake_up_process |
207 | * * set queue.status to the final value. | 207 | * * set queue.status to the final value. |
208 | * - the previously blocked thread checks queue.status: | 208 | * - the previously blocked thread checks queue.status: |
209 | * * if it's IN_WAKEUP, then it must wait until the value changes | 209 | * * if it's IN_WAKEUP, then it must wait until the value changes |
210 | * * if it's not -EINTR, then the operation was completed by | 210 | * * if it's not -EINTR, then the operation was completed by |
211 | * update_queue. semtimedop can return queue.status without | 211 | * update_queue. semtimedop can return queue.status without |
212 | * performing any operation on the sem array. | 212 | * performing any operation on the sem array. |
213 | * * otherwise it must acquire the spinlock and check what's up. | 213 | * * otherwise it must acquire the spinlock and check what's up. |
214 | * | 214 | * |
215 | * The two-stage algorithm is necessary to protect against the following | 215 | * The two-stage algorithm is necessary to protect against the following |
216 | * races: | 216 | * races: |
217 | * - if queue.status is set after wake_up_process, then the woken up idle | 217 | * - if queue.status is set after wake_up_process, then the woken up idle |
218 | * thread could race forward and try (and fail) to acquire sma->lock | 218 | * thread could race forward and try (and fail) to acquire sma->lock |
219 | * before update_queue had a chance to set queue.status | 219 | * before update_queue had a chance to set queue.status |
220 | * - if queue.status is written before wake_up_process and if the | 220 | * - if queue.status is written before wake_up_process and if the |
221 | * blocked process is woken up by a signal between writing | 221 | * blocked process is woken up by a signal between writing |
222 | * queue.status and the wake_up_process, then the woken up | 222 | * queue.status and the wake_up_process, then the woken up |
223 | * process could return from semtimedop and die by calling | 223 | * process could return from semtimedop and die by calling |
224 | * sys_exit before wake_up_process is called. Then wake_up_process | 224 | * sys_exit before wake_up_process is called. Then wake_up_process |
225 | * will oops, because the task structure is already invalid. | 225 | * will oops, because the task structure is already invalid. |
226 | * (yes, this happened on s390 with sysv msg). | 226 | * (yes, this happened on s390 with sysv msg). |
227 | * | 227 | * |
228 | */ | 228 | */ |
229 | #define IN_WAKEUP 1 | 229 | #define IN_WAKEUP 1 |
230 | 230 | ||
231 | /** | ||
232 | * newary - Create a new semaphore set | ||
233 | * @ns: namespace | ||
234 | * @params: ptr to the structure that contains key, semflg and nsems | ||
235 | * | ||
236 | * Called with sem_ids.mutex held | ||
237 | */ | ||
238 | |||
231 | static int newary(struct ipc_namespace *ns, struct ipc_params *params) | 239 | static int newary(struct ipc_namespace *ns, struct ipc_params *params) |
232 | { | 240 | { |
233 | int id; | 241 | int id; |
234 | int retval; | 242 | int retval; |
235 | struct sem_array *sma; | 243 | struct sem_array *sma; |
236 | int size; | 244 | int size; |
237 | key_t key = params->key; | 245 | key_t key = params->key; |
238 | int nsems = params->u.nsems; | 246 | int nsems = params->u.nsems; |
239 | int semflg = params->flg; | 247 | int semflg = params->flg; |
240 | 248 | ||
241 | if (!nsems) | 249 | if (!nsems) |
242 | return -EINVAL; | 250 | return -EINVAL; |
243 | if (ns->used_sems + nsems > ns->sc_semmns) | 251 | if (ns->used_sems + nsems > ns->sc_semmns) |
244 | return -ENOSPC; | 252 | return -ENOSPC; |
245 | 253 | ||
246 | size = sizeof (*sma) + nsems * sizeof (struct sem); | 254 | size = sizeof (*sma) + nsems * sizeof (struct sem); |
247 | sma = ipc_rcu_alloc(size); | 255 | sma = ipc_rcu_alloc(size); |
248 | if (!sma) { | 256 | if (!sma) { |
249 | return -ENOMEM; | 257 | return -ENOMEM; |
250 | } | 258 | } |
251 | memset (sma, 0, size); | 259 | memset (sma, 0, size); |
252 | 260 | ||
253 | sma->sem_perm.mode = (semflg & S_IRWXUGO); | 261 | sma->sem_perm.mode = (semflg & S_IRWXUGO); |
254 | sma->sem_perm.key = key; | 262 | sma->sem_perm.key = key; |
255 | 263 | ||
256 | sma->sem_perm.security = NULL; | 264 | sma->sem_perm.security = NULL; |
257 | retval = security_sem_alloc(sma); | 265 | retval = security_sem_alloc(sma); |
258 | if (retval) { | 266 | if (retval) { |
259 | ipc_rcu_putref(sma); | 267 | ipc_rcu_putref(sma); |
260 | return retval; | 268 | return retval; |
261 | } | 269 | } |
262 | 270 | ||
263 | id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); | 271 | id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); |
264 | if(id == -1) { | 272 | if(id == -1) { |
265 | security_sem_free(sma); | 273 | security_sem_free(sma); |
266 | ipc_rcu_putref(sma); | 274 | ipc_rcu_putref(sma); |
267 | return -ENOSPC; | 275 | return -ENOSPC; |
268 | } | 276 | } |
269 | ns->used_sems += nsems; | 277 | ns->used_sems += nsems; |
270 | 278 | ||
271 | sma->sem_perm.id = sem_buildid(ns, id, sma->sem_perm.seq); | 279 | sma->sem_perm.id = sem_buildid(ns, id, sma->sem_perm.seq); |
272 | sma->sem_base = (struct sem *) &sma[1]; | 280 | sma->sem_base = (struct sem *) &sma[1]; |
273 | /* sma->sem_pending = NULL; */ | 281 | /* sma->sem_pending = NULL; */ |
274 | sma->sem_pending_last = &sma->sem_pending; | 282 | sma->sem_pending_last = &sma->sem_pending; |
275 | /* sma->undo = NULL; */ | 283 | /* sma->undo = NULL; */ |
276 | sma->sem_nsems = nsems; | 284 | sma->sem_nsems = nsems; |
277 | sma->sem_ctime = get_seconds(); | 285 | sma->sem_ctime = get_seconds(); |
278 | sem_unlock(sma); | 286 | sem_unlock(sma); |
279 | 287 | ||
280 | return sma->sem_perm.id; | 288 | return sma->sem_perm.id; |
281 | } | 289 | } |
282 | 290 | ||
283 | 291 | ||
292 | /* | ||
293 | * Called with sem_ids.mutex and ipcp locked. | ||
294 | */ | ||
284 | static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) | 295 | static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) |
285 | { | 296 | { |
286 | struct sem_array *sma; | 297 | struct sem_array *sma; |
287 | 298 | ||
288 | sma = container_of(ipcp, struct sem_array, sem_perm); | 299 | sma = container_of(ipcp, struct sem_array, sem_perm); |
289 | return security_sem_associate(sma, semflg); | 300 | return security_sem_associate(sma, semflg); |
290 | } | 301 | } |
291 | 302 | ||
303 | /* | ||
304 | * Called with sem_ids.mutex and ipcp locked. | ||
305 | */ | ||
292 | static inline int sem_more_checks(struct kern_ipc_perm *ipcp, | 306 | static inline int sem_more_checks(struct kern_ipc_perm *ipcp, |
293 | struct ipc_params *params) | 307 | struct ipc_params *params) |
294 | { | 308 | { |
295 | struct sem_array *sma; | 309 | struct sem_array *sma; |
296 | 310 | ||
297 | sma = container_of(ipcp, struct sem_array, sem_perm); | 311 | sma = container_of(ipcp, struct sem_array, sem_perm); |
298 | if (params->u.nsems > sma->sem_nsems) | 312 | if (params->u.nsems > sma->sem_nsems) |
299 | return -EINVAL; | 313 | return -EINVAL; |
300 | 314 | ||
301 | return 0; | 315 | return 0; |
302 | } | 316 | } |
303 | 317 | ||
304 | asmlinkage long sys_semget(key_t key, int nsems, int semflg) | 318 | asmlinkage long sys_semget(key_t key, int nsems, int semflg) |
305 | { | 319 | { |
306 | struct ipc_namespace *ns; | 320 | struct ipc_namespace *ns; |
307 | struct ipc_ops sem_ops; | 321 | struct ipc_ops sem_ops; |
308 | struct ipc_params sem_params; | 322 | struct ipc_params sem_params; |
309 | 323 | ||
310 | ns = current->nsproxy->ipc_ns; | 324 | ns = current->nsproxy->ipc_ns; |
311 | 325 | ||
312 | if (nsems < 0 || nsems > ns->sc_semmsl) | 326 | if (nsems < 0 || nsems > ns->sc_semmsl) |
313 | return -EINVAL; | 327 | return -EINVAL; |
314 | 328 | ||
315 | sem_ops.getnew = newary; | 329 | sem_ops.getnew = newary; |
316 | sem_ops.associate = sem_security; | 330 | sem_ops.associate = sem_security; |
317 | sem_ops.more_checks = sem_more_checks; | 331 | sem_ops.more_checks = sem_more_checks; |
318 | 332 | ||
319 | sem_params.key = key; | 333 | sem_params.key = key; |
320 | sem_params.flg = semflg; | 334 | sem_params.flg = semflg; |
321 | sem_params.u.nsems = nsems; | 335 | sem_params.u.nsems = nsems; |
322 | 336 | ||
323 | return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); | 337 | return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); |
324 | } | 338 | } |
325 | 339 | ||
326 | /* Manage the doubly linked list sma->sem_pending as a FIFO: | 340 | /* Manage the doubly linked list sma->sem_pending as a FIFO: |
327 | * insert new queue elements at the tail sma->sem_pending_last. | 341 | * insert new queue elements at the tail sma->sem_pending_last. |
328 | */ | 342 | */ |
329 | static inline void append_to_queue (struct sem_array * sma, | 343 | static inline void append_to_queue (struct sem_array * sma, |
330 | struct sem_queue * q) | 344 | struct sem_queue * q) |
331 | { | 345 | { |
332 | *(q->prev = sma->sem_pending_last) = q; | 346 | *(q->prev = sma->sem_pending_last) = q; |
333 | *(sma->sem_pending_last = &q->next) = NULL; | 347 | *(sma->sem_pending_last = &q->next) = NULL; |
334 | } | 348 | } |
335 | 349 | ||
336 | static inline void prepend_to_queue (struct sem_array * sma, | 350 | static inline void prepend_to_queue (struct sem_array * sma, |
337 | struct sem_queue * q) | 351 | struct sem_queue * q) |
338 | { | 352 | { |
339 | q->next = sma->sem_pending; | 353 | q->next = sma->sem_pending; |
340 | *(q->prev = &sma->sem_pending) = q; | 354 | *(q->prev = &sma->sem_pending) = q; |
341 | if (q->next) | 355 | if (q->next) |
342 | q->next->prev = &q->next; | 356 | q->next->prev = &q->next; |
343 | else /* sma->sem_pending_last == &sma->sem_pending */ | 357 | else /* sma->sem_pending_last == &sma->sem_pending */ |
344 | sma->sem_pending_last = &q->next; | 358 | sma->sem_pending_last = &q->next; |
345 | } | 359 | } |
346 | 360 | ||
347 | static inline void remove_from_queue (struct sem_array * sma, | 361 | static inline void remove_from_queue (struct sem_array * sma, |
348 | struct sem_queue * q) | 362 | struct sem_queue * q) |
349 | { | 363 | { |
350 | *(q->prev) = q->next; | 364 | *(q->prev) = q->next; |
351 | if (q->next) | 365 | if (q->next) |
352 | q->next->prev = q->prev; | 366 | q->next->prev = q->prev; |
353 | else /* sma->sem_pending_last == &q->next */ | 367 | else /* sma->sem_pending_last == &q->next */ |
354 | sma->sem_pending_last = q->prev; | 368 | sma->sem_pending_last = q->prev; |
355 | q->prev = NULL; /* mark as removed */ | 369 | q->prev = NULL; /* mark as removed */ |
356 | } | 370 | } |
357 | 371 | ||
358 | /* | 372 | /* |
359 | * Determine whether a sequence of semaphore operations would succeed | 373 | * Determine whether a sequence of semaphore operations would succeed |
360 | * all at once. Return 0 if yes, 1 if need to sleep, else return error code. | 374 | * all at once. Return 0 if yes, 1 if need to sleep, else return error code. |
361 | */ | 375 | */ |
362 | 376 | ||
363 | static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, | 377 | static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, |
364 | int nsops, struct sem_undo *un, int pid) | 378 | int nsops, struct sem_undo *un, int pid) |
365 | { | 379 | { |
366 | int result, sem_op; | 380 | int result, sem_op; |
367 | struct sembuf *sop; | 381 | struct sembuf *sop; |
368 | struct sem * curr; | 382 | struct sem * curr; |
369 | 383 | ||
370 | for (sop = sops; sop < sops + nsops; sop++) { | 384 | for (sop = sops; sop < sops + nsops; sop++) { |
371 | curr = sma->sem_base + sop->sem_num; | 385 | curr = sma->sem_base + sop->sem_num; |
372 | sem_op = sop->sem_op; | 386 | sem_op = sop->sem_op; |
373 | result = curr->semval; | 387 | result = curr->semval; |
374 | 388 | ||
375 | if (!sem_op && result) | 389 | if (!sem_op && result) |
376 | goto would_block; | 390 | goto would_block; |
377 | 391 | ||
378 | result += sem_op; | 392 | result += sem_op; |
379 | if (result < 0) | 393 | if (result < 0) |
380 | goto would_block; | 394 | goto would_block; |
381 | if (result > SEMVMX) | 395 | if (result > SEMVMX) |
382 | goto out_of_range; | 396 | goto out_of_range; |
383 | if (sop->sem_flg & SEM_UNDO) { | 397 | if (sop->sem_flg & SEM_UNDO) { |
384 | int undo = un->semadj[sop->sem_num] - sem_op; | 398 | int undo = un->semadj[sop->sem_num] - sem_op; |
385 | /* | 399 | /* |
386 | * Exceeding the undo range is an error. | 400 | * Exceeding the undo range is an error. |
387 | */ | 401 | */ |
388 | if (undo < (-SEMAEM - 1) || undo > SEMAEM) | 402 | if (undo < (-SEMAEM - 1) || undo > SEMAEM) |
389 | goto out_of_range; | 403 | goto out_of_range; |
390 | } | 404 | } |
391 | curr->semval = result; | 405 | curr->semval = result; |
392 | } | 406 | } |
393 | 407 | ||
394 | sop--; | 408 | sop--; |
395 | while (sop >= sops) { | 409 | while (sop >= sops) { |
396 | sma->sem_base[sop->sem_num].sempid = pid; | 410 | sma->sem_base[sop->sem_num].sempid = pid; |
397 | if (sop->sem_flg & SEM_UNDO) | 411 | if (sop->sem_flg & SEM_UNDO) |
398 | un->semadj[sop->sem_num] -= sop->sem_op; | 412 | un->semadj[sop->sem_num] -= sop->sem_op; |
399 | sop--; | 413 | sop--; |
400 | } | 414 | } |
401 | 415 | ||
402 | sma->sem_otime = get_seconds(); | 416 | sma->sem_otime = get_seconds(); |
403 | return 0; | 417 | return 0; |
404 | 418 | ||
405 | out_of_range: | 419 | out_of_range: |
406 | result = -ERANGE; | 420 | result = -ERANGE; |
407 | goto undo; | 421 | goto undo; |
408 | 422 | ||
409 | would_block: | 423 | would_block: |
410 | if (sop->sem_flg & IPC_NOWAIT) | 424 | if (sop->sem_flg & IPC_NOWAIT) |
411 | result = -EAGAIN; | 425 | result = -EAGAIN; |
412 | else | 426 | else |
413 | result = 1; | 427 | result = 1; |
414 | 428 | ||
415 | undo: | 429 | undo: |
416 | sop--; | 430 | sop--; |
417 | while (sop >= sops) { | 431 | while (sop >= sops) { |
418 | sma->sem_base[sop->sem_num].semval -= sop->sem_op; | 432 | sma->sem_base[sop->sem_num].semval -= sop->sem_op; |
419 | sop--; | 433 | sop--; |
420 | } | 434 | } |
421 | 435 | ||
422 | return result; | 436 | return result; |
423 | } | 437 | } |
424 | 438 | ||
425 | /* Go through the pending queue for the indicated semaphore | 439 | /* Go through the pending queue for the indicated semaphore |
426 | * looking for tasks that can be completed. | 440 | * looking for tasks that can be completed. |
427 | */ | 441 | */ |
428 | static void update_queue (struct sem_array * sma) | 442 | static void update_queue (struct sem_array * sma) |
429 | { | 443 | { |
430 | int error; | 444 | int error; |
431 | struct sem_queue * q; | 445 | struct sem_queue * q; |
432 | 446 | ||
433 | q = sma->sem_pending; | 447 | q = sma->sem_pending; |
434 | while(q) { | 448 | while(q) { |
435 | error = try_atomic_semop(sma, q->sops, q->nsops, | 449 | error = try_atomic_semop(sma, q->sops, q->nsops, |
436 | q->undo, q->pid); | 450 | q->undo, q->pid); |
437 | 451 | ||
438 | /* Does q->sleeper still need to sleep? */ | 452 | /* Does q->sleeper still need to sleep? */ |
439 | if (error <= 0) { | 453 | if (error <= 0) { |
440 | struct sem_queue *n; | 454 | struct sem_queue *n; |
441 | remove_from_queue(sma,q); | 455 | remove_from_queue(sma,q); |
442 | q->status = IN_WAKEUP; | 456 | q->status = IN_WAKEUP; |
443 | /* | 457 | /* |
444 | * Continue scanning. The next operation | 458 | * Continue scanning. The next operation |
445 | * that must be checked depends on the type of the | 459 | * that must be checked depends on the type of the |
446 | * completed operation: | 460 | * completed operation: |
447 | * - if the operation modified the array, then | 461 | * - if the operation modified the array, then |
448 | * restart from the head of the queue and | 462 | * restart from the head of the queue and |
449 | * check for threads that might be waiting | 463 | * check for threads that might be waiting |
450 | * for semaphore values to become 0. | 464 | * for semaphore values to become 0. |
451 | * - if the operation didn't modify the array, | 465 | * - if the operation didn't modify the array, |
452 | * then just continue. | 466 | * then just continue. |
453 | */ | 467 | */ |
454 | if (q->alter) | 468 | if (q->alter) |
455 | n = sma->sem_pending; | 469 | n = sma->sem_pending; |
456 | else | 470 | else |
457 | n = q->next; | 471 | n = q->next; |
458 | wake_up_process(q->sleeper); | 472 | wake_up_process(q->sleeper); |
459 | /* hands-off: q will disappear immediately after | 473 | /* hands-off: q will disappear immediately after |
460 | * writing q->status. | 474 | * writing q->status. |
461 | */ | 475 | */ |
462 | smp_wmb(); | 476 | smp_wmb(); |
463 | q->status = error; | 477 | q->status = error; |
464 | q = n; | 478 | q = n; |
465 | } else { | 479 | } else { |
466 | q = q->next; | 480 | q = q->next; |
467 | } | 481 | } |
468 | } | 482 | } |
469 | } | 483 | } |
470 | 484 | ||
471 | /* The following counts are associated to each semaphore: | 485 | /* The following counts are associated to each semaphore: |
472 | * semncnt number of tasks waiting on semval being nonzero | 486 | * semncnt number of tasks waiting on semval being nonzero |
473 | * semzcnt number of tasks waiting on semval being zero | 487 | * semzcnt number of tasks waiting on semval being zero |
474 | * This model assumes that a task waits on exactly one semaphore. | 488 | * This model assumes that a task waits on exactly one semaphore. |
475 | * Since semaphore operations are to be performed atomically, tasks actually | 489 | * Since semaphore operations are to be performed atomically, tasks actually |
476 | * wait on a whole sequence of semaphores simultaneously. | 490 | * wait on a whole sequence of semaphores simultaneously. |
477 | * The counts we return here are a rough approximation, but still | 491 | * The counts we return here are a rough approximation, but still |
478 | * warrant that semncnt+semzcnt>0 if the task is on the pending queue. | 492 | * warrant that semncnt+semzcnt>0 if the task is on the pending queue. |
479 | */ | 493 | */ |
480 | static int count_semncnt (struct sem_array * sma, ushort semnum) | 494 | static int count_semncnt (struct sem_array * sma, ushort semnum) |
481 | { | 495 | { |
482 | int semncnt; | 496 | int semncnt; |
483 | struct sem_queue * q; | 497 | struct sem_queue * q; |
484 | 498 | ||
485 | semncnt = 0; | 499 | semncnt = 0; |
486 | for (q = sma->sem_pending; q; q = q->next) { | 500 | for (q = sma->sem_pending; q; q = q->next) { |
487 | struct sembuf * sops = q->sops; | 501 | struct sembuf * sops = q->sops; |
488 | int nsops = q->nsops; | 502 | int nsops = q->nsops; |
489 | int i; | 503 | int i; |
490 | for (i = 0; i < nsops; i++) | 504 | for (i = 0; i < nsops; i++) |
491 | if (sops[i].sem_num == semnum | 505 | if (sops[i].sem_num == semnum |
492 | && (sops[i].sem_op < 0) | 506 | && (sops[i].sem_op < 0) |
493 | && !(sops[i].sem_flg & IPC_NOWAIT)) | 507 | && !(sops[i].sem_flg & IPC_NOWAIT)) |
494 | semncnt++; | 508 | semncnt++; |
495 | } | 509 | } |
496 | return semncnt; | 510 | return semncnt; |
497 | } | 511 | } |
498 | static int count_semzcnt (struct sem_array * sma, ushort semnum) | 512 | static int count_semzcnt (struct sem_array * sma, ushort semnum) |
499 | { | 513 | { |
500 | int semzcnt; | 514 | int semzcnt; |
501 | struct sem_queue * q; | 515 | struct sem_queue * q; |
502 | 516 | ||
503 | semzcnt = 0; | 517 | semzcnt = 0; |
504 | for (q = sma->sem_pending; q; q = q->next) { | 518 | for (q = sma->sem_pending; q; q = q->next) { |
505 | struct sembuf * sops = q->sops; | 519 | struct sembuf * sops = q->sops; |
506 | int nsops = q->nsops; | 520 | int nsops = q->nsops; |
507 | int i; | 521 | int i; |
508 | for (i = 0; i < nsops; i++) | 522 | for (i = 0; i < nsops; i++) |
509 | if (sops[i].sem_num == semnum | 523 | if (sops[i].sem_num == semnum |
510 | && (sops[i].sem_op == 0) | 524 | && (sops[i].sem_op == 0) |
511 | && !(sops[i].sem_flg & IPC_NOWAIT)) | 525 | && !(sops[i].sem_flg & IPC_NOWAIT)) |
512 | semzcnt++; | 526 | semzcnt++; |
513 | } | 527 | } |
514 | return semzcnt; | 528 | return semzcnt; |
515 | } | 529 | } |
516 | 530 | ||
517 | /* Free a semaphore set. freeary() is called with sem_ids.mutex locked and | 531 | /* Free a semaphore set. freeary() is called with sem_ids.mutex locked and |
518 | * the spinlock for this semaphore set hold. sem_ids.mutex remains locked | 532 | * the spinlock for this semaphore set hold. sem_ids.mutex remains locked |
519 | * on exit. | 533 | * on exit. |
520 | */ | 534 | */ |
521 | static void freeary(struct ipc_namespace *ns, struct sem_array *sma) | 535 | static void freeary(struct ipc_namespace *ns, struct sem_array *sma) |
522 | { | 536 | { |
523 | struct sem_undo *un; | 537 | struct sem_undo *un; |
524 | struct sem_queue *q; | 538 | struct sem_queue *q; |
525 | 539 | ||
526 | /* Invalidate the existing undo structures for this semaphore set. | 540 | /* Invalidate the existing undo structures for this semaphore set. |
527 | * (They will be freed without any further action in exit_sem() | 541 | * (They will be freed without any further action in exit_sem() |
528 | * or during the next semop.) | 542 | * or during the next semop.) |
529 | */ | 543 | */ |
530 | for (un = sma->undo; un; un = un->id_next) | 544 | for (un = sma->undo; un; un = un->id_next) |
531 | un->semid = -1; | 545 | un->semid = -1; |
532 | 546 | ||
533 | /* Wake up all pending processes and let them fail with EIDRM. */ | 547 | /* Wake up all pending processes and let them fail with EIDRM. */ |
534 | q = sma->sem_pending; | 548 | q = sma->sem_pending; |
535 | while(q) { | 549 | while(q) { |
536 | struct sem_queue *n; | 550 | struct sem_queue *n; |
537 | /* lazy remove_from_queue: we are killing the whole queue */ | 551 | /* lazy remove_from_queue: we are killing the whole queue */ |
538 | q->prev = NULL; | 552 | q->prev = NULL; |
539 | n = q->next; | 553 | n = q->next; |
540 | q->status = IN_WAKEUP; | 554 | q->status = IN_WAKEUP; |
541 | wake_up_process(q->sleeper); /* doesn't sleep */ | 555 | wake_up_process(q->sleeper); /* doesn't sleep */ |
542 | smp_wmb(); | 556 | smp_wmb(); |
543 | q->status = -EIDRM; /* hands-off q */ | 557 | q->status = -EIDRM; /* hands-off q */ |
544 | q = n; | 558 | q = n; |
545 | } | 559 | } |
546 | 560 | ||
547 | /* Remove the semaphore set from the IDR */ | 561 | /* Remove the semaphore set from the IDR */ |
548 | sem_rmid(ns, sma); | 562 | sem_rmid(ns, sma); |
549 | sem_unlock(sma); | 563 | sem_unlock(sma); |
550 | 564 | ||
551 | ns->used_sems -= sma->sem_nsems; | 565 | ns->used_sems -= sma->sem_nsems; |
552 | security_sem_free(sma); | 566 | security_sem_free(sma); |
553 | ipc_rcu_putref(sma); | 567 | ipc_rcu_putref(sma); |
554 | } | 568 | } |
555 | 569 | ||
556 | static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) | 570 | static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) |
557 | { | 571 | { |
558 | switch(version) { | 572 | switch(version) { |
559 | case IPC_64: | 573 | case IPC_64: |
560 | return copy_to_user(buf, in, sizeof(*in)); | 574 | return copy_to_user(buf, in, sizeof(*in)); |
561 | case IPC_OLD: | 575 | case IPC_OLD: |
562 | { | 576 | { |
563 | struct semid_ds out; | 577 | struct semid_ds out; |
564 | 578 | ||
565 | ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); | 579 | ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); |
566 | 580 | ||
567 | out.sem_otime = in->sem_otime; | 581 | out.sem_otime = in->sem_otime; |
568 | out.sem_ctime = in->sem_ctime; | 582 | out.sem_ctime = in->sem_ctime; |
569 | out.sem_nsems = in->sem_nsems; | 583 | out.sem_nsems = in->sem_nsems; |
570 | 584 | ||
571 | return copy_to_user(buf, &out, sizeof(out)); | 585 | return copy_to_user(buf, &out, sizeof(out)); |
572 | } | 586 | } |
573 | default: | 587 | default: |
574 | return -EINVAL; | 588 | return -EINVAL; |
575 | } | 589 | } |
576 | } | 590 | } |
577 | 591 | ||
578 | static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum, | 592 | static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum, |
579 | int cmd, int version, union semun arg) | 593 | int cmd, int version, union semun arg) |
580 | { | 594 | { |
581 | int err = -EINVAL; | 595 | int err = -EINVAL; |
582 | struct sem_array *sma; | 596 | struct sem_array *sma; |
583 | 597 | ||
584 | switch(cmd) { | 598 | switch(cmd) { |
585 | case IPC_INFO: | 599 | case IPC_INFO: |
586 | case SEM_INFO: | 600 | case SEM_INFO: |
587 | { | 601 | { |
588 | struct seminfo seminfo; | 602 | struct seminfo seminfo; |
589 | int max_id; | 603 | int max_id; |
590 | 604 | ||
591 | err = security_sem_semctl(NULL, cmd); | 605 | err = security_sem_semctl(NULL, cmd); |
592 | if (err) | 606 | if (err) |
593 | return err; | 607 | return err; |
594 | 608 | ||
595 | memset(&seminfo,0,sizeof(seminfo)); | 609 | memset(&seminfo,0,sizeof(seminfo)); |
596 | seminfo.semmni = ns->sc_semmni; | 610 | seminfo.semmni = ns->sc_semmni; |
597 | seminfo.semmns = ns->sc_semmns; | 611 | seminfo.semmns = ns->sc_semmns; |
598 | seminfo.semmsl = ns->sc_semmsl; | 612 | seminfo.semmsl = ns->sc_semmsl; |
599 | seminfo.semopm = ns->sc_semopm; | 613 | seminfo.semopm = ns->sc_semopm; |
600 | seminfo.semvmx = SEMVMX; | 614 | seminfo.semvmx = SEMVMX; |
601 | seminfo.semmnu = SEMMNU; | 615 | seminfo.semmnu = SEMMNU; |
602 | seminfo.semmap = SEMMAP; | 616 | seminfo.semmap = SEMMAP; |
603 | seminfo.semume = SEMUME; | 617 | seminfo.semume = SEMUME; |
604 | mutex_lock(&sem_ids(ns).mutex); | 618 | mutex_lock(&sem_ids(ns).mutex); |
605 | if (cmd == SEM_INFO) { | 619 | if (cmd == SEM_INFO) { |
606 | seminfo.semusz = sem_ids(ns).in_use; | 620 | seminfo.semusz = sem_ids(ns).in_use; |
607 | seminfo.semaem = ns->used_sems; | 621 | seminfo.semaem = ns->used_sems; |
608 | } else { | 622 | } else { |
609 | seminfo.semusz = SEMUSZ; | 623 | seminfo.semusz = SEMUSZ; |
610 | seminfo.semaem = SEMAEM; | 624 | seminfo.semaem = SEMAEM; |
611 | } | 625 | } |
612 | max_id = ipc_get_maxid(&sem_ids(ns)); | 626 | max_id = ipc_get_maxid(&sem_ids(ns)); |
613 | mutex_unlock(&sem_ids(ns).mutex); | 627 | mutex_unlock(&sem_ids(ns).mutex); |
614 | if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) | 628 | if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) |
615 | return -EFAULT; | 629 | return -EFAULT; |
616 | return (max_id < 0) ? 0: max_id; | 630 | return (max_id < 0) ? 0: max_id; |
617 | } | 631 | } |
618 | case SEM_STAT: | 632 | case SEM_STAT: |
619 | { | 633 | { |
620 | struct semid64_ds tbuf; | 634 | struct semid64_ds tbuf; |
621 | int id; | 635 | int id; |
622 | 636 | ||
623 | sma = sem_lock(ns, semid); | 637 | sma = sem_lock(ns, semid); |
624 | if (IS_ERR(sma)) | 638 | if (IS_ERR(sma)) |
625 | return PTR_ERR(sma); | 639 | return PTR_ERR(sma); |
626 | 640 | ||
627 | err = -EACCES; | 641 | err = -EACCES; |
628 | if (ipcperms (&sma->sem_perm, S_IRUGO)) | 642 | if (ipcperms (&sma->sem_perm, S_IRUGO)) |
629 | goto out_unlock; | 643 | goto out_unlock; |
630 | 644 | ||
631 | err = security_sem_semctl(sma, cmd); | 645 | err = security_sem_semctl(sma, cmd); |
632 | if (err) | 646 | if (err) |
633 | goto out_unlock; | 647 | goto out_unlock; |
634 | 648 | ||
635 | id = sma->sem_perm.id; | 649 | id = sma->sem_perm.id; |
636 | 650 | ||
637 | memset(&tbuf, 0, sizeof(tbuf)); | 651 | memset(&tbuf, 0, sizeof(tbuf)); |
638 | 652 | ||
639 | kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); | 653 | kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); |
640 | tbuf.sem_otime = sma->sem_otime; | 654 | tbuf.sem_otime = sma->sem_otime; |
641 | tbuf.sem_ctime = sma->sem_ctime; | 655 | tbuf.sem_ctime = sma->sem_ctime; |
642 | tbuf.sem_nsems = sma->sem_nsems; | 656 | tbuf.sem_nsems = sma->sem_nsems; |
643 | sem_unlock(sma); | 657 | sem_unlock(sma); |
644 | if (copy_semid_to_user (arg.buf, &tbuf, version)) | 658 | if (copy_semid_to_user (arg.buf, &tbuf, version)) |
645 | return -EFAULT; | 659 | return -EFAULT; |
646 | return id; | 660 | return id; |
647 | } | 661 | } |
648 | default: | 662 | default: |
649 | return -EINVAL; | 663 | return -EINVAL; |
650 | } | 664 | } |
651 | return err; | 665 | return err; |
652 | out_unlock: | 666 | out_unlock: |
653 | sem_unlock(sma); | 667 | sem_unlock(sma); |
654 | return err; | 668 | return err; |
655 | } | 669 | } |
656 | 670 | ||
657 | static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | 671 | static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, |
658 | int cmd, int version, union semun arg) | 672 | int cmd, int version, union semun arg) |
659 | { | 673 | { |
660 | struct sem_array *sma; | 674 | struct sem_array *sma; |
661 | struct sem* curr; | 675 | struct sem* curr; |
662 | int err; | 676 | int err; |
663 | ushort fast_sem_io[SEMMSL_FAST]; | 677 | ushort fast_sem_io[SEMMSL_FAST]; |
664 | ushort* sem_io = fast_sem_io; | 678 | ushort* sem_io = fast_sem_io; |
665 | int nsems; | 679 | int nsems; |
666 | 680 | ||
667 | sma = sem_lock_check(ns, semid); | 681 | sma = sem_lock_check(ns, semid); |
668 | if (IS_ERR(sma)) | 682 | if (IS_ERR(sma)) |
669 | return PTR_ERR(sma); | 683 | return PTR_ERR(sma); |
670 | 684 | ||
671 | nsems = sma->sem_nsems; | 685 | nsems = sma->sem_nsems; |
672 | 686 | ||
673 | err = -EACCES; | 687 | err = -EACCES; |
674 | if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO)) | 688 | if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO)) |
675 | goto out_unlock; | 689 | goto out_unlock; |
676 | 690 | ||
677 | err = security_sem_semctl(sma, cmd); | 691 | err = security_sem_semctl(sma, cmd); |
678 | if (err) | 692 | if (err) |
679 | goto out_unlock; | 693 | goto out_unlock; |
680 | 694 | ||
681 | err = -EACCES; | 695 | err = -EACCES; |
682 | switch (cmd) { | 696 | switch (cmd) { |
683 | case GETALL: | 697 | case GETALL: |
684 | { | 698 | { |
685 | ushort __user *array = arg.array; | 699 | ushort __user *array = arg.array; |
686 | int i; | 700 | int i; |
687 | 701 | ||
688 | if(nsems > SEMMSL_FAST) { | 702 | if(nsems > SEMMSL_FAST) { |
689 | ipc_rcu_getref(sma); | 703 | ipc_rcu_getref(sma); |
690 | sem_unlock(sma); | 704 | sem_unlock(sma); |
691 | 705 | ||
692 | sem_io = ipc_alloc(sizeof(ushort)*nsems); | 706 | sem_io = ipc_alloc(sizeof(ushort)*nsems); |
693 | if(sem_io == NULL) { | 707 | if(sem_io == NULL) { |
694 | ipc_lock_by_ptr(&sma->sem_perm); | 708 | ipc_lock_by_ptr(&sma->sem_perm); |
695 | ipc_rcu_putref(sma); | 709 | ipc_rcu_putref(sma); |
696 | sem_unlock(sma); | 710 | sem_unlock(sma); |
697 | return -ENOMEM; | 711 | return -ENOMEM; |
698 | } | 712 | } |
699 | 713 | ||
700 | ipc_lock_by_ptr(&sma->sem_perm); | 714 | ipc_lock_by_ptr(&sma->sem_perm); |
701 | ipc_rcu_putref(sma); | 715 | ipc_rcu_putref(sma); |
702 | if (sma->sem_perm.deleted) { | 716 | if (sma->sem_perm.deleted) { |
703 | sem_unlock(sma); | 717 | sem_unlock(sma); |
704 | err = -EIDRM; | 718 | err = -EIDRM; |
705 | goto out_free; | 719 | goto out_free; |
706 | } | 720 | } |
707 | } | 721 | } |
708 | 722 | ||
709 | for (i = 0; i < sma->sem_nsems; i++) | 723 | for (i = 0; i < sma->sem_nsems; i++) |
710 | sem_io[i] = sma->sem_base[i].semval; | 724 | sem_io[i] = sma->sem_base[i].semval; |
711 | sem_unlock(sma); | 725 | sem_unlock(sma); |
712 | err = 0; | 726 | err = 0; |
713 | if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) | 727 | if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) |
714 | err = -EFAULT; | 728 | err = -EFAULT; |
715 | goto out_free; | 729 | goto out_free; |
716 | } | 730 | } |
717 | case SETALL: | 731 | case SETALL: |
718 | { | 732 | { |
719 | int i; | 733 | int i; |
720 | struct sem_undo *un; | 734 | struct sem_undo *un; |
721 | 735 | ||
722 | ipc_rcu_getref(sma); | 736 | ipc_rcu_getref(sma); |
723 | sem_unlock(sma); | 737 | sem_unlock(sma); |
724 | 738 | ||
725 | if(nsems > SEMMSL_FAST) { | 739 | if(nsems > SEMMSL_FAST) { |
726 | sem_io = ipc_alloc(sizeof(ushort)*nsems); | 740 | sem_io = ipc_alloc(sizeof(ushort)*nsems); |
727 | if(sem_io == NULL) { | 741 | if(sem_io == NULL) { |
728 | ipc_lock_by_ptr(&sma->sem_perm); | 742 | ipc_lock_by_ptr(&sma->sem_perm); |
729 | ipc_rcu_putref(sma); | 743 | ipc_rcu_putref(sma); |
730 | sem_unlock(sma); | 744 | sem_unlock(sma); |
731 | return -ENOMEM; | 745 | return -ENOMEM; |
732 | } | 746 | } |
733 | } | 747 | } |
734 | 748 | ||
735 | if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) { | 749 | if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) { |
736 | ipc_lock_by_ptr(&sma->sem_perm); | 750 | ipc_lock_by_ptr(&sma->sem_perm); |
737 | ipc_rcu_putref(sma); | 751 | ipc_rcu_putref(sma); |
738 | sem_unlock(sma); | 752 | sem_unlock(sma); |
739 | err = -EFAULT; | 753 | err = -EFAULT; |
740 | goto out_free; | 754 | goto out_free; |
741 | } | 755 | } |
742 | 756 | ||
743 | for (i = 0; i < nsems; i++) { | 757 | for (i = 0; i < nsems; i++) { |
744 | if (sem_io[i] > SEMVMX) { | 758 | if (sem_io[i] > SEMVMX) { |
745 | ipc_lock_by_ptr(&sma->sem_perm); | 759 | ipc_lock_by_ptr(&sma->sem_perm); |
746 | ipc_rcu_putref(sma); | 760 | ipc_rcu_putref(sma); |
747 | sem_unlock(sma); | 761 | sem_unlock(sma); |
748 | err = -ERANGE; | 762 | err = -ERANGE; |
749 | goto out_free; | 763 | goto out_free; |
750 | } | 764 | } |
751 | } | 765 | } |
752 | ipc_lock_by_ptr(&sma->sem_perm); | 766 | ipc_lock_by_ptr(&sma->sem_perm); |
753 | ipc_rcu_putref(sma); | 767 | ipc_rcu_putref(sma); |
754 | if (sma->sem_perm.deleted) { | 768 | if (sma->sem_perm.deleted) { |
755 | sem_unlock(sma); | 769 | sem_unlock(sma); |
756 | err = -EIDRM; | 770 | err = -EIDRM; |
757 | goto out_free; | 771 | goto out_free; |
758 | } | 772 | } |
759 | 773 | ||
760 | for (i = 0; i < nsems; i++) | 774 | for (i = 0; i < nsems; i++) |
761 | sma->sem_base[i].semval = sem_io[i]; | 775 | sma->sem_base[i].semval = sem_io[i]; |
762 | for (un = sma->undo; un; un = un->id_next) | 776 | for (un = sma->undo; un; un = un->id_next) |
763 | for (i = 0; i < nsems; i++) | 777 | for (i = 0; i < nsems; i++) |
764 | un->semadj[i] = 0; | 778 | un->semadj[i] = 0; |
765 | sma->sem_ctime = get_seconds(); | 779 | sma->sem_ctime = get_seconds(); |
766 | /* maybe some queued-up processes were waiting for this */ | 780 | /* maybe some queued-up processes were waiting for this */ |
767 | update_queue(sma); | 781 | update_queue(sma); |
768 | err = 0; | 782 | err = 0; |
769 | goto out_unlock; | 783 | goto out_unlock; |
770 | } | 784 | } |
771 | case IPC_STAT: | 785 | case IPC_STAT: |
772 | { | 786 | { |
773 | struct semid64_ds tbuf; | 787 | struct semid64_ds tbuf; |
774 | memset(&tbuf,0,sizeof(tbuf)); | 788 | memset(&tbuf,0,sizeof(tbuf)); |
775 | kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); | 789 | kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); |
776 | tbuf.sem_otime = sma->sem_otime; | 790 | tbuf.sem_otime = sma->sem_otime; |
777 | tbuf.sem_ctime = sma->sem_ctime; | 791 | tbuf.sem_ctime = sma->sem_ctime; |
778 | tbuf.sem_nsems = sma->sem_nsems; | 792 | tbuf.sem_nsems = sma->sem_nsems; |
779 | sem_unlock(sma); | 793 | sem_unlock(sma); |
780 | if (copy_semid_to_user (arg.buf, &tbuf, version)) | 794 | if (copy_semid_to_user (arg.buf, &tbuf, version)) |
781 | return -EFAULT; | 795 | return -EFAULT; |
782 | return 0; | 796 | return 0; |
783 | } | 797 | } |
784 | /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */ | 798 | /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */ |
785 | } | 799 | } |
786 | err = -EINVAL; | 800 | err = -EINVAL; |
787 | if(semnum < 0 || semnum >= nsems) | 801 | if(semnum < 0 || semnum >= nsems) |
788 | goto out_unlock; | 802 | goto out_unlock; |
789 | 803 | ||
790 | curr = &sma->sem_base[semnum]; | 804 | curr = &sma->sem_base[semnum]; |
791 | 805 | ||
792 | switch (cmd) { | 806 | switch (cmd) { |
793 | case GETVAL: | 807 | case GETVAL: |
794 | err = curr->semval; | 808 | err = curr->semval; |
795 | goto out_unlock; | 809 | goto out_unlock; |
796 | case GETPID: | 810 | case GETPID: |
797 | err = curr->sempid; | 811 | err = curr->sempid; |
798 | goto out_unlock; | 812 | goto out_unlock; |
799 | case GETNCNT: | 813 | case GETNCNT: |
800 | err = count_semncnt(sma,semnum); | 814 | err = count_semncnt(sma,semnum); |
801 | goto out_unlock; | 815 | goto out_unlock; |
802 | case GETZCNT: | 816 | case GETZCNT: |
803 | err = count_semzcnt(sma,semnum); | 817 | err = count_semzcnt(sma,semnum); |
804 | goto out_unlock; | 818 | goto out_unlock; |
805 | case SETVAL: | 819 | case SETVAL: |
806 | { | 820 | { |
807 | int val = arg.val; | 821 | int val = arg.val; |
808 | struct sem_undo *un; | 822 | struct sem_undo *un; |
809 | err = -ERANGE; | 823 | err = -ERANGE; |
810 | if (val > SEMVMX || val < 0) | 824 | if (val > SEMVMX || val < 0) |
811 | goto out_unlock; | 825 | goto out_unlock; |
812 | 826 | ||
813 | for (un = sma->undo; un; un = un->id_next) | 827 | for (un = sma->undo; un; un = un->id_next) |
814 | un->semadj[semnum] = 0; | 828 | un->semadj[semnum] = 0; |
815 | curr->semval = val; | 829 | curr->semval = val; |
816 | curr->sempid = task_tgid_vnr(current); | 830 | curr->sempid = task_tgid_vnr(current); |
817 | sma->sem_ctime = get_seconds(); | 831 | sma->sem_ctime = get_seconds(); |
818 | /* maybe some queued-up processes were waiting for this */ | 832 | /* maybe some queued-up processes were waiting for this */ |
819 | update_queue(sma); | 833 | update_queue(sma); |
820 | err = 0; | 834 | err = 0; |
821 | goto out_unlock; | 835 | goto out_unlock; |
822 | } | 836 | } |
823 | } | 837 | } |
824 | out_unlock: | 838 | out_unlock: |
825 | sem_unlock(sma); | 839 | sem_unlock(sma); |
826 | out_free: | 840 | out_free: |
827 | if(sem_io != fast_sem_io) | 841 | if(sem_io != fast_sem_io) |
828 | ipc_free(sem_io, sizeof(ushort)*nsems); | 842 | ipc_free(sem_io, sizeof(ushort)*nsems); |
829 | return err; | 843 | return err; |
830 | } | 844 | } |
831 | 845 | ||
832 | struct sem_setbuf { | 846 | struct sem_setbuf { |
833 | uid_t uid; | 847 | uid_t uid; |
834 | gid_t gid; | 848 | gid_t gid; |
835 | mode_t mode; | 849 | mode_t mode; |
836 | }; | 850 | }; |
837 | 851 | ||
838 | static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __user *buf, int version) | 852 | static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __user *buf, int version) |
839 | { | 853 | { |
840 | switch(version) { | 854 | switch(version) { |
841 | case IPC_64: | 855 | case IPC_64: |
842 | { | 856 | { |
843 | struct semid64_ds tbuf; | 857 | struct semid64_ds tbuf; |
844 | 858 | ||
845 | if(copy_from_user(&tbuf, buf, sizeof(tbuf))) | 859 | if(copy_from_user(&tbuf, buf, sizeof(tbuf))) |
846 | return -EFAULT; | 860 | return -EFAULT; |
847 | 861 | ||
848 | out->uid = tbuf.sem_perm.uid; | 862 | out->uid = tbuf.sem_perm.uid; |
849 | out->gid = tbuf.sem_perm.gid; | 863 | out->gid = tbuf.sem_perm.gid; |
850 | out->mode = tbuf.sem_perm.mode; | 864 | out->mode = tbuf.sem_perm.mode; |
851 | 865 | ||
852 | return 0; | 866 | return 0; |
853 | } | 867 | } |
854 | case IPC_OLD: | 868 | case IPC_OLD: |
855 | { | 869 | { |
856 | struct semid_ds tbuf_old; | 870 | struct semid_ds tbuf_old; |
857 | 871 | ||
858 | if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) | 872 | if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) |
859 | return -EFAULT; | 873 | return -EFAULT; |
860 | 874 | ||
861 | out->uid = tbuf_old.sem_perm.uid; | 875 | out->uid = tbuf_old.sem_perm.uid; |
862 | out->gid = tbuf_old.sem_perm.gid; | 876 | out->gid = tbuf_old.sem_perm.gid; |
863 | out->mode = tbuf_old.sem_perm.mode; | 877 | out->mode = tbuf_old.sem_perm.mode; |
864 | 878 | ||
865 | return 0; | 879 | return 0; |
866 | } | 880 | } |
867 | default: | 881 | default: |
868 | return -EINVAL; | 882 | return -EINVAL; |
869 | } | 883 | } |
870 | } | 884 | } |
871 | 885 | ||
872 | static int semctl_down(struct ipc_namespace *ns, int semid, int semnum, | 886 | static int semctl_down(struct ipc_namespace *ns, int semid, int semnum, |
873 | int cmd, int version, union semun arg) | 887 | int cmd, int version, union semun arg) |
874 | { | 888 | { |
875 | struct sem_array *sma; | 889 | struct sem_array *sma; |
876 | int err; | 890 | int err; |
877 | struct sem_setbuf uninitialized_var(setbuf); | 891 | struct sem_setbuf uninitialized_var(setbuf); |
878 | struct kern_ipc_perm *ipcp; | 892 | struct kern_ipc_perm *ipcp; |
879 | 893 | ||
880 | if(cmd == IPC_SET) { | 894 | if(cmd == IPC_SET) { |
881 | if(copy_semid_from_user (&setbuf, arg.buf, version)) | 895 | if(copy_semid_from_user (&setbuf, arg.buf, version)) |
882 | return -EFAULT; | 896 | return -EFAULT; |
883 | } | 897 | } |
884 | sma = sem_lock_check(ns, semid); | 898 | sma = sem_lock_check(ns, semid); |
885 | if (IS_ERR(sma)) | 899 | if (IS_ERR(sma)) |
886 | return PTR_ERR(sma); | 900 | return PTR_ERR(sma); |
887 | 901 | ||
888 | ipcp = &sma->sem_perm; | 902 | ipcp = &sma->sem_perm; |
889 | 903 | ||
890 | err = audit_ipc_obj(ipcp); | 904 | err = audit_ipc_obj(ipcp); |
891 | if (err) | 905 | if (err) |
892 | goto out_unlock; | 906 | goto out_unlock; |
893 | 907 | ||
894 | if (cmd == IPC_SET) { | 908 | if (cmd == IPC_SET) { |
895 | err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode); | 909 | err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode); |
896 | if (err) | 910 | if (err) |
897 | goto out_unlock; | 911 | goto out_unlock; |
898 | } | 912 | } |
899 | if (current->euid != ipcp->cuid && | 913 | if (current->euid != ipcp->cuid && |
900 | current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) { | 914 | current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) { |
901 | err=-EPERM; | 915 | err=-EPERM; |
902 | goto out_unlock; | 916 | goto out_unlock; |
903 | } | 917 | } |
904 | 918 | ||
905 | err = security_sem_semctl(sma, cmd); | 919 | err = security_sem_semctl(sma, cmd); |
906 | if (err) | 920 | if (err) |
907 | goto out_unlock; | 921 | goto out_unlock; |
908 | 922 | ||
909 | switch(cmd){ | 923 | switch(cmd){ |
910 | case IPC_RMID: | 924 | case IPC_RMID: |
911 | freeary(ns, sma); | 925 | freeary(ns, sma); |
912 | err = 0; | 926 | err = 0; |
913 | break; | 927 | break; |
914 | case IPC_SET: | 928 | case IPC_SET: |
915 | ipcp->uid = setbuf.uid; | 929 | ipcp->uid = setbuf.uid; |
916 | ipcp->gid = setbuf.gid; | 930 | ipcp->gid = setbuf.gid; |
917 | ipcp->mode = (ipcp->mode & ~S_IRWXUGO) | 931 | ipcp->mode = (ipcp->mode & ~S_IRWXUGO) |
918 | | (setbuf.mode & S_IRWXUGO); | 932 | | (setbuf.mode & S_IRWXUGO); |
919 | sma->sem_ctime = get_seconds(); | 933 | sma->sem_ctime = get_seconds(); |
920 | sem_unlock(sma); | 934 | sem_unlock(sma); |
921 | err = 0; | 935 | err = 0; |
922 | break; | 936 | break; |
923 | default: | 937 | default: |
924 | sem_unlock(sma); | 938 | sem_unlock(sma); |
925 | err = -EINVAL; | 939 | err = -EINVAL; |
926 | break; | 940 | break; |
927 | } | 941 | } |
928 | return err; | 942 | return err; |
929 | 943 | ||
930 | out_unlock: | 944 | out_unlock: |
931 | sem_unlock(sma); | 945 | sem_unlock(sma); |
932 | return err; | 946 | return err; |
933 | } | 947 | } |
934 | 948 | ||
935 | asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg) | 949 | asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg) |
936 | { | 950 | { |
937 | int err = -EINVAL; | 951 | int err = -EINVAL; |
938 | int version; | 952 | int version; |
939 | struct ipc_namespace *ns; | 953 | struct ipc_namespace *ns; |
940 | 954 | ||
941 | if (semid < 0) | 955 | if (semid < 0) |
942 | return -EINVAL; | 956 | return -EINVAL; |
943 | 957 | ||
944 | version = ipc_parse_version(&cmd); | 958 | version = ipc_parse_version(&cmd); |
945 | ns = current->nsproxy->ipc_ns; | 959 | ns = current->nsproxy->ipc_ns; |
946 | 960 | ||
947 | switch(cmd) { | 961 | switch(cmd) { |
948 | case IPC_INFO: | 962 | case IPC_INFO: |
949 | case SEM_INFO: | 963 | case SEM_INFO: |
950 | case SEM_STAT: | 964 | case SEM_STAT: |
951 | err = semctl_nolock(ns,semid,semnum,cmd,version,arg); | 965 | err = semctl_nolock(ns,semid,semnum,cmd,version,arg); |
952 | return err; | 966 | return err; |
953 | case GETALL: | 967 | case GETALL: |
954 | case GETVAL: | 968 | case GETVAL: |
955 | case GETPID: | 969 | case GETPID: |
956 | case GETNCNT: | 970 | case GETNCNT: |
957 | case GETZCNT: | 971 | case GETZCNT: |
958 | case IPC_STAT: | 972 | case IPC_STAT: |
959 | case SETVAL: | 973 | case SETVAL: |
960 | case SETALL: | 974 | case SETALL: |
961 | err = semctl_main(ns,semid,semnum,cmd,version,arg); | 975 | err = semctl_main(ns,semid,semnum,cmd,version,arg); |
962 | return err; | 976 | return err; |
963 | case IPC_RMID: | 977 | case IPC_RMID: |
964 | case IPC_SET: | 978 | case IPC_SET: |
965 | mutex_lock(&sem_ids(ns).mutex); | 979 | mutex_lock(&sem_ids(ns).mutex); |
966 | err = semctl_down(ns,semid,semnum,cmd,version,arg); | 980 | err = semctl_down(ns,semid,semnum,cmd,version,arg); |
967 | mutex_unlock(&sem_ids(ns).mutex); | 981 | mutex_unlock(&sem_ids(ns).mutex); |
968 | return err; | 982 | return err; |
969 | default: | 983 | default: |
970 | return -EINVAL; | 984 | return -EINVAL; |
971 | } | 985 | } |
972 | } | 986 | } |
973 | 987 | ||
974 | static inline void lock_semundo(void) | 988 | static inline void lock_semundo(void) |
975 | { | 989 | { |
976 | struct sem_undo_list *undo_list; | 990 | struct sem_undo_list *undo_list; |
977 | 991 | ||
978 | undo_list = current->sysvsem.undo_list; | 992 | undo_list = current->sysvsem.undo_list; |
979 | if (undo_list) | 993 | if (undo_list) |
980 | spin_lock(&undo_list->lock); | 994 | spin_lock(&undo_list->lock); |
981 | } | 995 | } |
982 | 996 | ||
983 | /* This code has an interaction with copy_semundo(). | 997 | /* This code has an interaction with copy_semundo(). |
984 | * Consider; two tasks are sharing the undo_list. task1 | 998 | * Consider; two tasks are sharing the undo_list. task1 |
985 | * acquires the undo_list lock in lock_semundo(). If task2 now | 999 | * acquires the undo_list lock in lock_semundo(). If task2 now |
986 | * exits before task1 releases the lock (by calling | 1000 | * exits before task1 releases the lock (by calling |
987 | * unlock_semundo()), then task1 will never call spin_unlock(). | 1001 | * unlock_semundo()), then task1 will never call spin_unlock(). |
988 | * This leave the sem_undo_list in a locked state. If task1 now creats task3 | 1002 | * This leave the sem_undo_list in a locked state. If task1 now creats task3 |
989 | * and once again shares the sem_undo_list, the sem_undo_list will still be | 1003 | * and once again shares the sem_undo_list, the sem_undo_list will still be |
990 | * locked, and future SEM_UNDO operations will deadlock. This case is | 1004 | * locked, and future SEM_UNDO operations will deadlock. This case is |
991 | * dealt with in copy_semundo() by having it reinitialize the spin lock when | 1005 | * dealt with in copy_semundo() by having it reinitialize the spin lock when |
992 | * the refcnt goes from 1 to 2. | 1006 | * the refcnt goes from 1 to 2. |
993 | */ | 1007 | */ |
994 | static inline void unlock_semundo(void) | 1008 | static inline void unlock_semundo(void) |
995 | { | 1009 | { |
996 | struct sem_undo_list *undo_list; | 1010 | struct sem_undo_list *undo_list; |
997 | 1011 | ||
998 | undo_list = current->sysvsem.undo_list; | 1012 | undo_list = current->sysvsem.undo_list; |
999 | if (undo_list) | 1013 | if (undo_list) |
1000 | spin_unlock(&undo_list->lock); | 1014 | spin_unlock(&undo_list->lock); |
1001 | } | 1015 | } |
1002 | 1016 | ||
1003 | 1017 | ||
1004 | /* If the task doesn't already have a undo_list, then allocate one | 1018 | /* If the task doesn't already have a undo_list, then allocate one |
1005 | * here. We guarantee there is only one thread using this undo list, | 1019 | * here. We guarantee there is only one thread using this undo list, |
1006 | * and current is THE ONE | 1020 | * and current is THE ONE |
1007 | * | 1021 | * |
1008 | * If this allocation and assignment succeeds, but later | 1022 | * If this allocation and assignment succeeds, but later |
1009 | * portions of this code fail, there is no need to free the sem_undo_list. | 1023 | * portions of this code fail, there is no need to free the sem_undo_list. |
1010 | * Just let it stay associated with the task, and it'll be freed later | 1024 | * Just let it stay associated with the task, and it'll be freed later |
1011 | * at exit time. | 1025 | * at exit time. |
1012 | * | 1026 | * |
1013 | * This can block, so callers must hold no locks. | 1027 | * This can block, so callers must hold no locks. |
1014 | */ | 1028 | */ |
1015 | static inline int get_undo_list(struct sem_undo_list **undo_listp) | 1029 | static inline int get_undo_list(struct sem_undo_list **undo_listp) |
1016 | { | 1030 | { |
1017 | struct sem_undo_list *undo_list; | 1031 | struct sem_undo_list *undo_list; |
1018 | 1032 | ||
1019 | undo_list = current->sysvsem.undo_list; | 1033 | undo_list = current->sysvsem.undo_list; |
1020 | if (!undo_list) { | 1034 | if (!undo_list) { |
1021 | undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); | 1035 | undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); |
1022 | if (undo_list == NULL) | 1036 | if (undo_list == NULL) |
1023 | return -ENOMEM; | 1037 | return -ENOMEM; |
1024 | spin_lock_init(&undo_list->lock); | 1038 | spin_lock_init(&undo_list->lock); |
1025 | atomic_set(&undo_list->refcnt, 1); | 1039 | atomic_set(&undo_list->refcnt, 1); |
1026 | current->sysvsem.undo_list = undo_list; | 1040 | current->sysvsem.undo_list = undo_list; |
1027 | } | 1041 | } |
1028 | *undo_listp = undo_list; | 1042 | *undo_listp = undo_list; |
1029 | return 0; | 1043 | return 0; |
1030 | } | 1044 | } |
1031 | 1045 | ||
1032 | static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) | 1046 | static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) |
1033 | { | 1047 | { |
1034 | struct sem_undo **last, *un; | 1048 | struct sem_undo **last, *un; |
1035 | 1049 | ||
1036 | last = &ulp->proc_list; | 1050 | last = &ulp->proc_list; |
1037 | un = *last; | 1051 | un = *last; |
1038 | while(un != NULL) { | 1052 | while(un != NULL) { |
1039 | if(un->semid==semid) | 1053 | if(un->semid==semid) |
1040 | break; | 1054 | break; |
1041 | if(un->semid==-1) { | 1055 | if(un->semid==-1) { |
1042 | *last=un->proc_next; | 1056 | *last=un->proc_next; |
1043 | kfree(un); | 1057 | kfree(un); |
1044 | } else { | 1058 | } else { |
1045 | last=&un->proc_next; | 1059 | last=&un->proc_next; |
1046 | } | 1060 | } |
1047 | un=*last; | 1061 | un=*last; |
1048 | } | 1062 | } |
1049 | return un; | 1063 | return un; |
1050 | } | 1064 | } |
1051 | 1065 | ||
1052 | static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) | 1066 | static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) |
1053 | { | 1067 | { |
1054 | struct sem_array *sma; | 1068 | struct sem_array *sma; |
1055 | struct sem_undo_list *ulp; | 1069 | struct sem_undo_list *ulp; |
1056 | struct sem_undo *un, *new; | 1070 | struct sem_undo *un, *new; |
1057 | int nsems; | 1071 | int nsems; |
1058 | int error; | 1072 | int error; |
1059 | 1073 | ||
1060 | error = get_undo_list(&ulp); | 1074 | error = get_undo_list(&ulp); |
1061 | if (error) | 1075 | if (error) |
1062 | return ERR_PTR(error); | 1076 | return ERR_PTR(error); |
1063 | 1077 | ||
1064 | lock_semundo(); | 1078 | lock_semundo(); |
1065 | un = lookup_undo(ulp, semid); | 1079 | un = lookup_undo(ulp, semid); |
1066 | unlock_semundo(); | 1080 | unlock_semundo(); |
1067 | if (likely(un!=NULL)) | 1081 | if (likely(un!=NULL)) |
1068 | goto out; | 1082 | goto out; |
1069 | 1083 | ||
1070 | /* no undo structure around - allocate one. */ | 1084 | /* no undo structure around - allocate one. */ |
1071 | sma = sem_lock_check(ns, semid); | 1085 | sma = sem_lock_check(ns, semid); |
1072 | if (IS_ERR(sma)) | 1086 | if (IS_ERR(sma)) |
1073 | return ERR_PTR(PTR_ERR(sma)); | 1087 | return ERR_PTR(PTR_ERR(sma)); |
1074 | 1088 | ||
1075 | nsems = sma->sem_nsems; | 1089 | nsems = sma->sem_nsems; |
1076 | ipc_rcu_getref(sma); | 1090 | ipc_rcu_getref(sma); |
1077 | sem_unlock(sma); | 1091 | sem_unlock(sma); |
1078 | 1092 | ||
1079 | new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); | 1093 | new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); |
1080 | if (!new) { | 1094 | if (!new) { |
1081 | ipc_lock_by_ptr(&sma->sem_perm); | 1095 | ipc_lock_by_ptr(&sma->sem_perm); |
1082 | ipc_rcu_putref(sma); | 1096 | ipc_rcu_putref(sma); |
1083 | sem_unlock(sma); | 1097 | sem_unlock(sma); |
1084 | return ERR_PTR(-ENOMEM); | 1098 | return ERR_PTR(-ENOMEM); |
1085 | } | 1099 | } |
1086 | new->semadj = (short *) &new[1]; | 1100 | new->semadj = (short *) &new[1]; |
1087 | new->semid = semid; | 1101 | new->semid = semid; |
1088 | 1102 | ||
1089 | lock_semundo(); | 1103 | lock_semundo(); |
1090 | un = lookup_undo(ulp, semid); | 1104 | un = lookup_undo(ulp, semid); |
1091 | if (un) { | 1105 | if (un) { |
1092 | unlock_semundo(); | 1106 | unlock_semundo(); |
1093 | kfree(new); | 1107 | kfree(new); |
1094 | ipc_lock_by_ptr(&sma->sem_perm); | 1108 | ipc_lock_by_ptr(&sma->sem_perm); |
1095 | ipc_rcu_putref(sma); | 1109 | ipc_rcu_putref(sma); |
1096 | sem_unlock(sma); | 1110 | sem_unlock(sma); |
1097 | goto out; | 1111 | goto out; |
1098 | } | 1112 | } |
1099 | ipc_lock_by_ptr(&sma->sem_perm); | 1113 | ipc_lock_by_ptr(&sma->sem_perm); |
1100 | ipc_rcu_putref(sma); | 1114 | ipc_rcu_putref(sma); |
1101 | if (sma->sem_perm.deleted) { | 1115 | if (sma->sem_perm.deleted) { |
1102 | sem_unlock(sma); | 1116 | sem_unlock(sma); |
1103 | unlock_semundo(); | 1117 | unlock_semundo(); |
1104 | kfree(new); | 1118 | kfree(new); |
1105 | un = ERR_PTR(-EIDRM); | 1119 | un = ERR_PTR(-EIDRM); |
1106 | goto out; | 1120 | goto out; |
1107 | } | 1121 | } |
1108 | new->proc_next = ulp->proc_list; | 1122 | new->proc_next = ulp->proc_list; |
1109 | ulp->proc_list = new; | 1123 | ulp->proc_list = new; |
1110 | new->id_next = sma->undo; | 1124 | new->id_next = sma->undo; |
1111 | sma->undo = new; | 1125 | sma->undo = new; |
1112 | sem_unlock(sma); | 1126 | sem_unlock(sma); |
1113 | un = new; | 1127 | un = new; |
1114 | unlock_semundo(); | 1128 | unlock_semundo(); |
1115 | out: | 1129 | out: |
1116 | return un; | 1130 | return un; |
1117 | } | 1131 | } |
1118 | 1132 | ||
1119 | asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, | 1133 | asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, |
1120 | unsigned nsops, const struct timespec __user *timeout) | 1134 | unsigned nsops, const struct timespec __user *timeout) |
1121 | { | 1135 | { |
1122 | int error = -EINVAL; | 1136 | int error = -EINVAL; |
1123 | struct sem_array *sma; | 1137 | struct sem_array *sma; |
1124 | struct sembuf fast_sops[SEMOPM_FAST]; | 1138 | struct sembuf fast_sops[SEMOPM_FAST]; |
1125 | struct sembuf* sops = fast_sops, *sop; | 1139 | struct sembuf* sops = fast_sops, *sop; |
1126 | struct sem_undo *un; | 1140 | struct sem_undo *un; |
1127 | int undos = 0, alter = 0, max; | 1141 | int undos = 0, alter = 0, max; |
1128 | struct sem_queue queue; | 1142 | struct sem_queue queue; |
1129 | unsigned long jiffies_left = 0; | 1143 | unsigned long jiffies_left = 0; |
1130 | struct ipc_namespace *ns; | 1144 | struct ipc_namespace *ns; |
1131 | 1145 | ||
1132 | ns = current->nsproxy->ipc_ns; | 1146 | ns = current->nsproxy->ipc_ns; |
1133 | 1147 | ||
1134 | if (nsops < 1 || semid < 0) | 1148 | if (nsops < 1 || semid < 0) |
1135 | return -EINVAL; | 1149 | return -EINVAL; |
1136 | if (nsops > ns->sc_semopm) | 1150 | if (nsops > ns->sc_semopm) |
1137 | return -E2BIG; | 1151 | return -E2BIG; |
1138 | if(nsops > SEMOPM_FAST) { | 1152 | if(nsops > SEMOPM_FAST) { |
1139 | sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL); | 1153 | sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL); |
1140 | if(sops==NULL) | 1154 | if(sops==NULL) |
1141 | return -ENOMEM; | 1155 | return -ENOMEM; |
1142 | } | 1156 | } |
1143 | if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) { | 1157 | if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) { |
1144 | error=-EFAULT; | 1158 | error=-EFAULT; |
1145 | goto out_free; | 1159 | goto out_free; |
1146 | } | 1160 | } |
1147 | if (timeout) { | 1161 | if (timeout) { |
1148 | struct timespec _timeout; | 1162 | struct timespec _timeout; |
1149 | if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { | 1163 | if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { |
1150 | error = -EFAULT; | 1164 | error = -EFAULT; |
1151 | goto out_free; | 1165 | goto out_free; |
1152 | } | 1166 | } |
1153 | if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 || | 1167 | if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 || |
1154 | _timeout.tv_nsec >= 1000000000L) { | 1168 | _timeout.tv_nsec >= 1000000000L) { |
1155 | error = -EINVAL; | 1169 | error = -EINVAL; |
1156 | goto out_free; | 1170 | goto out_free; |
1157 | } | 1171 | } |
1158 | jiffies_left = timespec_to_jiffies(&_timeout); | 1172 | jiffies_left = timespec_to_jiffies(&_timeout); |
1159 | } | 1173 | } |
1160 | max = 0; | 1174 | max = 0; |
1161 | for (sop = sops; sop < sops + nsops; sop++) { | 1175 | for (sop = sops; sop < sops + nsops; sop++) { |
1162 | if (sop->sem_num >= max) | 1176 | if (sop->sem_num >= max) |
1163 | max = sop->sem_num; | 1177 | max = sop->sem_num; |
1164 | if (sop->sem_flg & SEM_UNDO) | 1178 | if (sop->sem_flg & SEM_UNDO) |
1165 | undos = 1; | 1179 | undos = 1; |
1166 | if (sop->sem_op != 0) | 1180 | if (sop->sem_op != 0) |
1167 | alter = 1; | 1181 | alter = 1; |
1168 | } | 1182 | } |
1169 | 1183 | ||
1170 | retry_undos: | 1184 | retry_undos: |
1171 | if (undos) { | 1185 | if (undos) { |
1172 | un = find_undo(ns, semid); | 1186 | un = find_undo(ns, semid); |
1173 | if (IS_ERR(un)) { | 1187 | if (IS_ERR(un)) { |
1174 | error = PTR_ERR(un); | 1188 | error = PTR_ERR(un); |
1175 | goto out_free; | 1189 | goto out_free; |
1176 | } | 1190 | } |
1177 | } else | 1191 | } else |
1178 | un = NULL; | 1192 | un = NULL; |
1179 | 1193 | ||
1180 | sma = sem_lock_check(ns, semid); | 1194 | sma = sem_lock_check(ns, semid); |
1181 | if (IS_ERR(sma)) { | 1195 | if (IS_ERR(sma)) { |
1182 | error = PTR_ERR(sma); | 1196 | error = PTR_ERR(sma); |
1183 | goto out_free; | 1197 | goto out_free; |
1184 | } | 1198 | } |
1185 | 1199 | ||
1186 | /* | 1200 | /* |
1187 | * semid identifiers are not unique - find_undo may have | 1201 | * semid identifiers are not unique - find_undo may have |
1188 | * allocated an undo structure, it was invalidated by an RMID | 1202 | * allocated an undo structure, it was invalidated by an RMID |
1189 | * and now a new array with received the same id. Check and retry. | 1203 | * and now a new array with received the same id. Check and retry. |
1190 | */ | 1204 | */ |
1191 | if (un && un->semid == -1) { | 1205 | if (un && un->semid == -1) { |
1192 | sem_unlock(sma); | 1206 | sem_unlock(sma); |
1193 | goto retry_undos; | 1207 | goto retry_undos; |
1194 | } | 1208 | } |
1195 | error = -EFBIG; | 1209 | error = -EFBIG; |
1196 | if (max >= sma->sem_nsems) | 1210 | if (max >= sma->sem_nsems) |
1197 | goto out_unlock_free; | 1211 | goto out_unlock_free; |
1198 | 1212 | ||
1199 | error = -EACCES; | 1213 | error = -EACCES; |
1200 | if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) | 1214 | if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) |
1201 | goto out_unlock_free; | 1215 | goto out_unlock_free; |
1202 | 1216 | ||
1203 | error = security_sem_semop(sma, sops, nsops, alter); | 1217 | error = security_sem_semop(sma, sops, nsops, alter); |
1204 | if (error) | 1218 | if (error) |
1205 | goto out_unlock_free; | 1219 | goto out_unlock_free; |
1206 | 1220 | ||
1207 | error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); | 1221 | error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); |
1208 | if (error <= 0) { | 1222 | if (error <= 0) { |
1209 | if (alter && error == 0) | 1223 | if (alter && error == 0) |
1210 | update_queue (sma); | 1224 | update_queue (sma); |
1211 | goto out_unlock_free; | 1225 | goto out_unlock_free; |
1212 | } | 1226 | } |
1213 | 1227 | ||
1214 | /* We need to sleep on this operation, so we put the current | 1228 | /* We need to sleep on this operation, so we put the current |
1215 | * task into the pending queue and go to sleep. | 1229 | * task into the pending queue and go to sleep. |
1216 | */ | 1230 | */ |
1217 | 1231 | ||
1218 | queue.sma = sma; | 1232 | queue.sma = sma; |
1219 | queue.sops = sops; | 1233 | queue.sops = sops; |
1220 | queue.nsops = nsops; | 1234 | queue.nsops = nsops; |
1221 | queue.undo = un; | 1235 | queue.undo = un; |
1222 | queue.pid = task_tgid_vnr(current); | 1236 | queue.pid = task_tgid_vnr(current); |
1223 | queue.id = semid; | 1237 | queue.id = semid; |
1224 | queue.alter = alter; | 1238 | queue.alter = alter; |
1225 | if (alter) | 1239 | if (alter) |
1226 | append_to_queue(sma ,&queue); | 1240 | append_to_queue(sma ,&queue); |
1227 | else | 1241 | else |
1228 | prepend_to_queue(sma ,&queue); | 1242 | prepend_to_queue(sma ,&queue); |
1229 | 1243 | ||
1230 | queue.status = -EINTR; | 1244 | queue.status = -EINTR; |
1231 | queue.sleeper = current; | 1245 | queue.sleeper = current; |
1232 | current->state = TASK_INTERRUPTIBLE; | 1246 | current->state = TASK_INTERRUPTIBLE; |
1233 | sem_unlock(sma); | 1247 | sem_unlock(sma); |
1234 | 1248 | ||
1235 | if (timeout) | 1249 | if (timeout) |
1236 | jiffies_left = schedule_timeout(jiffies_left); | 1250 | jiffies_left = schedule_timeout(jiffies_left); |
1237 | else | 1251 | else |
1238 | schedule(); | 1252 | schedule(); |
1239 | 1253 | ||
1240 | error = queue.status; | 1254 | error = queue.status; |
1241 | while(unlikely(error == IN_WAKEUP)) { | 1255 | while(unlikely(error == IN_WAKEUP)) { |
1242 | cpu_relax(); | 1256 | cpu_relax(); |
1243 | error = queue.status; | 1257 | error = queue.status; |
1244 | } | 1258 | } |
1245 | 1259 | ||
1246 | if (error != -EINTR) { | 1260 | if (error != -EINTR) { |
1247 | /* fast path: update_queue already obtained all requested | 1261 | /* fast path: update_queue already obtained all requested |
1248 | * resources */ | 1262 | * resources */ |
1249 | goto out_free; | 1263 | goto out_free; |
1250 | } | 1264 | } |
1251 | 1265 | ||
1252 | sma = sem_lock(ns, semid); | 1266 | sma = sem_lock(ns, semid); |
1253 | if (IS_ERR(sma)) { | 1267 | if (IS_ERR(sma)) { |
1254 | BUG_ON(queue.prev != NULL); | 1268 | BUG_ON(queue.prev != NULL); |
1255 | error = -EIDRM; | 1269 | error = -EIDRM; |
1256 | goto out_free; | 1270 | goto out_free; |
1257 | } | 1271 | } |
1258 | 1272 | ||
1259 | /* | 1273 | /* |
1260 | * If queue.status != -EINTR we are woken up by another process | 1274 | * If queue.status != -EINTR we are woken up by another process |
1261 | */ | 1275 | */ |
1262 | error = queue.status; | 1276 | error = queue.status; |
1263 | if (error != -EINTR) { | 1277 | if (error != -EINTR) { |
1264 | goto out_unlock_free; | 1278 | goto out_unlock_free; |
1265 | } | 1279 | } |
1266 | 1280 | ||
1267 | /* | 1281 | /* |
1268 | * If an interrupt occurred we have to clean up the queue | 1282 | * If an interrupt occurred we have to clean up the queue |
1269 | */ | 1283 | */ |
1270 | if (timeout && jiffies_left == 0) | 1284 | if (timeout && jiffies_left == 0) |
1271 | error = -EAGAIN; | 1285 | error = -EAGAIN; |
1272 | remove_from_queue(sma,&queue); | 1286 | remove_from_queue(sma,&queue); |
1273 | goto out_unlock_free; | 1287 | goto out_unlock_free; |
1274 | 1288 | ||
1275 | out_unlock_free: | 1289 | out_unlock_free: |
1276 | sem_unlock(sma); | 1290 | sem_unlock(sma); |
1277 | out_free: | 1291 | out_free: |
1278 | if(sops != fast_sops) | 1292 | if(sops != fast_sops) |
1279 | kfree(sops); | 1293 | kfree(sops); |
1280 | return error; | 1294 | return error; |
1281 | } | 1295 | } |
1282 | 1296 | ||
1283 | asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsops) | 1297 | asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsops) |
1284 | { | 1298 | { |
1285 | return sys_semtimedop(semid, tsops, nsops, NULL); | 1299 | return sys_semtimedop(semid, tsops, nsops, NULL); |
1286 | } | 1300 | } |
1287 | 1301 | ||
1288 | /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between | 1302 | /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between |
1289 | * parent and child tasks. | 1303 | * parent and child tasks. |
1290 | * | 1304 | * |
1291 | * See the notes above unlock_semundo() regarding the spin_lock_init() | 1305 | * See the notes above unlock_semundo() regarding the spin_lock_init() |
1292 | * in this code. Initialize the undo_list->lock here instead of get_undo_list() | 1306 | * in this code. Initialize the undo_list->lock here instead of get_undo_list() |
1293 | * because of the reasoning in the comment above unlock_semundo. | 1307 | * because of the reasoning in the comment above unlock_semundo. |
1294 | */ | 1308 | */ |
1295 | 1309 | ||
1296 | int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) | 1310 | int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) |
1297 | { | 1311 | { |
1298 | struct sem_undo_list *undo_list; | 1312 | struct sem_undo_list *undo_list; |
1299 | int error; | 1313 | int error; |
1300 | 1314 | ||
1301 | if (clone_flags & CLONE_SYSVSEM) { | 1315 | if (clone_flags & CLONE_SYSVSEM) { |
1302 | error = get_undo_list(&undo_list); | 1316 | error = get_undo_list(&undo_list); |
1303 | if (error) | 1317 | if (error) |
1304 | return error; | 1318 | return error; |
1305 | atomic_inc(&undo_list->refcnt); | 1319 | atomic_inc(&undo_list->refcnt); |
1306 | tsk->sysvsem.undo_list = undo_list; | 1320 | tsk->sysvsem.undo_list = undo_list; |
1307 | } else | 1321 | } else |
1308 | tsk->sysvsem.undo_list = NULL; | 1322 | tsk->sysvsem.undo_list = NULL; |
1309 | 1323 | ||
1310 | return 0; | 1324 | return 0; |
1311 | } | 1325 | } |
1312 | 1326 | ||
1313 | /* | 1327 | /* |
1314 | * add semadj values to semaphores, free undo structures. | 1328 | * add semadj values to semaphores, free undo structures. |
1315 | * undo structures are not freed when semaphore arrays are destroyed | 1329 | * undo structures are not freed when semaphore arrays are destroyed |
1316 | * so some of them may be out of date. | 1330 | * so some of them may be out of date. |
1317 | * IMPLEMENTATION NOTE: There is some confusion over whether the | 1331 | * IMPLEMENTATION NOTE: There is some confusion over whether the |
1318 | * set of adjustments that needs to be done should be done in an atomic | 1332 | * set of adjustments that needs to be done should be done in an atomic |
1319 | * manner or not. That is, if we are attempting to decrement the semval | 1333 | * manner or not. That is, if we are attempting to decrement the semval |
1320 | * should we queue up and wait until we can do so legally? | 1334 | * should we queue up and wait until we can do so legally? |
1321 | * The original implementation attempted to do this (queue and wait). | 1335 | * The original implementation attempted to do this (queue and wait). |
1322 | * The current implementation does not do so. The POSIX standard | 1336 | * The current implementation does not do so. The POSIX standard |
1323 | * and SVID should be consulted to determine what behavior is mandated. | 1337 | * and SVID should be consulted to determine what behavior is mandated. |
1324 | */ | 1338 | */ |
1325 | void exit_sem(struct task_struct *tsk) | 1339 | void exit_sem(struct task_struct *tsk) |
1326 | { | 1340 | { |
1327 | struct sem_undo_list *undo_list; | 1341 | struct sem_undo_list *undo_list; |
1328 | struct sem_undo *u, **up; | 1342 | struct sem_undo *u, **up; |
1329 | struct ipc_namespace *ns; | 1343 | struct ipc_namespace *ns; |
1330 | 1344 | ||
1331 | undo_list = tsk->sysvsem.undo_list; | 1345 | undo_list = tsk->sysvsem.undo_list; |
1332 | if (!undo_list) | 1346 | if (!undo_list) |
1333 | return; | 1347 | return; |
1334 | 1348 | ||
1335 | if (!atomic_dec_and_test(&undo_list->refcnt)) | 1349 | if (!atomic_dec_and_test(&undo_list->refcnt)) |
1336 | return; | 1350 | return; |
1337 | 1351 | ||
1338 | ns = tsk->nsproxy->ipc_ns; | 1352 | ns = tsk->nsproxy->ipc_ns; |
1339 | /* There's no need to hold the semundo list lock, as current | 1353 | /* There's no need to hold the semundo list lock, as current |
1340 | * is the last task exiting for this undo list. | 1354 | * is the last task exiting for this undo list. |
1341 | */ | 1355 | */ |
1342 | for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) { | 1356 | for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) { |
1343 | struct sem_array *sma; | 1357 | struct sem_array *sma; |
1344 | int nsems, i; | 1358 | int nsems, i; |
1345 | struct sem_undo *un, **unp; | 1359 | struct sem_undo *un, **unp; |
1346 | int semid; | 1360 | int semid; |
1347 | 1361 | ||
1348 | semid = u->semid; | 1362 | semid = u->semid; |
1349 | 1363 | ||
1350 | if(semid == -1) | 1364 | if(semid == -1) |
1351 | continue; | 1365 | continue; |
1352 | sma = sem_lock(ns, semid); | 1366 | sma = sem_lock(ns, semid); |
1353 | if (IS_ERR(sma)) | 1367 | if (IS_ERR(sma)) |
1354 | continue; | 1368 | continue; |
1355 | 1369 | ||
1356 | if (u->semid == -1) | 1370 | if (u->semid == -1) |
1357 | goto next_entry; | 1371 | goto next_entry; |
1358 | 1372 | ||
1359 | BUG_ON(sem_checkid(ns,sma,u->semid)); | 1373 | BUG_ON(sem_checkid(ns,sma,u->semid)); |
1360 | 1374 | ||
1361 | /* remove u from the sma->undo list */ | 1375 | /* remove u from the sma->undo list */ |
1362 | for (unp = &sma->undo; (un = *unp); unp = &un->id_next) { | 1376 | for (unp = &sma->undo; (un = *unp); unp = &un->id_next) { |
1363 | if (u == un) | 1377 | if (u == un) |
1364 | goto found; | 1378 | goto found; |
1365 | } | 1379 | } |
1366 | printk ("exit_sem undo list error id=%d\n", u->semid); | 1380 | printk ("exit_sem undo list error id=%d\n", u->semid); |
1367 | goto next_entry; | 1381 | goto next_entry; |
1368 | found: | 1382 | found: |
1369 | *unp = un->id_next; | 1383 | *unp = un->id_next; |
1370 | /* perform adjustments registered in u */ | 1384 | /* perform adjustments registered in u */ |
1371 | nsems = sma->sem_nsems; | 1385 | nsems = sma->sem_nsems; |
1372 | for (i = 0; i < nsems; i++) { | 1386 | for (i = 0; i < nsems; i++) { |
1373 | struct sem * semaphore = &sma->sem_base[i]; | 1387 | struct sem * semaphore = &sma->sem_base[i]; |
1374 | if (u->semadj[i]) { | 1388 | if (u->semadj[i]) { |
1375 | semaphore->semval += u->semadj[i]; | 1389 | semaphore->semval += u->semadj[i]; |
1376 | /* | 1390 | /* |
1377 | * Range checks of the new semaphore value, | 1391 | * Range checks of the new semaphore value, |
1378 | * not defined by sus: | 1392 | * not defined by sus: |
1379 | * - Some unices ignore the undo entirely | 1393 | * - Some unices ignore the undo entirely |
1380 | * (e.g. HP UX 11i 11.22, Tru64 V5.1) | 1394 | * (e.g. HP UX 11i 11.22, Tru64 V5.1) |
1381 | * - some cap the value (e.g. FreeBSD caps | 1395 | * - some cap the value (e.g. FreeBSD caps |
1382 | * at 0, but doesn't enforce SEMVMX) | 1396 | * at 0, but doesn't enforce SEMVMX) |
1383 | * | 1397 | * |
1384 | * Linux caps the semaphore value, both at 0 | 1398 | * Linux caps the semaphore value, both at 0 |
1385 | * and at SEMVMX. | 1399 | * and at SEMVMX. |
1386 | * | 1400 | * |
1387 | * Manfred <manfred@colorfullife.com> | 1401 | * Manfred <manfred@colorfullife.com> |
1388 | */ | 1402 | */ |
1389 | if (semaphore->semval < 0) | 1403 | if (semaphore->semval < 0) |
1390 | semaphore->semval = 0; | 1404 | semaphore->semval = 0; |
1391 | if (semaphore->semval > SEMVMX) | 1405 | if (semaphore->semval > SEMVMX) |
1392 | semaphore->semval = SEMVMX; | 1406 | semaphore->semval = SEMVMX; |
1393 | semaphore->sempid = task_tgid_vnr(current); | 1407 | semaphore->sempid = task_tgid_vnr(current); |
1394 | } | 1408 | } |
1395 | } | 1409 | } |
1396 | sma->sem_otime = get_seconds(); | 1410 | sma->sem_otime = get_seconds(); |
1397 | /* maybe some queued-up processes were waiting for this */ | 1411 | /* maybe some queued-up processes were waiting for this */ |
1398 | update_queue(sma); | 1412 | update_queue(sma); |
1399 | next_entry: | 1413 | next_entry: |
1400 | sem_unlock(sma); | 1414 | sem_unlock(sma); |
1401 | } | 1415 | } |
1402 | kfree(undo_list); | 1416 | kfree(undo_list); |
1403 | } | 1417 | } |
1404 | 1418 | ||
1405 | #ifdef CONFIG_PROC_FS | 1419 | #ifdef CONFIG_PROC_FS |
1406 | static int sysvipc_sem_proc_show(struct seq_file *s, void *it) | 1420 | static int sysvipc_sem_proc_show(struct seq_file *s, void *it) |
1407 | { | 1421 | { |
1408 | struct sem_array *sma = it; | 1422 | struct sem_array *sma = it; |
1409 | 1423 | ||
1410 | return seq_printf(s, | 1424 | return seq_printf(s, |
1411 | "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", | 1425 | "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", |
1412 | sma->sem_perm.key, | 1426 | sma->sem_perm.key, |
1413 | sma->sem_perm.id, | 1427 | sma->sem_perm.id, |
1414 | sma->sem_perm.mode, | 1428 | sma->sem_perm.mode, |
1415 | sma->sem_nsems, | 1429 | sma->sem_nsems, |
1416 | sma->sem_perm.uid, | 1430 | sma->sem_perm.uid, |
1417 | sma->sem_perm.gid, | 1431 | sma->sem_perm.gid, |
1418 | sma->sem_perm.cuid, | 1432 | sma->sem_perm.cuid, |
1419 | sma->sem_perm.cgid, | 1433 | sma->sem_perm.cgid, |
1420 | sma->sem_otime, | 1434 | sma->sem_otime, |
1421 | sma->sem_ctime); | 1435 | sma->sem_ctime); |
1422 | } | 1436 | } |
1423 | #endif | 1437 | #endif |
1424 | 1438 |
ipc/shm.c
1 | /* | 1 | /* |
2 | * linux/ipc/shm.c | 2 | * linux/ipc/shm.c |
3 | * Copyright (C) 1992, 1993 Krishna Balasubramanian | 3 | * Copyright (C) 1992, 1993 Krishna Balasubramanian |
4 | * Many improvements/fixes by Bruno Haible. | 4 | * Many improvements/fixes by Bruno Haible. |
5 | * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. | 5 | * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. |
6 | * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. | 6 | * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. |
7 | * | 7 | * |
8 | * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com> | 8 | * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com> |
9 | * BIGMEM support, Andrea Arcangeli <andrea@suse.de> | 9 | * BIGMEM support, Andrea Arcangeli <andrea@suse.de> |
10 | * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr> | 10 | * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr> |
11 | * HIGHMEM support, Ingo Molnar <mingo@redhat.com> | 11 | * HIGHMEM support, Ingo Molnar <mingo@redhat.com> |
12 | * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com> | 12 | * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com> |
13 | * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com> | 13 | * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com> |
14 | * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com> | 14 | * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com> |
15 | * | 15 | * |
16 | * support for audit of ipc object properties and permission changes | 16 | * support for audit of ipc object properties and permission changes |
17 | * Dustin Kirkland <dustin.kirkland@us.ibm.com> | 17 | * Dustin Kirkland <dustin.kirkland@us.ibm.com> |
18 | * | 18 | * |
19 | * namespaces support | 19 | * namespaces support |
20 | * OpenVZ, SWsoft Inc. | 20 | * OpenVZ, SWsoft Inc. |
21 | * Pavel Emelianov <xemul@openvz.org> | 21 | * Pavel Emelianov <xemul@openvz.org> |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
26 | #include <linux/hugetlb.h> | 26 | #include <linux/hugetlb.h> |
27 | #include <linux/shm.h> | 27 | #include <linux/shm.h> |
28 | #include <linux/init.h> | 28 | #include <linux/init.h> |
29 | #include <linux/file.h> | 29 | #include <linux/file.h> |
30 | #include <linux/mman.h> | 30 | #include <linux/mman.h> |
31 | #include <linux/shmem_fs.h> | 31 | #include <linux/shmem_fs.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/syscalls.h> | 33 | #include <linux/syscalls.h> |
34 | #include <linux/audit.h> | 34 | #include <linux/audit.h> |
35 | #include <linux/capability.h> | 35 | #include <linux/capability.h> |
36 | #include <linux/ptrace.h> | 36 | #include <linux/ptrace.h> |
37 | #include <linux/seq_file.h> | 37 | #include <linux/seq_file.h> |
38 | #include <linux/mutex.h> | 38 | #include <linux/mutex.h> |
39 | #include <linux/nsproxy.h> | 39 | #include <linux/nsproxy.h> |
40 | #include <linux/mount.h> | 40 | #include <linux/mount.h> |
41 | 41 | ||
42 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
43 | 43 | ||
44 | #include "util.h" | 44 | #include "util.h" |
45 | 45 | ||
46 | struct shm_file_data { | 46 | struct shm_file_data { |
47 | int id; | 47 | int id; |
48 | struct ipc_namespace *ns; | 48 | struct ipc_namespace *ns; |
49 | struct file *file; | 49 | struct file *file; |
50 | const struct vm_operations_struct *vm_ops; | 50 | const struct vm_operations_struct *vm_ops; |
51 | }; | 51 | }; |
52 | 52 | ||
53 | #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data)) | 53 | #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data)) |
54 | 54 | ||
55 | static const struct file_operations shm_file_operations; | 55 | static const struct file_operations shm_file_operations; |
56 | static struct vm_operations_struct shm_vm_ops; | 56 | static struct vm_operations_struct shm_vm_ops; |
57 | 57 | ||
58 | static struct ipc_ids init_shm_ids; | 58 | static struct ipc_ids init_shm_ids; |
59 | 59 | ||
60 | #define shm_ids(ns) (*((ns)->ids[IPC_SHM_IDS])) | 60 | #define shm_ids(ns) (*((ns)->ids[IPC_SHM_IDS])) |
61 | 61 | ||
62 | #define shm_unlock(shp) \ | 62 | #define shm_unlock(shp) \ |
63 | ipc_unlock(&(shp)->shm_perm) | 63 | ipc_unlock(&(shp)->shm_perm) |
64 | #define shm_buildid(ns, id, seq) \ | 64 | #define shm_buildid(ns, id, seq) \ |
65 | ipc_buildid(&shm_ids(ns), id, seq) | 65 | ipc_buildid(&shm_ids(ns), id, seq) |
66 | 66 | ||
67 | static int newseg(struct ipc_namespace *, struct ipc_params *); | 67 | static int newseg(struct ipc_namespace *, struct ipc_params *); |
68 | static void shm_open(struct vm_area_struct *vma); | 68 | static void shm_open(struct vm_area_struct *vma); |
69 | static void shm_close(struct vm_area_struct *vma); | 69 | static void shm_close(struct vm_area_struct *vma); |
70 | static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp); | 70 | static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp); |
71 | #ifdef CONFIG_PROC_FS | 71 | #ifdef CONFIG_PROC_FS |
72 | static int sysvipc_shm_proc_show(struct seq_file *s, void *it); | 72 | static int sysvipc_shm_proc_show(struct seq_file *s, void *it); |
73 | #endif | 73 | #endif |
74 | 74 | ||
75 | static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) | 75 | static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) |
76 | { | 76 | { |
77 | ns->ids[IPC_SHM_IDS] = ids; | 77 | ns->ids[IPC_SHM_IDS] = ids; |
78 | ns->shm_ctlmax = SHMMAX; | 78 | ns->shm_ctlmax = SHMMAX; |
79 | ns->shm_ctlall = SHMALL; | 79 | ns->shm_ctlall = SHMALL; |
80 | ns->shm_ctlmni = SHMMNI; | 80 | ns->shm_ctlmni = SHMMNI; |
81 | ns->shm_tot = 0; | 81 | ns->shm_tot = 0; |
82 | ipc_init_ids(ids); | 82 | ipc_init_ids(ids); |
83 | } | 83 | } |
84 | 84 | ||
85 | /* | ||
86 | * Called with shm_ids.mutex and the shp structure locked. | ||
87 | * Only shm_ids.mutex remains locked on exit. | ||
88 | */ | ||
85 | static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp) | 89 | static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp) |
86 | { | 90 | { |
87 | if (shp->shm_nattch){ | 91 | if (shp->shm_nattch){ |
88 | shp->shm_perm.mode |= SHM_DEST; | 92 | shp->shm_perm.mode |= SHM_DEST; |
89 | /* Do not find it any more */ | 93 | /* Do not find it any more */ |
90 | shp->shm_perm.key = IPC_PRIVATE; | 94 | shp->shm_perm.key = IPC_PRIVATE; |
91 | shm_unlock(shp); | 95 | shm_unlock(shp); |
92 | } else | 96 | } else |
93 | shm_destroy(ns, shp); | 97 | shm_destroy(ns, shp); |
94 | } | 98 | } |
95 | 99 | ||
96 | int shm_init_ns(struct ipc_namespace *ns) | 100 | int shm_init_ns(struct ipc_namespace *ns) |
97 | { | 101 | { |
98 | struct ipc_ids *ids; | 102 | struct ipc_ids *ids; |
99 | 103 | ||
100 | ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL); | 104 | ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL); |
101 | if (ids == NULL) | 105 | if (ids == NULL) |
102 | return -ENOMEM; | 106 | return -ENOMEM; |
103 | 107 | ||
104 | __shm_init_ns(ns, ids); | 108 | __shm_init_ns(ns, ids); |
105 | return 0; | 109 | return 0; |
106 | } | 110 | } |
107 | 111 | ||
108 | void shm_exit_ns(struct ipc_namespace *ns) | 112 | void shm_exit_ns(struct ipc_namespace *ns) |
109 | { | 113 | { |
110 | struct shmid_kernel *shp; | 114 | struct shmid_kernel *shp; |
111 | int next_id; | 115 | int next_id; |
112 | int total, in_use; | 116 | int total, in_use; |
113 | 117 | ||
114 | mutex_lock(&shm_ids(ns).mutex); | 118 | mutex_lock(&shm_ids(ns).mutex); |
115 | 119 | ||
116 | in_use = shm_ids(ns).in_use; | 120 | in_use = shm_ids(ns).in_use; |
117 | 121 | ||
118 | for (total = 0, next_id = 0; total < in_use; next_id++) { | 122 | for (total = 0, next_id = 0; total < in_use; next_id++) { |
119 | shp = idr_find(&shm_ids(ns).ipcs_idr, next_id); | 123 | shp = idr_find(&shm_ids(ns).ipcs_idr, next_id); |
120 | if (shp == NULL) | 124 | if (shp == NULL) |
121 | continue; | 125 | continue; |
122 | ipc_lock_by_ptr(&shp->shm_perm); | 126 | ipc_lock_by_ptr(&shp->shm_perm); |
123 | do_shm_rmid(ns, shp); | 127 | do_shm_rmid(ns, shp); |
124 | total++; | 128 | total++; |
125 | } | 129 | } |
126 | mutex_unlock(&shm_ids(ns).mutex); | 130 | mutex_unlock(&shm_ids(ns).mutex); |
127 | 131 | ||
128 | kfree(ns->ids[IPC_SHM_IDS]); | 132 | kfree(ns->ids[IPC_SHM_IDS]); |
129 | ns->ids[IPC_SHM_IDS] = NULL; | 133 | ns->ids[IPC_SHM_IDS] = NULL; |
130 | } | 134 | } |
131 | 135 | ||
132 | void __init shm_init (void) | 136 | void __init shm_init (void) |
133 | { | 137 | { |
134 | __shm_init_ns(&init_ipc_ns, &init_shm_ids); | 138 | __shm_init_ns(&init_ipc_ns, &init_shm_ids); |
135 | ipc_init_proc_interface("sysvipc/shm", | 139 | ipc_init_proc_interface("sysvipc/shm", |
136 | " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n", | 140 | " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n", |
137 | IPC_SHM_IDS, sysvipc_shm_proc_show); | 141 | IPC_SHM_IDS, sysvipc_shm_proc_show); |
138 | } | 142 | } |
139 | 143 | ||
140 | static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) | 144 | static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) |
141 | { | 145 | { |
142 | struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); | 146 | struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); |
143 | 147 | ||
144 | return container_of(ipcp, struct shmid_kernel, shm_perm); | 148 | return container_of(ipcp, struct shmid_kernel, shm_perm); |
145 | } | 149 | } |
146 | 150 | ||
147 | static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, | 151 | static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, |
148 | int id) | 152 | int id) |
149 | { | 153 | { |
150 | struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id); | 154 | struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id); |
151 | 155 | ||
152 | return container_of(ipcp, struct shmid_kernel, shm_perm); | 156 | return container_of(ipcp, struct shmid_kernel, shm_perm); |
153 | } | 157 | } |
154 | 158 | ||
155 | static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) | 159 | static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) |
156 | { | 160 | { |
157 | ipc_rmid(&shm_ids(ns), &s->shm_perm); | 161 | ipc_rmid(&shm_ids(ns), &s->shm_perm); |
158 | } | 162 | } |
159 | 163 | ||
160 | static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp) | 164 | static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp) |
161 | { | 165 | { |
162 | return ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); | 166 | return ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); |
163 | } | 167 | } |
164 | 168 | ||
165 | 169 | ||
166 | 170 | ||
167 | /* This is called by fork, once for every shm attach. */ | 171 | /* This is called by fork, once for every shm attach. */ |
168 | static void shm_open(struct vm_area_struct *vma) | 172 | static void shm_open(struct vm_area_struct *vma) |
169 | { | 173 | { |
170 | struct file *file = vma->vm_file; | 174 | struct file *file = vma->vm_file; |
171 | struct shm_file_data *sfd = shm_file_data(file); | 175 | struct shm_file_data *sfd = shm_file_data(file); |
172 | struct shmid_kernel *shp; | 176 | struct shmid_kernel *shp; |
173 | 177 | ||
174 | shp = shm_lock(sfd->ns, sfd->id); | 178 | shp = shm_lock(sfd->ns, sfd->id); |
175 | BUG_ON(IS_ERR(shp)); | 179 | BUG_ON(IS_ERR(shp)); |
176 | shp->shm_atim = get_seconds(); | 180 | shp->shm_atim = get_seconds(); |
177 | shp->shm_lprid = task_tgid_vnr(current); | 181 | shp->shm_lprid = task_tgid_vnr(current); |
178 | shp->shm_nattch++; | 182 | shp->shm_nattch++; |
179 | shm_unlock(shp); | 183 | shm_unlock(shp); |
180 | } | 184 | } |
181 | 185 | ||
182 | /* | 186 | /* |
183 | * shm_destroy - free the struct shmid_kernel | 187 | * shm_destroy - free the struct shmid_kernel |
184 | * | 188 | * |
189 | * @ns: namespace | ||
185 | * @shp: struct to free | 190 | * @shp: struct to free |
186 | * | 191 | * |
187 | * It has to be called with shp and shm_ids.mutex locked, | 192 | * It has to be called with shp and shm_ids.mutex locked, |
188 | * but returns with shp unlocked and freed. | 193 | * but returns with shp unlocked and freed. |
189 | */ | 194 | */ |
190 | static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) | 195 | static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) |
191 | { | 196 | { |
192 | ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; | 197 | ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; |
193 | shm_rmid(ns, shp); | 198 | shm_rmid(ns, shp); |
194 | shm_unlock(shp); | 199 | shm_unlock(shp); |
195 | if (!is_file_hugepages(shp->shm_file)) | 200 | if (!is_file_hugepages(shp->shm_file)) |
196 | shmem_lock(shp->shm_file, 0, shp->mlock_user); | 201 | shmem_lock(shp->shm_file, 0, shp->mlock_user); |
197 | else | 202 | else |
198 | user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size, | 203 | user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size, |
199 | shp->mlock_user); | 204 | shp->mlock_user); |
200 | fput (shp->shm_file); | 205 | fput (shp->shm_file); |
201 | security_shm_free(shp); | 206 | security_shm_free(shp); |
202 | ipc_rcu_putref(shp); | 207 | ipc_rcu_putref(shp); |
203 | } | 208 | } |
204 | 209 | ||
205 | /* | 210 | /* |
206 | * remove the attach descriptor vma. | 211 | * remove the attach descriptor vma. |
207 | * free memory for segment if it is marked destroyed. | 212 | * free memory for segment if it is marked destroyed. |
208 | * The descriptor has already been removed from the current->mm->mmap list | 213 | * The descriptor has already been removed from the current->mm->mmap list |
209 | * and will later be kfree()d. | 214 | * and will later be kfree()d. |
210 | */ | 215 | */ |
211 | static void shm_close(struct vm_area_struct *vma) | 216 | static void shm_close(struct vm_area_struct *vma) |
212 | { | 217 | { |
213 | struct file * file = vma->vm_file; | 218 | struct file * file = vma->vm_file; |
214 | struct shm_file_data *sfd = shm_file_data(file); | 219 | struct shm_file_data *sfd = shm_file_data(file); |
215 | struct shmid_kernel *shp; | 220 | struct shmid_kernel *shp; |
216 | struct ipc_namespace *ns = sfd->ns; | 221 | struct ipc_namespace *ns = sfd->ns; |
217 | 222 | ||
218 | mutex_lock(&shm_ids(ns).mutex); | 223 | mutex_lock(&shm_ids(ns).mutex); |
219 | /* remove from the list of attaches of the shm segment */ | 224 | /* remove from the list of attaches of the shm segment */ |
220 | shp = shm_lock(ns, sfd->id); | 225 | shp = shm_lock(ns, sfd->id); |
221 | BUG_ON(IS_ERR(shp)); | 226 | BUG_ON(IS_ERR(shp)); |
222 | shp->shm_lprid = task_tgid_vnr(current); | 227 | shp->shm_lprid = task_tgid_vnr(current); |
223 | shp->shm_dtim = get_seconds(); | 228 | shp->shm_dtim = get_seconds(); |
224 | shp->shm_nattch--; | 229 | shp->shm_nattch--; |
225 | if(shp->shm_nattch == 0 && | 230 | if(shp->shm_nattch == 0 && |
226 | shp->shm_perm.mode & SHM_DEST) | 231 | shp->shm_perm.mode & SHM_DEST) |
227 | shm_destroy(ns, shp); | 232 | shm_destroy(ns, shp); |
228 | else | 233 | else |
229 | shm_unlock(shp); | 234 | shm_unlock(shp); |
230 | mutex_unlock(&shm_ids(ns).mutex); | 235 | mutex_unlock(&shm_ids(ns).mutex); |
231 | } | 236 | } |
232 | 237 | ||
233 | static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 238 | static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
234 | { | 239 | { |
235 | struct file *file = vma->vm_file; | 240 | struct file *file = vma->vm_file; |
236 | struct shm_file_data *sfd = shm_file_data(file); | 241 | struct shm_file_data *sfd = shm_file_data(file); |
237 | 242 | ||
238 | return sfd->vm_ops->fault(vma, vmf); | 243 | return sfd->vm_ops->fault(vma, vmf); |
239 | } | 244 | } |
240 | 245 | ||
241 | #ifdef CONFIG_NUMA | 246 | #ifdef CONFIG_NUMA |
242 | static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) | 247 | static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) |
243 | { | 248 | { |
244 | struct file *file = vma->vm_file; | 249 | struct file *file = vma->vm_file; |
245 | struct shm_file_data *sfd = shm_file_data(file); | 250 | struct shm_file_data *sfd = shm_file_data(file); |
246 | int err = 0; | 251 | int err = 0; |
247 | if (sfd->vm_ops->set_policy) | 252 | if (sfd->vm_ops->set_policy) |
248 | err = sfd->vm_ops->set_policy(vma, new); | 253 | err = sfd->vm_ops->set_policy(vma, new); |
249 | return err; | 254 | return err; |
250 | } | 255 | } |
251 | 256 | ||
252 | static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, | 257 | static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, |
253 | unsigned long addr) | 258 | unsigned long addr) |
254 | { | 259 | { |
255 | struct file *file = vma->vm_file; | 260 | struct file *file = vma->vm_file; |
256 | struct shm_file_data *sfd = shm_file_data(file); | 261 | struct shm_file_data *sfd = shm_file_data(file); |
257 | struct mempolicy *pol = NULL; | 262 | struct mempolicy *pol = NULL; |
258 | 263 | ||
259 | if (sfd->vm_ops->get_policy) | 264 | if (sfd->vm_ops->get_policy) |
260 | pol = sfd->vm_ops->get_policy(vma, addr); | 265 | pol = sfd->vm_ops->get_policy(vma, addr); |
261 | else if (vma->vm_policy) | 266 | else if (vma->vm_policy) |
262 | pol = vma->vm_policy; | 267 | pol = vma->vm_policy; |
263 | else | 268 | else |
264 | pol = current->mempolicy; | 269 | pol = current->mempolicy; |
265 | return pol; | 270 | return pol; |
266 | } | 271 | } |
267 | #endif | 272 | #endif |
268 | 273 | ||
269 | static int shm_mmap(struct file * file, struct vm_area_struct * vma) | 274 | static int shm_mmap(struct file * file, struct vm_area_struct * vma) |
270 | { | 275 | { |
271 | struct shm_file_data *sfd = shm_file_data(file); | 276 | struct shm_file_data *sfd = shm_file_data(file); |
272 | int ret; | 277 | int ret; |
273 | 278 | ||
274 | ret = sfd->file->f_op->mmap(sfd->file, vma); | 279 | ret = sfd->file->f_op->mmap(sfd->file, vma); |
275 | if (ret != 0) | 280 | if (ret != 0) |
276 | return ret; | 281 | return ret; |
277 | sfd->vm_ops = vma->vm_ops; | 282 | sfd->vm_ops = vma->vm_ops; |
278 | #ifdef CONFIG_MMU | 283 | #ifdef CONFIG_MMU |
279 | BUG_ON(!sfd->vm_ops->fault); | 284 | BUG_ON(!sfd->vm_ops->fault); |
280 | #endif | 285 | #endif |
281 | vma->vm_ops = &shm_vm_ops; | 286 | vma->vm_ops = &shm_vm_ops; |
282 | shm_open(vma); | 287 | shm_open(vma); |
283 | 288 | ||
284 | return ret; | 289 | return ret; |
285 | } | 290 | } |
286 | 291 | ||
287 | static int shm_release(struct inode *ino, struct file *file) | 292 | static int shm_release(struct inode *ino, struct file *file) |
288 | { | 293 | { |
289 | struct shm_file_data *sfd = shm_file_data(file); | 294 | struct shm_file_data *sfd = shm_file_data(file); |
290 | 295 | ||
291 | put_ipc_ns(sfd->ns); | 296 | put_ipc_ns(sfd->ns); |
292 | shm_file_data(file) = NULL; | 297 | shm_file_data(file) = NULL; |
293 | kfree(sfd); | 298 | kfree(sfd); |
294 | return 0; | 299 | return 0; |
295 | } | 300 | } |
296 | 301 | ||
297 | static int shm_fsync(struct file *file, struct dentry *dentry, int datasync) | 302 | static int shm_fsync(struct file *file, struct dentry *dentry, int datasync) |
298 | { | 303 | { |
299 | int (*fsync) (struct file *, struct dentry *, int datasync); | 304 | int (*fsync) (struct file *, struct dentry *, int datasync); |
300 | struct shm_file_data *sfd = shm_file_data(file); | 305 | struct shm_file_data *sfd = shm_file_data(file); |
301 | int ret = -EINVAL; | 306 | int ret = -EINVAL; |
302 | 307 | ||
303 | fsync = sfd->file->f_op->fsync; | 308 | fsync = sfd->file->f_op->fsync; |
304 | if (fsync) | 309 | if (fsync) |
305 | ret = fsync(sfd->file, sfd->file->f_path.dentry, datasync); | 310 | ret = fsync(sfd->file, sfd->file->f_path.dentry, datasync); |
306 | return ret; | 311 | return ret; |
307 | } | 312 | } |
308 | 313 | ||
309 | static unsigned long shm_get_unmapped_area(struct file *file, | 314 | static unsigned long shm_get_unmapped_area(struct file *file, |
310 | unsigned long addr, unsigned long len, unsigned long pgoff, | 315 | unsigned long addr, unsigned long len, unsigned long pgoff, |
311 | unsigned long flags) | 316 | unsigned long flags) |
312 | { | 317 | { |
313 | struct shm_file_data *sfd = shm_file_data(file); | 318 | struct shm_file_data *sfd = shm_file_data(file); |
314 | return get_unmapped_area(sfd->file, addr, len, pgoff, flags); | 319 | return get_unmapped_area(sfd->file, addr, len, pgoff, flags); |
315 | } | 320 | } |
316 | 321 | ||
317 | int is_file_shm_hugepages(struct file *file) | 322 | int is_file_shm_hugepages(struct file *file) |
318 | { | 323 | { |
319 | int ret = 0; | 324 | int ret = 0; |
320 | 325 | ||
321 | if (file->f_op == &shm_file_operations) { | 326 | if (file->f_op == &shm_file_operations) { |
322 | struct shm_file_data *sfd; | 327 | struct shm_file_data *sfd; |
323 | sfd = shm_file_data(file); | 328 | sfd = shm_file_data(file); |
324 | ret = is_file_hugepages(sfd->file); | 329 | ret = is_file_hugepages(sfd->file); |
325 | } | 330 | } |
326 | return ret; | 331 | return ret; |
327 | } | 332 | } |
328 | 333 | ||
329 | static const struct file_operations shm_file_operations = { | 334 | static const struct file_operations shm_file_operations = { |
330 | .mmap = shm_mmap, | 335 | .mmap = shm_mmap, |
331 | .fsync = shm_fsync, | 336 | .fsync = shm_fsync, |
332 | .release = shm_release, | 337 | .release = shm_release, |
333 | .get_unmapped_area = shm_get_unmapped_area, | 338 | .get_unmapped_area = shm_get_unmapped_area, |
334 | }; | 339 | }; |
335 | 340 | ||
336 | static struct vm_operations_struct shm_vm_ops = { | 341 | static struct vm_operations_struct shm_vm_ops = { |
337 | .open = shm_open, /* callback for a new vm-area open */ | 342 | .open = shm_open, /* callback for a new vm-area open */ |
338 | .close = shm_close, /* callback for when the vm-area is released */ | 343 | .close = shm_close, /* callback for when the vm-area is released */ |
339 | .fault = shm_fault, | 344 | .fault = shm_fault, |
340 | #if defined(CONFIG_NUMA) | 345 | #if defined(CONFIG_NUMA) |
341 | .set_policy = shm_set_policy, | 346 | .set_policy = shm_set_policy, |
342 | .get_policy = shm_get_policy, | 347 | .get_policy = shm_get_policy, |
343 | #endif | 348 | #endif |
344 | }; | 349 | }; |
345 | 350 | ||
351 | /** | ||
352 | * newseg - Create a new shared memory segment | ||
353 | * @ns: namespace | ||
354 | * @params: ptr to the structure that contains key, size and shmflg | ||
355 | * | ||
356 | * Called with shm_ids.mutex held | ||
357 | */ | ||
358 | |||
346 | static int newseg(struct ipc_namespace *ns, struct ipc_params *params) | 359 | static int newseg(struct ipc_namespace *ns, struct ipc_params *params) |
347 | { | 360 | { |
348 | key_t key = params->key; | 361 | key_t key = params->key; |
349 | int shmflg = params->flg; | 362 | int shmflg = params->flg; |
350 | size_t size = params->u.size; | 363 | size_t size = params->u.size; |
351 | int error; | 364 | int error; |
352 | struct shmid_kernel *shp; | 365 | struct shmid_kernel *shp; |
353 | int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; | 366 | int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; |
354 | struct file * file; | 367 | struct file * file; |
355 | char name[13]; | 368 | char name[13]; |
356 | int id; | 369 | int id; |
357 | 370 | ||
358 | if (size < SHMMIN || size > ns->shm_ctlmax) | 371 | if (size < SHMMIN || size > ns->shm_ctlmax) |
359 | return -EINVAL; | 372 | return -EINVAL; |
360 | 373 | ||
361 | if (ns->shm_tot + numpages > ns->shm_ctlall) | 374 | if (ns->shm_tot + numpages > ns->shm_ctlall) |
362 | return -ENOSPC; | 375 | return -ENOSPC; |
363 | 376 | ||
364 | shp = ipc_rcu_alloc(sizeof(*shp)); | 377 | shp = ipc_rcu_alloc(sizeof(*shp)); |
365 | if (!shp) | 378 | if (!shp) |
366 | return -ENOMEM; | 379 | return -ENOMEM; |
367 | 380 | ||
368 | shp->shm_perm.key = key; | 381 | shp->shm_perm.key = key; |
369 | shp->shm_perm.mode = (shmflg & S_IRWXUGO); | 382 | shp->shm_perm.mode = (shmflg & S_IRWXUGO); |
370 | shp->mlock_user = NULL; | 383 | shp->mlock_user = NULL; |
371 | 384 | ||
372 | shp->shm_perm.security = NULL; | 385 | shp->shm_perm.security = NULL; |
373 | error = security_shm_alloc(shp); | 386 | error = security_shm_alloc(shp); |
374 | if (error) { | 387 | if (error) { |
375 | ipc_rcu_putref(shp); | 388 | ipc_rcu_putref(shp); |
376 | return error; | 389 | return error; |
377 | } | 390 | } |
378 | 391 | ||
379 | sprintf (name, "SYSV%08x", key); | 392 | sprintf (name, "SYSV%08x", key); |
380 | if (shmflg & SHM_HUGETLB) { | 393 | if (shmflg & SHM_HUGETLB) { |
381 | /* hugetlb_file_setup takes care of mlock user accounting */ | 394 | /* hugetlb_file_setup takes care of mlock user accounting */ |
382 | file = hugetlb_file_setup(name, size); | 395 | file = hugetlb_file_setup(name, size); |
383 | shp->mlock_user = current->user; | 396 | shp->mlock_user = current->user; |
384 | } else { | 397 | } else { |
385 | int acctflag = VM_ACCOUNT; | 398 | int acctflag = VM_ACCOUNT; |
386 | /* | 399 | /* |
387 | * Do not allow no accounting for OVERCOMMIT_NEVER, even | 400 | * Do not allow no accounting for OVERCOMMIT_NEVER, even |
388 | * if it's asked for. | 401 | * if it's asked for. |
389 | */ | 402 | */ |
390 | if ((shmflg & SHM_NORESERVE) && | 403 | if ((shmflg & SHM_NORESERVE) && |
391 | sysctl_overcommit_memory != OVERCOMMIT_NEVER) | 404 | sysctl_overcommit_memory != OVERCOMMIT_NEVER) |
392 | acctflag = 0; | 405 | acctflag = 0; |
393 | file = shmem_file_setup(name, size, acctflag); | 406 | file = shmem_file_setup(name, size, acctflag); |
394 | } | 407 | } |
395 | error = PTR_ERR(file); | 408 | error = PTR_ERR(file); |
396 | if (IS_ERR(file)) | 409 | if (IS_ERR(file)) |
397 | goto no_file; | 410 | goto no_file; |
398 | 411 | ||
399 | error = -ENOSPC; | 412 | error = -ENOSPC; |
400 | id = shm_addid(ns, shp); | 413 | id = shm_addid(ns, shp); |
401 | if(id == -1) | 414 | if(id == -1) |
402 | goto no_id; | 415 | goto no_id; |
403 | 416 | ||
404 | shp->shm_cprid = task_tgid_vnr(current); | 417 | shp->shm_cprid = task_tgid_vnr(current); |
405 | shp->shm_lprid = 0; | 418 | shp->shm_lprid = 0; |
406 | shp->shm_atim = shp->shm_dtim = 0; | 419 | shp->shm_atim = shp->shm_dtim = 0; |
407 | shp->shm_ctim = get_seconds(); | 420 | shp->shm_ctim = get_seconds(); |
408 | shp->shm_segsz = size; | 421 | shp->shm_segsz = size; |
409 | shp->shm_nattch = 0; | 422 | shp->shm_nattch = 0; |
410 | shp->shm_perm.id = shm_buildid(ns, id, shp->shm_perm.seq); | 423 | shp->shm_perm.id = shm_buildid(ns, id, shp->shm_perm.seq); |
411 | shp->shm_file = file; | 424 | shp->shm_file = file; |
412 | /* | 425 | /* |
413 | * shmid gets reported as "inode#" in /proc/pid/maps. | 426 | * shmid gets reported as "inode#" in /proc/pid/maps. |
414 | * proc-ps tools use this. Changing this will break them. | 427 | * proc-ps tools use this. Changing this will break them. |
415 | */ | 428 | */ |
416 | file->f_dentry->d_inode->i_ino = shp->shm_perm.id; | 429 | file->f_dentry->d_inode->i_ino = shp->shm_perm.id; |
417 | 430 | ||
418 | ns->shm_tot += numpages; | 431 | ns->shm_tot += numpages; |
419 | error = shp->shm_perm.id; | 432 | error = shp->shm_perm.id; |
420 | shm_unlock(shp); | 433 | shm_unlock(shp); |
421 | return error; | 434 | return error; |
422 | 435 | ||
423 | no_id: | 436 | no_id: |
424 | fput(file); | 437 | fput(file); |
425 | no_file: | 438 | no_file: |
426 | security_shm_free(shp); | 439 | security_shm_free(shp); |
427 | ipc_rcu_putref(shp); | 440 | ipc_rcu_putref(shp); |
428 | return error; | 441 | return error; |
429 | } | 442 | } |
430 | 443 | ||
444 | /* | ||
445 | * Called with shm_ids.mutex and ipcp locked. | ||
446 | */ | ||
431 | static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) | 447 | static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) |
432 | { | 448 | { |
433 | struct shmid_kernel *shp; | 449 | struct shmid_kernel *shp; |
434 | 450 | ||
435 | shp = container_of(ipcp, struct shmid_kernel, shm_perm); | 451 | shp = container_of(ipcp, struct shmid_kernel, shm_perm); |
436 | return security_shm_associate(shp, shmflg); | 452 | return security_shm_associate(shp, shmflg); |
437 | } | 453 | } |
438 | 454 | ||
455 | /* | ||
456 | * Called with shm_ids.mutex and ipcp locked. | ||
457 | */ | ||
439 | static inline int shm_more_checks(struct kern_ipc_perm *ipcp, | 458 | static inline int shm_more_checks(struct kern_ipc_perm *ipcp, |
440 | struct ipc_params *params) | 459 | struct ipc_params *params) |
441 | { | 460 | { |
442 | struct shmid_kernel *shp; | 461 | struct shmid_kernel *shp; |
443 | 462 | ||
444 | shp = container_of(ipcp, struct shmid_kernel, shm_perm); | 463 | shp = container_of(ipcp, struct shmid_kernel, shm_perm); |
445 | if (shp->shm_segsz < params->u.size) | 464 | if (shp->shm_segsz < params->u.size) |
446 | return -EINVAL; | 465 | return -EINVAL; |
447 | 466 | ||
448 | return 0; | 467 | return 0; |
449 | } | 468 | } |
450 | 469 | ||
451 | asmlinkage long sys_shmget (key_t key, size_t size, int shmflg) | 470 | asmlinkage long sys_shmget (key_t key, size_t size, int shmflg) |
452 | { | 471 | { |
453 | struct ipc_namespace *ns; | 472 | struct ipc_namespace *ns; |
454 | struct ipc_ops shm_ops; | 473 | struct ipc_ops shm_ops; |
455 | struct ipc_params shm_params; | 474 | struct ipc_params shm_params; |
456 | 475 | ||
457 | ns = current->nsproxy->ipc_ns; | 476 | ns = current->nsproxy->ipc_ns; |
458 | 477 | ||
459 | shm_ops.getnew = newseg; | 478 | shm_ops.getnew = newseg; |
460 | shm_ops.associate = shm_security; | 479 | shm_ops.associate = shm_security; |
461 | shm_ops.more_checks = shm_more_checks; | 480 | shm_ops.more_checks = shm_more_checks; |
462 | 481 | ||
463 | shm_params.key = key; | 482 | shm_params.key = key; |
464 | shm_params.flg = shmflg; | 483 | shm_params.flg = shmflg; |
465 | shm_params.u.size = size; | 484 | shm_params.u.size = size; |
466 | 485 | ||
467 | return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); | 486 | return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); |
468 | } | 487 | } |
469 | 488 | ||
470 | static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) | 489 | static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) |
471 | { | 490 | { |
472 | switch(version) { | 491 | switch(version) { |
473 | case IPC_64: | 492 | case IPC_64: |
474 | return copy_to_user(buf, in, sizeof(*in)); | 493 | return copy_to_user(buf, in, sizeof(*in)); |
475 | case IPC_OLD: | 494 | case IPC_OLD: |
476 | { | 495 | { |
477 | struct shmid_ds out; | 496 | struct shmid_ds out; |
478 | 497 | ||
479 | ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm); | 498 | ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm); |
480 | out.shm_segsz = in->shm_segsz; | 499 | out.shm_segsz = in->shm_segsz; |
481 | out.shm_atime = in->shm_atime; | 500 | out.shm_atime = in->shm_atime; |
482 | out.shm_dtime = in->shm_dtime; | 501 | out.shm_dtime = in->shm_dtime; |
483 | out.shm_ctime = in->shm_ctime; | 502 | out.shm_ctime = in->shm_ctime; |
484 | out.shm_cpid = in->shm_cpid; | 503 | out.shm_cpid = in->shm_cpid; |
485 | out.shm_lpid = in->shm_lpid; | 504 | out.shm_lpid = in->shm_lpid; |
486 | out.shm_nattch = in->shm_nattch; | 505 | out.shm_nattch = in->shm_nattch; |
487 | 506 | ||
488 | return copy_to_user(buf, &out, sizeof(out)); | 507 | return copy_to_user(buf, &out, sizeof(out)); |
489 | } | 508 | } |
490 | default: | 509 | default: |
491 | return -EINVAL; | 510 | return -EINVAL; |
492 | } | 511 | } |
493 | } | 512 | } |
494 | 513 | ||
495 | struct shm_setbuf { | 514 | struct shm_setbuf { |
496 | uid_t uid; | 515 | uid_t uid; |
497 | gid_t gid; | 516 | gid_t gid; |
498 | mode_t mode; | 517 | mode_t mode; |
499 | }; | 518 | }; |
500 | 519 | ||
501 | static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __user *buf, int version) | 520 | static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __user *buf, int version) |
502 | { | 521 | { |
503 | switch(version) { | 522 | switch(version) { |
504 | case IPC_64: | 523 | case IPC_64: |
505 | { | 524 | { |
506 | struct shmid64_ds tbuf; | 525 | struct shmid64_ds tbuf; |
507 | 526 | ||
508 | if (copy_from_user(&tbuf, buf, sizeof(tbuf))) | 527 | if (copy_from_user(&tbuf, buf, sizeof(tbuf))) |
509 | return -EFAULT; | 528 | return -EFAULT; |
510 | 529 | ||
511 | out->uid = tbuf.shm_perm.uid; | 530 | out->uid = tbuf.shm_perm.uid; |
512 | out->gid = tbuf.shm_perm.gid; | 531 | out->gid = tbuf.shm_perm.gid; |
513 | out->mode = tbuf.shm_perm.mode; | 532 | out->mode = tbuf.shm_perm.mode; |
514 | 533 | ||
515 | return 0; | 534 | return 0; |
516 | } | 535 | } |
517 | case IPC_OLD: | 536 | case IPC_OLD: |
518 | { | 537 | { |
519 | struct shmid_ds tbuf_old; | 538 | struct shmid_ds tbuf_old; |
520 | 539 | ||
521 | if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) | 540 | if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) |
522 | return -EFAULT; | 541 | return -EFAULT; |
523 | 542 | ||
524 | out->uid = tbuf_old.shm_perm.uid; | 543 | out->uid = tbuf_old.shm_perm.uid; |
525 | out->gid = tbuf_old.shm_perm.gid; | 544 | out->gid = tbuf_old.shm_perm.gid; |
526 | out->mode = tbuf_old.shm_perm.mode; | 545 | out->mode = tbuf_old.shm_perm.mode; |
527 | 546 | ||
528 | return 0; | 547 | return 0; |
529 | } | 548 | } |
530 | default: | 549 | default: |
531 | return -EINVAL; | 550 | return -EINVAL; |
532 | } | 551 | } |
533 | } | 552 | } |
534 | 553 | ||
535 | static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) | 554 | static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) |
536 | { | 555 | { |
537 | switch(version) { | 556 | switch(version) { |
538 | case IPC_64: | 557 | case IPC_64: |
539 | return copy_to_user(buf, in, sizeof(*in)); | 558 | return copy_to_user(buf, in, sizeof(*in)); |
540 | case IPC_OLD: | 559 | case IPC_OLD: |
541 | { | 560 | { |
542 | struct shminfo out; | 561 | struct shminfo out; |
543 | 562 | ||
544 | if(in->shmmax > INT_MAX) | 563 | if(in->shmmax > INT_MAX) |
545 | out.shmmax = INT_MAX; | 564 | out.shmmax = INT_MAX; |
546 | else | 565 | else |
547 | out.shmmax = (int)in->shmmax; | 566 | out.shmmax = (int)in->shmmax; |
548 | 567 | ||
549 | out.shmmin = in->shmmin; | 568 | out.shmmin = in->shmmin; |
550 | out.shmmni = in->shmmni; | 569 | out.shmmni = in->shmmni; |
551 | out.shmseg = in->shmseg; | 570 | out.shmseg = in->shmseg; |
552 | out.shmall = in->shmall; | 571 | out.shmall = in->shmall; |
553 | 572 | ||
554 | return copy_to_user(buf, &out, sizeof(out)); | 573 | return copy_to_user(buf, &out, sizeof(out)); |
555 | } | 574 | } |
556 | default: | 575 | default: |
557 | return -EINVAL; | 576 | return -EINVAL; |
558 | } | 577 | } |
559 | } | 578 | } |
560 | 579 | ||
580 | /* | ||
581 | * Called with shm_ids.mutex held | ||
582 | */ | ||
561 | static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, | 583 | static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, |
562 | unsigned long *swp) | 584 | unsigned long *swp) |
563 | { | 585 | { |
564 | int next_id; | 586 | int next_id; |
565 | int total, in_use; | 587 | int total, in_use; |
566 | 588 | ||
567 | *rss = 0; | 589 | *rss = 0; |
568 | *swp = 0; | 590 | *swp = 0; |
569 | 591 | ||
570 | in_use = shm_ids(ns).in_use; | 592 | in_use = shm_ids(ns).in_use; |
571 | 593 | ||
572 | for (total = 0, next_id = 0; total < in_use; next_id++) { | 594 | for (total = 0, next_id = 0; total < in_use; next_id++) { |
573 | struct shmid_kernel *shp; | 595 | struct shmid_kernel *shp; |
574 | struct inode *inode; | 596 | struct inode *inode; |
575 | 597 | ||
576 | /* | ||
577 | * idr_find() is called via shm_get(), so with shm_ids.mutex | ||
578 | * locked. Since ipc_addid() is also called with | ||
579 | * shm_ids.mutex down, there is no need to add read barriers | ||
580 | * here to gurantee the writes in ipc_addid() are seen in | ||
581 | * order here (for Alpha). | ||
582 | * However idr_find() itself does not necessary require | ||
583 | * ipc_ids.mutex down. So if idr_find() is used by other | ||
584 | * places without ipc_ids.mutex down, then it needs read | ||
585 | * read memory barriers as ipc_lock() does. | ||
586 | */ | ||
587 | |||
588 | shp = idr_find(&shm_ids(ns).ipcs_idr, next_id); | 598 | shp = idr_find(&shm_ids(ns).ipcs_idr, next_id); |
589 | if (shp == NULL) | 599 | if (shp == NULL) |
590 | continue; | 600 | continue; |
591 | 601 | ||
592 | inode = shp->shm_file->f_path.dentry->d_inode; | 602 | inode = shp->shm_file->f_path.dentry->d_inode; |
593 | 603 | ||
594 | if (is_file_hugepages(shp->shm_file)) { | 604 | if (is_file_hugepages(shp->shm_file)) { |
595 | struct address_space *mapping = inode->i_mapping; | 605 | struct address_space *mapping = inode->i_mapping; |
596 | *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages; | 606 | *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages; |
597 | } else { | 607 | } else { |
598 | struct shmem_inode_info *info = SHMEM_I(inode); | 608 | struct shmem_inode_info *info = SHMEM_I(inode); |
599 | spin_lock(&info->lock); | 609 | spin_lock(&info->lock); |
600 | *rss += inode->i_mapping->nrpages; | 610 | *rss += inode->i_mapping->nrpages; |
601 | *swp += info->swapped; | 611 | *swp += info->swapped; |
602 | spin_unlock(&info->lock); | 612 | spin_unlock(&info->lock); |
603 | } | 613 | } |
604 | 614 | ||
605 | total++; | 615 | total++; |
606 | } | 616 | } |
607 | } | 617 | } |
608 | 618 | ||
609 | asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) | 619 | asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) |
610 | { | 620 | { |
611 | struct shm_setbuf setbuf; | 621 | struct shm_setbuf setbuf; |
612 | struct shmid_kernel *shp; | 622 | struct shmid_kernel *shp; |
613 | int err, version; | 623 | int err, version; |
614 | struct ipc_namespace *ns; | 624 | struct ipc_namespace *ns; |
615 | 625 | ||
616 | if (cmd < 0 || shmid < 0) { | 626 | if (cmd < 0 || shmid < 0) { |
617 | err = -EINVAL; | 627 | err = -EINVAL; |
618 | goto out; | 628 | goto out; |
619 | } | 629 | } |
620 | 630 | ||
621 | version = ipc_parse_version(&cmd); | 631 | version = ipc_parse_version(&cmd); |
622 | ns = current->nsproxy->ipc_ns; | 632 | ns = current->nsproxy->ipc_ns; |
623 | 633 | ||
624 | switch (cmd) { /* replace with proc interface ? */ | 634 | switch (cmd) { /* replace with proc interface ? */ |
625 | case IPC_INFO: | 635 | case IPC_INFO: |
626 | { | 636 | { |
627 | struct shminfo64 shminfo; | 637 | struct shminfo64 shminfo; |
628 | 638 | ||
629 | err = security_shm_shmctl(NULL, cmd); | 639 | err = security_shm_shmctl(NULL, cmd); |
630 | if (err) | 640 | if (err) |
631 | return err; | 641 | return err; |
632 | 642 | ||
633 | memset(&shminfo,0,sizeof(shminfo)); | 643 | memset(&shminfo,0,sizeof(shminfo)); |
634 | shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; | 644 | shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; |
635 | shminfo.shmmax = ns->shm_ctlmax; | 645 | shminfo.shmmax = ns->shm_ctlmax; |
636 | shminfo.shmall = ns->shm_ctlall; | 646 | shminfo.shmall = ns->shm_ctlall; |
637 | 647 | ||
638 | shminfo.shmmin = SHMMIN; | 648 | shminfo.shmmin = SHMMIN; |
639 | if(copy_shminfo_to_user (buf, &shminfo, version)) | 649 | if(copy_shminfo_to_user (buf, &shminfo, version)) |
640 | return -EFAULT; | 650 | return -EFAULT; |
641 | /* reading a integer is always atomic */ | 651 | |
652 | mutex_lock(&shm_ids(ns).mutex); | ||
642 | err = ipc_get_maxid(&shm_ids(ns)); | 653 | err = ipc_get_maxid(&shm_ids(ns)); |
654 | mutex_unlock(&shm_ids(ns).mutex); | ||
655 | |||
643 | if(err<0) | 656 | if(err<0) |
644 | err = 0; | 657 | err = 0; |
645 | goto out; | 658 | goto out; |
646 | } | 659 | } |
647 | case SHM_INFO: | 660 | case SHM_INFO: |
648 | { | 661 | { |
649 | struct shm_info shm_info; | 662 | struct shm_info shm_info; |
650 | 663 | ||
651 | err = security_shm_shmctl(NULL, cmd); | 664 | err = security_shm_shmctl(NULL, cmd); |
652 | if (err) | 665 | if (err) |
653 | return err; | 666 | return err; |
654 | 667 | ||
655 | memset(&shm_info,0,sizeof(shm_info)); | 668 | memset(&shm_info,0,sizeof(shm_info)); |
656 | mutex_lock(&shm_ids(ns).mutex); | 669 | mutex_lock(&shm_ids(ns).mutex); |
657 | shm_info.used_ids = shm_ids(ns).in_use; | 670 | shm_info.used_ids = shm_ids(ns).in_use; |
658 | shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); | 671 | shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); |
659 | shm_info.shm_tot = ns->shm_tot; | 672 | shm_info.shm_tot = ns->shm_tot; |
660 | shm_info.swap_attempts = 0; | 673 | shm_info.swap_attempts = 0; |
661 | shm_info.swap_successes = 0; | 674 | shm_info.swap_successes = 0; |
662 | err = ipc_get_maxid(&shm_ids(ns)); | 675 | err = ipc_get_maxid(&shm_ids(ns)); |
663 | mutex_unlock(&shm_ids(ns).mutex); | 676 | mutex_unlock(&shm_ids(ns).mutex); |
664 | if(copy_to_user (buf, &shm_info, sizeof(shm_info))) { | 677 | if(copy_to_user (buf, &shm_info, sizeof(shm_info))) { |
665 | err = -EFAULT; | 678 | err = -EFAULT; |
666 | goto out; | 679 | goto out; |
667 | } | 680 | } |
668 | 681 | ||
669 | err = err < 0 ? 0 : err; | 682 | err = err < 0 ? 0 : err; |
670 | goto out; | 683 | goto out; |
671 | } | 684 | } |
672 | case SHM_STAT: | 685 | case SHM_STAT: |
673 | case IPC_STAT: | 686 | case IPC_STAT: |
674 | { | 687 | { |
675 | struct shmid64_ds tbuf; | 688 | struct shmid64_ds tbuf; |
676 | int result; | 689 | int result; |
677 | 690 | ||
678 | if (!buf) { | 691 | if (!buf) { |
679 | err = -EFAULT; | 692 | err = -EFAULT; |
680 | goto out; | 693 | goto out; |
681 | } | 694 | } |
682 | 695 | ||
683 | if (cmd == SHM_STAT) { | 696 | if (cmd == SHM_STAT) { |
684 | shp = shm_lock(ns, shmid); | 697 | shp = shm_lock(ns, shmid); |
685 | if (IS_ERR(shp)) { | 698 | if (IS_ERR(shp)) { |
686 | err = PTR_ERR(shp); | 699 | err = PTR_ERR(shp); |
687 | goto out; | 700 | goto out; |
688 | } | 701 | } |
689 | result = shp->shm_perm.id; | 702 | result = shp->shm_perm.id; |
690 | } else { | 703 | } else { |
691 | shp = shm_lock_check(ns, shmid); | 704 | shp = shm_lock_check(ns, shmid); |
692 | if (IS_ERR(shp)) { | 705 | if (IS_ERR(shp)) { |
693 | err = PTR_ERR(shp); | 706 | err = PTR_ERR(shp); |
694 | goto out; | 707 | goto out; |
695 | } | 708 | } |
696 | result = 0; | 709 | result = 0; |
697 | } | 710 | } |
698 | err=-EACCES; | 711 | err=-EACCES; |
699 | if (ipcperms (&shp->shm_perm, S_IRUGO)) | 712 | if (ipcperms (&shp->shm_perm, S_IRUGO)) |
700 | goto out_unlock; | 713 | goto out_unlock; |
701 | err = security_shm_shmctl(shp, cmd); | 714 | err = security_shm_shmctl(shp, cmd); |
702 | if (err) | 715 | if (err) |
703 | goto out_unlock; | 716 | goto out_unlock; |
704 | memset(&tbuf, 0, sizeof(tbuf)); | 717 | memset(&tbuf, 0, sizeof(tbuf)); |
705 | kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); | 718 | kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); |
706 | tbuf.shm_segsz = shp->shm_segsz; | 719 | tbuf.shm_segsz = shp->shm_segsz; |
707 | tbuf.shm_atime = shp->shm_atim; | 720 | tbuf.shm_atime = shp->shm_atim; |
708 | tbuf.shm_dtime = shp->shm_dtim; | 721 | tbuf.shm_dtime = shp->shm_dtim; |
709 | tbuf.shm_ctime = shp->shm_ctim; | 722 | tbuf.shm_ctime = shp->shm_ctim; |
710 | tbuf.shm_cpid = shp->shm_cprid; | 723 | tbuf.shm_cpid = shp->shm_cprid; |
711 | tbuf.shm_lpid = shp->shm_lprid; | 724 | tbuf.shm_lpid = shp->shm_lprid; |
712 | tbuf.shm_nattch = shp->shm_nattch; | 725 | tbuf.shm_nattch = shp->shm_nattch; |
713 | shm_unlock(shp); | 726 | shm_unlock(shp); |
714 | if(copy_shmid_to_user (buf, &tbuf, version)) | 727 | if(copy_shmid_to_user (buf, &tbuf, version)) |
715 | err = -EFAULT; | 728 | err = -EFAULT; |
716 | else | 729 | else |
717 | err = result; | 730 | err = result; |
718 | goto out; | 731 | goto out; |
719 | } | 732 | } |
720 | case SHM_LOCK: | 733 | case SHM_LOCK: |
721 | case SHM_UNLOCK: | 734 | case SHM_UNLOCK: |
722 | { | 735 | { |
723 | shp = shm_lock_check(ns, shmid); | 736 | shp = shm_lock_check(ns, shmid); |
724 | if (IS_ERR(shp)) { | 737 | if (IS_ERR(shp)) { |
725 | err = PTR_ERR(shp); | 738 | err = PTR_ERR(shp); |
726 | goto out; | 739 | goto out; |
727 | } | 740 | } |
728 | 741 | ||
729 | err = audit_ipc_obj(&(shp->shm_perm)); | 742 | err = audit_ipc_obj(&(shp->shm_perm)); |
730 | if (err) | 743 | if (err) |
731 | goto out_unlock; | 744 | goto out_unlock; |
732 | 745 | ||
733 | if (!capable(CAP_IPC_LOCK)) { | 746 | if (!capable(CAP_IPC_LOCK)) { |
734 | err = -EPERM; | 747 | err = -EPERM; |
735 | if (current->euid != shp->shm_perm.uid && | 748 | if (current->euid != shp->shm_perm.uid && |
736 | current->euid != shp->shm_perm.cuid) | 749 | current->euid != shp->shm_perm.cuid) |
737 | goto out_unlock; | 750 | goto out_unlock; |
738 | if (cmd == SHM_LOCK && | 751 | if (cmd == SHM_LOCK && |
739 | !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) | 752 | !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) |
740 | goto out_unlock; | 753 | goto out_unlock; |
741 | } | 754 | } |
742 | 755 | ||
743 | err = security_shm_shmctl(shp, cmd); | 756 | err = security_shm_shmctl(shp, cmd); |
744 | if (err) | 757 | if (err) |
745 | goto out_unlock; | 758 | goto out_unlock; |
746 | 759 | ||
747 | if(cmd==SHM_LOCK) { | 760 | if(cmd==SHM_LOCK) { |
748 | struct user_struct * user = current->user; | 761 | struct user_struct * user = current->user; |
749 | if (!is_file_hugepages(shp->shm_file)) { | 762 | if (!is_file_hugepages(shp->shm_file)) { |
750 | err = shmem_lock(shp->shm_file, 1, user); | 763 | err = shmem_lock(shp->shm_file, 1, user); |
751 | if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ | 764 | if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ |
752 | shp->shm_perm.mode |= SHM_LOCKED; | 765 | shp->shm_perm.mode |= SHM_LOCKED; |
753 | shp->mlock_user = user; | 766 | shp->mlock_user = user; |
754 | } | 767 | } |
755 | } | 768 | } |
756 | } else if (!is_file_hugepages(shp->shm_file)) { | 769 | } else if (!is_file_hugepages(shp->shm_file)) { |
757 | shmem_lock(shp->shm_file, 0, shp->mlock_user); | 770 | shmem_lock(shp->shm_file, 0, shp->mlock_user); |
758 | shp->shm_perm.mode &= ~SHM_LOCKED; | 771 | shp->shm_perm.mode &= ~SHM_LOCKED; |
759 | shp->mlock_user = NULL; | 772 | shp->mlock_user = NULL; |
760 | } | 773 | } |
761 | shm_unlock(shp); | 774 | shm_unlock(shp); |
762 | goto out; | 775 | goto out; |
763 | } | 776 | } |
764 | case IPC_RMID: | 777 | case IPC_RMID: |
765 | { | 778 | { |
766 | /* | 779 | /* |
767 | * We cannot simply remove the file. The SVID states | 780 | * We cannot simply remove the file. The SVID states |
768 | * that the block remains until the last person | 781 | * that the block remains until the last person |
769 | * detaches from it, then is deleted. A shmat() on | 782 | * detaches from it, then is deleted. A shmat() on |
770 | * an RMID segment is legal in older Linux and if | 783 | * an RMID segment is legal in older Linux and if |
771 | * we change it apps break... | 784 | * we change it apps break... |
772 | * | 785 | * |
773 | * Instead we set a destroyed flag, and then blow | 786 | * Instead we set a destroyed flag, and then blow |
774 | * the name away when the usage hits zero. | 787 | * the name away when the usage hits zero. |
775 | */ | 788 | */ |
776 | mutex_lock(&shm_ids(ns).mutex); | 789 | mutex_lock(&shm_ids(ns).mutex); |
777 | shp = shm_lock_check(ns, shmid); | 790 | shp = shm_lock_check(ns, shmid); |
778 | if (IS_ERR(shp)) { | 791 | if (IS_ERR(shp)) { |
779 | err = PTR_ERR(shp); | 792 | err = PTR_ERR(shp); |
780 | goto out_up; | 793 | goto out_up; |
781 | } | 794 | } |
782 | 795 | ||
783 | err = audit_ipc_obj(&(shp->shm_perm)); | 796 | err = audit_ipc_obj(&(shp->shm_perm)); |
784 | if (err) | 797 | if (err) |
785 | goto out_unlock_up; | 798 | goto out_unlock_up; |
786 | 799 | ||
787 | if (current->euid != shp->shm_perm.uid && | 800 | if (current->euid != shp->shm_perm.uid && |
788 | current->euid != shp->shm_perm.cuid && | 801 | current->euid != shp->shm_perm.cuid && |
789 | !capable(CAP_SYS_ADMIN)) { | 802 | !capable(CAP_SYS_ADMIN)) { |
790 | err=-EPERM; | 803 | err=-EPERM; |
791 | goto out_unlock_up; | 804 | goto out_unlock_up; |
792 | } | 805 | } |
793 | 806 | ||
794 | err = security_shm_shmctl(shp, cmd); | 807 | err = security_shm_shmctl(shp, cmd); |
795 | if (err) | 808 | if (err) |
796 | goto out_unlock_up; | 809 | goto out_unlock_up; |
797 | 810 | ||
798 | do_shm_rmid(ns, shp); | 811 | do_shm_rmid(ns, shp); |
799 | mutex_unlock(&shm_ids(ns).mutex); | 812 | mutex_unlock(&shm_ids(ns).mutex); |
800 | goto out; | 813 | goto out; |
801 | } | 814 | } |
802 | 815 | ||
803 | case IPC_SET: | 816 | case IPC_SET: |
804 | { | 817 | { |
805 | if (!buf) { | 818 | if (!buf) { |
806 | err = -EFAULT; | 819 | err = -EFAULT; |
807 | goto out; | 820 | goto out; |
808 | } | 821 | } |
809 | 822 | ||
810 | if (copy_shmid_from_user (&setbuf, buf, version)) { | 823 | if (copy_shmid_from_user (&setbuf, buf, version)) { |
811 | err = -EFAULT; | 824 | err = -EFAULT; |
812 | goto out; | 825 | goto out; |
813 | } | 826 | } |
814 | mutex_lock(&shm_ids(ns).mutex); | 827 | mutex_lock(&shm_ids(ns).mutex); |
815 | shp = shm_lock_check(ns, shmid); | 828 | shp = shm_lock_check(ns, shmid); |
816 | if (IS_ERR(shp)) { | 829 | if (IS_ERR(shp)) { |
817 | err = PTR_ERR(shp); | 830 | err = PTR_ERR(shp); |
818 | goto out_up; | 831 | goto out_up; |
819 | } | 832 | } |
820 | err = audit_ipc_obj(&(shp->shm_perm)); | 833 | err = audit_ipc_obj(&(shp->shm_perm)); |
821 | if (err) | 834 | if (err) |
822 | goto out_unlock_up; | 835 | goto out_unlock_up; |
823 | err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode); | 836 | err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode); |
824 | if (err) | 837 | if (err) |
825 | goto out_unlock_up; | 838 | goto out_unlock_up; |
826 | err=-EPERM; | 839 | err=-EPERM; |
827 | if (current->euid != shp->shm_perm.uid && | 840 | if (current->euid != shp->shm_perm.uid && |
828 | current->euid != shp->shm_perm.cuid && | 841 | current->euid != shp->shm_perm.cuid && |
829 | !capable(CAP_SYS_ADMIN)) { | 842 | !capable(CAP_SYS_ADMIN)) { |
830 | goto out_unlock_up; | 843 | goto out_unlock_up; |
831 | } | 844 | } |
832 | 845 | ||
833 | err = security_shm_shmctl(shp, cmd); | 846 | err = security_shm_shmctl(shp, cmd); |
834 | if (err) | 847 | if (err) |
835 | goto out_unlock_up; | 848 | goto out_unlock_up; |
836 | 849 | ||
837 | shp->shm_perm.uid = setbuf.uid; | 850 | shp->shm_perm.uid = setbuf.uid; |
838 | shp->shm_perm.gid = setbuf.gid; | 851 | shp->shm_perm.gid = setbuf.gid; |
839 | shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO) | 852 | shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO) |
840 | | (setbuf.mode & S_IRWXUGO); | 853 | | (setbuf.mode & S_IRWXUGO); |
841 | shp->shm_ctim = get_seconds(); | 854 | shp->shm_ctim = get_seconds(); |
842 | break; | 855 | break; |
843 | } | 856 | } |
844 | 857 | ||
845 | default: | 858 | default: |
846 | err = -EINVAL; | 859 | err = -EINVAL; |
847 | goto out; | 860 | goto out; |
848 | } | 861 | } |
849 | 862 | ||
850 | err = 0; | 863 | err = 0; |
851 | out_unlock_up: | 864 | out_unlock_up: |
852 | shm_unlock(shp); | 865 | shm_unlock(shp); |
853 | out_up: | 866 | out_up: |
854 | mutex_unlock(&shm_ids(ns).mutex); | 867 | mutex_unlock(&shm_ids(ns).mutex); |
855 | goto out; | 868 | goto out; |
856 | out_unlock: | 869 | out_unlock: |
857 | shm_unlock(shp); | 870 | shm_unlock(shp); |
858 | out: | 871 | out: |
859 | return err; | 872 | return err; |
860 | } | 873 | } |
861 | 874 | ||
862 | /* | 875 | /* |
863 | * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. | 876 | * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. |
864 | * | 877 | * |
865 | * NOTE! Despite the name, this is NOT a direct system call entrypoint. The | 878 | * NOTE! Despite the name, this is NOT a direct system call entrypoint. The |
866 | * "raddr" thing points to kernel space, and there has to be a wrapper around | 879 | * "raddr" thing points to kernel space, and there has to be a wrapper around |
867 | * this. | 880 | * this. |
868 | */ | 881 | */ |
869 | long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) | 882 | long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) |
870 | { | 883 | { |
871 | struct shmid_kernel *shp; | 884 | struct shmid_kernel *shp; |
872 | unsigned long addr; | 885 | unsigned long addr; |
873 | unsigned long size; | 886 | unsigned long size; |
874 | struct file * file; | 887 | struct file * file; |
875 | int err; | 888 | int err; |
876 | unsigned long flags; | 889 | unsigned long flags; |
877 | unsigned long prot; | 890 | unsigned long prot; |
878 | int acc_mode; | 891 | int acc_mode; |
879 | unsigned long user_addr; | 892 | unsigned long user_addr; |
880 | struct ipc_namespace *ns; | 893 | struct ipc_namespace *ns; |
881 | struct shm_file_data *sfd; | 894 | struct shm_file_data *sfd; |
882 | struct path path; | 895 | struct path path; |
883 | mode_t f_mode; | 896 | mode_t f_mode; |
884 | 897 | ||
885 | err = -EINVAL; | 898 | err = -EINVAL; |
886 | if (shmid < 0) | 899 | if (shmid < 0) |
887 | goto out; | 900 | goto out; |
888 | else if ((addr = (ulong)shmaddr)) { | 901 | else if ((addr = (ulong)shmaddr)) { |
889 | if (addr & (SHMLBA-1)) { | 902 | if (addr & (SHMLBA-1)) { |
890 | if (shmflg & SHM_RND) | 903 | if (shmflg & SHM_RND) |
891 | addr &= ~(SHMLBA-1); /* round down */ | 904 | addr &= ~(SHMLBA-1); /* round down */ |
892 | else | 905 | else |
893 | #ifndef __ARCH_FORCE_SHMLBA | 906 | #ifndef __ARCH_FORCE_SHMLBA |
894 | if (addr & ~PAGE_MASK) | 907 | if (addr & ~PAGE_MASK) |
895 | #endif | 908 | #endif |
896 | goto out; | 909 | goto out; |
897 | } | 910 | } |
898 | flags = MAP_SHARED | MAP_FIXED; | 911 | flags = MAP_SHARED | MAP_FIXED; |
899 | } else { | 912 | } else { |
900 | if ((shmflg & SHM_REMAP)) | 913 | if ((shmflg & SHM_REMAP)) |
901 | goto out; | 914 | goto out; |
902 | 915 | ||
903 | flags = MAP_SHARED; | 916 | flags = MAP_SHARED; |
904 | } | 917 | } |
905 | 918 | ||
906 | if (shmflg & SHM_RDONLY) { | 919 | if (shmflg & SHM_RDONLY) { |
907 | prot = PROT_READ; | 920 | prot = PROT_READ; |
908 | acc_mode = S_IRUGO; | 921 | acc_mode = S_IRUGO; |
909 | f_mode = FMODE_READ; | 922 | f_mode = FMODE_READ; |
910 | } else { | 923 | } else { |
911 | prot = PROT_READ | PROT_WRITE; | 924 | prot = PROT_READ | PROT_WRITE; |
912 | acc_mode = S_IRUGO | S_IWUGO; | 925 | acc_mode = S_IRUGO | S_IWUGO; |
913 | f_mode = FMODE_READ | FMODE_WRITE; | 926 | f_mode = FMODE_READ | FMODE_WRITE; |
914 | } | 927 | } |
915 | if (shmflg & SHM_EXEC) { | 928 | if (shmflg & SHM_EXEC) { |
916 | prot |= PROT_EXEC; | 929 | prot |= PROT_EXEC; |
917 | acc_mode |= S_IXUGO; | 930 | acc_mode |= S_IXUGO; |
918 | } | 931 | } |
919 | 932 | ||
920 | /* | 933 | /* |
921 | * We cannot rely on the fs check since SYSV IPC does have an | 934 | * We cannot rely on the fs check since SYSV IPC does have an |
922 | * additional creator id... | 935 | * additional creator id... |
923 | */ | 936 | */ |
924 | ns = current->nsproxy->ipc_ns; | 937 | ns = current->nsproxy->ipc_ns; |
925 | shp = shm_lock_check(ns, shmid); | 938 | shp = shm_lock_check(ns, shmid); |
926 | if (IS_ERR(shp)) { | 939 | if (IS_ERR(shp)) { |
927 | err = PTR_ERR(shp); | 940 | err = PTR_ERR(shp); |
928 | goto out; | 941 | goto out; |
929 | } | 942 | } |
930 | 943 | ||
931 | err = -EACCES; | 944 | err = -EACCES; |
932 | if (ipcperms(&shp->shm_perm, acc_mode)) | 945 | if (ipcperms(&shp->shm_perm, acc_mode)) |
933 | goto out_unlock; | 946 | goto out_unlock; |
934 | 947 | ||
935 | err = security_shm_shmat(shp, shmaddr, shmflg); | 948 | err = security_shm_shmat(shp, shmaddr, shmflg); |
936 | if (err) | 949 | if (err) |
937 | goto out_unlock; | 950 | goto out_unlock; |
938 | 951 | ||
939 | path.dentry = dget(shp->shm_file->f_path.dentry); | 952 | path.dentry = dget(shp->shm_file->f_path.dentry); |
940 | path.mnt = shp->shm_file->f_path.mnt; | 953 | path.mnt = shp->shm_file->f_path.mnt; |
941 | shp->shm_nattch++; | 954 | shp->shm_nattch++; |
942 | size = i_size_read(path.dentry->d_inode); | 955 | size = i_size_read(path.dentry->d_inode); |
943 | shm_unlock(shp); | 956 | shm_unlock(shp); |
944 | 957 | ||
945 | err = -ENOMEM; | 958 | err = -ENOMEM; |
946 | sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); | 959 | sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); |
947 | if (!sfd) | 960 | if (!sfd) |
948 | goto out_put_dentry; | 961 | goto out_put_dentry; |
949 | 962 | ||
950 | err = -ENOMEM; | 963 | err = -ENOMEM; |
951 | 964 | ||
952 | file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations); | 965 | file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations); |
953 | if (!file) | 966 | if (!file) |
954 | goto out_free; | 967 | goto out_free; |
955 | 968 | ||
956 | file->private_data = sfd; | 969 | file->private_data = sfd; |
957 | file->f_mapping = shp->shm_file->f_mapping; | 970 | file->f_mapping = shp->shm_file->f_mapping; |
958 | sfd->id = shp->shm_perm.id; | 971 | sfd->id = shp->shm_perm.id; |
959 | sfd->ns = get_ipc_ns(ns); | 972 | sfd->ns = get_ipc_ns(ns); |
960 | sfd->file = shp->shm_file; | 973 | sfd->file = shp->shm_file; |
961 | sfd->vm_ops = NULL; | 974 | sfd->vm_ops = NULL; |
962 | 975 | ||
963 | down_write(¤t->mm->mmap_sem); | 976 | down_write(¤t->mm->mmap_sem); |
964 | if (addr && !(shmflg & SHM_REMAP)) { | 977 | if (addr && !(shmflg & SHM_REMAP)) { |
965 | err = -EINVAL; | 978 | err = -EINVAL; |
966 | if (find_vma_intersection(current->mm, addr, addr + size)) | 979 | if (find_vma_intersection(current->mm, addr, addr + size)) |
967 | goto invalid; | 980 | goto invalid; |
968 | /* | 981 | /* |
969 | * If shm segment goes below stack, make sure there is some | 982 | * If shm segment goes below stack, make sure there is some |
970 | * space left for the stack to grow (at least 4 pages). | 983 | * space left for the stack to grow (at least 4 pages). |
971 | */ | 984 | */ |
972 | if (addr < current->mm->start_stack && | 985 | if (addr < current->mm->start_stack && |
973 | addr > current->mm->start_stack - size - PAGE_SIZE * 5) | 986 | addr > current->mm->start_stack - size - PAGE_SIZE * 5) |
974 | goto invalid; | 987 | goto invalid; |
975 | } | 988 | } |
976 | 989 | ||
977 | user_addr = do_mmap (file, addr, size, prot, flags, 0); | 990 | user_addr = do_mmap (file, addr, size, prot, flags, 0); |
978 | *raddr = user_addr; | 991 | *raddr = user_addr; |
979 | err = 0; | 992 | err = 0; |
980 | if (IS_ERR_VALUE(user_addr)) | 993 | if (IS_ERR_VALUE(user_addr)) |
981 | err = (long)user_addr; | 994 | err = (long)user_addr; |
982 | invalid: | 995 | invalid: |
983 | up_write(¤t->mm->mmap_sem); | 996 | up_write(¤t->mm->mmap_sem); |
984 | 997 | ||
985 | fput(file); | 998 | fput(file); |
986 | 999 | ||
987 | out_nattch: | 1000 | out_nattch: |
988 | mutex_lock(&shm_ids(ns).mutex); | 1001 | mutex_lock(&shm_ids(ns).mutex); |
989 | shp = shm_lock(ns, shmid); | 1002 | shp = shm_lock(ns, shmid); |
990 | BUG_ON(IS_ERR(shp)); | 1003 | BUG_ON(IS_ERR(shp)); |
991 | shp->shm_nattch--; | 1004 | shp->shm_nattch--; |
992 | if(shp->shm_nattch == 0 && | 1005 | if(shp->shm_nattch == 0 && |
993 | shp->shm_perm.mode & SHM_DEST) | 1006 | shp->shm_perm.mode & SHM_DEST) |
994 | shm_destroy(ns, shp); | 1007 | shm_destroy(ns, shp); |
995 | else | 1008 | else |
996 | shm_unlock(shp); | 1009 | shm_unlock(shp); |
997 | mutex_unlock(&shm_ids(ns).mutex); | 1010 | mutex_unlock(&shm_ids(ns).mutex); |
998 | 1011 | ||
999 | out: | 1012 | out: |
1000 | return err; | 1013 | return err; |
1001 | 1014 | ||
1002 | out_unlock: | 1015 | out_unlock: |
1003 | shm_unlock(shp); | 1016 | shm_unlock(shp); |
1004 | goto out; | 1017 | goto out; |
1005 | 1018 | ||
1006 | out_free: | 1019 | out_free: |
1007 | kfree(sfd); | 1020 | kfree(sfd); |
1008 | out_put_dentry: | 1021 | out_put_dentry: |
1009 | dput(path.dentry); | 1022 | dput(path.dentry); |
1010 | goto out_nattch; | 1023 | goto out_nattch; |
1011 | } | 1024 | } |
1012 | 1025 | ||
1013 | asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg) | 1026 | asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg) |
1014 | { | 1027 | { |
1015 | unsigned long ret; | 1028 | unsigned long ret; |
1016 | long err; | 1029 | long err; |
1017 | 1030 | ||
1018 | err = do_shmat(shmid, shmaddr, shmflg, &ret); | 1031 | err = do_shmat(shmid, shmaddr, shmflg, &ret); |
1019 | if (err) | 1032 | if (err) |
1020 | return err; | 1033 | return err; |
1021 | force_successful_syscall_return(); | 1034 | force_successful_syscall_return(); |
1022 | return (long)ret; | 1035 | return (long)ret; |
1023 | } | 1036 | } |
1024 | 1037 | ||
1025 | /* | 1038 | /* |
1026 | * detach and kill segment if marked destroyed. | 1039 | * detach and kill segment if marked destroyed. |
1027 | * The work is done in shm_close. | 1040 | * The work is done in shm_close. |
1028 | */ | 1041 | */ |
1029 | asmlinkage long sys_shmdt(char __user *shmaddr) | 1042 | asmlinkage long sys_shmdt(char __user *shmaddr) |
1030 | { | 1043 | { |
1031 | struct mm_struct *mm = current->mm; | 1044 | struct mm_struct *mm = current->mm; |
1032 | struct vm_area_struct *vma, *next; | 1045 | struct vm_area_struct *vma, *next; |
1033 | unsigned long addr = (unsigned long)shmaddr; | 1046 | unsigned long addr = (unsigned long)shmaddr; |
1034 | loff_t size = 0; | 1047 | loff_t size = 0; |
1035 | int retval = -EINVAL; | 1048 | int retval = -EINVAL; |
1036 | 1049 | ||
1037 | if (addr & ~PAGE_MASK) | 1050 | if (addr & ~PAGE_MASK) |
1038 | return retval; | 1051 | return retval; |
1039 | 1052 | ||
1040 | down_write(&mm->mmap_sem); | 1053 | down_write(&mm->mmap_sem); |
1041 | 1054 | ||
1042 | /* | 1055 | /* |
1043 | * This function tries to be smart and unmap shm segments that | 1056 | * This function tries to be smart and unmap shm segments that |
1044 | * were modified by partial mlock or munmap calls: | 1057 | * were modified by partial mlock or munmap calls: |
1045 | * - It first determines the size of the shm segment that should be | 1058 | * - It first determines the size of the shm segment that should be |
1046 | * unmapped: It searches for a vma that is backed by shm and that | 1059 | * unmapped: It searches for a vma that is backed by shm and that |
1047 | * started at address shmaddr. It records it's size and then unmaps | 1060 | * started at address shmaddr. It records it's size and then unmaps |
1048 | * it. | 1061 | * it. |
1049 | * - Then it unmaps all shm vmas that started at shmaddr and that | 1062 | * - Then it unmaps all shm vmas that started at shmaddr and that |
1050 | * are within the initially determined size. | 1063 | * are within the initially determined size. |
1051 | * Errors from do_munmap are ignored: the function only fails if | 1064 | * Errors from do_munmap are ignored: the function only fails if |
1052 | * it's called with invalid parameters or if it's called to unmap | 1065 | * it's called with invalid parameters or if it's called to unmap |
1053 | * a part of a vma. Both calls in this function are for full vmas, | 1066 | * a part of a vma. Both calls in this function are for full vmas, |
1054 | * the parameters are directly copied from the vma itself and always | 1067 | * the parameters are directly copied from the vma itself and always |
1055 | * valid - therefore do_munmap cannot fail. (famous last words?) | 1068 | * valid - therefore do_munmap cannot fail. (famous last words?) |
1056 | */ | 1069 | */ |
1057 | /* | 1070 | /* |
1058 | * If it had been mremap()'d, the starting address would not | 1071 | * If it had been mremap()'d, the starting address would not |
1059 | * match the usual checks anyway. So assume all vma's are | 1072 | * match the usual checks anyway. So assume all vma's are |
1060 | * above the starting address given. | 1073 | * above the starting address given. |
1061 | */ | 1074 | */ |
1062 | vma = find_vma(mm, addr); | 1075 | vma = find_vma(mm, addr); |
1063 | 1076 | ||
1064 | while (vma) { | 1077 | while (vma) { |
1065 | next = vma->vm_next; | 1078 | next = vma->vm_next; |
1066 | 1079 | ||
1067 | /* | 1080 | /* |
1068 | * Check if the starting address would match, i.e. it's | 1081 | * Check if the starting address would match, i.e. it's |
1069 | * a fragment created by mprotect() and/or munmap(), or it | 1082 | * a fragment created by mprotect() and/or munmap(), or it |
1070 | * otherwise it starts at this address with no hassles. | 1083 | * otherwise it starts at this address with no hassles. |
1071 | */ | 1084 | */ |
1072 | if ((vma->vm_ops == &shm_vm_ops) && | 1085 | if ((vma->vm_ops == &shm_vm_ops) && |
1073 | (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { | 1086 | (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { |
1074 | 1087 | ||
1075 | 1088 | ||
1076 | size = vma->vm_file->f_path.dentry->d_inode->i_size; | 1089 | size = vma->vm_file->f_path.dentry->d_inode->i_size; |
1077 | do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); | 1090 | do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); |
1078 | /* | 1091 | /* |
1079 | * We discovered the size of the shm segment, so | 1092 | * We discovered the size of the shm segment, so |
1080 | * break out of here and fall through to the next | 1093 | * break out of here and fall through to the next |
1081 | * loop that uses the size information to stop | 1094 | * loop that uses the size information to stop |
1082 | * searching for matching vma's. | 1095 | * searching for matching vma's. |
1083 | */ | 1096 | */ |
1084 | retval = 0; | 1097 | retval = 0; |
1085 | vma = next; | 1098 | vma = next; |
1086 | break; | 1099 | break; |
1087 | } | 1100 | } |
1088 | vma = next; | 1101 | vma = next; |
1089 | } | 1102 | } |
1090 | 1103 | ||
1091 | /* | 1104 | /* |
1092 | * We need look no further than the maximum address a fragment | 1105 | * We need look no further than the maximum address a fragment |
1093 | * could possibly have landed at. Also cast things to loff_t to | 1106 | * could possibly have landed at. Also cast things to loff_t to |
1094 | * prevent overflows and make comparisions vs. equal-width types. | 1107 | * prevent overflows and make comparisions vs. equal-width types. |
1095 | */ | 1108 | */ |
1096 | size = PAGE_ALIGN(size); | 1109 | size = PAGE_ALIGN(size); |
1097 | while (vma && (loff_t)(vma->vm_end - addr) <= size) { | 1110 | while (vma && (loff_t)(vma->vm_end - addr) <= size) { |
1098 | next = vma->vm_next; | 1111 | next = vma->vm_next; |
1099 | 1112 | ||
1100 | /* finding a matching vma now does not alter retval */ | 1113 | /* finding a matching vma now does not alter retval */ |
1101 | if ((vma->vm_ops == &shm_vm_ops) && | 1114 | if ((vma->vm_ops == &shm_vm_ops) && |
1102 | (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) | 1115 | (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) |
1103 | 1116 | ||
1104 | do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); | 1117 | do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); |
1105 | vma = next; | 1118 | vma = next; |
1106 | } | 1119 | } |
1107 | 1120 | ||
1108 | up_write(&mm->mmap_sem); | 1121 | up_write(&mm->mmap_sem); |
1109 | return retval; | 1122 | return retval; |
1110 | } | 1123 | } |
1111 | 1124 | ||
1112 | #ifdef CONFIG_PROC_FS | 1125 | #ifdef CONFIG_PROC_FS |
1113 | static int sysvipc_shm_proc_show(struct seq_file *s, void *it) | 1126 | static int sysvipc_shm_proc_show(struct seq_file *s, void *it) |
1114 | { | 1127 | { |
1115 | struct shmid_kernel *shp = it; | 1128 | struct shmid_kernel *shp = it; |
1116 | char *format; | 1129 | char *format; |
1117 | 1130 | ||
1118 | #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" | 1131 | #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" |
1119 | #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" | 1132 | #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" |
1120 | 1133 | ||
1121 | if (sizeof(size_t) <= sizeof(int)) | 1134 | if (sizeof(size_t) <= sizeof(int)) |
1122 | format = SMALL_STRING; | 1135 | format = SMALL_STRING; |
1123 | else | 1136 | else |
1124 | format = BIG_STRING; | 1137 | format = BIG_STRING; |
1125 | return seq_printf(s, format, | 1138 | return seq_printf(s, format, |
1126 | shp->shm_perm.key, | 1139 | shp->shm_perm.key, |
1127 | shp->shm_perm.id, | 1140 | shp->shm_perm.id, |
1128 | shp->shm_perm.mode, | 1141 | shp->shm_perm.mode, |
1129 | shp->shm_segsz, | 1142 | shp->shm_segsz, |
1130 | shp->shm_cprid, | 1143 | shp->shm_cprid, |
ipc/util.c
1 | /* | 1 | /* |
2 | * linux/ipc/util.c | 2 | * linux/ipc/util.c |
3 | * Copyright (C) 1992 Krishna Balasubramanian | 3 | * Copyright (C) 1992 Krishna Balasubramanian |
4 | * | 4 | * |
5 | * Sep 1997 - Call suser() last after "normal" permission checks so we | 5 | * Sep 1997 - Call suser() last after "normal" permission checks so we |
6 | * get BSD style process accounting right. | 6 | * get BSD style process accounting right. |
7 | * Occurs in several places in the IPC code. | 7 | * Occurs in several places in the IPC code. |
8 | * Chris Evans, <chris@ferret.lmh.ox.ac.uk> | 8 | * Chris Evans, <chris@ferret.lmh.ox.ac.uk> |
9 | * Nov 1999 - ipc helper functions, unified SMP locking | 9 | * Nov 1999 - ipc helper functions, unified SMP locking |
10 | * Manfred Spraul <manfred@colorfullife.com> | 10 | * Manfred Spraul <manfred@colorfullife.com> |
11 | * Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary(). | 11 | * Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary(). |
12 | * Mingming Cao <cmm@us.ibm.com> | 12 | * Mingming Cao <cmm@us.ibm.com> |
13 | * Mar 2006 - support for audit of ipc object properties | 13 | * Mar 2006 - support for audit of ipc object properties |
14 | * Dustin Kirkland <dustin.kirkland@us.ibm.com> | 14 | * Dustin Kirkland <dustin.kirkland@us.ibm.com> |
15 | * Jun 2006 - namespaces ssupport | 15 | * Jun 2006 - namespaces ssupport |
16 | * OpenVZ, SWsoft Inc. | 16 | * OpenVZ, SWsoft Inc. |
17 | * Pavel Emelianov <xemul@openvz.org> | 17 | * Pavel Emelianov <xemul@openvz.org> |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/shm.h> | 21 | #include <linux/shm.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/msg.h> | 23 | #include <linux/msg.h> |
24 | #include <linux/vmalloc.h> | 24 | #include <linux/vmalloc.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/capability.h> | 26 | #include <linux/capability.h> |
27 | #include <linux/highuid.h> | 27 | #include <linux/highuid.h> |
28 | #include <linux/security.h> | 28 | #include <linux/security.h> |
29 | #include <linux/rcupdate.h> | 29 | #include <linux/rcupdate.h> |
30 | #include <linux/workqueue.h> | 30 | #include <linux/workqueue.h> |
31 | #include <linux/seq_file.h> | 31 | #include <linux/seq_file.h> |
32 | #include <linux/proc_fs.h> | 32 | #include <linux/proc_fs.h> |
33 | #include <linux/audit.h> | 33 | #include <linux/audit.h> |
34 | #include <linux/nsproxy.h> | 34 | #include <linux/nsproxy.h> |
35 | 35 | ||
36 | #include <asm/unistd.h> | 36 | #include <asm/unistd.h> |
37 | 37 | ||
38 | #include "util.h" | 38 | #include "util.h" |
39 | 39 | ||
40 | struct ipc_proc_iface { | 40 | struct ipc_proc_iface { |
41 | const char *path; | 41 | const char *path; |
42 | const char *header; | 42 | const char *header; |
43 | int ids; | 43 | int ids; |
44 | int (*show)(struct seq_file *, void *); | 44 | int (*show)(struct seq_file *, void *); |
45 | }; | 45 | }; |
46 | 46 | ||
47 | struct ipc_namespace init_ipc_ns = { | 47 | struct ipc_namespace init_ipc_ns = { |
48 | .kref = { | 48 | .kref = { |
49 | .refcount = ATOMIC_INIT(2), | 49 | .refcount = ATOMIC_INIT(2), |
50 | }, | 50 | }, |
51 | }; | 51 | }; |
52 | 52 | ||
53 | static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) | 53 | static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) |
54 | { | 54 | { |
55 | int err; | 55 | int err; |
56 | struct ipc_namespace *ns; | 56 | struct ipc_namespace *ns; |
57 | 57 | ||
58 | err = -ENOMEM; | 58 | err = -ENOMEM; |
59 | ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); | 59 | ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); |
60 | if (ns == NULL) | 60 | if (ns == NULL) |
61 | goto err_mem; | 61 | goto err_mem; |
62 | 62 | ||
63 | err = sem_init_ns(ns); | 63 | err = sem_init_ns(ns); |
64 | if (err) | 64 | if (err) |
65 | goto err_sem; | 65 | goto err_sem; |
66 | err = msg_init_ns(ns); | 66 | err = msg_init_ns(ns); |
67 | if (err) | 67 | if (err) |
68 | goto err_msg; | 68 | goto err_msg; |
69 | err = shm_init_ns(ns); | 69 | err = shm_init_ns(ns); |
70 | if (err) | 70 | if (err) |
71 | goto err_shm; | 71 | goto err_shm; |
72 | 72 | ||
73 | kref_init(&ns->kref); | 73 | kref_init(&ns->kref); |
74 | return ns; | 74 | return ns; |
75 | 75 | ||
76 | err_shm: | 76 | err_shm: |
77 | msg_exit_ns(ns); | 77 | msg_exit_ns(ns); |
78 | err_msg: | 78 | err_msg: |
79 | sem_exit_ns(ns); | 79 | sem_exit_ns(ns); |
80 | err_sem: | 80 | err_sem: |
81 | kfree(ns); | 81 | kfree(ns); |
82 | err_mem: | 82 | err_mem: |
83 | return ERR_PTR(err); | 83 | return ERR_PTR(err); |
84 | } | 84 | } |
85 | 85 | ||
86 | struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns) | 86 | struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns) |
87 | { | 87 | { |
88 | struct ipc_namespace *new_ns; | 88 | struct ipc_namespace *new_ns; |
89 | 89 | ||
90 | BUG_ON(!ns); | 90 | BUG_ON(!ns); |
91 | get_ipc_ns(ns); | 91 | get_ipc_ns(ns); |
92 | 92 | ||
93 | if (!(flags & CLONE_NEWIPC)) | 93 | if (!(flags & CLONE_NEWIPC)) |
94 | return ns; | 94 | return ns; |
95 | 95 | ||
96 | new_ns = clone_ipc_ns(ns); | 96 | new_ns = clone_ipc_ns(ns); |
97 | 97 | ||
98 | put_ipc_ns(ns); | 98 | put_ipc_ns(ns); |
99 | return new_ns; | 99 | return new_ns; |
100 | } | 100 | } |
101 | 101 | ||
102 | void free_ipc_ns(struct kref *kref) | 102 | void free_ipc_ns(struct kref *kref) |
103 | { | 103 | { |
104 | struct ipc_namespace *ns; | 104 | struct ipc_namespace *ns; |
105 | 105 | ||
106 | ns = container_of(kref, struct ipc_namespace, kref); | 106 | ns = container_of(kref, struct ipc_namespace, kref); |
107 | sem_exit_ns(ns); | 107 | sem_exit_ns(ns); |
108 | msg_exit_ns(ns); | 108 | msg_exit_ns(ns); |
109 | shm_exit_ns(ns); | 109 | shm_exit_ns(ns); |
110 | kfree(ns); | 110 | kfree(ns); |
111 | } | 111 | } |
112 | 112 | ||
113 | /** | 113 | /** |
114 | * ipc_init - initialise IPC subsystem | 114 | * ipc_init - initialise IPC subsystem |
115 | * | 115 | * |
116 | * The various system5 IPC resources (semaphores, messages and shared | 116 | * The various system5 IPC resources (semaphores, messages and shared |
117 | * memory) are initialised | 117 | * memory) are initialised |
118 | */ | 118 | */ |
119 | 119 | ||
120 | static int __init ipc_init(void) | 120 | static int __init ipc_init(void) |
121 | { | 121 | { |
122 | sem_init(); | 122 | sem_init(); |
123 | msg_init(); | 123 | msg_init(); |
124 | shm_init(); | 124 | shm_init(); |
125 | return 0; | 125 | return 0; |
126 | } | 126 | } |
127 | __initcall(ipc_init); | 127 | __initcall(ipc_init); |
128 | 128 | ||
129 | /** | 129 | /** |
130 | * ipc_init_ids - initialise IPC identifiers | 130 | * ipc_init_ids - initialise IPC identifiers |
131 | * @ids: Identifier set | 131 | * @ids: Identifier set |
132 | * | 132 | * |
133 | * Set up the sequence range to use for the ipc identifier range (limited | 133 | * Set up the sequence range to use for the ipc identifier range (limited |
134 | * below IPCMNI) then initialise the ids idr. | 134 | * below IPCMNI) then initialise the ids idr. |
135 | */ | 135 | */ |
136 | 136 | ||
137 | void ipc_init_ids(struct ipc_ids *ids) | 137 | void ipc_init_ids(struct ipc_ids *ids) |
138 | { | 138 | { |
139 | mutex_init(&ids->mutex); | 139 | mutex_init(&ids->mutex); |
140 | 140 | ||
141 | ids->in_use = 0; | 141 | ids->in_use = 0; |
142 | ids->seq = 0; | 142 | ids->seq = 0; |
143 | { | 143 | { |
144 | int seq_limit = INT_MAX/SEQ_MULTIPLIER; | 144 | int seq_limit = INT_MAX/SEQ_MULTIPLIER; |
145 | if(seq_limit > USHRT_MAX) | 145 | if(seq_limit > USHRT_MAX) |
146 | ids->seq_max = USHRT_MAX; | 146 | ids->seq_max = USHRT_MAX; |
147 | else | 147 | else |
148 | ids->seq_max = seq_limit; | 148 | ids->seq_max = seq_limit; |
149 | } | 149 | } |
150 | 150 | ||
151 | idr_init(&ids->ipcs_idr); | 151 | idr_init(&ids->ipcs_idr); |
152 | } | 152 | } |
153 | 153 | ||
154 | #ifdef CONFIG_PROC_FS | 154 | #ifdef CONFIG_PROC_FS |
155 | static const struct file_operations sysvipc_proc_fops; | 155 | static const struct file_operations sysvipc_proc_fops; |
156 | /** | 156 | /** |
157 | * ipc_init_proc_interface - Create a proc interface for sysipc types using a seq_file interface. | 157 | * ipc_init_proc_interface - Create a proc interface for sysipc types using a seq_file interface. |
158 | * @path: Path in procfs | 158 | * @path: Path in procfs |
159 | * @header: Banner to be printed at the beginning of the file. | 159 | * @header: Banner to be printed at the beginning of the file. |
160 | * @ids: ipc id table to iterate. | 160 | * @ids: ipc id table to iterate. |
161 | * @show: show routine. | 161 | * @show: show routine. |
162 | */ | 162 | */ |
163 | void __init ipc_init_proc_interface(const char *path, const char *header, | 163 | void __init ipc_init_proc_interface(const char *path, const char *header, |
164 | int ids, int (*show)(struct seq_file *, void *)) | 164 | int ids, int (*show)(struct seq_file *, void *)) |
165 | { | 165 | { |
166 | struct proc_dir_entry *pde; | 166 | struct proc_dir_entry *pde; |
167 | struct ipc_proc_iface *iface; | 167 | struct ipc_proc_iface *iface; |
168 | 168 | ||
169 | iface = kmalloc(sizeof(*iface), GFP_KERNEL); | 169 | iface = kmalloc(sizeof(*iface), GFP_KERNEL); |
170 | if (!iface) | 170 | if (!iface) |
171 | return; | 171 | return; |
172 | iface->path = path; | 172 | iface->path = path; |
173 | iface->header = header; | 173 | iface->header = header; |
174 | iface->ids = ids; | 174 | iface->ids = ids; |
175 | iface->show = show; | 175 | iface->show = show; |
176 | 176 | ||
177 | pde = create_proc_entry(path, | 177 | pde = create_proc_entry(path, |
178 | S_IRUGO, /* world readable */ | 178 | S_IRUGO, /* world readable */ |
179 | NULL /* parent dir */); | 179 | NULL /* parent dir */); |
180 | if (pde) { | 180 | if (pde) { |
181 | pde->data = iface; | 181 | pde->data = iface; |
182 | pde->proc_fops = &sysvipc_proc_fops; | 182 | pde->proc_fops = &sysvipc_proc_fops; |
183 | } else { | 183 | } else { |
184 | kfree(iface); | 184 | kfree(iface); |
185 | } | 185 | } |
186 | } | 186 | } |
187 | #endif | 187 | #endif |
188 | 188 | ||
189 | /** | 189 | /** |
190 | * ipc_findkey - find a key in an ipc identifier set | 190 | * ipc_findkey - find a key in an ipc identifier set |
191 | * @ids: Identifier set | 191 | * @ids: Identifier set |
192 | * @key: The key to find | 192 | * @key: The key to find |
193 | * | 193 | * |
194 | * Requires ipc_ids.mutex locked. | 194 | * Requires ipc_ids.mutex locked. |
195 | * Returns the LOCKED pointer to the ipc structure if found or NULL | 195 | * Returns the LOCKED pointer to the ipc structure if found or NULL |
196 | * if not. | 196 | * if not. |
197 | * If key is found ipc contains its ipc structure | 197 | * If key is found ipc points to the owning ipc structure |
198 | */ | 198 | */ |
199 | 199 | ||
200 | static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) | 200 | static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) |
201 | { | 201 | { |
202 | struct kern_ipc_perm *ipc; | 202 | struct kern_ipc_perm *ipc; |
203 | int next_id; | 203 | int next_id; |
204 | int total; | 204 | int total; |
205 | 205 | ||
206 | for (total = 0, next_id = 0; total < ids->in_use; next_id++) { | 206 | for (total = 0, next_id = 0; total < ids->in_use; next_id++) { |
207 | ipc = idr_find(&ids->ipcs_idr, next_id); | 207 | ipc = idr_find(&ids->ipcs_idr, next_id); |
208 | 208 | ||
209 | if (ipc == NULL) | 209 | if (ipc == NULL) |
210 | continue; | 210 | continue; |
211 | 211 | ||
212 | if (ipc->key != key) { | 212 | if (ipc->key != key) { |
213 | total++; | 213 | total++; |
214 | continue; | 214 | continue; |
215 | } | 215 | } |
216 | 216 | ||
217 | ipc_lock_by_ptr(ipc); | 217 | ipc_lock_by_ptr(ipc); |
218 | return ipc; | 218 | return ipc; |
219 | } | 219 | } |
220 | 220 | ||
221 | return NULL; | 221 | return NULL; |
222 | } | 222 | } |
223 | 223 | ||
224 | /** | 224 | /** |
225 | * ipc_get_maxid - get the last assigned id | 225 | * ipc_get_maxid - get the last assigned id |
226 | * @ids: IPC identifier set | 226 | * @ids: IPC identifier set |
227 | * | 227 | * |
228 | * Called with ipc_ids.mutex held. | 228 | * Called with ipc_ids.mutex held. |
229 | */ | 229 | */ |
230 | 230 | ||
231 | int ipc_get_maxid(struct ipc_ids *ids) | 231 | int ipc_get_maxid(struct ipc_ids *ids) |
232 | { | 232 | { |
233 | struct kern_ipc_perm *ipc; | 233 | struct kern_ipc_perm *ipc; |
234 | int max_id = -1; | 234 | int max_id = -1; |
235 | int total, id; | 235 | int total, id; |
236 | 236 | ||
237 | if (ids->in_use == 0) | 237 | if (ids->in_use == 0) |
238 | return -1; | 238 | return -1; |
239 | 239 | ||
240 | if (ids->in_use == IPCMNI) | 240 | if (ids->in_use == IPCMNI) |
241 | return IPCMNI - 1; | 241 | return IPCMNI - 1; |
242 | 242 | ||
243 | /* Look for the last assigned id */ | 243 | /* Look for the last assigned id */ |
244 | total = 0; | 244 | total = 0; |
245 | for (id = 0; id < IPCMNI && total < ids->in_use; id++) { | 245 | for (id = 0; id < IPCMNI && total < ids->in_use; id++) { |
246 | ipc = idr_find(&ids->ipcs_idr, id); | 246 | ipc = idr_find(&ids->ipcs_idr, id); |
247 | if (ipc != NULL) { | 247 | if (ipc != NULL) { |
248 | max_id = id; | 248 | max_id = id; |
249 | total++; | 249 | total++; |
250 | } | 250 | } |
251 | } | 251 | } |
252 | return max_id; | 252 | return max_id; |
253 | } | 253 | } |
254 | 254 | ||
255 | /** | 255 | /** |
256 | * ipc_addid - add an IPC identifier | 256 | * ipc_addid - add an IPC identifier |
257 | * @ids: IPC identifier set | 257 | * @ids: IPC identifier set |
258 | * @new: new IPC permission set | 258 | * @new: new IPC permission set |
259 | * @size: limit for the number of used ids | 259 | * @size: limit for the number of used ids |
260 | * | 260 | * |
261 | * Add an entry 'new' to the IPC idr. The permissions object is | 261 | * Add an entry 'new' to the IPC ids idr. The permissions object is |
262 | * initialised and the first free entry is set up and the id assigned | 262 | * initialised and the first free entry is set up and the id assigned |
263 | * is returned. The list is returned in a locked state on success. | 263 | * is returned. The 'new' entry is returned in a locked state on success. |
264 | * On failure the list is not locked and -1 is returned. | 264 | * On failure the entry is not locked and -1 is returned. |
265 | * | 265 | * |
266 | * Called with ipc_ids.mutex held. | 266 | * Called with ipc_ids.mutex held. |
267 | */ | 267 | */ |
268 | 268 | ||
269 | int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) | 269 | int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) |
270 | { | 270 | { |
271 | int id, err; | 271 | int id, err; |
272 | 272 | ||
273 | /* | ||
274 | * rcu_dereference()() is not needed here since | ||
275 | * ipc_ids.mutex is held | ||
276 | */ | ||
277 | if (size > IPCMNI) | 273 | if (size > IPCMNI) |
278 | size = IPCMNI; | 274 | size = IPCMNI; |
279 | 275 | ||
280 | if (ids->in_use >= size) | 276 | if (ids->in_use >= size) |
281 | return -1; | 277 | return -1; |
282 | 278 | ||
283 | err = idr_get_new(&ids->ipcs_idr, new, &id); | 279 | err = idr_get_new(&ids->ipcs_idr, new, &id); |
284 | if (err) | 280 | if (err) |
285 | return -1; | 281 | return -1; |
286 | 282 | ||
287 | ids->in_use++; | 283 | ids->in_use++; |
288 | 284 | ||
289 | new->cuid = new->uid = current->euid; | 285 | new->cuid = new->uid = current->euid; |
290 | new->gid = new->cgid = current->egid; | 286 | new->gid = new->cgid = current->egid; |
291 | 287 | ||
292 | new->seq = ids->seq++; | 288 | new->seq = ids->seq++; |
293 | if(ids->seq > ids->seq_max) | 289 | if(ids->seq > ids->seq_max) |
294 | ids->seq = 0; | 290 | ids->seq = 0; |
295 | 291 | ||
296 | spin_lock_init(&new->lock); | 292 | spin_lock_init(&new->lock); |
297 | new->deleted = 0; | 293 | new->deleted = 0; |
298 | rcu_read_lock(); | 294 | rcu_read_lock(); |
299 | spin_lock(&new->lock); | 295 | spin_lock(&new->lock); |
300 | return id; | 296 | return id; |
301 | } | 297 | } |
302 | 298 | ||
303 | /** | 299 | /** |
304 | * ipcget_new - create a new ipc object | 300 | * ipcget_new - create a new ipc object |
305 | * @ns: namespace | 301 | * @ns: namespace |
306 | * @ids: identifer set | 302 | * @ids: IPC identifer set |
307 | * @ops: the actual creation routine to call | 303 | * @ops: the actual creation routine to call |
308 | * @params: its parameters | 304 | * @params: its parameters |
309 | * | 305 | * |
310 | * This routine is called sys_msgget, sys_semget() and sys_shmget() when | 306 | * This routine is called by sys_msgget, sys_semget() and sys_shmget() |
311 | * the key is IPC_PRIVATE | 307 | * when the key is IPC_PRIVATE. |
312 | */ | 308 | */ |
313 | int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, | 309 | int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, |
314 | struct ipc_ops *ops, struct ipc_params *params) | 310 | struct ipc_ops *ops, struct ipc_params *params) |
315 | { | 311 | { |
316 | int err; | 312 | int err; |
317 | 313 | ||
318 | err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL); | 314 | err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL); |
319 | 315 | ||
320 | if (!err) | 316 | if (!err) |
321 | return -ENOMEM; | 317 | return -ENOMEM; |
322 | 318 | ||
323 | mutex_lock(&ids->mutex); | 319 | mutex_lock(&ids->mutex); |
324 | err = ops->getnew(ns, params); | 320 | err = ops->getnew(ns, params); |
325 | mutex_unlock(&ids->mutex); | 321 | mutex_unlock(&ids->mutex); |
326 | 322 | ||
327 | return err; | 323 | return err; |
328 | } | 324 | } |
329 | 325 | ||
330 | /** | 326 | /** |
331 | * ipc_check_perms - check security and permissions for an IPC | 327 | * ipc_check_perms - check security and permissions for an IPC |
332 | * @ipcp: ipc permission set | 328 | * @ipcp: ipc permission set |
333 | * @ids: identifer set | ||
334 | * @ops: the actual security routine to call | 329 | * @ops: the actual security routine to call |
335 | * @params: its parameters | 330 | * @params: its parameters |
331 | * | ||
332 | * This routine is called by sys_msgget(), sys_semget() and sys_shmget() | ||
333 | * when the key is not IPC_PRIVATE and that key already exists in the | ||
334 | * ids IDR. | ||
335 | * | ||
336 | * On success, the IPC id is returned. | ||
337 | * | ||
338 | * It is called with ipc_ids.mutex and ipcp->lock held. | ||
336 | */ | 339 | */ |
337 | static int ipc_check_perms(struct kern_ipc_perm *ipcp, struct ipc_ops *ops, | 340 | static int ipc_check_perms(struct kern_ipc_perm *ipcp, struct ipc_ops *ops, |
338 | struct ipc_params *params) | 341 | struct ipc_params *params) |
339 | { | 342 | { |
340 | int err; | 343 | int err; |
341 | 344 | ||
342 | if (ipcperms(ipcp, params->flg)) | 345 | if (ipcperms(ipcp, params->flg)) |
343 | err = -EACCES; | 346 | err = -EACCES; |
344 | else { | 347 | else { |
345 | err = ops->associate(ipcp, params->flg); | 348 | err = ops->associate(ipcp, params->flg); |
346 | if (!err) | 349 | if (!err) |
347 | err = ipcp->id; | 350 | err = ipcp->id; |
348 | } | 351 | } |
349 | 352 | ||
350 | return err; | 353 | return err; |
351 | } | 354 | } |
352 | 355 | ||
353 | /** | 356 | /** |
354 | * ipcget_public - get an ipc object or create a new one | 357 | * ipcget_public - get an ipc object or create a new one |
355 | * @ns: namespace | 358 | * @ns: namespace |
356 | * @ids: identifer set | 359 | * @ids: IPC identifer set |
357 | * @ops: the actual creation routine to call | 360 | * @ops: the actual creation routine to call |
358 | * @params: its parameters | 361 | * @params: its parameters |
359 | * | 362 | * |
360 | * This routine is called sys_msgget, sys_semget() and sys_shmget() when | 363 | * This routine is called by sys_msgget, sys_semget() and sys_shmget() |
361 | * the key is not IPC_PRIVATE | 364 | * when the key is not IPC_PRIVATE. |
365 | * It adds a new entry if the key is not found and does some permission | ||
366 | * / security checkings if the key is found. | ||
367 | * | ||
368 | * On success, the ipc id is returned. | ||
362 | */ | 369 | */ |
363 | int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, | 370 | int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, |
364 | struct ipc_ops *ops, struct ipc_params *params) | 371 | struct ipc_ops *ops, struct ipc_params *params) |
365 | { | 372 | { |
366 | struct kern_ipc_perm *ipcp; | 373 | struct kern_ipc_perm *ipcp; |
367 | int flg = params->flg; | 374 | int flg = params->flg; |
368 | int err; | 375 | int err; |
369 | 376 | ||
370 | err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL); | 377 | err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL); |
371 | 378 | ||
372 | mutex_lock(&ids->mutex); | 379 | mutex_lock(&ids->mutex); |
373 | ipcp = ipc_findkey(ids, params->key); | 380 | ipcp = ipc_findkey(ids, params->key); |
374 | if (ipcp == NULL) { | 381 | if (ipcp == NULL) { |
375 | /* key not used */ | 382 | /* key not used */ |
376 | if (!(flg & IPC_CREAT)) | 383 | if (!(flg & IPC_CREAT)) |
377 | err = -ENOENT; | 384 | err = -ENOENT; |
378 | else if (!err) | 385 | else if (!err) |
379 | err = -ENOMEM; | 386 | err = -ENOMEM; |
380 | else | 387 | else |
381 | err = ops->getnew(ns, params); | 388 | err = ops->getnew(ns, params); |
382 | } else { | 389 | } else { |
383 | /* ipc object has been locked by ipc_findkey() */ | 390 | /* ipc object has been locked by ipc_findkey() */ |
384 | 391 | ||
385 | if (flg & IPC_CREAT && flg & IPC_EXCL) | 392 | if (flg & IPC_CREAT && flg & IPC_EXCL) |
386 | err = -EEXIST; | 393 | err = -EEXIST; |
387 | else { | 394 | else { |
388 | err = 0; | 395 | err = 0; |
389 | if (ops->more_checks) | 396 | if (ops->more_checks) |
390 | err = ops->more_checks(ipcp, params); | 397 | err = ops->more_checks(ipcp, params); |
391 | if (!err) | 398 | if (!err) |
399 | /* | ||
400 | * ipc_check_perms returns the IPC id on | ||
401 | * success | ||
402 | */ | ||
392 | err = ipc_check_perms(ipcp, ops, params); | 403 | err = ipc_check_perms(ipcp, ops, params); |
393 | } | 404 | } |
394 | ipc_unlock(ipcp); | 405 | ipc_unlock(ipcp); |
395 | } | 406 | } |
396 | mutex_unlock(&ids->mutex); | 407 | mutex_unlock(&ids->mutex); |
397 | 408 | ||
398 | return err; | 409 | return err; |
399 | } | 410 | } |
400 | 411 | ||
401 | 412 | ||
402 | /** | 413 | /** |
403 | * ipc_rmid - remove an IPC identifier | 414 | * ipc_rmid - remove an IPC identifier |
404 | * @ids: identifier set | 415 | * @ids: IPC identifier set |
405 | * @id: ipc perm structure containing the identifier to remove | 416 | * @ipcp: ipc perm structure containing the identifier to remove |
406 | * | 417 | * |
407 | * The identifier must be valid, and in use. The kernel will panic if | ||
408 | * fed an invalid identifier. The entry is removed and internal | ||
409 | * variables recomputed. | ||
410 | * ipc_ids.mutex and the spinlock for this ID are held before this | 418 | * ipc_ids.mutex and the spinlock for this ID are held before this |
411 | * function is called, and remain locked on the exit. | 419 | * function is called, and remain locked on the exit. |
412 | */ | 420 | */ |
413 | 421 | ||
414 | void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) | 422 | void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) |
415 | { | 423 | { |
416 | int lid = ipcid_to_idx(ipcp->id); | 424 | int lid = ipcid_to_idx(ipcp->id); |
417 | 425 | ||
418 | idr_remove(&ids->ipcs_idr, lid); | 426 | idr_remove(&ids->ipcs_idr, lid); |
419 | 427 | ||
420 | ids->in_use--; | 428 | ids->in_use--; |
421 | 429 | ||
422 | ipcp->deleted = 1; | 430 | ipcp->deleted = 1; |
423 | 431 | ||
424 | return; | 432 | return; |
425 | } | 433 | } |
426 | 434 | ||
427 | /** | 435 | /** |
428 | * ipc_alloc - allocate ipc space | 436 | * ipc_alloc - allocate ipc space |
429 | * @size: size desired | 437 | * @size: size desired |
430 | * | 438 | * |
431 | * Allocate memory from the appropriate pools and return a pointer to it. | 439 | * Allocate memory from the appropriate pools and return a pointer to it. |
432 | * NULL is returned if the allocation fails | 440 | * NULL is returned if the allocation fails |
433 | */ | 441 | */ |
434 | 442 | ||
435 | void* ipc_alloc(int size) | 443 | void* ipc_alloc(int size) |
436 | { | 444 | { |
437 | void* out; | 445 | void* out; |
438 | if(size > PAGE_SIZE) | 446 | if(size > PAGE_SIZE) |
439 | out = vmalloc(size); | 447 | out = vmalloc(size); |
440 | else | 448 | else |
441 | out = kmalloc(size, GFP_KERNEL); | 449 | out = kmalloc(size, GFP_KERNEL); |
442 | return out; | 450 | return out; |
443 | } | 451 | } |
444 | 452 | ||
445 | /** | 453 | /** |
446 | * ipc_free - free ipc space | 454 | * ipc_free - free ipc space |
447 | * @ptr: pointer returned by ipc_alloc | 455 | * @ptr: pointer returned by ipc_alloc |
448 | * @size: size of block | 456 | * @size: size of block |
449 | * | 457 | * |
450 | * Free a block created with ipc_alloc(). The caller must know the size | 458 | * Free a block created with ipc_alloc(). The caller must know the size |
451 | * used in the allocation call. | 459 | * used in the allocation call. |
452 | */ | 460 | */ |
453 | 461 | ||
454 | void ipc_free(void* ptr, int size) | 462 | void ipc_free(void* ptr, int size) |
455 | { | 463 | { |
456 | if(size > PAGE_SIZE) | 464 | if(size > PAGE_SIZE) |
457 | vfree(ptr); | 465 | vfree(ptr); |
458 | else | 466 | else |
459 | kfree(ptr); | 467 | kfree(ptr); |
460 | } | 468 | } |
461 | 469 | ||
462 | /* | 470 | /* |
463 | * rcu allocations: | 471 | * rcu allocations: |
464 | * There are three headers that are prepended to the actual allocation: | 472 | * There are three headers that are prepended to the actual allocation: |
465 | * - during use: ipc_rcu_hdr. | 473 | * - during use: ipc_rcu_hdr. |
466 | * - during the rcu grace period: ipc_rcu_grace. | 474 | * - during the rcu grace period: ipc_rcu_grace. |
467 | * - [only if vmalloc]: ipc_rcu_sched. | 475 | * - [only if vmalloc]: ipc_rcu_sched. |
468 | * Their lifetime doesn't overlap, thus the headers share the same memory. | 476 | * Their lifetime doesn't overlap, thus the headers share the same memory. |
469 | * Unlike a normal union, they are right-aligned, thus some container_of | 477 | * Unlike a normal union, they are right-aligned, thus some container_of |
470 | * forward/backward casting is necessary: | 478 | * forward/backward casting is necessary: |
471 | */ | 479 | */ |
472 | struct ipc_rcu_hdr | 480 | struct ipc_rcu_hdr |
473 | { | 481 | { |
474 | int refcount; | 482 | int refcount; |
475 | int is_vmalloc; | 483 | int is_vmalloc; |
476 | void *data[0]; | 484 | void *data[0]; |
477 | }; | 485 | }; |
478 | 486 | ||
479 | 487 | ||
480 | struct ipc_rcu_grace | 488 | struct ipc_rcu_grace |
481 | { | 489 | { |
482 | struct rcu_head rcu; | 490 | struct rcu_head rcu; |
483 | /* "void *" makes sure alignment of following data is sane. */ | 491 | /* "void *" makes sure alignment of following data is sane. */ |
484 | void *data[0]; | 492 | void *data[0]; |
485 | }; | 493 | }; |
486 | 494 | ||
487 | struct ipc_rcu_sched | 495 | struct ipc_rcu_sched |
488 | { | 496 | { |
489 | struct work_struct work; | 497 | struct work_struct work; |
490 | /* "void *" makes sure alignment of following data is sane. */ | 498 | /* "void *" makes sure alignment of following data is sane. */ |
491 | void *data[0]; | 499 | void *data[0]; |
492 | }; | 500 | }; |
493 | 501 | ||
494 | #define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \ | 502 | #define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \ |
495 | sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr)) | 503 | sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr)) |
496 | #define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \ | 504 | #define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \ |
497 | sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC) | 505 | sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC) |
498 | 506 | ||
499 | static inline int rcu_use_vmalloc(int size) | 507 | static inline int rcu_use_vmalloc(int size) |
500 | { | 508 | { |
501 | /* Too big for a single page? */ | 509 | /* Too big for a single page? */ |
502 | if (HDRLEN_KMALLOC + size > PAGE_SIZE) | 510 | if (HDRLEN_KMALLOC + size > PAGE_SIZE) |
503 | return 1; | 511 | return 1; |
504 | return 0; | 512 | return 0; |
505 | } | 513 | } |
506 | 514 | ||
507 | /** | 515 | /** |
508 | * ipc_rcu_alloc - allocate ipc and rcu space | 516 | * ipc_rcu_alloc - allocate ipc and rcu space |
509 | * @size: size desired | 517 | * @size: size desired |
510 | * | 518 | * |
511 | * Allocate memory for the rcu header structure + the object. | 519 | * Allocate memory for the rcu header structure + the object. |
512 | * Returns the pointer to the object. | 520 | * Returns the pointer to the object. |
513 | * NULL is returned if the allocation fails. | 521 | * NULL is returned if the allocation fails. |
514 | */ | 522 | */ |
515 | 523 | ||
516 | void* ipc_rcu_alloc(int size) | 524 | void* ipc_rcu_alloc(int size) |
517 | { | 525 | { |
518 | void* out; | 526 | void* out; |
519 | /* | 527 | /* |
520 | * We prepend the allocation with the rcu struct, and | 528 | * We prepend the allocation with the rcu struct, and |
521 | * workqueue if necessary (for vmalloc). | 529 | * workqueue if necessary (for vmalloc). |
522 | */ | 530 | */ |
523 | if (rcu_use_vmalloc(size)) { | 531 | if (rcu_use_vmalloc(size)) { |
524 | out = vmalloc(HDRLEN_VMALLOC + size); | 532 | out = vmalloc(HDRLEN_VMALLOC + size); |
525 | if (out) { | 533 | if (out) { |
526 | out += HDRLEN_VMALLOC; | 534 | out += HDRLEN_VMALLOC; |
527 | container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1; | 535 | container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1; |
528 | container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; | 536 | container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; |
529 | } | 537 | } |
530 | } else { | 538 | } else { |
531 | out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL); | 539 | out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL); |
532 | if (out) { | 540 | if (out) { |
533 | out += HDRLEN_KMALLOC; | 541 | out += HDRLEN_KMALLOC; |
534 | container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0; | 542 | container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0; |
535 | container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; | 543 | container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; |
536 | } | 544 | } |
537 | } | 545 | } |
538 | 546 | ||
539 | return out; | 547 | return out; |
540 | } | 548 | } |
541 | 549 | ||
542 | void ipc_rcu_getref(void *ptr) | 550 | void ipc_rcu_getref(void *ptr) |
543 | { | 551 | { |
544 | container_of(ptr, struct ipc_rcu_hdr, data)->refcount++; | 552 | container_of(ptr, struct ipc_rcu_hdr, data)->refcount++; |
545 | } | 553 | } |
546 | 554 | ||
547 | static void ipc_do_vfree(struct work_struct *work) | 555 | static void ipc_do_vfree(struct work_struct *work) |
548 | { | 556 | { |
549 | vfree(container_of(work, struct ipc_rcu_sched, work)); | 557 | vfree(container_of(work, struct ipc_rcu_sched, work)); |
550 | } | 558 | } |
551 | 559 | ||
552 | /** | 560 | /** |
553 | * ipc_schedule_free - free ipc + rcu space | 561 | * ipc_schedule_free - free ipc + rcu space |
554 | * @head: RCU callback structure for queued work | 562 | * @head: RCU callback structure for queued work |
555 | * | 563 | * |
556 | * Since RCU callback function is called in bh, | 564 | * Since RCU callback function is called in bh, |
557 | * we need to defer the vfree to schedule_work(). | 565 | * we need to defer the vfree to schedule_work(). |
558 | */ | 566 | */ |
559 | static void ipc_schedule_free(struct rcu_head *head) | 567 | static void ipc_schedule_free(struct rcu_head *head) |
560 | { | 568 | { |
561 | struct ipc_rcu_grace *grace = | 569 | struct ipc_rcu_grace *grace; |
562 | container_of(head, struct ipc_rcu_grace, rcu); | 570 | struct ipc_rcu_sched *sched; |
563 | struct ipc_rcu_sched *sched = | ||
564 | container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]); | ||
565 | 571 | ||
572 | grace = container_of(head, struct ipc_rcu_grace, rcu); | ||
573 | sched = container_of(&(grace->data[0]), struct ipc_rcu_sched, | ||
574 | data[0]); | ||
575 | |||
566 | INIT_WORK(&sched->work, ipc_do_vfree); | 576 | INIT_WORK(&sched->work, ipc_do_vfree); |
567 | schedule_work(&sched->work); | 577 | schedule_work(&sched->work); |
568 | } | 578 | } |
569 | 579 | ||
570 | /** | 580 | /** |
571 | * ipc_immediate_free - free ipc + rcu space | 581 | * ipc_immediate_free - free ipc + rcu space |
572 | * @head: RCU callback structure that contains pointer to be freed | 582 | * @head: RCU callback structure that contains pointer to be freed |
573 | * | 583 | * |
574 | * Free from the RCU callback context. | 584 | * Free from the RCU callback context. |
575 | */ | 585 | */ |
576 | static void ipc_immediate_free(struct rcu_head *head) | 586 | static void ipc_immediate_free(struct rcu_head *head) |
577 | { | 587 | { |
578 | struct ipc_rcu_grace *free = | 588 | struct ipc_rcu_grace *free = |
579 | container_of(head, struct ipc_rcu_grace, rcu); | 589 | container_of(head, struct ipc_rcu_grace, rcu); |
580 | kfree(free); | 590 | kfree(free); |
581 | } | 591 | } |
582 | 592 | ||
583 | void ipc_rcu_putref(void *ptr) | 593 | void ipc_rcu_putref(void *ptr) |
584 | { | 594 | { |
585 | if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0) | 595 | if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0) |
586 | return; | 596 | return; |
587 | 597 | ||
588 | if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) { | 598 | if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) { |
589 | call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, | 599 | call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, |
590 | ipc_schedule_free); | 600 | ipc_schedule_free); |
591 | } else { | 601 | } else { |
592 | call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, | 602 | call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, |
593 | ipc_immediate_free); | 603 | ipc_immediate_free); |
594 | } | 604 | } |
595 | } | 605 | } |
596 | 606 | ||
597 | /** | 607 | /** |
598 | * ipcperms - check IPC permissions | 608 | * ipcperms - check IPC permissions |
599 | * @ipcp: IPC permission set | 609 | * @ipcp: IPC permission set |
600 | * @flag: desired permission set. | 610 | * @flag: desired permission set. |
601 | * | 611 | * |
602 | * Check user, group, other permissions for access | 612 | * Check user, group, other permissions for access |
603 | * to ipc resources. return 0 if allowed | 613 | * to ipc resources. return 0 if allowed |
604 | */ | 614 | */ |
605 | 615 | ||
606 | int ipcperms (struct kern_ipc_perm *ipcp, short flag) | 616 | int ipcperms (struct kern_ipc_perm *ipcp, short flag) |
607 | { /* flag will most probably be 0 or S_...UGO from <linux/stat.h> */ | 617 | { /* flag will most probably be 0 or S_...UGO from <linux/stat.h> */ |
608 | int requested_mode, granted_mode, err; | 618 | int requested_mode, granted_mode, err; |
609 | 619 | ||
610 | if (unlikely((err = audit_ipc_obj(ipcp)))) | 620 | if (unlikely((err = audit_ipc_obj(ipcp)))) |
611 | return err; | 621 | return err; |
612 | requested_mode = (flag >> 6) | (flag >> 3) | flag; | 622 | requested_mode = (flag >> 6) | (flag >> 3) | flag; |
613 | granted_mode = ipcp->mode; | 623 | granted_mode = ipcp->mode; |
614 | if (current->euid == ipcp->cuid || current->euid == ipcp->uid) | 624 | if (current->euid == ipcp->cuid || current->euid == ipcp->uid) |
615 | granted_mode >>= 6; | 625 | granted_mode >>= 6; |
616 | else if (in_group_p(ipcp->cgid) || in_group_p(ipcp->gid)) | 626 | else if (in_group_p(ipcp->cgid) || in_group_p(ipcp->gid)) |
617 | granted_mode >>= 3; | 627 | granted_mode >>= 3; |
618 | /* is there some bit set in requested_mode but not in granted_mode? */ | 628 | /* is there some bit set in requested_mode but not in granted_mode? */ |
619 | if ((requested_mode & ~granted_mode & 0007) && | 629 | if ((requested_mode & ~granted_mode & 0007) && |
620 | !capable(CAP_IPC_OWNER)) | 630 | !capable(CAP_IPC_OWNER)) |
621 | return -1; | 631 | return -1; |
622 | 632 | ||
623 | return security_ipc_permission(ipcp, flag); | 633 | return security_ipc_permission(ipcp, flag); |
624 | } | 634 | } |
625 | 635 | ||
626 | /* | 636 | /* |
627 | * Functions to convert between the kern_ipc_perm structure and the | 637 | * Functions to convert between the kern_ipc_perm structure and the |
628 | * old/new ipc_perm structures | 638 | * old/new ipc_perm structures |
629 | */ | 639 | */ |
630 | 640 | ||
631 | /** | 641 | /** |
632 | * kernel_to_ipc64_perm - convert kernel ipc permissions to user | 642 | * kernel_to_ipc64_perm - convert kernel ipc permissions to user |
633 | * @in: kernel permissions | 643 | * @in: kernel permissions |
634 | * @out: new style IPC permissions | 644 | * @out: new style IPC permissions |
635 | * | 645 | * |
636 | * Turn the kernel object @in into a set of permissions descriptions | 646 | * Turn the kernel object @in into a set of permissions descriptions |
637 | * for returning to userspace (@out). | 647 | * for returning to userspace (@out). |
638 | */ | 648 | */ |
639 | 649 | ||
640 | 650 | ||
641 | void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out) | 651 | void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out) |
642 | { | 652 | { |
643 | out->key = in->key; | 653 | out->key = in->key; |
644 | out->uid = in->uid; | 654 | out->uid = in->uid; |
645 | out->gid = in->gid; | 655 | out->gid = in->gid; |
646 | out->cuid = in->cuid; | 656 | out->cuid = in->cuid; |
647 | out->cgid = in->cgid; | 657 | out->cgid = in->cgid; |
648 | out->mode = in->mode; | 658 | out->mode = in->mode; |
649 | out->seq = in->seq; | 659 | out->seq = in->seq; |
650 | } | 660 | } |
651 | 661 | ||
652 | /** | 662 | /** |
653 | * ipc64_perm_to_ipc_perm - convert old ipc permissions to new | 663 | * ipc64_perm_to_ipc_perm - convert new ipc permissions to old |
654 | * @in: new style IPC permissions | 664 | * @in: new style IPC permissions |
655 | * @out: old style IPC permissions | 665 | * @out: old style IPC permissions |
656 | * | 666 | * |
657 | * Turn the new style permissions object @in into a compatibility | 667 | * Turn the new style permissions object @in into a compatibility |
658 | * object and store it into the @out pointer. | 668 | * object and store it into the @out pointer. |
659 | */ | 669 | */ |
660 | 670 | ||
661 | void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) | 671 | void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) |
662 | { | 672 | { |
663 | out->key = in->key; | 673 | out->key = in->key; |
664 | SET_UID(out->uid, in->uid); | 674 | SET_UID(out->uid, in->uid); |
665 | SET_GID(out->gid, in->gid); | 675 | SET_GID(out->gid, in->gid); |
666 | SET_UID(out->cuid, in->cuid); | 676 | SET_UID(out->cuid, in->cuid); |
667 | SET_GID(out->cgid, in->cgid); | 677 | SET_GID(out->cgid, in->cgid); |
668 | out->mode = in->mode; | 678 | out->mode = in->mode; |
669 | out->seq = in->seq; | 679 | out->seq = in->seq; |
670 | } | 680 | } |
671 | 681 | ||
682 | /** | ||
683 | * ipc_lock - Lock an ipc structure | ||
684 | * @ids: IPC identifier set | ||
685 | * @id: ipc id to look for | ||
686 | * | ||
687 | * Look for an id in the ipc ids idr and lock the associated ipc object. | ||
688 | * | ||
689 | * ipc_ids.mutex is not necessarily held before this function is called, | ||
690 | * that's why we enter a RCU read section. | ||
691 | * The ipc object is locked on exit. | ||
692 | */ | ||
693 | |||
672 | struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) | 694 | struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) |
673 | { | 695 | { |
674 | struct kern_ipc_perm *out; | 696 | struct kern_ipc_perm *out; |
675 | int lid = ipcid_to_idx(id); | 697 | int lid = ipcid_to_idx(id); |
676 | 698 | ||
677 | rcu_read_lock(); | 699 | rcu_read_lock(); |
678 | out = idr_find(&ids->ipcs_idr, lid); | 700 | out = idr_find(&ids->ipcs_idr, lid); |
679 | if (out == NULL) { | 701 | if (out == NULL) { |
680 | rcu_read_unlock(); | 702 | rcu_read_unlock(); |
681 | return ERR_PTR(-EINVAL); | 703 | return ERR_PTR(-EINVAL); |
682 | } | 704 | } |
683 | 705 | ||
684 | spin_lock(&out->lock); | 706 | spin_lock(&out->lock); |
685 | 707 | ||
686 | /* ipc_rmid() may have already freed the ID while ipc_lock | 708 | /* ipc_rmid() may have already freed the ID while ipc_lock |
687 | * was spinning: here verify that the structure is still valid | 709 | * was spinning: here verify that the structure is still valid |
688 | */ | 710 | */ |
689 | if (out->deleted) { | 711 | if (out->deleted) { |
690 | spin_unlock(&out->lock); | 712 | spin_unlock(&out->lock); |
691 | rcu_read_unlock(); | 713 | rcu_read_unlock(); |
692 | return ERR_PTR(-EINVAL); | 714 | return ERR_PTR(-EINVAL); |
693 | } | 715 | } |
694 | 716 | ||
695 | return out; | 717 | return out; |
696 | } | 718 | } |
697 | 719 | ||
698 | #ifdef __ARCH_WANT_IPC_PARSE_VERSION | 720 | #ifdef __ARCH_WANT_IPC_PARSE_VERSION |
699 | 721 | ||
700 | 722 | ||
701 | /** | 723 | /** |
702 | * ipc_parse_version - IPC call version | 724 | * ipc_parse_version - IPC call version |
703 | * @cmd: pointer to command | 725 | * @cmd: pointer to command |
704 | * | 726 | * |
705 | * Return IPC_64 for new style IPC and IPC_OLD for old style IPC. | 727 | * Return IPC_64 for new style IPC and IPC_OLD for old style IPC. |
706 | * The @cmd value is turned from an encoding command and version into | 728 | * The @cmd value is turned from an encoding command and version into |
707 | * just the command code. | 729 | * just the command code. |
708 | */ | 730 | */ |
709 | 731 | ||
710 | int ipc_parse_version (int *cmd) | 732 | int ipc_parse_version (int *cmd) |
711 | { | 733 | { |
712 | if (*cmd & IPC_64) { | 734 | if (*cmd & IPC_64) { |
713 | *cmd ^= IPC_64; | 735 | *cmd ^= IPC_64; |
714 | return IPC_64; | 736 | return IPC_64; |
715 | } else { | 737 | } else { |
716 | return IPC_OLD; | 738 | return IPC_OLD; |
717 | } | 739 | } |
718 | } | 740 | } |
719 | 741 | ||
720 | #endif /* __ARCH_WANT_IPC_PARSE_VERSION */ | 742 | #endif /* __ARCH_WANT_IPC_PARSE_VERSION */ |
721 | 743 | ||
722 | #ifdef CONFIG_PROC_FS | 744 | #ifdef CONFIG_PROC_FS |
723 | struct ipc_proc_iter { | 745 | struct ipc_proc_iter { |
724 | struct ipc_namespace *ns; | 746 | struct ipc_namespace *ns; |
725 | struct ipc_proc_iface *iface; | 747 | struct ipc_proc_iface *iface; |
726 | }; | 748 | }; |
727 | 749 | ||
728 | /* | 750 | /* |
729 | * This routine locks the ipc structure found at least at position pos. | 751 | * This routine locks the ipc structure found at least at position pos. |
730 | */ | 752 | */ |
731 | struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, | 753 | struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, |
732 | loff_t *new_pos) | 754 | loff_t *new_pos) |
733 | { | 755 | { |
734 | struct kern_ipc_perm *ipc; | 756 | struct kern_ipc_perm *ipc; |
735 | int total, id; | 757 | int total, id; |
736 | 758 | ||
737 | total = 0; | 759 | total = 0; |
738 | for (id = 0; id < pos && total < ids->in_use; id++) { | 760 | for (id = 0; id < pos && total < ids->in_use; id++) { |
739 | ipc = idr_find(&ids->ipcs_idr, id); | 761 | ipc = idr_find(&ids->ipcs_idr, id); |
740 | if (ipc != NULL) | 762 | if (ipc != NULL) |
741 | total++; | 763 | total++; |
742 | } | 764 | } |
743 | 765 | ||
744 | if (total >= ids->in_use) | 766 | if (total >= ids->in_use) |
745 | return NULL; | 767 | return NULL; |
746 | 768 | ||
747 | for ( ; pos < IPCMNI; pos++) { | 769 | for ( ; pos < IPCMNI; pos++) { |
748 | ipc = idr_find(&ids->ipcs_idr, pos); | 770 | ipc = idr_find(&ids->ipcs_idr, pos); |
749 | if (ipc != NULL) { | 771 | if (ipc != NULL) { |
750 | *new_pos = pos + 1; | 772 | *new_pos = pos + 1; |
751 | ipc_lock_by_ptr(ipc); | 773 | ipc_lock_by_ptr(ipc); |
752 | return ipc; | 774 | return ipc; |
753 | } | 775 | } |
754 | } | 776 | } |
755 | 777 | ||
756 | /* Out of range - return NULL to terminate iteration */ | 778 | /* Out of range - return NULL to terminate iteration */ |
757 | return NULL; | 779 | return NULL; |
758 | } | 780 | } |
759 | 781 | ||
760 | static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos) | 782 | static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos) |
761 | { | 783 | { |
762 | struct ipc_proc_iter *iter = s->private; | 784 | struct ipc_proc_iter *iter = s->private; |
763 | struct ipc_proc_iface *iface = iter->iface; | 785 | struct ipc_proc_iface *iface = iter->iface; |
764 | struct kern_ipc_perm *ipc = it; | 786 | struct kern_ipc_perm *ipc = it; |
765 | 787 | ||
766 | /* If we had an ipc id locked before, unlock it */ | 788 | /* If we had an ipc id locked before, unlock it */ |
767 | if (ipc && ipc != SEQ_START_TOKEN) | 789 | if (ipc && ipc != SEQ_START_TOKEN) |
768 | ipc_unlock(ipc); | 790 | ipc_unlock(ipc); |
769 | 791 | ||
770 | return sysvipc_find_ipc(iter->ns->ids[iface->ids], *pos, pos); | 792 | return sysvipc_find_ipc(iter->ns->ids[iface->ids], *pos, pos); |
771 | } | 793 | } |
772 | 794 | ||
773 | /* | 795 | /* |
774 | * File positions: pos 0 -> header, pos n -> ipc id + 1. | 796 | * File positions: pos 0 -> header, pos n -> ipc id = n - 1. |
775 | * SeqFile iterator: iterator value locked shp or SEQ_TOKEN_START. | 797 | * SeqFile iterator: iterator value locked ipc pointer or SEQ_TOKEN_START. |
776 | */ | 798 | */ |
777 | static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) | 799 | static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) |
778 | { | 800 | { |
779 | struct ipc_proc_iter *iter = s->private; | 801 | struct ipc_proc_iter *iter = s->private; |
780 | struct ipc_proc_iface *iface = iter->iface; | 802 | struct ipc_proc_iface *iface = iter->iface; |
781 | struct ipc_ids *ids; | 803 | struct ipc_ids *ids; |
782 | 804 | ||
783 | ids = iter->ns->ids[iface->ids]; | 805 | ids = iter->ns->ids[iface->ids]; |
784 | 806 | ||
785 | /* | 807 | /* |
786 | * Take the lock - this will be released by the corresponding | 808 | * Take the lock - this will be released by the corresponding |
787 | * call to stop(). | 809 | * call to stop(). |
788 | */ | 810 | */ |
789 | mutex_lock(&ids->mutex); | 811 | mutex_lock(&ids->mutex); |
790 | 812 | ||
791 | /* pos < 0 is invalid */ | 813 | /* pos < 0 is invalid */ |
792 | if (*pos < 0) | 814 | if (*pos < 0) |
793 | return NULL; | 815 | return NULL; |
794 | 816 | ||
795 | /* pos == 0 means header */ | 817 | /* pos == 0 means header */ |
796 | if (*pos == 0) | 818 | if (*pos == 0) |
797 | return SEQ_START_TOKEN; | 819 | return SEQ_START_TOKEN; |
798 | 820 | ||
799 | /* Find the (pos-1)th ipc */ | 821 | /* Find the (pos-1)th ipc */ |
800 | return sysvipc_find_ipc(ids, *pos - 1, pos); | 822 | return sysvipc_find_ipc(ids, *pos - 1, pos); |
801 | } | 823 | } |
802 | 824 | ||
803 | static void sysvipc_proc_stop(struct seq_file *s, void *it) | 825 | static void sysvipc_proc_stop(struct seq_file *s, void *it) |
804 | { | 826 | { |
805 | struct kern_ipc_perm *ipc = it; | 827 | struct kern_ipc_perm *ipc = it; |
806 | struct ipc_proc_iter *iter = s->private; | 828 | struct ipc_proc_iter *iter = s->private; |
807 | struct ipc_proc_iface *iface = iter->iface; | 829 | struct ipc_proc_iface *iface = iter->iface; |
808 | struct ipc_ids *ids; | 830 | struct ipc_ids *ids; |
809 | 831 | ||
810 | /* If we had a locked segment, release it */ | 832 | /* If we had a locked structure, release it */ |
811 | if (ipc && ipc != SEQ_START_TOKEN) | 833 | if (ipc && ipc != SEQ_START_TOKEN) |
812 | ipc_unlock(ipc); | 834 | ipc_unlock(ipc); |
813 | 835 | ||
814 | ids = iter->ns->ids[iface->ids]; | 836 | ids = iter->ns->ids[iface->ids]; |
815 | /* Release the lock we took in start() */ | 837 | /* Release the lock we took in start() */ |
816 | mutex_unlock(&ids->mutex); | 838 | mutex_unlock(&ids->mutex); |
817 | } | 839 | } |
818 | 840 | ||
819 | static int sysvipc_proc_show(struct seq_file *s, void *it) | 841 | static int sysvipc_proc_show(struct seq_file *s, void *it) |
820 | { | 842 | { |
821 | struct ipc_proc_iter *iter = s->private; | 843 | struct ipc_proc_iter *iter = s->private; |
822 | struct ipc_proc_iface *iface = iter->iface; | 844 | struct ipc_proc_iface *iface = iter->iface; |
823 | 845 | ||
824 | if (it == SEQ_START_TOKEN) | 846 | if (it == SEQ_START_TOKEN) |
825 | return seq_puts(s, iface->header); | 847 | return seq_puts(s, iface->header); |
826 | 848 | ||
827 | return iface->show(s, it); | 849 | return iface->show(s, it); |
828 | } | 850 | } |
829 | 851 | ||
830 | static struct seq_operations sysvipc_proc_seqops = { | 852 | static struct seq_operations sysvipc_proc_seqops = { |
831 | .start = sysvipc_proc_start, | 853 | .start = sysvipc_proc_start, |
832 | .stop = sysvipc_proc_stop, | 854 | .stop = sysvipc_proc_stop, |
833 | .next = sysvipc_proc_next, | 855 | .next = sysvipc_proc_next, |
834 | .show = sysvipc_proc_show, | 856 | .show = sysvipc_proc_show, |
835 | }; | 857 | }; |
836 | 858 | ||
837 | static int sysvipc_proc_open(struct inode *inode, struct file *file) | 859 | static int sysvipc_proc_open(struct inode *inode, struct file *file) |
838 | { | 860 | { |
839 | int ret; | 861 | int ret; |
840 | struct seq_file *seq; | 862 | struct seq_file *seq; |
841 | struct ipc_proc_iter *iter; | 863 | struct ipc_proc_iter *iter; |
842 | 864 | ||
843 | ret = -ENOMEM; | 865 | ret = -ENOMEM; |
844 | iter = kmalloc(sizeof(*iter), GFP_KERNEL); | 866 | iter = kmalloc(sizeof(*iter), GFP_KERNEL); |
845 | if (!iter) | 867 | if (!iter) |
846 | goto out; | 868 | goto out; |
847 | 869 | ||
848 | ret = seq_open(file, &sysvipc_proc_seqops); | 870 | ret = seq_open(file, &sysvipc_proc_seqops); |
849 | if (ret) | 871 | if (ret) |
850 | goto out_kfree; | 872 | goto out_kfree; |
851 | 873 | ||
852 | seq = file->private_data; | 874 | seq = file->private_data; |
853 | seq->private = iter; | 875 | seq->private = iter; |
854 | 876 | ||
855 | iter->iface = PDE(inode)->data; | 877 | iter->iface = PDE(inode)->data; |
856 | iter->ns = get_ipc_ns(current->nsproxy->ipc_ns); | 878 | iter->ns = get_ipc_ns(current->nsproxy->ipc_ns); |
857 | out: | 879 | out: |
858 | return ret; | 880 | return ret; |
859 | out_kfree: | 881 | out_kfree: |
860 | kfree(iter); | 882 | kfree(iter); |
861 | goto out; | 883 | goto out; |
862 | } | 884 | } |
863 | 885 | ||
864 | static int sysvipc_proc_release(struct inode *inode, struct file *file) | 886 | static int sysvipc_proc_release(struct inode *inode, struct file *file) |
865 | { | 887 | { |
866 | struct seq_file *seq = file->private_data; | 888 | struct seq_file *seq = file->private_data; |
867 | struct ipc_proc_iter *iter = seq->private; | 889 | struct ipc_proc_iter *iter = seq->private; |
868 | put_ipc_ns(iter->ns); | 890 | put_ipc_ns(iter->ns); |
869 | return seq_release_private(inode, file); | 891 | return seq_release_private(inode, file); |
ipc/util.h
1 | /* | 1 | /* |
2 | * linux/ipc/util.h | 2 | * linux/ipc/util.h |
3 | * Copyright (C) 1999 Christoph Rohland | 3 | * Copyright (C) 1999 Christoph Rohland |
4 | * | 4 | * |
5 | * ipc helper functions (c) 1999 Manfred Spraul <manfred@colorfullife.com> | 5 | * ipc helper functions (c) 1999 Manfred Spraul <manfred@colorfullife.com> |
6 | * namespaces support. 2006 OpenVZ, SWsoft Inc. | 6 | * namespaces support. 2006 OpenVZ, SWsoft Inc. |
7 | * Pavel Emelianov <xemul@openvz.org> | 7 | * Pavel Emelianov <xemul@openvz.org> |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #ifndef _IPC_UTIL_H | 10 | #ifndef _IPC_UTIL_H |
11 | #define _IPC_UTIL_H | 11 | #define _IPC_UTIL_H |
12 | 12 | ||
13 | #include <linux/idr.h> | 13 | #include <linux/idr.h> |
14 | #include <linux/err.h> | 14 | #include <linux/err.h> |
15 | 15 | ||
16 | #define USHRT_MAX 0xffff | 16 | #define USHRT_MAX 0xffff |
17 | #define SEQ_MULTIPLIER (IPCMNI) | 17 | #define SEQ_MULTIPLIER (IPCMNI) |
18 | 18 | ||
19 | void sem_init (void); | 19 | void sem_init (void); |
20 | void msg_init (void); | 20 | void msg_init (void); |
21 | void shm_init (void); | 21 | void shm_init (void); |
22 | 22 | ||
23 | int sem_init_ns(struct ipc_namespace *ns); | 23 | int sem_init_ns(struct ipc_namespace *ns); |
24 | int msg_init_ns(struct ipc_namespace *ns); | 24 | int msg_init_ns(struct ipc_namespace *ns); |
25 | int shm_init_ns(struct ipc_namespace *ns); | 25 | int shm_init_ns(struct ipc_namespace *ns); |
26 | 26 | ||
27 | void sem_exit_ns(struct ipc_namespace *ns); | 27 | void sem_exit_ns(struct ipc_namespace *ns); |
28 | void msg_exit_ns(struct ipc_namespace *ns); | 28 | void msg_exit_ns(struct ipc_namespace *ns); |
29 | void shm_exit_ns(struct ipc_namespace *ns); | 29 | void shm_exit_ns(struct ipc_namespace *ns); |
30 | 30 | ||
31 | struct ipc_ids { | 31 | struct ipc_ids { |
32 | int in_use; | 32 | int in_use; |
33 | unsigned short seq; | 33 | unsigned short seq; |
34 | unsigned short seq_max; | 34 | unsigned short seq_max; |
35 | struct mutex mutex; | 35 | struct mutex mutex; |
36 | struct idr ipcs_idr; | 36 | struct idr ipcs_idr; |
37 | }; | 37 | }; |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * Structure that holds the parameters needed by the ipc operations | 40 | * Structure that holds the parameters needed by the ipc operations |
41 | * (see after) | 41 | * (see after) |
42 | */ | 42 | */ |
43 | struct ipc_params { | 43 | struct ipc_params { |
44 | key_t key; | 44 | key_t key; |
45 | int flg; | 45 | int flg; |
46 | union { | 46 | union { |
47 | size_t size; /* for shared memories */ | 47 | size_t size; /* for shared memories */ |
48 | int nsems; /* for semaphores */ | 48 | int nsems; /* for semaphores */ |
49 | } u; /* holds the getnew() specific param */ | 49 | } u; /* holds the getnew() specific param */ |
50 | }; | 50 | }; |
51 | 51 | ||
52 | /* | 52 | /* |
53 | * Structure that holds some ipc operations. This structure is used to unify | 53 | * Structure that holds some ipc operations. This structure is used to unify |
54 | * the calls to sys_msgget(), sys_semget(), sys_shmget() | 54 | * the calls to sys_msgget(), sys_semget(), sys_shmget() |
55 | * . routine to call to create a new ipc object. Can be one of newque, | 55 | * . routine to call to create a new ipc object. Can be one of newque, |
56 | * newary, newseg | 56 | * newary, newseg |
57 | * . routine to call to call to check permissions for a new ipc object. | 57 | * . routine to call to check permissions for a new ipc object. |
58 | * Can be one of security_msg_associate, security_sem_associate, | 58 | * Can be one of security_msg_associate, security_sem_associate, |
59 | * security_shm_associate | 59 | * security_shm_associate |
60 | * . routine to call for an extra check if needed | 60 | * . routine to call for an extra check if needed |
61 | */ | 61 | */ |
62 | struct ipc_ops { | 62 | struct ipc_ops { |
63 | int (*getnew) (struct ipc_namespace *, struct ipc_params *); | 63 | int (*getnew) (struct ipc_namespace *, struct ipc_params *); |
64 | int (*associate) (struct kern_ipc_perm *, int); | 64 | int (*associate) (struct kern_ipc_perm *, int); |
65 | int (*more_checks) (struct kern_ipc_perm *, struct ipc_params *); | 65 | int (*more_checks) (struct kern_ipc_perm *, struct ipc_params *); |
66 | }; | 66 | }; |
67 | 67 | ||
68 | struct seq_file; | 68 | struct seq_file; |
69 | 69 | ||
70 | void ipc_init_ids(struct ipc_ids *); | 70 | void ipc_init_ids(struct ipc_ids *); |
71 | #ifdef CONFIG_PROC_FS | 71 | #ifdef CONFIG_PROC_FS |
72 | void __init ipc_init_proc_interface(const char *path, const char *header, | 72 | void __init ipc_init_proc_interface(const char *path, const char *header, |
73 | int ids, int (*show)(struct seq_file *, void *)); | 73 | int ids, int (*show)(struct seq_file *, void *)); |
74 | #else | 74 | #else |
75 | #define ipc_init_proc_interface(path, header, ids, show) do {} while (0) | 75 | #define ipc_init_proc_interface(path, header, ids, show) do {} while (0) |
76 | #endif | 76 | #endif |
77 | 77 | ||
78 | #define IPC_SEM_IDS 0 | 78 | #define IPC_SEM_IDS 0 |
79 | #define IPC_MSG_IDS 1 | 79 | #define IPC_MSG_IDS 1 |
80 | #define IPC_SHM_IDS 2 | 80 | #define IPC_SHM_IDS 2 |
81 | 81 | ||
82 | #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) | 82 | #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) |
83 | 83 | ||
84 | /* must be called with ids->mutex acquired.*/ | 84 | /* must be called with ids->mutex acquired.*/ |
85 | int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); | 85 | int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); |
86 | int ipc_get_maxid(struct ipc_ids *); | 86 | int ipc_get_maxid(struct ipc_ids *); |
87 | 87 | ||
88 | /* must be called with both locks acquired. */ | 88 | /* must be called with both locks acquired. */ |
89 | void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *); | 89 | void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *); |
90 | 90 | ||
91 | int ipcperms (struct kern_ipc_perm *ipcp, short flg); | 91 | /* must be called with ipcp locked */ |
92 | int ipcperms(struct kern_ipc_perm *ipcp, short flg); | ||
92 | 93 | ||
93 | /* for rare, potentially huge allocations. | 94 | /* for rare, potentially huge allocations. |
94 | * both function can sleep | 95 | * both function can sleep |
95 | */ | 96 | */ |
96 | void* ipc_alloc(int size); | 97 | void* ipc_alloc(int size); |
97 | void ipc_free(void* ptr, int size); | 98 | void ipc_free(void* ptr, int size); |
98 | 99 | ||
99 | /* | 100 | /* |
100 | * For allocation that need to be freed by RCU. | 101 | * For allocation that need to be freed by RCU. |
101 | * Objects are reference counted, they start with reference count 1. | 102 | * Objects are reference counted, they start with reference count 1. |
102 | * getref increases the refcount, the putref call that reduces the recount | 103 | * getref increases the refcount, the putref call that reduces the recount |
103 | * to 0 schedules the rcu destruction. Caller must guarantee locking. | 104 | * to 0 schedules the rcu destruction. Caller must guarantee locking. |
104 | */ | 105 | */ |
105 | void* ipc_rcu_alloc(int size); | 106 | void* ipc_rcu_alloc(int size); |
106 | void ipc_rcu_getref(void *ptr); | 107 | void ipc_rcu_getref(void *ptr); |
107 | void ipc_rcu_putref(void *ptr); | 108 | void ipc_rcu_putref(void *ptr); |
108 | 109 | ||
109 | struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); | 110 | struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); |
110 | 111 | ||
111 | void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); | 112 | void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); |
112 | void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); | 113 | void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); |
113 | 114 | ||
114 | #if defined(__ia64__) || defined(__x86_64__) || defined(__hppa__) || defined(__XTENSA__) | 115 | #if defined(__ia64__) || defined(__x86_64__) || defined(__hppa__) || defined(__XTENSA__) |
115 | /* On IA-64, we always use the "64-bit version" of the IPC structures. */ | 116 | /* On IA-64, we always use the "64-bit version" of the IPC structures. */ |
116 | # define ipc_parse_version(cmd) IPC_64 | 117 | # define ipc_parse_version(cmd) IPC_64 |
117 | #else | 118 | #else |
118 | int ipc_parse_version (int *cmd); | 119 | int ipc_parse_version (int *cmd); |
119 | #endif | 120 | #endif |
120 | 121 | ||
121 | extern void free_msg(struct msg_msg *msg); | 122 | extern void free_msg(struct msg_msg *msg); |
122 | extern struct msg_msg *load_msg(const void __user *src, int len); | 123 | extern struct msg_msg *load_msg(const void __user *src, int len); |
123 | extern int store_msg(void __user *dest, struct msg_msg *msg, int len); | 124 | extern int store_msg(void __user *dest, struct msg_msg *msg, int len); |
124 | extern int ipcget_new(struct ipc_namespace *, struct ipc_ids *, | 125 | extern int ipcget_new(struct ipc_namespace *, struct ipc_ids *, |
125 | struct ipc_ops *, struct ipc_params *); | 126 | struct ipc_ops *, struct ipc_params *); |
126 | extern int ipcget_public(struct ipc_namespace *, struct ipc_ids *, | 127 | extern int ipcget_public(struct ipc_namespace *, struct ipc_ids *, |
127 | struct ipc_ops *, struct ipc_params *); | 128 | struct ipc_ops *, struct ipc_params *); |
128 | 129 | ||
129 | static inline int ipc_buildid(struct ipc_ids *ids, int id, int seq) | 130 | static inline int ipc_buildid(struct ipc_ids *ids, int id, int seq) |
130 | { | 131 | { |
131 | return SEQ_MULTIPLIER * seq + id; | 132 | return SEQ_MULTIPLIER * seq + id; |
132 | } | 133 | } |
133 | 134 | ||
135 | /* | ||
136 | * Must be called with ipcp locked | ||
137 | */ | ||
134 | static inline int ipc_checkid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp, | 138 | static inline int ipc_checkid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp, |
135 | int uid) | 139 | int uid) |
136 | { | 140 | { |
137 | if (uid / SEQ_MULTIPLIER != ipcp->seq) | 141 | if (uid / SEQ_MULTIPLIER != ipcp->seq) |
138 | return 1; | 142 | return 1; |
139 | return 0; | 143 | return 0; |
140 | } | 144 | } |
141 | 145 | ||
142 | static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) | 146 | static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) |
143 | { | 147 | { |
144 | rcu_read_lock(); | 148 | rcu_read_lock(); |
145 | spin_lock(&perm->lock); | 149 | spin_lock(&perm->lock); |
146 | } | 150 | } |
147 | 151 | ||
148 | static inline void ipc_unlock(struct kern_ipc_perm *perm) | 152 | static inline void ipc_unlock(struct kern_ipc_perm *perm) |
149 | { | 153 | { |
150 | spin_unlock(&perm->lock); | 154 | spin_unlock(&perm->lock); |
151 | rcu_read_unlock(); | 155 | rcu_read_unlock(); |
152 | } | 156 | } |
153 | 157 | ||
154 | static inline struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, | 158 | static inline struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, |
155 | int id) | 159 | int id) |
156 | { | 160 | { |
157 | struct kern_ipc_perm *out; | 161 | struct kern_ipc_perm *out; |
158 | 162 | ||
159 | out = ipc_lock(ids, id); | 163 | out = ipc_lock(ids, id); |
160 | if (IS_ERR(out)) | 164 | if (IS_ERR(out)) |
161 | return out; | 165 | return out; |
162 | 166 | ||
163 | if (ipc_checkid(ids, out, id)) { | 167 | if (ipc_checkid(ids, out, id)) { |
164 | ipc_unlock(out); | 168 | ipc_unlock(out); |
165 | return ERR_PTR(-EIDRM); | 169 | return ERR_PTR(-EIDRM); |
166 | } | 170 | } |
167 | 171 | ||
168 | return out; | 172 | return out; |
169 | } | 173 | } |
170 | 174 | ||
175 | /** | ||
176 | * ipcget - Common sys_*get() code | ||
177 | * @ns : namsepace | ||
178 | * @ids : IPC identifier set | ||
179 | * @ops : operations to be called on ipc object creation, permission checks | ||
180 | * and further checks | ||
181 | * @params : the parameters needed by the previous operations. | ||
182 | * | ||
183 | * Common routine called by sys_msgget(), sys_semget() and sys_shmget(). | ||
184 | */ | ||
171 | static inline int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, | 185 | static inline int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, |
172 | struct ipc_ops *ops, struct ipc_params *params) | 186 | struct ipc_ops *ops, struct ipc_params *params) |
173 | { | 187 | { |
174 | if (params->key == IPC_PRIVATE) | 188 | if (params->key == IPC_PRIVATE) |
175 | return ipcget_new(ns, ids, ops, params); | 189 | return ipcget_new(ns, ids, ops, params); |
176 | else | 190 | else |
177 | return ipcget_public(ns, ids, ops, params); | 191 | return ipcget_public(ns, ids, ops, params); |
178 | } | 192 | } |
179 | 193 | ||
180 | #endif | 194 | #endif |
181 | 195 |