Blame view

kernel/futex.c 49.1 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
  /*
   *  Fast Userspace Mutexes (which I call "Futexes!").
   *  (C) Rusty Russell, IBM 2002
   *
   *  Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
   *  (C) Copyright 2003 Red Hat Inc, All Rights Reserved
   *
   *  Removed page pinning, fix privately mapped COW pages and other cleanups
   *  (C) Copyright 2003, 2004 Jamie Lokier
   *
0771dfefc   Ingo Molnar   [PATCH] lightweig...
11
12
13
14
   *  Robust futex support started by Ingo Molnar
   *  (C) Copyright 2006 Red Hat Inc, All Rights Reserved
   *  Thanks to Thomas Gleixner for suggestions, analysis and fixes.
   *
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
15
16
17
18
   *  PI-futex support started by Ingo Molnar and Thomas Gleixner
   *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
   *  Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
   *
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
19
20
21
   *  PRIVATE futexes by Eric Dumazet
   *  Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
   *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
   *  enough at me, Linus for the original (flawed) idea, Matthew
   *  Kirkwood for proof-of-concept implementation.
   *
   *  "The futexes are also cursed."
   *  "But they come in a choice of three flavours!"
   *
   *  This program is free software; you can redistribute it and/or modify
   *  it under the terms of the GNU General Public License as published by
   *  the Free Software Foundation; either version 2 of the License, or
   *  (at your option) any later version.
   *
   *  This program is distributed in the hope that it will be useful,
   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   *  GNU General Public License for more details.
   *
   *  You should have received a copy of the GNU General Public License
   *  along with this program; if not, write to the Free Software
   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   */
  #include <linux/slab.h>
  #include <linux/poll.h>
  #include <linux/fs.h>
  #include <linux/file.h>
  #include <linux/jhash.h>
  #include <linux/init.h>
  #include <linux/futex.h>
  #include <linux/mount.h>
  #include <linux/pagemap.h>
  #include <linux/syscalls.h>
7ed20e1ad   Jesper Juhl   [PATCH] convert t...
53
  #include <linux/signal.h>
9adef58b1   Rusty Russell   futex: get_futex_...
54
  #include <linux/module.h>
fd5eea421   Andrey Mirkin   change inotifyfs ...
55
  #include <linux/magic.h>
b488893a3   Pavel Emelyanov   pid namespaces: c...
56
57
  #include <linux/pid.h>
  #include <linux/nsproxy.h>
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
58
  #include <asm/futex.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
59

c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
60
  #include "rtmutex_common.h"
a0c1e9073   Thomas Gleixner   futex: runtime en...
61
  int __read_mostly futex_cmpxchg_enabled;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
63
64
  #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
  
  /*
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
   * Priority Inheritance state:
   */
  struct futex_pi_state {
  	/*
  	 * list of 'owned' pi_state instances - these have to be
  	 * cleaned up in do_exit() if the task exits prematurely:
  	 */
  	struct list_head list;
  
  	/*
  	 * The PI object:
  	 */
  	struct rt_mutex pi_mutex;
  
  	struct task_struct *owner;
  	atomic_t refcount;
  
  	union futex_key key;
  };
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
86
87
88
89
   * We use this hashed waitqueue instead of a normal wait_queue_t, so
   * we can wake only the relevant ones (hashed queues may be shared).
   *
   * A futex_q has a woken state, just like tasks have TASK_RUNNING.
ec92d0829   Pierre Peiffer   futex priority ba...
90
   * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
91
   * The order of wakup is always to make the first condition true, then
73500ac54   Darren Hart   futex: rename fie...
92
   * wake up q->waiter, then make the second condition true.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
93
94
   */
  struct futex_q {
ec92d0829   Pierre Peiffer   futex priority ba...
95
  	struct plist_node list;
73500ac54   Darren Hart   futex: rename fie...
96
97
  	/* There can only be a single waiter */
  	wait_queue_head_t waiter;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
98

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
99
  	/* Which hash list lock to use: */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
100
  	spinlock_t *lock_ptr;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
101
  	/* Key which the futex is hashed on: */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
102
  	union futex_key key;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
103
104
105
  	/* Optional priority inheritance state: */
  	struct futex_pi_state *pi_state;
  	struct task_struct *task;
cd689985c   Thomas Gleixner   futex: Add bitset...
106
107
108
  
  	/* Bitset for the optional bitmasked wakeup */
  	u32 bitset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
109
110
111
112
113
114
  };
  
  /*
   * Split the global futex_lock into every hash list lock.
   */
  struct futex_hash_bucket {
ec92d0829   Pierre Peiffer   futex priority ba...
115
116
  	spinlock_t lock;
  	struct plist_head chain;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
117
118
119
  };
  
  static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
  /*
   * We hash on the keys returned from get_futex_key (see below).
   */
  static struct futex_hash_bucket *hash_futex(union futex_key *key)
  {
  	u32 hash = jhash2((u32*)&key->both.word,
  			  (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
  			  key->both.offset);
  	return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
  }
  
  /*
   * Return 1 if two futex_keys are equal, 0 otherwise.
   */
  static inline int match_futex(union futex_key *key1, union futex_key *key2)
  {
  	return (key1->both.word == key2->both.word
  		&& key1->both.ptr == key2->both.ptr
  		&& key1->both.offset == key2->both.offset);
  }
38d47c1b7   Peter Zijlstra   futex: rely on ge...
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
  /*
   * Take a reference to the resource addressed by a key.
   * Can be called while holding spinlocks.
   *
   */
  static void get_futex_key_refs(union futex_key *key)
  {
  	if (!key->both.ptr)
  		return;
  
  	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
  	case FUT_OFF_INODE:
  		atomic_inc(&key->shared.inode->i_count);
  		break;
  	case FUT_OFF_MMSHARED:
  		atomic_inc(&key->private.mm->mm_count);
  		break;
  	}
  }
  
  /*
   * Drop a reference to the resource addressed by a key.
   * The hash bucket spinlock must not be held.
   */
  static void drop_futex_key_refs(union futex_key *key)
  {
90621c40c   Darren Hart   futex: catch cert...
166
167
168
  	if (!key->both.ptr) {
  		/* If we're here then we tried to put a key we failed to get */
  		WARN_ON_ONCE(1);
38d47c1b7   Peter Zijlstra   futex: rely on ge...
169
  		return;
90621c40c   Darren Hart   futex: catch cert...
170
  	}
38d47c1b7   Peter Zijlstra   futex: rely on ge...
171
172
173
174
175
176
177
178
179
180
  
  	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
  	case FUT_OFF_INODE:
  		iput(key->shared.inode);
  		break;
  	case FUT_OFF_MMSHARED:
  		mmdrop(key->private.mm);
  		break;
  	}
  }
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
181
182
183
184
185
186
187
188
189
  /**
   * get_futex_key - Get parameters which are the keys for a futex.
   * @uaddr: virtual address of the futex
   * @shared: NULL for a PROCESS_PRIVATE futex,
   *	&current->mm->mmap_sem for a PROCESS_SHARED futex
   * @key: address where result is stored.
   *
   * Returns a negative error code or 0
   * The key words are stored in *key on success.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
190
   *
f3a43f3f6   Josef "Jeff" Sipek   [PATCH] kernel: c...
191
   * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
192
193
194
   * offset_within_page).  For private mappings, it's (uaddr, current->mm).
   * We can usually work out the index without swapping in the page.
   *
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
195
196
197
   * fshared is NULL for PROCESS_PRIVATE futexes
   * For other futexes, it points to &current->mm->mmap_sem and
   * caller must have taken the reader lock. but NOT any spinlocks.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
198
   */
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
199
  static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
200
  {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
201
  	unsigned long address = (unsigned long)uaddr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
  	struct mm_struct *mm = current->mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
204
205
206
207
208
  	struct page *page;
  	int err;
  
  	/*
  	 * The futex address must be "naturally" aligned.
  	 */
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
209
  	key->both.offset = address % PAGE_SIZE;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
210
  	if (unlikely((address % sizeof(u32)) != 0))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
  		return -EINVAL;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
212
  	address -= key->both.offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213
214
  
  	/*
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
215
216
217
218
219
220
221
222
223
224
225
  	 * PROCESS_PRIVATE futexes are fast.
  	 * As the mm cannot disappear under us and the 'key' only needs
  	 * virtual address, we dont even have to find the underlying vma.
  	 * Note : We do have to check 'uaddr' is a valid user address,
  	 *        but access_ok() should be faster than find_vma()
  	 */
  	if (!fshared) {
  		if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
  			return -EFAULT;
  		key->private.mm = mm;
  		key->private.address = address;
42569c399   Peter Zijlstra   futex: fixup get_...
226
  		get_futex_key_refs(key);
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
227
228
  		return 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
229

38d47c1b7   Peter Zijlstra   futex: rely on ge...
230
  again:
734b05b10   Peter Zijlstra   futex: use fast_g...
231
  	err = get_user_pages_fast(address, 1, 0, &page);
38d47c1b7   Peter Zijlstra   futex: rely on ge...
232
233
234
235
236
237
238
239
240
  	if (err < 0)
  		return err;
  
  	lock_page(page);
  	if (!page->mapping) {
  		unlock_page(page);
  		put_page(page);
  		goto again;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
241
242
243
244
245
246
  
  	/*
  	 * Private mappings are handled in a simple way.
  	 *
  	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
  	 * it's a read-only handle, it's expected that futexes attach to
38d47c1b7   Peter Zijlstra   futex: rely on ge...
247
  	 * the object not the particular process.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
248
  	 */
38d47c1b7   Peter Zijlstra   futex: rely on ge...
249
250
  	if (PageAnon(page)) {
  		key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
251
  		key->private.mm = mm;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
252
  		key->private.address = address;
38d47c1b7   Peter Zijlstra   futex: rely on ge...
253
254
255
256
  	} else {
  		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
  		key->shared.inode = page->mapping->host;
  		key->shared.pgoff = page->index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257
  	}
38d47c1b7   Peter Zijlstra   futex: rely on ge...
258
  	get_futex_key_refs(key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
259

38d47c1b7   Peter Zijlstra   futex: rely on ge...
260
261
262
  	unlock_page(page);
  	put_page(page);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
263
  }
38d47c1b7   Peter Zijlstra   futex: rely on ge...
264
  static inline
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
265
  void put_futex_key(int fshared, union futex_key *key)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
266
  {
38d47c1b7   Peter Zijlstra   futex: rely on ge...
267
  	drop_futex_key_refs(key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268
  }
36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
269
270
271
272
273
274
275
276
277
278
279
280
  static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
  {
  	u32 curval;
  
  	pagefault_disable();
  	curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
  	pagefault_enable();
  
  	return curval;
  }
  
  static int get_futex_value_locked(u32 *dest, u32 __user *from)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
281
282
  {
  	int ret;
a866374ae   Peter Zijlstra   [PATCH] mm: pagef...
283
  	pagefault_disable();
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
284
  	ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
a866374ae   Peter Zijlstra   [PATCH] mm: pagef...
285
  	pagefault_enable();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
287
288
289
290
  
  	return ret ? -EFAULT : 0;
  }
  
  /*
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
291
   * Fault handling.
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
292
   */
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
293
  static int futex_handle_fault(unsigned long address, int attempt)
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
294
295
296
  {
  	struct vm_area_struct * vma;
  	struct mm_struct *mm = current->mm;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
297
  	int ret = -EFAULT;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
298

34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
299
300
  	if (attempt > 2)
  		return ret;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
301

61270708e   Peter Zijlstra   futex: reduce mma...
302
  	down_read(&mm->mmap_sem);
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
303
304
305
  	vma = find_vma(mm, address);
  	if (vma && address >= vma->vm_start &&
  	    (vma->vm_flags & VM_WRITE)) {
83c54070e   Nick Piggin   mm: fault feedbac...
306
307
308
309
310
311
312
313
314
  		int fault;
  		fault = handle_mm_fault(mm, vma, address, 1);
  		if (unlikely((fault & VM_FAULT_ERROR))) {
  #if 0
  			/* XXX: let's do this when we verify it is OK */
  			if (ret & VM_FAULT_OOM)
  				ret = -ENOMEM;
  #endif
  		} else {
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
315
  			ret = 0;
83c54070e   Nick Piggin   mm: fault feedbac...
316
317
318
319
  			if (fault & VM_FAULT_MAJOR)
  				current->maj_flt++;
  			else
  				current->min_flt++;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
320
  		}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
321
  	}
61270708e   Peter Zijlstra   futex: reduce mma...
322
  	up_read(&mm->mmap_sem);
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
323
  	return ret;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
324
325
326
327
328
329
330
331
332
333
334
  }
  
  /*
   * PI code:
   */
  static int refill_pi_state_cache(void)
  {
  	struct futex_pi_state *pi_state;
  
  	if (likely(current->pi_state_cache))
  		return 0;
4668edc33   Burman Yan   [PATCH] kernel co...
335
  	pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
336
337
338
  
  	if (!pi_state)
  		return -ENOMEM;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
339
340
341
342
  	INIT_LIST_HEAD(&pi_state->list);
  	/* pi_mutex gets initialized later */
  	pi_state->owner = NULL;
  	atomic_set(&pi_state->refcount, 1);
38d47c1b7   Peter Zijlstra   futex: rely on ge...
343
  	pi_state->key = FUTEX_KEY_INIT;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
  
  	current->pi_state_cache = pi_state;
  
  	return 0;
  }
  
  static struct futex_pi_state * alloc_pi_state(void)
  {
  	struct futex_pi_state *pi_state = current->pi_state_cache;
  
  	WARN_ON(!pi_state);
  	current->pi_state_cache = NULL;
  
  	return pi_state;
  }
  
  static void free_pi_state(struct futex_pi_state *pi_state)
  {
  	if (!atomic_dec_and_test(&pi_state->refcount))
  		return;
  
  	/*
  	 * If pi_state->owner is NULL, the owner is most probably dying
  	 * and has cleaned up the pi_state already
  	 */
  	if (pi_state->owner) {
  		spin_lock_irq(&pi_state->owner->pi_lock);
  		list_del_init(&pi_state->list);
  		spin_unlock_irq(&pi_state->owner->pi_lock);
  
  		rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
  	}
  
  	if (current->pi_state_cache)
  		kfree(pi_state);
  	else {
  		/*
  		 * pi_state->list is already empty.
  		 * clear pi_state->owner.
  		 * refcount is at 0 - put it back to 1.
  		 */
  		pi_state->owner = NULL;
  		atomic_set(&pi_state->refcount, 1);
  		current->pi_state_cache = pi_state;
  	}
  }
  
  /*
   * Look up the task based on what TID userspace gave us.
   * We dont trust it.
   */
  static struct task_struct * futex_find_get_task(pid_t pid)
  {
  	struct task_struct *p;
c69e8d9c0   David Howells   CRED: Use RCU to ...
398
  	const struct cred *cred = current_cred(), *pcred;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
399

d359b549b   Oleg Nesterov   [PATCH] futex_fin...
400
  	rcu_read_lock();
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
401
  	p = find_task_by_vpid(pid);
c69e8d9c0   David Howells   CRED: Use RCU to ...
402
  	if (!p) {
a06381fec   Thomas Gleixner   FUTEX: Restore th...
403
  		p = ERR_PTR(-ESRCH);
c69e8d9c0   David Howells   CRED: Use RCU to ...
404
405
406
407
408
409
410
411
  	} else {
  		pcred = __task_cred(p);
  		if (cred->euid != pcred->euid &&
  		    cred->euid != pcred->uid)
  			p = ERR_PTR(-ESRCH);
  		else
  			get_task_struct(p);
  	}
a06381fec   Thomas Gleixner   FUTEX: Restore th...
412

d359b549b   Oleg Nesterov   [PATCH] futex_fin...
413
  	rcu_read_unlock();
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
414
415
416
417
418
419
420
421
422
423
424
  
  	return p;
  }
  
  /*
   * This task is holding PI mutexes at exit time => bad.
   * Kernel cleans up PI-state, but userspace is likely hosed.
   * (Robust-futex cleanup is separate and might save the day for userspace.)
   */
  void exit_pi_state_list(struct task_struct *curr)
  {
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
425
426
  	struct list_head *next, *head = &curr->pi_state_list;
  	struct futex_pi_state *pi_state;
627371d73   Ingo Molnar   [PATCH] pi-futex:...
427
  	struct futex_hash_bucket *hb;
38d47c1b7   Peter Zijlstra   futex: rely on ge...
428
  	union futex_key key = FUTEX_KEY_INIT;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
429

a0c1e9073   Thomas Gleixner   futex: runtime en...
430
431
  	if (!futex_cmpxchg_enabled)
  		return;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
432
433
434
  	/*
  	 * We are a ZOMBIE and nobody can enqueue itself on
  	 * pi_state_list anymore, but we have to be careful
627371d73   Ingo Molnar   [PATCH] pi-futex:...
435
  	 * versus waiters unqueueing themselves:
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
436
437
438
439
440
441
442
  	 */
  	spin_lock_irq(&curr->pi_lock);
  	while (!list_empty(head)) {
  
  		next = head->next;
  		pi_state = list_entry(next, struct futex_pi_state, list);
  		key = pi_state->key;
627371d73   Ingo Molnar   [PATCH] pi-futex:...
443
  		hb = hash_futex(&key);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
444
  		spin_unlock_irq(&curr->pi_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
445
446
447
  		spin_lock(&hb->lock);
  
  		spin_lock_irq(&curr->pi_lock);
627371d73   Ingo Molnar   [PATCH] pi-futex:...
448
449
450
451
  		/*
  		 * We dropped the pi-lock, so re-check whether this
  		 * task still owns the PI-state:
  		 */
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
452
453
454
455
  		if (head->next != next) {
  			spin_unlock(&hb->lock);
  			continue;
  		}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
456
  		WARN_ON(pi_state->owner != curr);
627371d73   Ingo Molnar   [PATCH] pi-futex:...
457
458
  		WARN_ON(list_empty(&pi_state->list));
  		list_del_init(&pi_state->list);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
459
460
461
462
463
464
465
466
467
468
469
470
471
  		pi_state->owner = NULL;
  		spin_unlock_irq(&curr->pi_lock);
  
  		rt_mutex_unlock(&pi_state->pi_mutex);
  
  		spin_unlock(&hb->lock);
  
  		spin_lock_irq(&curr->pi_lock);
  	}
  	spin_unlock_irq(&curr->pi_lock);
  }
  
  static int
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
472
473
  lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
  		union futex_key *key, struct futex_pi_state **ps)
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
474
475
476
  {
  	struct futex_pi_state *pi_state = NULL;
  	struct futex_q *this, *next;
ec92d0829   Pierre Peiffer   futex priority ba...
477
  	struct plist_head *head;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
478
  	struct task_struct *p;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
479
  	pid_t pid = uval & FUTEX_TID_MASK;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
480
481
  
  	head = &hb->chain;
ec92d0829   Pierre Peiffer   futex priority ba...
482
  	plist_for_each_entry_safe(this, next, head, list) {
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
483
  		if (match_futex(&this->key, key)) {
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
484
485
486
487
488
  			/*
  			 * Another waiter already exists - bump up
  			 * the refcount and return its pi_state:
  			 */
  			pi_state = this->pi_state;
06a9ec291   Thomas Gleixner   [PATCH] pi-futex:...
489
490
491
492
493
  			/*
  			 * Userspace might have messed up non PI and PI futexes
  			 */
  			if (unlikely(!pi_state))
  				return -EINVAL;
627371d73   Ingo Molnar   [PATCH] pi-futex:...
494
  			WARN_ON(!atomic_read(&pi_state->refcount));
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
495
496
  			WARN_ON(pid && pi_state->owner &&
  				pi_state->owner->pid != pid);
627371d73   Ingo Molnar   [PATCH] pi-futex:...
497

c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
498
  			atomic_inc(&pi_state->refcount);
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
499
  			*ps = pi_state;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
500
501
502
503
504
505
  
  			return 0;
  		}
  	}
  
  	/*
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
506
  	 * We are the first waiter - try to look up the real owner and attach
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
507
  	 * the new pi_state to it, but bail out when TID = 0
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
508
  	 */
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
509
  	if (!pid)
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
510
  		return -ESRCH;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
511
  	p = futex_find_get_task(pid);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
  	if (IS_ERR(p))
  		return PTR_ERR(p);
  
  	/*
  	 * We need to look at the task state flags to figure out,
  	 * whether the task is exiting. To protect against the do_exit
  	 * change of the task flags, we do this protected by
  	 * p->pi_lock:
  	 */
  	spin_lock_irq(&p->pi_lock);
  	if (unlikely(p->flags & PF_EXITING)) {
  		/*
  		 * The task is on the way out. When PF_EXITPIDONE is
  		 * set, we know that the task has finished the
  		 * cleanup:
  		 */
  		int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
  
  		spin_unlock_irq(&p->pi_lock);
  		put_task_struct(p);
  		return ret;
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
534
535
536
537
538
539
540
541
542
543
  
  	pi_state = alloc_pi_state();
  
  	/*
  	 * Initialize the pi_mutex in locked state and make 'p'
  	 * the owner of it:
  	 */
  	rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
  
  	/* Store the key for possible exit cleanups: */
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
544
  	pi_state->key = *key;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
545

627371d73   Ingo Molnar   [PATCH] pi-futex:...
546
  	WARN_ON(!list_empty(&pi_state->list));
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
547
548
549
550
551
  	list_add(&pi_state->list, &p->pi_state_list);
  	pi_state->owner = p;
  	spin_unlock_irq(&p->pi_lock);
  
  	put_task_struct(p);
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
552
  	*ps = pi_state;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
553
554
555
556
557
  
  	return 0;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
558
559
560
561
562
   * The hash bucket lock must be held when this is called.
   * Afterwards, the futex_q must not be accessed.
   */
  static void wake_futex(struct futex_q *q)
  {
ec92d0829   Pierre Peiffer   futex priority ba...
563
  	plist_del(&q->list, &q->list.plist);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
564
565
  	/*
  	 * The lock in wake_up_all() is a crucial memory barrier after the
ec92d0829   Pierre Peiffer   futex priority ba...
566
  	 * plist_del() and also before assigning to q->lock_ptr.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567
  	 */
73500ac54   Darren Hart   futex: rename fie...
568
  	wake_up(&q->waiter);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
569
570
571
  	/*
  	 * The waiting task can free the futex_q as soon as this is written,
  	 * without taking any locks.  This must come last.
8e31108b9   Andrew Morton   [PATCH] Fix memor...
572
573
574
575
576
  	 *
  	 * A memory barrier is required here to prevent the following store
  	 * to lock_ptr from getting ahead of the wakeup. Clearing the lock
  	 * at the end of wake_up_all() does not prevent this store from
  	 * moving.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
577
  	 */
ccdea2f88   Ralf Baechle   [PATCH] futex: re...
578
  	smp_wmb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
579
580
  	q->lock_ptr = NULL;
  }
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
581
582
583
584
585
586
587
588
  static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
  {
  	struct task_struct *new_owner;
  	struct futex_pi_state *pi_state = this->pi_state;
  	u32 curval, newval;
  
  	if (!pi_state)
  		return -EINVAL;
21778867b   Ingo Molnar   [PATCH] futex: PI...
589
  	spin_lock(&pi_state->pi_mutex.wait_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
  	new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
  
  	/*
  	 * This happens when we have stolen the lock and the original
  	 * pending owner did not enqueue itself back on the rt_mutex.
  	 * Thats not a tragedy. We know that way, that a lock waiter
  	 * is on the fly. We make the futex_q waiter the pending owner.
  	 */
  	if (!new_owner)
  		new_owner = this->task;
  
  	/*
  	 * We pass it to the next owner. (The WAITERS bit is always
  	 * kept enabled while there is PI state around. We must also
  	 * preserve the owner died bit.)
  	 */
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
606
  	if (!(uval & FUTEX_OWNER_DIED)) {
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
607
  		int ret = 0;
b488893a3   Pavel Emelyanov   pid namespaces: c...
608
  		newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
609

36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
610
  		curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
611

e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
612
  		if (curval == -EFAULT)
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
613
  			ret = -EFAULT;
cde898fa8   Thomas Gleixner   futex: correctly ...
614
  		else if (curval != uval)
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
615
616
617
618
619
  			ret = -EINVAL;
  		if (ret) {
  			spin_unlock(&pi_state->pi_mutex.wait_lock);
  			return ret;
  		}
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
620
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
621

627371d73   Ingo Molnar   [PATCH] pi-futex:...
622
623
624
625
626
627
628
  	spin_lock_irq(&pi_state->owner->pi_lock);
  	WARN_ON(list_empty(&pi_state->list));
  	list_del_init(&pi_state->list);
  	spin_unlock_irq(&pi_state->owner->pi_lock);
  
  	spin_lock_irq(&new_owner->pi_lock);
  	WARN_ON(!list_empty(&pi_state->list));
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
629
630
  	list_add(&pi_state->list, &new_owner->pi_state_list);
  	pi_state->owner = new_owner;
627371d73   Ingo Molnar   [PATCH] pi-futex:...
631
  	spin_unlock_irq(&new_owner->pi_lock);
21778867b   Ingo Molnar   [PATCH] futex: PI...
632
  	spin_unlock(&pi_state->pi_mutex.wait_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
633
634
635
636
637
638
639
640
641
642
643
644
645
  	rt_mutex_unlock(&pi_state->pi_mutex);
  
  	return 0;
  }
  
  static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
  {
  	u32 oldval;
  
  	/*
  	 * There is no waiter, so we unlock the futex. The owner died
  	 * bit has not to be preserved here. We are the owner:
  	 */
36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
646
  	oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
647
648
649
650
651
652
653
654
  
  	if (oldval == -EFAULT)
  		return oldval;
  	if (oldval != uval)
  		return -EAGAIN;
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
655
  /*
8b8f319fc   Ingo Molnar   [PATCH] lockdep: ...
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
   * Express the locking dependencies for lockdep:
   */
  static inline void
  double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
  {
  	if (hb1 <= hb2) {
  		spin_lock(&hb1->lock);
  		if (hb1 < hb2)
  			spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
  	} else { /* hb1 > hb2 */
  		spin_lock(&hb2->lock);
  		spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
  	}
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
672
673
674
   * Wake up all waiters hashed on the physical page that is mapped
   * to this virtual address:
   */
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
675
  static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
676
  {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
677
  	struct futex_hash_bucket *hb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
678
  	struct futex_q *this, *next;
ec92d0829   Pierre Peiffer   futex priority ba...
679
  	struct plist_head *head;
38d47c1b7   Peter Zijlstra   futex: rely on ge...
680
  	union futex_key key = FUTEX_KEY_INIT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
681
  	int ret;
cd689985c   Thomas Gleixner   futex: Add bitset...
682
683
  	if (!bitset)
  		return -EINVAL;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
684
  	ret = get_futex_key(uaddr, fshared, &key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
685
686
  	if (unlikely(ret != 0))
  		goto out;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
687
688
689
  	hb = hash_futex(&key);
  	spin_lock(&hb->lock);
  	head = &hb->chain;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
690

ec92d0829   Pierre Peiffer   futex priority ba...
691
  	plist_for_each_entry_safe(this, next, head, list) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
692
  		if (match_futex (&this->key, &key)) {
ed6f7b10e   Ingo Molnar   [PATCH] pi-futex:...
693
694
695
696
  			if (this->pi_state) {
  				ret = -EINVAL;
  				break;
  			}
cd689985c   Thomas Gleixner   futex: Add bitset...
697
698
699
700
  
  			/* Check if one of the bits is set in both bitsets */
  			if (!(this->bitset & bitset))
  				continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
701
702
703
704
705
  			wake_futex(this);
  			if (++ret >= nr_wake)
  				break;
  		}
  	}
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
706
  	spin_unlock(&hb->lock);
38d47c1b7   Peter Zijlstra   futex: rely on ge...
707
  	put_futex_key(fshared, &key);
42d35d48c   Darren Hart   futex: make futex...
708
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
709
710
711
712
  	return ret;
  }
  
  /*
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
713
714
715
   * Wake up all waiters hashed on the physical page that is mapped
   * to this virtual address:
   */
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
716
  static int
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
717
  futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
718
  	      int nr_wake, int nr_wake2, int op)
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
719
  {
38d47c1b7   Peter Zijlstra   futex: rely on ge...
720
  	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
721
  	struct futex_hash_bucket *hb1, *hb2;
ec92d0829   Pierre Peiffer   futex priority ba...
722
  	struct plist_head *head;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
723
724
725
726
  	struct futex_q *this, *next;
  	int ret, op_ret, attempt = 0;
  
  retryfull:
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
727
  	ret = get_futex_key(uaddr1, fshared, &key1);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
728
729
  	if (unlikely(ret != 0))
  		goto out;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
730
  	ret = get_futex_key(uaddr2, fshared, &key2);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
731
  	if (unlikely(ret != 0))
42d35d48c   Darren Hart   futex: make futex...
732
  		goto out_put_key1;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
733

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
734
735
  	hb1 = hash_futex(&key1);
  	hb2 = hash_futex(&key2);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
736
737
  
  retry:
8b8f319fc   Ingo Molnar   [PATCH] lockdep: ...
738
  	double_lock_hb(hb1, hb2);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
739

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
740
  	op_ret = futex_atomic_op_inuser(op, uaddr2);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
741
  	if (unlikely(op_ret < 0)) {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
742
  		u32 dummy;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
743

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
744
745
746
  		spin_unlock(&hb1->lock);
  		if (hb1 != hb2)
  			spin_unlock(&hb2->lock);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
747

7ee1dd3fe   David Howells   [PATCH] FRV: Make...
748
  #ifndef CONFIG_MMU
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
749
750
751
752
  		/*
  		 * we don't get EFAULT from MMU faults if we don't have an MMU,
  		 * but we might get them from range checking
  		 */
7ee1dd3fe   David Howells   [PATCH] FRV: Make...
753
  		ret = op_ret;
42d35d48c   Darren Hart   futex: make futex...
754
  		goto out_put_keys;
7ee1dd3fe   David Howells   [PATCH] FRV: Make...
755
  #endif
796f8d9b9   David Gibson   [PATCH] FUTEX_WAK...
756
757
  		if (unlikely(op_ret != -EFAULT)) {
  			ret = op_ret;
42d35d48c   Darren Hart   futex: make futex...
758
  			goto out_put_keys;
796f8d9b9   David Gibson   [PATCH] FUTEX_WAK...
759
  		}
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
760
761
  		/*
  		 * futex_atomic_op_inuser needs to both read and write
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
762
763
764
  		 * *(int __user *)uaddr2, but we can't modify it
  		 * non-atomically.  Therefore, if get_user below is not
  		 * enough, we need to handle the fault ourselves, while
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
765
766
  		 * still holding the mmap_sem.
  		 */
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
767
  		if (attempt++) {
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
768
  			ret = futex_handle_fault((unsigned long)uaddr2,
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
769
  						 attempt);
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
770
  			if (ret)
42d35d48c   Darren Hart   futex: make futex...
771
  				goto out_put_keys;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
772
773
  			goto retry;
  		}
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
774
  		ret = get_user(dummy, uaddr2);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
775
776
777
778
779
  		if (ret)
  			return ret;
  
  		goto retryfull;
  	}
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
780
  	head = &hb1->chain;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
781

ec92d0829   Pierre Peiffer   futex priority ba...
782
  	plist_for_each_entry_safe(this, next, head, list) {
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
783
784
785
786
787
788
789
790
  		if (match_futex (&this->key, &key1)) {
  			wake_futex(this);
  			if (++ret >= nr_wake)
  				break;
  		}
  	}
  
  	if (op_ret > 0) {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
791
  		head = &hb2->chain;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
792
793
  
  		op_ret = 0;
ec92d0829   Pierre Peiffer   futex priority ba...
794
  		plist_for_each_entry_safe(this, next, head, list) {
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
795
796
797
798
799
800
801
802
  			if (match_futex (&this->key, &key2)) {
  				wake_futex(this);
  				if (++op_ret >= nr_wake2)
  					break;
  			}
  		}
  		ret += op_ret;
  	}
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
803
804
805
  	spin_unlock(&hb1->lock);
  	if (hb1 != hb2)
  		spin_unlock(&hb2->lock);
42d35d48c   Darren Hart   futex: make futex...
806
  out_put_keys:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
807
  	put_futex_key(fshared, &key2);
42d35d48c   Darren Hart   futex: make futex...
808
  out_put_key1:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
809
  	put_futex_key(fshared, &key1);
42d35d48c   Darren Hart   futex: make futex...
810
  out:
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
811
812
813
814
  	return ret;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
815
816
817
   * Requeue all waiters hashed on one physical page to another
   * physical page.
   */
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
818
  static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
819
  			 int nr_wake, int nr_requeue, u32 *cmpval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
820
  {
38d47c1b7   Peter Zijlstra   futex: rely on ge...
821
  	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
822
  	struct futex_hash_bucket *hb1, *hb2;
ec92d0829   Pierre Peiffer   futex priority ba...
823
  	struct plist_head *head1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
824
825
  	struct futex_q *this, *next;
  	int ret, drop_count = 0;
42d35d48c   Darren Hart   futex: make futex...
826
  retry:
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
827
  	ret = get_futex_key(uaddr1, fshared, &key1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
828
829
  	if (unlikely(ret != 0))
  		goto out;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
830
  	ret = get_futex_key(uaddr2, fshared, &key2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
831
  	if (unlikely(ret != 0))
42d35d48c   Darren Hart   futex: make futex...
832
  		goto out_put_key1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
833

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
834
835
  	hb1 = hash_futex(&key1);
  	hb2 = hash_futex(&key2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
836

8b8f319fc   Ingo Molnar   [PATCH] lockdep: ...
837
  	double_lock_hb(hb1, hb2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
838

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
839
840
  	if (likely(cmpval != NULL)) {
  		u32 curval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
841

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
842
  		ret = get_futex_value_locked(&curval, uaddr1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
843
844
  
  		if (unlikely(ret)) {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
845
846
847
  			spin_unlock(&hb1->lock);
  			if (hb1 != hb2)
  				spin_unlock(&hb2->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
848

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
849
  			ret = get_user(curval, uaddr1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
850
851
852
  
  			if (!ret)
  				goto retry;
42d35d48c   Darren Hart   futex: make futex...
853
  			goto out_put_keys;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
854
  		}
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
855
  		if (curval != *cmpval) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
856
857
858
859
  			ret = -EAGAIN;
  			goto out_unlock;
  		}
  	}
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
860
  	head1 = &hb1->chain;
ec92d0829   Pierre Peiffer   futex priority ba...
861
  	plist_for_each_entry_safe(this, next, head1, list) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
862
863
864
865
866
  		if (!match_futex (&this->key, &key1))
  			continue;
  		if (++ret <= nr_wake) {
  			wake_futex(this);
  		} else {
59e0e0ace   Sebastien Dugue   [PATCH] futex_req...
867
868
869
870
871
  			/*
  			 * If key1 and key2 hash to the same bucket, no need to
  			 * requeue.
  			 */
  			if (likely(head1 != &hb2->chain)) {
ec92d0829   Pierre Peiffer   futex priority ba...
872
873
  				plist_del(&this->list, &hb1->chain);
  				plist_add(&this->list, &hb2->chain);
59e0e0ace   Sebastien Dugue   [PATCH] futex_req...
874
  				this->lock_ptr = &hb2->lock;
ec92d0829   Pierre Peiffer   futex priority ba...
875
876
877
  #ifdef CONFIG_DEBUG_PI_LIST
  				this->list.plist.lock = &hb2->lock;
  #endif
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
878
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
879
  			this->key = key2;
9adef58b1   Rusty Russell   futex: get_futex_...
880
  			get_futex_key_refs(&key2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
881
882
883
884
  			drop_count++;
  
  			if (ret - nr_wake >= nr_requeue)
  				break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
885
886
887
888
  		}
  	}
  
  out_unlock:
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
889
890
891
  	spin_unlock(&hb1->lock);
  	if (hb1 != hb2)
  		spin_unlock(&hb2->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
892

9adef58b1   Rusty Russell   futex: get_futex_...
893
  	/* drop_futex_key_refs() must be called outside the spinlocks. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
894
  	while (--drop_count >= 0)
9adef58b1   Rusty Russell   futex: get_futex_...
895
  		drop_futex_key_refs(&key1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
896

42d35d48c   Darren Hart   futex: make futex...
897
  out_put_keys:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
898
  	put_futex_key(fshared, &key2);
42d35d48c   Darren Hart   futex: make futex...
899
  out_put_key1:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
900
  	put_futex_key(fshared, &key1);
42d35d48c   Darren Hart   futex: make futex...
901
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
902
903
904
905
  	return ret;
  }
  
  /* The key must be already stored in q->key. */
82af7aca5   Eric Sesterhenn   Removal of FUTEX_FD
906
  static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
907
  {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
908
  	struct futex_hash_bucket *hb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
909

73500ac54   Darren Hart   futex: rename fie...
910
  	init_waitqueue_head(&q->waiter);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
911

9adef58b1   Rusty Russell   futex: get_futex_...
912
  	get_futex_key_refs(&q->key);
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
913
914
  	hb = hash_futex(&q->key);
  	q->lock_ptr = &hb->lock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
915

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
916
917
  	spin_lock(&hb->lock);
  	return hb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
918
  }
82af7aca5   Eric Sesterhenn   Removal of FUTEX_FD
919
  static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
920
  {
ec92d0829   Pierre Peiffer   futex priority ba...
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
  	int prio;
  
  	/*
  	 * The priority used to register this element is
  	 * - either the real thread-priority for the real-time threads
  	 * (i.e. threads with a priority lower than MAX_RT_PRIO)
  	 * - or MAX_RT_PRIO for non-RT threads.
  	 * Thus, all RT-threads are woken first in priority order, and
  	 * the others are woken last, in FIFO order.
  	 */
  	prio = min(current->normal_prio, MAX_RT_PRIO);
  
  	plist_node_init(&q->list, prio);
  #ifdef CONFIG_DEBUG_PI_LIST
  	q->list.plist.lock = &hb->lock;
  #endif
  	plist_add(&q->list, &hb->chain);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
938
  	q->task = current;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
939
  	spin_unlock(&hb->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
940
941
942
  }
  
  static inline void
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
943
  queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
944
  {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
945
  	spin_unlock(&hb->lock);
9adef58b1   Rusty Russell   futex: get_futex_...
946
  	drop_futex_key_refs(&q->key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
947
948
949
950
951
952
  }
  
  /*
   * queue_me and unqueue_me must be called as a pair, each
   * exactly once.  They are called with the hashed spinlock held.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
953
954
955
  /* Return 1 if we were still queued (ie. 0 means we were woken) */
  static int unqueue_me(struct futex_q *q)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
956
  	spinlock_t *lock_ptr;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
957
  	int ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
958
959
  
  	/* In the common case we don't take the spinlock, which is nice. */
42d35d48c   Darren Hart   futex: make futex...
960
  retry:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
961
  	lock_ptr = q->lock_ptr;
e91467ecd   Christian Borntraeger   [PATCH] bug in fu...
962
  	barrier();
c80544dc0   Stephen Hemminger   sparse pointer us...
963
  	if (lock_ptr != NULL) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
  		spin_lock(lock_ptr);
  		/*
  		 * q->lock_ptr can change between reading it and
  		 * spin_lock(), causing us to take the wrong lock.  This
  		 * corrects the race condition.
  		 *
  		 * Reasoning goes like this: if we have the wrong lock,
  		 * q->lock_ptr must have changed (maybe several times)
  		 * between reading it and the spin_lock().  It can
  		 * change again after the spin_lock() but only if it was
  		 * already changed before the spin_lock().  It cannot,
  		 * however, change back to the original value.  Therefore
  		 * we can detect whether we acquired the correct lock.
  		 */
  		if (unlikely(lock_ptr != q->lock_ptr)) {
  			spin_unlock(lock_ptr);
  			goto retry;
  		}
ec92d0829   Pierre Peiffer   futex priority ba...
982
983
  		WARN_ON(plist_node_empty(&q->list));
  		plist_del(&q->list, &q->list.plist);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
984
985
  
  		BUG_ON(q->pi_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
986
987
988
  		spin_unlock(lock_ptr);
  		ret = 1;
  	}
9adef58b1   Rusty Russell   futex: get_futex_...
989
  	drop_futex_key_refs(&q->key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
990
991
  	return ret;
  }
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
992
993
  /*
   * PI futexes can not be requeued and must remove themself from the
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
994
995
   * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
   * and dropped here.
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
996
   */
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
997
  static void unqueue_me_pi(struct futex_q *q)
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
998
  {
ec92d0829   Pierre Peiffer   futex priority ba...
999
1000
  	WARN_ON(plist_node_empty(&q->list));
  	plist_del(&q->list, &q->list.plist);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1001
1002
1003
1004
  
  	BUG_ON(!q->pi_state);
  	free_pi_state(q->pi_state);
  	q->pi_state = NULL;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1005
  	spin_unlock(q->lock_ptr);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1006

9adef58b1   Rusty Russell   futex: get_futex_...
1007
  	drop_futex_key_refs(&q->key);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1008
  }
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1009
  /*
cdf71a10c   Thomas Gleixner   futex: Prevent st...
1010
   * Fixup the pi_state owner with the new owner.
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1011
   *
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1012
1013
   * Must be called with hash bucket lock held and mm->sem held for non
   * private futexes.
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1014
   */
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1015
  static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1016
  				struct task_struct *newowner, int fshared)
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1017
  {
cdf71a10c   Thomas Gleixner   futex: Prevent st...
1018
  	u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1019
  	struct futex_pi_state *pi_state = q->pi_state;
1b7558e45   Thomas Gleixner   futexes: fix faul...
1020
  	struct task_struct *oldowner = pi_state->owner;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1021
  	u32 uval, curval, newval;
1b7558e45   Thomas Gleixner   futexes: fix faul...
1022
  	int ret, attempt = 0;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1023
1024
  
  	/* Owner died? */
1b7558e45   Thomas Gleixner   futexes: fix faul...
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
  	if (!pi_state->owner)
  		newtid |= FUTEX_OWNER_DIED;
  
  	/*
  	 * We are here either because we stole the rtmutex from the
  	 * pending owner or we are the pending owner which failed to
  	 * get the rtmutex. We have to replace the pending owner TID
  	 * in the user space variable. This must be atomic as we have
  	 * to preserve the owner died bit here.
  	 *
  	 * Note: We write the user space value _before_ changing the
  	 * pi_state because we can fault here. Imagine swapped out
  	 * pages or a fork, which was running right before we acquired
  	 * mmap_sem, that marked all the anonymous memory readonly for
  	 * cow.
  	 *
  	 * Modifying pi_state _before_ the user space value would
  	 * leave the pi_state in an inconsistent state when we fault
  	 * here, because we need to drop the hash bucket lock to
  	 * handle the fault. This might be observed in the PID check
  	 * in lookup_pi_state.
  	 */
  retry:
  	if (get_futex_value_locked(&uval, uaddr))
  		goto handle_fault;
  
  	while (1) {
  		newval = (uval & FUTEX_OWNER_DIED) | newtid;
  
  		curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
  
  		if (curval == -EFAULT)
  			goto handle_fault;
  		if (curval == uval)
  			break;
  		uval = curval;
  	}
  
  	/*
  	 * We fixed up user space. Now we need to fix the pi_state
  	 * itself.
  	 */
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1067
1068
1069
1070
1071
  	if (pi_state->owner != NULL) {
  		spin_lock_irq(&pi_state->owner->pi_lock);
  		WARN_ON(list_empty(&pi_state->list));
  		list_del_init(&pi_state->list);
  		spin_unlock_irq(&pi_state->owner->pi_lock);
1b7558e45   Thomas Gleixner   futexes: fix faul...
1072
  	}
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1073

cdf71a10c   Thomas Gleixner   futex: Prevent st...
1074
  	pi_state->owner = newowner;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1075

cdf71a10c   Thomas Gleixner   futex: Prevent st...
1076
  	spin_lock_irq(&newowner->pi_lock);
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1077
  	WARN_ON(!list_empty(&pi_state->list));
cdf71a10c   Thomas Gleixner   futex: Prevent st...
1078
1079
  	list_add(&pi_state->list, &newowner->pi_state_list);
  	spin_unlock_irq(&newowner->pi_lock);
1b7558e45   Thomas Gleixner   futexes: fix faul...
1080
  	return 0;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1081

d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1082
  	/*
1b7558e45   Thomas Gleixner   futexes: fix faul...
1083
1084
1085
1086
1087
1088
1089
1090
  	 * To handle the page fault we need to drop the hash bucket
  	 * lock here. That gives the other task (either the pending
  	 * owner itself or the task which stole the rtmutex) the
  	 * chance to try the fixup of the pi_state. So once we are
  	 * back from handling the fault we need to check the pi_state
  	 * after reacquiring the hash bucket lock and before trying to
  	 * do another fixup. When the fixup has been done already we
  	 * simply return.
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1091
  	 */
1b7558e45   Thomas Gleixner   futexes: fix faul...
1092
1093
  handle_fault:
  	spin_unlock(q->lock_ptr);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1094

c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1095
  	ret = futex_handle_fault((unsigned long)uaddr, attempt++);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1096

1b7558e45   Thomas Gleixner   futexes: fix faul...
1097
  	spin_lock(q->lock_ptr);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1098

1b7558e45   Thomas Gleixner   futexes: fix faul...
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
  	/*
  	 * Check if someone else fixed it for us:
  	 */
  	if (pi_state->owner != oldowner)
  		return 0;
  
  	if (ret)
  		return ret;
  
  	goto retry;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1109
  }
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1110
1111
  /*
   * In case we must use restart_block to restart a futex_wait,
ce6bd420f   Steven Rostedt   futex: fix for fu...
1112
   * we encode in the 'flags' shared capability
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1113
   */
1acdac104   Thomas Gleixner   futex: make clock...
1114
1115
  #define FLAGS_SHARED		0x01
  #define FLAGS_CLOCKRT		0x02
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1116

72c1bbf30   Nick Piggin   futex: restartabl...
1117
  static long futex_wait_restart(struct restart_block *restart);
36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
1118

c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1119
  static int futex_wait(u32 __user *uaddr, int fshared,
1acdac104   Thomas Gleixner   futex: make clock...
1120
  		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1121
  {
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1122
  	struct task_struct *curr = current;
2fff78c78   Peter Zijlstra   futex: fix refere...
1123
  	struct restart_block *restart;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1124
  	DECLARE_WAITQUEUE(wait, curr);
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1125
  	struct futex_hash_bucket *hb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1126
  	struct futex_q q;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1127
1128
  	u32 uval;
  	int ret;
bd197234b   Thomas Gleixner   Revert "futex_req...
1129
  	struct hrtimer_sleeper t;
c19384b5b   Pierre Peiffer   Make futex_wait()...
1130
  	int rem = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1131

cd689985c   Thomas Gleixner   futex: Add bitset...
1132
1133
  	if (!bitset)
  		return -EINVAL;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1134
  	q.pi_state = NULL;
cd689985c   Thomas Gleixner   futex: Add bitset...
1135
  	q.bitset = bitset;
42d35d48c   Darren Hart   futex: make futex...
1136
  retry:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
1137
  	q.key = FUTEX_KEY_INIT;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1138
  	ret = get_futex_key(uaddr, fshared, &q.key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1139
  	if (unlikely(ret != 0))
42d35d48c   Darren Hart   futex: make futex...
1140
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1141

82af7aca5   Eric Sesterhenn   Removal of FUTEX_FD
1142
  	hb = queue_lock(&q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
  
  	/*
  	 * Access the page AFTER the futex is queued.
  	 * Order is important:
  	 *
  	 *   Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
  	 *   Userspace waker:  if (cond(var)) { var = new; futex_wake(&var); }
  	 *
  	 * The basic logical guarantee of a futex is that it blocks ONLY
  	 * if cond(var) is known to be true at the time of blocking, for
  	 * any cond.  If we queued after testing *uaddr, that would open
  	 * a race condition where we could block indefinitely with
  	 * cond(var) false, which would violate the guarantee.
  	 *
  	 * A consequence is that futex_wait() can return zero and absorb
  	 * a wakeup when *uaddr != val on entry to the syscall.  This is
  	 * rare, but normal.
  	 *
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1161
1162
  	 * for shared futexes, we hold the mmap semaphore, so the mapping
  	 * cannot have changed since we looked it up in get_futex_key.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1163
  	 */
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1164
  	ret = get_futex_value_locked(&uval, uaddr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1165
1166
  
  	if (unlikely(ret)) {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1167
  		queue_unlock(&q, hb);
42d35d48c   Darren Hart   futex: make futex...
1168
  		put_futex_key(fshared, &q.key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1169

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1170
  		ret = get_user(uval, uaddr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1171
1172
1173
  
  		if (!ret)
  			goto retry;
2fff78c78   Peter Zijlstra   futex: fix refere...
1174
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1175
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1176
  	ret = -EWOULDBLOCK;
2fff78c78   Peter Zijlstra   futex: fix refere...
1177
1178
1179
1180
  	if (unlikely(uval != val)) {
  		queue_unlock(&q, hb);
  		goto out_put_key;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1181
1182
  
  	/* Only actually queue if *uaddr contained val.  */
82af7aca5   Eric Sesterhenn   Removal of FUTEX_FD
1183
  	queue_me(&q, hb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1184
1185
  
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
  	 * There might have been scheduling since the queue_me(), as we
  	 * cannot hold a spinlock across the get_user() in case it
  	 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
  	 * queueing ourselves into the futex hash.  This code thus has to
  	 * rely on the futex_wake() code removing us from hash when it
  	 * wakes us up.
  	 */
  
  	/* add_wait_queue is the barrier after __set_current_state. */
  	__set_current_state(TASK_INTERRUPTIBLE);
73500ac54   Darren Hart   futex: rename fie...
1196
  	add_wait_queue(&q.waiter, &wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1197
  	/*
ec92d0829   Pierre Peiffer   futex priority ba...
1198
  	 * !plist_node_empty() is safe here without any lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1199
1200
  	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
  	 */
ec92d0829   Pierre Peiffer   futex priority ba...
1201
  	if (likely(!plist_node_empty(&q.list))) {
c19384b5b   Pierre Peiffer   Make futex_wait()...
1202
1203
1204
  		if (!abs_time)
  			schedule();
  		else {
ae4b748e8   Arjan van de Ven   hrtimer: make the...
1205
1206
1207
1208
  			unsigned long slack;
  			slack = current->timer_slack_ns;
  			if (rt_task(current))
  				slack = 0;
1acdac104   Thomas Gleixner   futex: make clock...
1209
1210
1211
1212
  			hrtimer_init_on_stack(&t.timer,
  					      clockrt ? CLOCK_REALTIME :
  					      CLOCK_MONOTONIC,
  					      HRTIMER_MODE_ABS);
c19384b5b   Pierre Peiffer   Make futex_wait()...
1213
  			hrtimer_init_sleeper(&t, current);
ae4b748e8   Arjan van de Ven   hrtimer: make the...
1214
  			hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
c19384b5b   Pierre Peiffer   Make futex_wait()...
1215

cc584b213   Arjan van de Ven   hrtimer: convert ...
1216
  			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
3588a085c   Peter Zijlstra   hrtimer: fix hrti...
1217
1218
  			if (!hrtimer_active(&t.timer))
  				t.task = NULL;
c19384b5b   Pierre Peiffer   Make futex_wait()...
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
  
  			/*
  			 * the timer could have already expired, in which
  			 * case current would be flagged for rescheduling.
  			 * Don't bother calling schedule.
  			 */
  			if (likely(t.task))
  				schedule();
  
  			hrtimer_cancel(&t.timer);
72c1bbf30   Nick Piggin   futex: restartabl...
1229

c19384b5b   Pierre Peiffer   Make futex_wait()...
1230
1231
  			/* Flag if a timeout occured */
  			rem = (t.task == NULL);
237fc6e7a   Thomas Gleixner   add hrtimer speci...
1232
1233
  
  			destroy_hrtimer_on_stack(&t.timer);
c19384b5b   Pierre Peiffer   Make futex_wait()...
1234
  		}
72c1bbf30   Nick Piggin   futex: restartabl...
1235
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1236
1237
1238
1239
1240
1241
1242
1243
  	__set_current_state(TASK_RUNNING);
  
  	/*
  	 * NOTE: we don't remove ourselves from the waitqueue because
  	 * we are the only user of it.
  	 */
  
  	/* If we were woken (and unqueued), we succeeded, whatever. */
2fff78c78   Peter Zijlstra   futex: fix refere...
1244
  	ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1245
  	if (!unqueue_me(&q))
2fff78c78   Peter Zijlstra   futex: fix refere...
1246
1247
  		goto out_put_key;
  	ret = -ETIMEDOUT;
c19384b5b   Pierre Peiffer   Make futex_wait()...
1248
  	if (rem)
2fff78c78   Peter Zijlstra   futex: fix refere...
1249
  		goto out_put_key;
72c1bbf30   Nick Piggin   futex: restartabl...
1250

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1251
1252
1253
1254
  	/*
  	 * We expect signal_pending(current), but another thread may
  	 * have handled it for us already.
  	 */
2fff78c78   Peter Zijlstra   futex: fix refere...
1255
  	ret = -ERESTARTSYS;
c19384b5b   Pierre Peiffer   Make futex_wait()...
1256
  	if (!abs_time)
2fff78c78   Peter Zijlstra   futex: fix refere...
1257
  		goto out_put_key;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1258

2fff78c78   Peter Zijlstra   futex: fix refere...
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
  	restart = &current_thread_info()->restart_block;
  	restart->fn = futex_wait_restart;
  	restart->futex.uaddr = (u32 *)uaddr;
  	restart->futex.val = val;
  	restart->futex.time = abs_time->tv64;
  	restart->futex.bitset = bitset;
  	restart->futex.flags = 0;
  
  	if (fshared)
  		restart->futex.flags |= FLAGS_SHARED;
  	if (clockrt)
  		restart->futex.flags |= FLAGS_CLOCKRT;
42d35d48c   Darren Hart   futex: make futex...
1271

2fff78c78   Peter Zijlstra   futex: fix refere...
1272
1273
1274
1275
  	ret = -ERESTART_RESTARTBLOCK;
  
  out_put_key:
  	put_futex_key(fshared, &q.key);
42d35d48c   Darren Hart   futex: make futex...
1276
  out:
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1277
1278
  	return ret;
  }
72c1bbf30   Nick Piggin   futex: restartabl...
1279
1280
1281
  
  static long futex_wait_restart(struct restart_block *restart)
  {
ce6bd420f   Steven Rostedt   futex: fix for fu...
1282
  	u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1283
  	int fshared = 0;
ce6bd420f   Steven Rostedt   futex: fix for fu...
1284
  	ktime_t t;
72c1bbf30   Nick Piggin   futex: restartabl...
1285

ce6bd420f   Steven Rostedt   futex: fix for fu...
1286
  	t.tv64 = restart->futex.time;
72c1bbf30   Nick Piggin   futex: restartabl...
1287
  	restart->fn = do_no_restart_syscall;
ce6bd420f   Steven Rostedt   futex: fix for fu...
1288
  	if (restart->futex.flags & FLAGS_SHARED)
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1289
  		fshared = 1;
cd689985c   Thomas Gleixner   futex: Add bitset...
1290
  	return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
1acdac104   Thomas Gleixner   futex: make clock...
1291
1292
  				restart->futex.bitset,
  				restart->futex.flags & FLAGS_CLOCKRT);
72c1bbf30   Nick Piggin   futex: restartabl...
1293
  }
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1294
1295
1296
1297
1298
1299
  /*
   * Userspace tried a 0 -> TID atomic transition of the futex value
   * and failed. The kernel side here does the whole locking operation:
   * if there are waiters then it will block, it does PI, etc. (Due to
   * races the kernel might see a 0 value of the futex too.)
   */
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1300
  static int futex_lock_pi(u32 __user *uaddr, int fshared,
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1301
  			 int detect, ktime_t *time, int trylock)
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1302
  {
c5780e976   Thomas Gleixner   [PATCH] Use the c...
1303
  	struct hrtimer_sleeper timeout, *to = NULL;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1304
1305
1306
1307
  	struct task_struct *curr = current;
  	struct futex_hash_bucket *hb;
  	u32 uval, newval, curval;
  	struct futex_q q;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1308
  	int ret, lock_taken, ownerdied = 0, attempt = 0;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1309
1310
1311
  
  	if (refill_pi_state_cache())
  		return -ENOMEM;
c19384b5b   Pierre Peiffer   Make futex_wait()...
1312
  	if (time) {
c5780e976   Thomas Gleixner   [PATCH] Use the c...
1313
  		to = &timeout;
237fc6e7a   Thomas Gleixner   add hrtimer speci...
1314
1315
  		hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
  				      HRTIMER_MODE_ABS);
c5780e976   Thomas Gleixner   [PATCH] Use the c...
1316
  		hrtimer_init_sleeper(to, current);
cc584b213   Arjan van de Ven   hrtimer: convert ...
1317
  		hrtimer_set_expires(&to->timer, *time);
c5780e976   Thomas Gleixner   [PATCH] Use the c...
1318
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1319
  	q.pi_state = NULL;
42d35d48c   Darren Hart   futex: make futex...
1320
  retry:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
1321
  	q.key = FUTEX_KEY_INIT;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1322
  	ret = get_futex_key(uaddr, fshared, &q.key);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1323
  	if (unlikely(ret != 0))
42d35d48c   Darren Hart   futex: make futex...
1324
  		goto out;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1325

42d35d48c   Darren Hart   futex: make futex...
1326
  retry_unlocked:
82af7aca5   Eric Sesterhenn   Removal of FUTEX_FD
1327
  	hb = queue_lock(&q);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1328

42d35d48c   Darren Hart   futex: make futex...
1329
  retry_locked:
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1330
  	ret = lock_taken = 0;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1331

c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1332
1333
1334
1335
1336
  	/*
  	 * To avoid races, we attempt to take the lock here again
  	 * (by doing a 0 -> TID atomic cmpxchg), while holding all
  	 * the locks. It will most likely not succeed.
  	 */
b488893a3   Pavel Emelyanov   pid namespaces: c...
1337
  	newval = task_pid_vnr(current);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1338

36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
1339
  	curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1340
1341
1342
  
  	if (unlikely(curval == -EFAULT))
  		goto uaddr_faulted;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1343
1344
1345
1346
  	/*
  	 * Detect deadlocks. In case of REQUEUE_PI this is a valid
  	 * situation and we return success to user space.
  	 */
b488893a3   Pavel Emelyanov   pid namespaces: c...
1347
  	if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
bd197234b   Thomas Gleixner   Revert "futex_req...
1348
  		ret = -EDEADLK;
42d35d48c   Darren Hart   futex: make futex...
1349
  		goto out_unlock_put_key;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1350
1351
1352
  	}
  
  	/*
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1353
  	 * Surprise - we got the lock. Just return to userspace:
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1354
1355
  	 */
  	if (unlikely(!curval))
42d35d48c   Darren Hart   futex: make futex...
1356
  		goto out_unlock_put_key;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1357
1358
  
  	uval = curval;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1359

d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1360
  	/*
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1361
1362
  	 * Set the WAITERS flag, so the owner will know it has someone
  	 * to wake at next unlock
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1363
  	 */
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1364
1365
1366
1367
  	newval = curval | FUTEX_WAITERS;
  
  	/*
  	 * There are two cases, where a futex might have no owner (the
bd197234b   Thomas Gleixner   Revert "futex_req...
1368
1369
1370
  	 * owner TID is 0): OWNER_DIED. We take over the futex in this
  	 * case. We also do an unconditional take over, when the owner
  	 * of the futex died.
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1371
1372
1373
1374
  	 *
  	 * This is safe as we are protected by the hash bucket lock !
  	 */
  	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
bd197234b   Thomas Gleixner   Revert "futex_req...
1375
  		/* Keep the OWNER_DIED bit */
b488893a3   Pavel Emelyanov   pid namespaces: c...
1376
  		newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1377
1378
1379
  		ownerdied = 0;
  		lock_taken = 1;
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1380

36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
1381
  	curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1382
1383
1384
1385
1386
  
  	if (unlikely(curval == -EFAULT))
  		goto uaddr_faulted;
  	if (unlikely(curval != uval))
  		goto retry_locked;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1387
  	/*
bd197234b   Thomas Gleixner   Revert "futex_req...
1388
  	 * We took the lock due to owner died take over.
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1389
  	 */
bd197234b   Thomas Gleixner   Revert "futex_req...
1390
  	if (unlikely(lock_taken))
42d35d48c   Darren Hart   futex: make futex...
1391
  		goto out_unlock_put_key;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1392

c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1393
1394
1395
1396
  	/*
  	 * We dont have the lock. Look up the PI state (or create it if
  	 * we are the first waiter):
  	 */
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1397
  	ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1398
1399
  
  	if (unlikely(ret)) {
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1400
  		switch (ret) {
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1401

778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1402
1403
1404
1405
1406
1407
  		case -EAGAIN:
  			/*
  			 * Task is exiting and we just wait for the
  			 * exit to complete.
  			 */
  			queue_unlock(&q, hb);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1408
1409
  			cond_resched();
  			goto retry;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1410

778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1411
1412
1413
1414
1415
1416
1417
  		case -ESRCH:
  			/*
  			 * No owner found for this futex. Check if the
  			 * OWNER_DIED bit is set to figure out whether
  			 * this is a robust futex or not.
  			 */
  			if (get_futex_value_locked(&curval, uaddr))
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1418
  				goto uaddr_faulted;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1419
1420
1421
1422
1423
1424
1425
1426
  
  			/*
  			 * We simply start over in case of a robust
  			 * futex. The code above will take the futex
  			 * and return happy.
  			 */
  			if (curval & FUTEX_OWNER_DIED) {
  				ownerdied = 1;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1427
  				goto retry_locked;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1428
1429
  			}
  		default:
42d35d48c   Darren Hart   futex: make futex...
1430
  			goto out_unlock_put_key;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1431
  		}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1432
1433
1434
1435
1436
  	}
  
  	/*
  	 * Only actually queue now that the atomic ops are done:
  	 */
82af7aca5   Eric Sesterhenn   Removal of FUTEX_FD
1437
  	queue_me(&q, hb);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1438

c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
  	WARN_ON(!q.pi_state);
  	/*
  	 * Block on the PI mutex:
  	 */
  	if (!trylock)
  		ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
  	else {
  		ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
  		/* Fixup the trylock return value: */
  		ret = ret ? 0 : -EWOULDBLOCK;
  	}
a99e4e413   Vernon Mauery   [PATCH] pi-futex:...
1450
  	spin_lock(q.lock_ptr);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1451

778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1452
1453
1454
1455
1456
1457
1458
  	if (!ret) {
  		/*
  		 * Got the lock. We might not be the anticipated owner
  		 * if we did a lock-steal - fix up the PI-state in
  		 * that case:
  		 */
  		if (q.pi_state->owner != curr)
1b7558e45   Thomas Gleixner   futexes: fix faul...
1459
  			ret = fixup_pi_state_owner(uaddr, &q, curr, fshared);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1460
  	} else {
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1461
1462
  		/*
  		 * Catch the rare case, where the lock was released
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1463
1464
  		 * when we were on the way back before we locked the
  		 * hash bucket.
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1465
  		 */
cdf71a10c   Thomas Gleixner   futex: Prevent st...
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
  		if (q.pi_state->owner == curr) {
  			/*
  			 * Try to get the rt_mutex now. This might
  			 * fail as some other task acquired the
  			 * rt_mutex after we removed ourself from the
  			 * rt_mutex waiters list.
  			 */
  			if (rt_mutex_trylock(&q.pi_state->pi_mutex))
  				ret = 0;
  			else {
  				/*
  				 * pi_state is incorrect, some other
  				 * task did a lock steal and we
  				 * returned due to timeout or signal
  				 * without taking the rt_mutex. Too
  				 * late. We can access the
  				 * rt_mutex_owner without locking, as
  				 * the other task is now blocked on
  				 * the hash bucket lock. Fix the state
  				 * up.
  				 */
  				struct task_struct *owner;
  				int res;
  
  				owner = rt_mutex_owner(&q.pi_state->pi_mutex);
1b7558e45   Thomas Gleixner   futexes: fix faul...
1491
1492
  				res = fixup_pi_state_owner(uaddr, &q, owner,
  							   fshared);
cdf71a10c   Thomas Gleixner   futex: Prevent st...
1493

cdf71a10c   Thomas Gleixner   futex: Prevent st...
1494
1495
1496
1497
  				/* propagate -EFAULT, if the fixup failed */
  				if (res)
  					ret = res;
  			}
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
  		} else {
  			/*
  			 * Paranoia check. If we did not take the lock
  			 * in the trylock above, then we should not be
  			 * the owner of the rtmutex, neither the real
  			 * nor the pending one:
  			 */
  			if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)
  				printk(KERN_ERR "futex_lock_pi: ret = %d "
  				       "pi-mutex: %p pi-state %p
  ", ret,
  				       q.pi_state->pi_mutex.owner,
  				       q.pi_state->owner);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1511
  		}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1512
  	}
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1513
1514
  	/* Unqueue and drop the lock */
  	unqueue_me_pi(&q);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1515

237fc6e7a   Thomas Gleixner   add hrtimer speci...
1516
1517
  	if (to)
  		destroy_hrtimer_on_stack(&to->timer);
c5780e976   Thomas Gleixner   [PATCH] Use the c...
1518
  	return ret != -EINTR ? ret : -ERESTARTNOINTR;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1519

42d35d48c   Darren Hart   futex: make futex...
1520
  out_unlock_put_key:
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1521
  	queue_unlock(&q, hb);
42d35d48c   Darren Hart   futex: make futex...
1522
  out_put_key:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
1523
  	put_futex_key(fshared, &q.key);
42d35d48c   Darren Hart   futex: make futex...
1524
  out:
237fc6e7a   Thomas Gleixner   add hrtimer speci...
1525
1526
  	if (to)
  		destroy_hrtimer_on_stack(&to->timer);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1527
  	return ret;
42d35d48c   Darren Hart   futex: make futex...
1528
  uaddr_faulted:
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1529
  	/*
b56863630   Darren Hart   futex: clean up f...
1530
1531
1532
1533
1534
  	 * We have to r/w  *(int __user *)uaddr, and we have to modify it
  	 * atomically.  Therefore, if we continue to fault after get_user()
  	 * below, we need to handle the fault ourselves, while still holding
  	 * the mmap_sem.  This can occur if the uaddr is under contention as
  	 * we have to drop the mmap_sem in order to call get_user().
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1535
  	 */
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1536
  	queue_unlock(&q, hb);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1537
  	if (attempt++) {
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1538
  		ret = futex_handle_fault((unsigned long)uaddr, attempt);
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1539
  		if (ret)
42d35d48c   Darren Hart   futex: make futex...
1540
  			goto out_put_key;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1541
  		goto retry_unlocked;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1542
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1543
  	ret = get_user(uval, uaddr);
b56863630   Darren Hart   futex: clean up f...
1544
  	if (!ret)
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1545
  		goto retry;
237fc6e7a   Thomas Gleixner   add hrtimer speci...
1546
1547
  	if (to)
  		destroy_hrtimer_on_stack(&to->timer);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1548
1549
1550
1551
  	return ret;
  }
  
  /*
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1552
1553
1554
1555
   * Userspace attempted a TID -> 0 atomic transition, and failed.
   * This is the in-kernel slowpath: we look up the PI state (if any),
   * and do the rt-mutex unlock.
   */
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1556
  static int futex_unlock_pi(u32 __user *uaddr, int fshared)
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1557
1558
1559
1560
  {
  	struct futex_hash_bucket *hb;
  	struct futex_q *this, *next;
  	u32 uval;
ec92d0829   Pierre Peiffer   futex priority ba...
1561
  	struct plist_head *head;
38d47c1b7   Peter Zijlstra   futex: rely on ge...
1562
  	union futex_key key = FUTEX_KEY_INIT;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1563
1564
1565
1566
1567
1568
1569
1570
  	int ret, attempt = 0;
  
  retry:
  	if (get_user(uval, uaddr))
  		return -EFAULT;
  	/*
  	 * We release only a lock we actually own:
  	 */
b488893a3   Pavel Emelyanov   pid namespaces: c...
1571
  	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1572
  		return -EPERM;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1573

34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1574
  	ret = get_futex_key(uaddr, fshared, &key);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1575
1576
1577
1578
  	if (unlikely(ret != 0))
  		goto out;
  
  	hb = hash_futex(&key);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1579
  retry_unlocked:
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1580
  	spin_lock(&hb->lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1581
1582
1583
1584
1585
  	/*
  	 * To avoid races, try to do the TID -> 0 atomic transition
  	 * again. If it succeeds then we can return without waking
  	 * anyone else up:
  	 */
36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
1586
  	if (!(uval & FUTEX_OWNER_DIED))
b488893a3   Pavel Emelyanov   pid namespaces: c...
1587
  		uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
1588

c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1589
1590
1591
1592
1593
1594
1595
  
  	if (unlikely(uval == -EFAULT))
  		goto pi_faulted;
  	/*
  	 * Rare case: we managed to release the lock atomically,
  	 * no need to wake anyone else up:
  	 */
b488893a3   Pavel Emelyanov   pid namespaces: c...
1596
  	if (unlikely(uval == task_pid_vnr(current)))
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1597
1598
1599
1600
1601
1602
1603
  		goto out_unlock;
  
  	/*
  	 * Ok, other tasks may need to be woken up - check waiters
  	 * and do the wakeup if necessary:
  	 */
  	head = &hb->chain;
ec92d0829   Pierre Peiffer   futex priority ba...
1604
  	plist_for_each_entry_safe(this, next, head, list) {
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
  		if (!match_futex (&this->key, &key))
  			continue;
  		ret = wake_futex_pi(uaddr, uval, this);
  		/*
  		 * The atomic access to the futex value
  		 * generated a pagefault, so retry the
  		 * user-access and the wakeup:
  		 */
  		if (ret == -EFAULT)
  			goto pi_faulted;
  		goto out_unlock;
  	}
  	/*
  	 * No waiters - kernel unlocks the futex:
  	 */
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1620
1621
1622
1623
1624
  	if (!(uval & FUTEX_OWNER_DIED)) {
  		ret = unlock_futex_pi(uaddr, uval);
  		if (ret == -EFAULT)
  			goto pi_faulted;
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1625
1626
1627
  
  out_unlock:
  	spin_unlock(&hb->lock);
38d47c1b7   Peter Zijlstra   futex: rely on ge...
1628
  	put_futex_key(fshared, &key);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1629

42d35d48c   Darren Hart   futex: make futex...
1630
  out:
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1631
1632
1633
1634
  	return ret;
  
  pi_faulted:
  	/*
b56863630   Darren Hart   futex: clean up f...
1635
1636
1637
1638
1639
  	 * We have to r/w  *(int __user *)uaddr, and we have to modify it
  	 * atomically.  Therefore, if we continue to fault after get_user()
  	 * below, we need to handle the fault ourselves, while still holding
  	 * the mmap_sem.  This can occur if the uaddr is under contention as
  	 * we have to drop the mmap_sem in order to call get_user().
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1640
  	 */
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1641
  	spin_unlock(&hb->lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1642
  	if (attempt++) {
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1643
  		ret = futex_handle_fault((unsigned long)uaddr, attempt);
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1644
  		if (ret)
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1645
  			goto out;
187226f57   John Stultz   futex_unlock_pi()...
1646
  		uval = 0;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1647
  		goto retry_unlocked;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1648
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1649
  	ret = get_user(uval, uaddr);
b56863630   Darren Hart   futex: clean up f...
1650
  	if (!ret)
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1651
  		goto retry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1652
1653
  	return ret;
  }
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1654
1655
1656
1657
1658
1659
1660
  /*
   * Support for robust futexes: the kernel cleans up held futexes at
   * thread exit time.
   *
   * Implementation: user-space maintains a per-thread list of locks it
   * is holding. Upon do_exit(), the kernel carefully walks this list,
   * and marks all locks that are owned by this thread with the
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1661
   * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
   * always manipulated with the lock held, so the list is private and
   * per-thread. Userspace also maintains a per-thread 'list_op_pending'
   * field, to allow the kernel to clean up if the thread dies after
   * acquiring the lock, but just before it could have added itself to
   * the list. There can only be one such pending lock.
   */
  
  /**
   * sys_set_robust_list - set the robust-futex list head of a task
   * @head: pointer to the list-head
   * @len: length of the list-head, as userspace expects
   */
836f92adf   Heiko Carstens   [CVE-2009-0029] S...
1674
1675
  SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
  		size_t, len)
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1676
  {
a0c1e9073   Thomas Gleixner   futex: runtime en...
1677
1678
  	if (!futex_cmpxchg_enabled)
  		return -ENOSYS;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
  	/*
  	 * The kernel knows only one size for now:
  	 */
  	if (unlikely(len != sizeof(*head)))
  		return -EINVAL;
  
  	current->robust_list = head;
  
  	return 0;
  }
  
  /**
   * sys_get_robust_list - get the robust-futex list head of a task
   * @pid: pid of the process [zero for current task]
   * @head_ptr: pointer to a list-head pointer, the kernel fills it in
   * @len_ptr: pointer to a length field, the kernel fills in the header size
   */
836f92adf   Heiko Carstens   [CVE-2009-0029] S...
1696
1697
1698
  SYSCALL_DEFINE3(get_robust_list, int, pid,
  		struct robust_list_head __user * __user *, head_ptr,
  		size_t __user *, len_ptr)
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1699
  {
ba46df984   Al Viro   [PATCH] __user an...
1700
  	struct robust_list_head __user *head;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1701
  	unsigned long ret;
c69e8d9c0   David Howells   CRED: Use RCU to ...
1702
  	const struct cred *cred = current_cred(), *pcred;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1703

a0c1e9073   Thomas Gleixner   futex: runtime en...
1704
1705
  	if (!futex_cmpxchg_enabled)
  		return -ENOSYS;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1706
1707
1708
1709
1710
1711
  	if (!pid)
  		head = current->robust_list;
  	else {
  		struct task_struct *p;
  
  		ret = -ESRCH;
aaa2a97eb   Oleg Nesterov   [PATCH] sys_get_r...
1712
  		rcu_read_lock();
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
1713
  		p = find_task_by_vpid(pid);
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1714
1715
1716
  		if (!p)
  			goto err_unlock;
  		ret = -EPERM;
c69e8d9c0   David Howells   CRED: Use RCU to ...
1717
1718
1719
  		pcred = __task_cred(p);
  		if (cred->euid != pcred->euid &&
  		    cred->euid != pcred->uid &&
76aac0e9a   David Howells   CRED: Wrap task c...
1720
  		    !capable(CAP_SYS_PTRACE))
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1721
1722
  			goto err_unlock;
  		head = p->robust_list;
aaa2a97eb   Oleg Nesterov   [PATCH] sys_get_r...
1723
  		rcu_read_unlock();
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1724
1725
1726
1727
1728
1729
1730
  	}
  
  	if (put_user(sizeof(*head), len_ptr))
  		return -EFAULT;
  	return put_user(head, head_ptr);
  
  err_unlock:
aaa2a97eb   Oleg Nesterov   [PATCH] sys_get_r...
1731
  	rcu_read_unlock();
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1732
1733
1734
1735
1736
1737
1738
1739
  
  	return ret;
  }
  
  /*
   * Process a futex-list entry, check whether it's owned by the
   * dying task, and do notification if so:
   */
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1740
  int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1741
  {
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1742
  	u32 uval, nval, mval;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1743

8f17d3a50   Ingo Molnar   [PATCH] lightweig...
1744
1745
  retry:
  	if (get_user(uval, uaddr))
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1746
  		return -1;
b488893a3   Pavel Emelyanov   pid namespaces: c...
1747
  	if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
  		/*
  		 * Ok, this dying thread is truly holding a futex
  		 * of interest. Set the OWNER_DIED bit atomically
  		 * via cmpxchg, and if the value had FUTEX_WAITERS
  		 * set, wake up a waiter (if any). (We have to do a
  		 * futex_wake() even if OWNER_DIED is already set -
  		 * to handle the rare but possible case of recursive
  		 * thread-death.) The rest of the cleanup is done in
  		 * userspace.
  		 */
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1758
1759
  		mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
  		nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1760
1761
1762
1763
  		if (nval == -EFAULT)
  			return -1;
  
  		if (nval != uval)
8f17d3a50   Ingo Molnar   [PATCH] lightweig...
1764
  			goto retry;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1765

e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1766
1767
1768
1769
  		/*
  		 * Wake robust non-PI futexes here. The wakeup of
  		 * PI futexes happens in exit_pi_state():
  		 */
36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
1770
  		if (!pi && (uval & FUTEX_WAITERS))
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1771
  			futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1772
1773
1774
1775
1776
  	}
  	return 0;
  }
  
  /*
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1777
1778
1779
   * Fetch a robust-list pointer. Bit 0 signals PI futexes:
   */
  static inline int fetch_robust_entry(struct robust_list __user **entry,
ba46df984   Al Viro   [PATCH] __user an...
1780
1781
  				     struct robust_list __user * __user *head,
  				     int *pi)
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1782
1783
  {
  	unsigned long uentry;
ba46df984   Al Viro   [PATCH] __user an...
1784
  	if (get_user(uentry, (unsigned long __user *)head))
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1785
  		return -EFAULT;
ba46df984   Al Viro   [PATCH] __user an...
1786
  	*entry = (void __user *)(uentry & ~1UL);
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1787
1788
1789
1790
1791
1792
  	*pi = uentry & 1;
  
  	return 0;
  }
  
  /*
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1793
1794
1795
1796
1797
1798
1799
1800
   * Walk curr->robust_list (very carefully, it's a userspace list!)
   * and mark any locks found there dead, and notify any waiters.
   *
   * We silently return on any sign of list-walking problem.
   */
  void exit_robust_list(struct task_struct *curr)
  {
  	struct robust_list_head __user *head = curr->robust_list;
9f96cb1e8   Martin Schwidefsky   robust futex thre...
1801
1802
  	struct robust_list __user *entry, *next_entry, *pending;
  	unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1803
  	unsigned long futex_offset;
9f96cb1e8   Martin Schwidefsky   robust futex thre...
1804
  	int rc;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1805

a0c1e9073   Thomas Gleixner   futex: runtime en...
1806
1807
  	if (!futex_cmpxchg_enabled)
  		return;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1808
1809
1810
1811
  	/*
  	 * Fetch the list head (which was registered earlier, via
  	 * sys_set_robust_list()):
  	 */
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1812
  	if (fetch_robust_entry(&entry, &head->list.next, &pi))
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
  		return;
  	/*
  	 * Fetch the relative futex offset:
  	 */
  	if (get_user(futex_offset, &head->futex_offset))
  		return;
  	/*
  	 * Fetch any possibly pending lock-add first, and handle it
  	 * if it exists:
  	 */
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1823
  	if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1824
  		return;
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1825

9f96cb1e8   Martin Schwidefsky   robust futex thre...
1826
  	next_entry = NULL;	/* avoid warning with gcc */
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1827
1828
  	while (entry != &head->list) {
  		/*
9f96cb1e8   Martin Schwidefsky   robust futex thre...
1829
1830
1831
1832
1833
  		 * Fetch the next entry in the list before calling
  		 * handle_futex_death:
  		 */
  		rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
  		/*
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1834
  		 * A pending lock might already be on the list, so
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1835
  		 * don't process it twice:
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1836
1837
  		 */
  		if (entry != pending)
ba46df984   Al Viro   [PATCH] __user an...
1838
  			if (handle_futex_death((void __user *)entry + futex_offset,
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
1839
  						curr, pi))
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1840
  				return;
9f96cb1e8   Martin Schwidefsky   robust futex thre...
1841
  		if (rc)
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1842
  			return;
9f96cb1e8   Martin Schwidefsky   robust futex thre...
1843
1844
  		entry = next_entry;
  		pi = next_pi;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1845
1846
1847
1848
1849
1850
1851
1852
  		/*
  		 * Avoid excessively long or circular lists:
  		 */
  		if (!--limit)
  			break;
  
  		cond_resched();
  	}
9f96cb1e8   Martin Schwidefsky   robust futex thre...
1853
1854
1855
1856
  
  	if (pending)
  		handle_futex_death((void __user *)pending + futex_offset,
  				   curr, pip);
0771dfefc   Ingo Molnar   [PATCH] lightweig...
1857
  }
c19384b5b   Pierre Peiffer   Make futex_wait()...
1858
  long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1859
  		u32 __user *uaddr2, u32 val2, u32 val3)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1860
  {
1acdac104   Thomas Gleixner   futex: make clock...
1861
  	int clockrt, ret = -ENOSYS;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1862
  	int cmd = op & FUTEX_CMD_MASK;
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1863
  	int fshared = 0;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1864
1865
  
  	if (!(op & FUTEX_PRIVATE_FLAG))
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1866
  		fshared = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1867

1acdac104   Thomas Gleixner   futex: make clock...
1868
1869
1870
  	clockrt = op & FUTEX_CLOCK_REALTIME;
  	if (clockrt && cmd != FUTEX_WAIT_BITSET)
  		return -ENOSYS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1871

34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1872
  	switch (cmd) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1873
  	case FUTEX_WAIT:
cd689985c   Thomas Gleixner   futex: Add bitset...
1874
1875
  		val3 = FUTEX_BITSET_MATCH_ANY;
  	case FUTEX_WAIT_BITSET:
1acdac104   Thomas Gleixner   futex: make clock...
1876
  		ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1877
1878
  		break;
  	case FUTEX_WAKE:
cd689985c   Thomas Gleixner   futex: Add bitset...
1879
1880
1881
  		val3 = FUTEX_BITSET_MATCH_ANY;
  	case FUTEX_WAKE_BITSET:
  		ret = futex_wake(uaddr, fshared, val, val3);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1882
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1883
  	case FUTEX_REQUEUE:
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1884
  		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1885
1886
  		break;
  	case FUTEX_CMP_REQUEUE:
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1887
  		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1888
  		break;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
1889
  	case FUTEX_WAKE_OP:
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1890
  		ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
1891
  		break;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1892
  	case FUTEX_LOCK_PI:
a0c1e9073   Thomas Gleixner   futex: runtime en...
1893
1894
  		if (futex_cmpxchg_enabled)
  			ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1895
1896
  		break;
  	case FUTEX_UNLOCK_PI:
a0c1e9073   Thomas Gleixner   futex: runtime en...
1897
1898
  		if (futex_cmpxchg_enabled)
  			ret = futex_unlock_pi(uaddr, fshared);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1899
1900
  		break;
  	case FUTEX_TRYLOCK_PI:
a0c1e9073   Thomas Gleixner   futex: runtime en...
1901
1902
  		if (futex_cmpxchg_enabled)
  			ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1903
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1904
1905
1906
1907
1908
  	default:
  		ret = -ENOSYS;
  	}
  	return ret;
  }
17da2bd90   Heiko Carstens   [CVE-2009-0029] S...
1909
1910
1911
  SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
  		struct timespec __user *, utime, u32 __user *, uaddr2,
  		u32, val3)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1912
  {
c19384b5b   Pierre Peiffer   Make futex_wait()...
1913
1914
  	struct timespec ts;
  	ktime_t t, *tp = NULL;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1915
  	u32 val2 = 0;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1916
  	int cmd = op & FUTEX_CMD_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1917

cd689985c   Thomas Gleixner   futex: Add bitset...
1918
1919
  	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
  		      cmd == FUTEX_WAIT_BITSET)) {
c19384b5b   Pierre Peiffer   Make futex_wait()...
1920
  		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1921
  			return -EFAULT;
c19384b5b   Pierre Peiffer   Make futex_wait()...
1922
  		if (!timespec_valid(&ts))
9741ef964   Thomas Gleixner   [PATCH] futex: ch...
1923
  			return -EINVAL;
c19384b5b   Pierre Peiffer   Make futex_wait()...
1924
1925
  
  		t = timespec_to_ktime(ts);
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1926
  		if (cmd == FUTEX_WAIT)
5a7780e72   Thomas Gleixner   hrtimer: check re...
1927
  			t = ktime_add_safe(ktime_get(), t);
c19384b5b   Pierre Peiffer   Make futex_wait()...
1928
  		tp = &t;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1929
1930
  	}
  	/*
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1931
  	 * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE.
f54f09861   Andreas Schwab   futex: pass nr_wa...
1932
  	 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1933
  	 */
f54f09861   Andreas Schwab   futex: pass nr_wa...
1934
1935
  	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
  	    cmd == FUTEX_WAKE_OP)
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1936
  		val2 = (u32) (unsigned long) utime;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1937

c19384b5b   Pierre Peiffer   Make futex_wait()...
1938
  	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1939
  }
f6d107fb1   Benjamin Herrenschmidt   Give futex init a...
1940
  static int __init futex_init(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1941
  {
a0c1e9073   Thomas Gleixner   futex: runtime en...
1942
  	u32 curval;
3e4ab747e   Thomas Gleixner   futex: fix init o...
1943
  	int i;
95362fa90   Akinobu Mita   [PATCH] futex: in...
1944

a0c1e9073   Thomas Gleixner   futex: runtime en...
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
  	/*
  	 * This will fail and we want it. Some arch implementations do
  	 * runtime detection of the futex_atomic_cmpxchg_inatomic()
  	 * functionality. We want to know that before we call in any
  	 * of the complex code paths. Also we want to prevent
  	 * registration of robust lists in that case. NULL is
  	 * guaranteed to fault and we get -EFAULT on functional
  	 * implementation, the non functional ones will return
  	 * -ENOSYS.
  	 */
  	curval = cmpxchg_futex_value_locked(NULL, 0, 0);
  	if (curval == -EFAULT)
  		futex_cmpxchg_enabled = 1;
3e4ab747e   Thomas Gleixner   futex: fix init o...
1958
1959
1960
1961
  	for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
  		plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
  		spin_lock_init(&futex_queues[i].lock);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1962
1963
  	return 0;
  }
f6d107fb1   Benjamin Herrenschmidt   Give futex init a...
1964
  __initcall(futex_init);