Blame view

kernel/futex.c 68.1 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
  /*
   *  Fast Userspace Mutexes (which I call "Futexes!").
   *  (C) Rusty Russell, IBM 2002
   *
   *  Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
   *  (C) Copyright 2003 Red Hat Inc, All Rights Reserved
   *
   *  Removed page pinning, fix privately mapped COW pages and other cleanups
   *  (C) Copyright 2003, 2004 Jamie Lokier
   *
0771dfefc   Ingo Molnar   [PATCH] lightweig...
11
12
13
14
   *  Robust futex support started by Ingo Molnar
   *  (C) Copyright 2006 Red Hat Inc, All Rights Reserved
   *  Thanks to Thomas Gleixner for suggestions, analysis and fixes.
   *
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
15
16
17
18
   *  PI-futex support started by Ingo Molnar and Thomas Gleixner
   *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
   *  Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
   *
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
19
20
21
   *  PRIVATE futexes by Eric Dumazet
   *  Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
   *
52400ba94   Darren Hart   futex: add requeu...
22
23
24
25
   *  Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
   *  Copyright (C) IBM Corporation, 2009
   *  Thanks to Thomas Gleixner for conceptual design and careful reviews.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
   *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
   *  enough at me, Linus for the original (flawed) idea, Matthew
   *  Kirkwood for proof-of-concept implementation.
   *
   *  "The futexes are also cursed."
   *  "But they come in a choice of three flavours!"
   *
   *  This program is free software; you can redistribute it and/or modify
   *  it under the terms of the GNU General Public License as published by
   *  the Free Software Foundation; either version 2 of the License, or
   *  (at your option) any later version.
   *
   *  This program is distributed in the hope that it will be useful,
   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   *  GNU General Public License for more details.
   *
   *  You should have received a copy of the GNU General Public License
   *  along with this program; if not, write to the Free Software
   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   */
  #include <linux/slab.h>
  #include <linux/poll.h>
  #include <linux/fs.h>
  #include <linux/file.h>
  #include <linux/jhash.h>
  #include <linux/init.h>
  #include <linux/futex.h>
  #include <linux/mount.h>
  #include <linux/pagemap.h>
  #include <linux/syscalls.h>
7ed20e1ad   Jesper Juhl   [PATCH] convert t...
57
  #include <linux/signal.h>
9adef58b1   Rusty Russell   futex: get_futex_...
58
  #include <linux/module.h>
fd5eea421   Andrey Mirkin   change inotifyfs ...
59
  #include <linux/magic.h>
b488893a3   Pavel Emelyanov   pid namespaces: c...
60
61
  #include <linux/pid.h>
  #include <linux/nsproxy.h>
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
62
  #include <asm/futex.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
63

c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
64
  #include "rtmutex_common.h"
a0c1e9073   Thomas Gleixner   futex: runtime en...
65
  int __read_mostly futex_cmpxchg_enabled;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
66
67
68
  #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
  
  /*
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
   * Priority Inheritance state:
   */
  struct futex_pi_state {
  	/*
  	 * list of 'owned' pi_state instances - these have to be
  	 * cleaned up in do_exit() if the task exits prematurely:
  	 */
  	struct list_head list;
  
  	/*
  	 * The PI object:
  	 */
  	struct rt_mutex pi_mutex;
  
  	struct task_struct *owner;
  	atomic_t refcount;
  
  	union futex_key key;
  };
d8d88fbb1   Darren Hart   futex: Correct fu...
88
89
90
91
92
93
94
95
96
97
98
  /**
   * struct futex_q - The hashed futex queue entry, one per waiting task
   * @task:		the task waiting on the futex
   * @lock_ptr:		the hash bucket lock
   * @key:		the key the futex is hashed on
   * @pi_state:		optional priority inheritance state
   * @rt_waiter:		rt_waiter storage for use with requeue_pi
   * @requeue_pi_key:	the requeue_pi target futex key
   * @bitset:		bitset for the optional bitmasked wakeup
   *
   * We use this hashed waitqueue, instead of a normal wait_queue_t, so
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
99
100
101
   * we can wake only the relevant ones (hashed queues may be shared).
   *
   * A futex_q has a woken state, just like tasks have TASK_RUNNING.
ec92d0829   Pierre Peiffer   futex priority ba...
102
   * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
   * The order of wakup is always to make the first condition true, then
d8d88fbb1   Darren Hart   futex: Correct fu...
104
105
106
107
   * the second.
   *
   * PI futexes are typically woken before they are removed from the hash list via
   * the rt_mutex code. See unqueue_me_pi().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
108
109
   */
  struct futex_q {
ec92d0829   Pierre Peiffer   futex priority ba...
110
  	struct plist_node list;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111

d8d88fbb1   Darren Hart   futex: Correct fu...
112
  	struct task_struct *task;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113
  	spinlock_t *lock_ptr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
114
  	union futex_key key;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
115
  	struct futex_pi_state *pi_state;
52400ba94   Darren Hart   futex: add requeu...
116
  	struct rt_mutex_waiter *rt_waiter;
84bc4af59   Darren Hart   futex: Detect mis...
117
  	union futex_key *requeue_pi_key;
cd689985c   Thomas Gleixner   futex: Add bitset...
118
  	u32 bitset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119
120
121
  };
  
  /*
b2d0994b1   Darren Hart   futex: update fut...
122
123
124
   * Hash buckets are shared by all the futex_keys that hash to the same
   * location.  Each key may have multiple futex_q structures, one for each task
   * waiting on a futex.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
125
126
   */
  struct futex_hash_bucket {
ec92d0829   Pierre Peiffer   futex priority ba...
127
128
  	spinlock_t lock;
  	struct plist_head chain;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
129
130
131
  };
  
  static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  /*
   * We hash on the keys returned from get_futex_key (see below).
   */
  static struct futex_hash_bucket *hash_futex(union futex_key *key)
  {
  	u32 hash = jhash2((u32*)&key->both.word,
  			  (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
  			  key->both.offset);
  	return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
  }
  
  /*
   * Return 1 if two futex_keys are equal, 0 otherwise.
   */
  static inline int match_futex(union futex_key *key1, union futex_key *key2)
  {
2bc872036   Darren Hart   futex: Check for ...
148
149
  	return (key1 && key2
  		&& key1->both.word == key2->both.word
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
150
151
152
  		&& key1->both.ptr == key2->both.ptr
  		&& key1->both.offset == key2->both.offset);
  }
38d47c1b7   Peter Zijlstra   futex: rely on ge...
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
  /*
   * Take a reference to the resource addressed by a key.
   * Can be called while holding spinlocks.
   *
   */
  static void get_futex_key_refs(union futex_key *key)
  {
  	if (!key->both.ptr)
  		return;
  
  	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
  	case FUT_OFF_INODE:
  		atomic_inc(&key->shared.inode->i_count);
  		break;
  	case FUT_OFF_MMSHARED:
  		atomic_inc(&key->private.mm->mm_count);
  		break;
  	}
  }
  
  /*
   * Drop a reference to the resource addressed by a key.
   * The hash bucket spinlock must not be held.
   */
  static void drop_futex_key_refs(union futex_key *key)
  {
90621c40c   Darren Hart   futex: catch cert...
179
180
181
  	if (!key->both.ptr) {
  		/* If we're here then we tried to put a key we failed to get */
  		WARN_ON_ONCE(1);
38d47c1b7   Peter Zijlstra   futex: rely on ge...
182
  		return;
90621c40c   Darren Hart   futex: catch cert...
183
  	}
38d47c1b7   Peter Zijlstra   futex: rely on ge...
184
185
186
187
188
189
190
191
192
193
  
  	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
  	case FUT_OFF_INODE:
  		iput(key->shared.inode);
  		break;
  	case FUT_OFF_MMSHARED:
  		mmdrop(key->private.mm);
  		break;
  	}
  }
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
194
  /**
d96ee56ce   Darren Hart   futex: Make funct...
195
196
197
198
   * get_futex_key() - Get parameters which are the keys for a futex
   * @uaddr:	virtual address of the futex
   * @fshared:	0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
   * @key:	address where result is stored.
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
199
200
201
   *
   * Returns a negative error code or 0
   * The key words are stored in *key on success.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
   *
f3a43f3f6   Josef "Jeff" Sipek   [PATCH] kernel: c...
203
   * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
204
205
206
   * offset_within_page).  For private mappings, it's (uaddr, current->mm).
   * We can usually work out the index without swapping in the page.
   *
b2d0994b1   Darren Hart   futex: update fut...
207
   * lock_page() might sleep, the caller should not hold a spinlock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208
   */
64d1304a6   Thomas Gleixner   futex: setup writ...
209
  static int
7485d0d37   KOSAKI Motohiro   futexes: Remove r...
210
  get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
  {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
212
  	unsigned long address = (unsigned long)uaddr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213
  	struct mm_struct *mm = current->mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214
215
216
217
218
219
  	struct page *page;
  	int err;
  
  	/*
  	 * The futex address must be "naturally" aligned.
  	 */
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
220
  	key->both.offset = address % PAGE_SIZE;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
221
  	if (unlikely((address % sizeof(u32)) != 0))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
222
  		return -EINVAL;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
223
  	address -= key->both.offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
224
225
  
  	/*
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
226
227
228
229
230
231
232
  	 * PROCESS_PRIVATE futexes are fast.
  	 * As the mm cannot disappear under us and the 'key' only needs
  	 * virtual address, we dont even have to find the underlying vma.
  	 * Note : We do have to check 'uaddr' is a valid user address,
  	 *        but access_ok() should be faster than find_vma()
  	 */
  	if (!fshared) {
7485d0d37   KOSAKI Motohiro   futexes: Remove r...
233
  		if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
234
235
236
  			return -EFAULT;
  		key->private.mm = mm;
  		key->private.address = address;
42569c399   Peter Zijlstra   futex: fixup get_...
237
  		get_futex_key_refs(key);
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
238
239
  		return 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
240

38d47c1b7   Peter Zijlstra   futex: rely on ge...
241
  again:
7485d0d37   KOSAKI Motohiro   futexes: Remove r...
242
  	err = get_user_pages_fast(address, 1, 1, &page);
38d47c1b7   Peter Zijlstra   futex: rely on ge...
243
244
  	if (err < 0)
  		return err;
ce2ae53b7   Sonny Rao   futexes: Fix infi...
245
  	page = compound_head(page);
38d47c1b7   Peter Zijlstra   futex: rely on ge...
246
247
248
249
250
251
  	lock_page(page);
  	if (!page->mapping) {
  		unlock_page(page);
  		put_page(page);
  		goto again;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
252
253
254
255
256
257
  
  	/*
  	 * Private mappings are handled in a simple way.
  	 *
  	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
  	 * it's a read-only handle, it's expected that futexes attach to
38d47c1b7   Peter Zijlstra   futex: rely on ge...
258
  	 * the object not the particular process.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
259
  	 */
38d47c1b7   Peter Zijlstra   futex: rely on ge...
260
261
  	if (PageAnon(page)) {
  		key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262
  		key->private.mm = mm;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
263
  		key->private.address = address;
38d47c1b7   Peter Zijlstra   futex: rely on ge...
264
265
266
267
  	} else {
  		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
  		key->shared.inode = page->mapping->host;
  		key->shared.pgoff = page->index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268
  	}
38d47c1b7   Peter Zijlstra   futex: rely on ge...
269
  	get_futex_key_refs(key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
270

38d47c1b7   Peter Zijlstra   futex: rely on ge...
271
272
273
  	unlock_page(page);
  	put_page(page);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
274
  }
38d47c1b7   Peter Zijlstra   futex: rely on ge...
275
  static inline
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
276
  void put_futex_key(int fshared, union futex_key *key)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
277
  {
38d47c1b7   Peter Zijlstra   futex: rely on ge...
278
  	drop_futex_key_refs(key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
279
  }
d96ee56ce   Darren Hart   futex: Make funct...
280
281
  /**
   * fault_in_user_writeable() - Fault in user address and verify RW access
d0725992c   Thomas Gleixner   futex: Fix the wr...
282
283
284
285
286
287
288
289
290
291
292
293
   * @uaddr:	pointer to faulting user space address
   *
   * Slow path to fixup the fault we just took in the atomic write
   * access to @uaddr.
   *
   * We have no generic implementation of a non destructive write to the
   * user address. We know that we faulted in the atomic pagefault
   * disabled section so we can as well avoid the #PF overhead by
   * calling get_user_pages() right away.
   */
  static int fault_in_user_writeable(u32 __user *uaddr)
  {
722d01723   Andi Kleen   futex: Take mmap_...
294
295
296
297
298
299
300
  	struct mm_struct *mm = current->mm;
  	int ret;
  
  	down_read(&mm->mmap_sem);
  	ret = get_user_pages(current, mm, (unsigned long)uaddr,
  			     1, 1, 0, NULL, NULL);
  	up_read(&mm->mmap_sem);
d0725992c   Thomas Gleixner   futex: Fix the wr...
301
302
  	return ret < 0 ? ret : 0;
  }
4b1c486b3   Darren Hart   futex: add helper...
303
304
  /**
   * futex_top_waiter() - Return the highest priority waiter on a futex
d96ee56ce   Darren Hart   futex: Make funct...
305
306
   * @hb:		the hash bucket the futex_q's reside in
   * @key:	the futex key (to distinguish it from other futex futex_q's)
4b1c486b3   Darren Hart   futex: add helper...
307
308
309
310
311
312
313
314
315
316
317
318
319
320
   *
   * Must be called with the hb lock held.
   */
  static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
  					union futex_key *key)
  {
  	struct futex_q *this;
  
  	plist_for_each_entry(this, &hb->chain, list) {
  		if (match_futex(&this->key, key))
  			return this;
  	}
  	return NULL;
  }
36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
321
322
323
324
325
326
327
328
329
330
331
332
  static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
  {
  	u32 curval;
  
  	pagefault_disable();
  	curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
  	pagefault_enable();
  
  	return curval;
  }
  
  static int get_futex_value_locked(u32 *dest, u32 __user *from)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
333
334
  {
  	int ret;
a866374ae   Peter Zijlstra   [PATCH] mm: pagef...
335
  	pagefault_disable();
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
336
  	ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
a866374ae   Peter Zijlstra   [PATCH] mm: pagef...
337
  	pagefault_enable();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
338
339
340
  
  	return ret ? -EFAULT : 0;
  }
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
341
342
343
344
345
346
347
348
349
350
  
  /*
   * PI code:
   */
  static int refill_pi_state_cache(void)
  {
  	struct futex_pi_state *pi_state;
  
  	if (likely(current->pi_state_cache))
  		return 0;
4668edc33   Burman Yan   [PATCH] kernel co...
351
  	pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
352
353
354
  
  	if (!pi_state)
  		return -ENOMEM;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
355
356
357
358
  	INIT_LIST_HEAD(&pi_state->list);
  	/* pi_mutex gets initialized later */
  	pi_state->owner = NULL;
  	atomic_set(&pi_state->refcount, 1);
38d47c1b7   Peter Zijlstra   futex: rely on ge...
359
  	pi_state->key = FUTEX_KEY_INIT;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
  
  	current->pi_state_cache = pi_state;
  
  	return 0;
  }
  
  static struct futex_pi_state * alloc_pi_state(void)
  {
  	struct futex_pi_state *pi_state = current->pi_state_cache;
  
  	WARN_ON(!pi_state);
  	current->pi_state_cache = NULL;
  
  	return pi_state;
  }
  
  static void free_pi_state(struct futex_pi_state *pi_state)
  {
  	if (!atomic_dec_and_test(&pi_state->refcount))
  		return;
  
  	/*
  	 * If pi_state->owner is NULL, the owner is most probably dying
  	 * and has cleaned up the pi_state already
  	 */
  	if (pi_state->owner) {
1d6154825   Thomas Gleixner   sched: Convert pi...
386
  		raw_spin_lock_irq(&pi_state->owner->pi_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
387
  		list_del_init(&pi_state->list);
1d6154825   Thomas Gleixner   sched: Convert pi...
388
  		raw_spin_unlock_irq(&pi_state->owner->pi_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
  
  		rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
  	}
  
  	if (current->pi_state_cache)
  		kfree(pi_state);
  	else {
  		/*
  		 * pi_state->list is already empty.
  		 * clear pi_state->owner.
  		 * refcount is at 0 - put it back to 1.
  		 */
  		pi_state->owner = NULL;
  		atomic_set(&pi_state->refcount, 1);
  		current->pi_state_cache = pi_state;
  	}
  }
  
  /*
   * Look up the task based on what TID userspace gave us.
   * We dont trust it.
   */
  static struct task_struct * futex_find_get_task(pid_t pid)
  {
  	struct task_struct *p;
d359b549b   Oleg Nesterov   [PATCH] futex_fin...
414
  	rcu_read_lock();
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
415
  	p = find_task_by_vpid(pid);
7a0ea09ad   Michal Hocko   futex: futex_find...
416
417
  	if (p)
  		get_task_struct(p);
a06381fec   Thomas Gleixner   FUTEX: Restore th...
418

d359b549b   Oleg Nesterov   [PATCH] futex_fin...
419
  	rcu_read_unlock();
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
420
421
422
423
424
425
426
427
428
429
430
  
  	return p;
  }
  
  /*
   * This task is holding PI mutexes at exit time => bad.
   * Kernel cleans up PI-state, but userspace is likely hosed.
   * (Robust-futex cleanup is separate and might save the day for userspace.)
   */
  void exit_pi_state_list(struct task_struct *curr)
  {
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
431
432
  	struct list_head *next, *head = &curr->pi_state_list;
  	struct futex_pi_state *pi_state;
627371d73   Ingo Molnar   [PATCH] pi-futex:...
433
  	struct futex_hash_bucket *hb;
38d47c1b7   Peter Zijlstra   futex: rely on ge...
434
  	union futex_key key = FUTEX_KEY_INIT;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
435

a0c1e9073   Thomas Gleixner   futex: runtime en...
436
437
  	if (!futex_cmpxchg_enabled)
  		return;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
438
439
440
  	/*
  	 * We are a ZOMBIE and nobody can enqueue itself on
  	 * pi_state_list anymore, but we have to be careful
627371d73   Ingo Molnar   [PATCH] pi-futex:...
441
  	 * versus waiters unqueueing themselves:
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
442
  	 */
1d6154825   Thomas Gleixner   sched: Convert pi...
443
  	raw_spin_lock_irq(&curr->pi_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
444
445
446
447
448
  	while (!list_empty(head)) {
  
  		next = head->next;
  		pi_state = list_entry(next, struct futex_pi_state, list);
  		key = pi_state->key;
627371d73   Ingo Molnar   [PATCH] pi-futex:...
449
  		hb = hash_futex(&key);
1d6154825   Thomas Gleixner   sched: Convert pi...
450
  		raw_spin_unlock_irq(&curr->pi_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
451

c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
452
  		spin_lock(&hb->lock);
1d6154825   Thomas Gleixner   sched: Convert pi...
453
  		raw_spin_lock_irq(&curr->pi_lock);
627371d73   Ingo Molnar   [PATCH] pi-futex:...
454
455
456
457
  		/*
  		 * We dropped the pi-lock, so re-check whether this
  		 * task still owns the PI-state:
  		 */
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
458
459
460
461
  		if (head->next != next) {
  			spin_unlock(&hb->lock);
  			continue;
  		}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
462
  		WARN_ON(pi_state->owner != curr);
627371d73   Ingo Molnar   [PATCH] pi-futex:...
463
464
  		WARN_ON(list_empty(&pi_state->list));
  		list_del_init(&pi_state->list);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
465
  		pi_state->owner = NULL;
1d6154825   Thomas Gleixner   sched: Convert pi...
466
  		raw_spin_unlock_irq(&curr->pi_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
467
468
469
470
  
  		rt_mutex_unlock(&pi_state->pi_mutex);
  
  		spin_unlock(&hb->lock);
1d6154825   Thomas Gleixner   sched: Convert pi...
471
  		raw_spin_lock_irq(&curr->pi_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
472
  	}
1d6154825   Thomas Gleixner   sched: Convert pi...
473
  	raw_spin_unlock_irq(&curr->pi_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
474
475
476
  }
  
  static int
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
477
478
  lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
  		union futex_key *key, struct futex_pi_state **ps)
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
479
480
481
  {
  	struct futex_pi_state *pi_state = NULL;
  	struct futex_q *this, *next;
ec92d0829   Pierre Peiffer   futex priority ba...
482
  	struct plist_head *head;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
483
  	struct task_struct *p;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
484
  	pid_t pid = uval & FUTEX_TID_MASK;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
485
486
  
  	head = &hb->chain;
ec92d0829   Pierre Peiffer   futex priority ba...
487
  	plist_for_each_entry_safe(this, next, head, list) {
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
488
  		if (match_futex(&this->key, key)) {
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
489
490
491
492
493
  			/*
  			 * Another waiter already exists - bump up
  			 * the refcount and return its pi_state:
  			 */
  			pi_state = this->pi_state;
06a9ec291   Thomas Gleixner   [PATCH] pi-futex:...
494
495
496
497
498
  			/*
  			 * Userspace might have messed up non PI and PI futexes
  			 */
  			if (unlikely(!pi_state))
  				return -EINVAL;
627371d73   Ingo Molnar   [PATCH] pi-futex:...
499
  			WARN_ON(!atomic_read(&pi_state->refcount));
59647b6ac   Thomas Gleixner   futex: Handle fut...
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
  
  			/*
  			 * When pi_state->owner is NULL then the owner died
  			 * and another waiter is on the fly. pi_state->owner
  			 * is fixed up by the task which acquires
  			 * pi_state->rt_mutex.
  			 *
  			 * We do not check for pid == 0 which can happen when
  			 * the owner died and robust_list_exit() cleared the
  			 * TID.
  			 */
  			if (pid && pi_state->owner) {
  				/*
  				 * Bail out if user space manipulated the
  				 * futex value.
  				 */
  				if (pid != task_pid_vnr(pi_state->owner))
  					return -EINVAL;
  			}
627371d73   Ingo Molnar   [PATCH] pi-futex:...
519

c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
520
  			atomic_inc(&pi_state->refcount);
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
521
  			*ps = pi_state;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
522
523
524
525
526
527
  
  			return 0;
  		}
  	}
  
  	/*
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
528
  	 * We are the first waiter - try to look up the real owner and attach
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
529
  	 * the new pi_state to it, but bail out when TID = 0
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
530
  	 */
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
531
  	if (!pid)
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
532
  		return -ESRCH;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
533
  	p = futex_find_get_task(pid);
7a0ea09ad   Michal Hocko   futex: futex_find...
534
535
  	if (!p)
  		return -ESRCH;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
536
537
538
539
540
541
542
  
  	/*
  	 * We need to look at the task state flags to figure out,
  	 * whether the task is exiting. To protect against the do_exit
  	 * change of the task flags, we do this protected by
  	 * p->pi_lock:
  	 */
1d6154825   Thomas Gleixner   sched: Convert pi...
543
  	raw_spin_lock_irq(&p->pi_lock);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
544
545
546
547
548
549
550
  	if (unlikely(p->flags & PF_EXITING)) {
  		/*
  		 * The task is on the way out. When PF_EXITPIDONE is
  		 * set, we know that the task has finished the
  		 * cleanup:
  		 */
  		int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
1d6154825   Thomas Gleixner   sched: Convert pi...
551
  		raw_spin_unlock_irq(&p->pi_lock);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
552
553
554
  		put_task_struct(p);
  		return ret;
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
555
556
557
558
559
560
561
562
563
564
  
  	pi_state = alloc_pi_state();
  
  	/*
  	 * Initialize the pi_mutex in locked state and make 'p'
  	 * the owner of it:
  	 */
  	rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
  
  	/* Store the key for possible exit cleanups: */
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
565
  	pi_state->key = *key;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
566

627371d73   Ingo Molnar   [PATCH] pi-futex:...
567
  	WARN_ON(!list_empty(&pi_state->list));
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
568
569
  	list_add(&pi_state->list, &p->pi_state_list);
  	pi_state->owner = p;
1d6154825   Thomas Gleixner   sched: Convert pi...
570
  	raw_spin_unlock_irq(&p->pi_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
571
572
  
  	put_task_struct(p);
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
573
  	*ps = pi_state;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
574
575
576
  
  	return 0;
  }
1a52084d0   Darren Hart   futex: split out ...
577
  /**
d96ee56ce   Darren Hart   futex: Make funct...
578
   * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
bab5bc9e8   Darren Hart   futex: fixup unlo...
579
580
581
582
583
584
585
586
   * @uaddr:		the pi futex user address
   * @hb:			the pi futex hash bucket
   * @key:		the futex key associated with uaddr and hb
   * @ps:			the pi_state pointer where we store the result of the
   *			lookup
   * @task:		the task to perform the atomic lock work for.  This will
   *			be "current" except in the case of requeue pi.
   * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
1a52084d0   Darren Hart   futex: split out ...
587
588
589
590
591
592
593
594
595
596
597
   *
   * Returns:
   *  0 - ready to wait
   *  1 - acquired the lock
   * <0 - error
   *
   * The hb->lock and futex_key refs shall be held by the caller.
   */
  static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
  				union futex_key *key,
  				struct futex_pi_state **ps,
bab5bc9e8   Darren Hart   futex: fixup unlo...
598
  				struct task_struct *task, int set_waiters)
1a52084d0   Darren Hart   futex: split out ...
599
600
601
602
603
604
605
606
607
608
609
610
611
  {
  	int lock_taken, ret, ownerdied = 0;
  	u32 uval, newval, curval;
  
  retry:
  	ret = lock_taken = 0;
  
  	/*
  	 * To avoid races, we attempt to take the lock here again
  	 * (by doing a 0 -> TID atomic cmpxchg), while holding all
  	 * the locks. It will most likely not succeed.
  	 */
  	newval = task_pid_vnr(task);
bab5bc9e8   Darren Hart   futex: fixup unlo...
612
613
  	if (set_waiters)
  		newval |= FUTEX_WAITERS;
1a52084d0   Darren Hart   futex: split out ...
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
  
  	curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
  
  	if (unlikely(curval == -EFAULT))
  		return -EFAULT;
  
  	/*
  	 * Detect deadlocks.
  	 */
  	if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task))))
  		return -EDEADLK;
  
  	/*
  	 * Surprise - we got the lock. Just return to userspace:
  	 */
  	if (unlikely(!curval))
  		return 1;
  
  	uval = curval;
  
  	/*
  	 * Set the FUTEX_WAITERS flag, so the owner will know it has someone
  	 * to wake at the next unlock.
  	 */
  	newval = curval | FUTEX_WAITERS;
  
  	/*
  	 * There are two cases, where a futex might have no owner (the
  	 * owner TID is 0): OWNER_DIED. We take over the futex in this
  	 * case. We also do an unconditional take over, when the owner
  	 * of the futex died.
  	 *
  	 * This is safe as we are protected by the hash bucket lock !
  	 */
  	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
  		/* Keep the OWNER_DIED bit */
  		newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task);
  		ownerdied = 0;
  		lock_taken = 1;
  	}
  
  	curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
  
  	if (unlikely(curval == -EFAULT))
  		return -EFAULT;
  	if (unlikely(curval != uval))
  		goto retry;
  
  	/*
  	 * We took the lock due to owner died take over.
  	 */
  	if (unlikely(lock_taken))
  		return 1;
  
  	/*
  	 * We dont have the lock. Look up the PI state (or create it if
  	 * we are the first waiter):
  	 */
  	ret = lookup_pi_state(uval, hb, key, ps);
  
  	if (unlikely(ret)) {
  		switch (ret) {
  		case -ESRCH:
  			/*
  			 * No owner found for this futex. Check if the
  			 * OWNER_DIED bit is set to figure out whether
  			 * this is a robust futex or not.
  			 */
  			if (get_futex_value_locked(&curval, uaddr))
  				return -EFAULT;
  
  			/*
  			 * We simply start over in case of a robust
  			 * futex. The code above will take the futex
  			 * and return happy.
  			 */
  			if (curval & FUTEX_OWNER_DIED) {
  				ownerdied = 1;
  				goto retry;
  			}
  		default:
  			break;
  		}
  	}
  
  	return ret;
  }
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
701
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
702
703
704
705
706
   * The hash bucket lock must be held when this is called.
   * Afterwards, the futex_q must not be accessed.
   */
  static void wake_futex(struct futex_q *q)
  {
f1a11e057   Thomas Gleixner   futex: remove the...
707
  	struct task_struct *p = q->task;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708
  	/*
f1a11e057   Thomas Gleixner   futex: remove the...
709
710
711
712
713
  	 * We set q->lock_ptr = NULL _before_ we wake up the task. If
  	 * a non futex wake up happens on another CPU then the task
  	 * might exit and p would dereference a non existing task
  	 * struct. Prevent this by holding a reference on p across the
  	 * wake up.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
714
  	 */
f1a11e057   Thomas Gleixner   futex: remove the...
715
716
717
  	get_task_struct(p);
  
  	plist_del(&q->list, &q->list.plist);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
718
  	/*
f1a11e057   Thomas Gleixner   futex: remove the...
719
720
721
722
  	 * The waiting task can free the futex_q as soon as
  	 * q->lock_ptr = NULL is written, without taking any locks. A
  	 * memory barrier is required here to prevent the following
  	 * store to lock_ptr from getting ahead of the plist_del.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
723
  	 */
ccdea2f88   Ralf Baechle   [PATCH] futex: re...
724
  	smp_wmb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
725
  	q->lock_ptr = NULL;
f1a11e057   Thomas Gleixner   futex: remove the...
726
727
728
  
  	wake_up_state(p, TASK_NORMAL);
  	put_task_struct(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
729
  }
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
730
731
732
733
734
735
736
737
  static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
  {
  	struct task_struct *new_owner;
  	struct futex_pi_state *pi_state = this->pi_state;
  	u32 curval, newval;
  
  	if (!pi_state)
  		return -EINVAL;
51246bfd1   Thomas Gleixner   futex: Handle use...
738
739
740
741
742
743
  	/*
  	 * If current does not own the pi_state then the futex is
  	 * inconsistent and user space fiddled with the futex value.
  	 */
  	if (pi_state->owner != current)
  		return -EINVAL;
d209d74d5   Thomas Gleixner   rtmutes: Convert ...
744
  	raw_spin_lock(&pi_state->pi_mutex.wait_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
  	new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
  
  	/*
  	 * This happens when we have stolen the lock and the original
  	 * pending owner did not enqueue itself back on the rt_mutex.
  	 * Thats not a tragedy. We know that way, that a lock waiter
  	 * is on the fly. We make the futex_q waiter the pending owner.
  	 */
  	if (!new_owner)
  		new_owner = this->task;
  
  	/*
  	 * We pass it to the next owner. (The WAITERS bit is always
  	 * kept enabled while there is PI state around. We must also
  	 * preserve the owner died bit.)
  	 */
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
761
  	if (!(uval & FUTEX_OWNER_DIED)) {
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
762
  		int ret = 0;
b488893a3   Pavel Emelyanov   pid namespaces: c...
763
  		newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
764

36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
765
  		curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
766

e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
767
  		if (curval == -EFAULT)
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
768
  			ret = -EFAULT;
cde898fa8   Thomas Gleixner   futex: correctly ...
769
  		else if (curval != uval)
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
770
771
  			ret = -EINVAL;
  		if (ret) {
d209d74d5   Thomas Gleixner   rtmutes: Convert ...
772
  			raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
773
774
  			return ret;
  		}
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
775
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
776

1d6154825   Thomas Gleixner   sched: Convert pi...
777
  	raw_spin_lock_irq(&pi_state->owner->pi_lock);
627371d73   Ingo Molnar   [PATCH] pi-futex:...
778
779
  	WARN_ON(list_empty(&pi_state->list));
  	list_del_init(&pi_state->list);
1d6154825   Thomas Gleixner   sched: Convert pi...
780
  	raw_spin_unlock_irq(&pi_state->owner->pi_lock);
627371d73   Ingo Molnar   [PATCH] pi-futex:...
781

1d6154825   Thomas Gleixner   sched: Convert pi...
782
  	raw_spin_lock_irq(&new_owner->pi_lock);
627371d73   Ingo Molnar   [PATCH] pi-futex:...
783
  	WARN_ON(!list_empty(&pi_state->list));
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
784
785
  	list_add(&pi_state->list, &new_owner->pi_state_list);
  	pi_state->owner = new_owner;
1d6154825   Thomas Gleixner   sched: Convert pi...
786
  	raw_spin_unlock_irq(&new_owner->pi_lock);
627371d73   Ingo Molnar   [PATCH] pi-futex:...
787

d209d74d5   Thomas Gleixner   rtmutes: Convert ...
788
  	raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
789
790
791
792
793
794
795
796
797
798
799
800
801
  	rt_mutex_unlock(&pi_state->pi_mutex);
  
  	return 0;
  }
  
  static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
  {
  	u32 oldval;
  
  	/*
  	 * There is no waiter, so we unlock the futex. The owner died
  	 * bit has not to be preserved here. We are the owner:
  	 */
36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
802
  	oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
803
804
805
806
807
808
809
810
  
  	if (oldval == -EFAULT)
  		return oldval;
  	if (oldval != uval)
  		return -EAGAIN;
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
811
  /*
8b8f319fc   Ingo Molnar   [PATCH] lockdep: ...
812
813
814
815
816
817
818
819
820
821
822
823
824
825
   * Express the locking dependencies for lockdep:
   */
  static inline void
  double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
  {
  	if (hb1 <= hb2) {
  		spin_lock(&hb1->lock);
  		if (hb1 < hb2)
  			spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
  	} else { /* hb1 > hb2 */
  		spin_lock(&hb2->lock);
  		spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
  	}
  }
5eb3dc62f   Darren Hart   futex: add double...
826
827
828
  static inline void
  double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
  {
f061d3515   Darren Hart   futex: remove the...
829
  	spin_unlock(&hb1->lock);
88f502fed   Ingo Molnar   futex: remove the...
830
831
  	if (hb1 != hb2)
  		spin_unlock(&hb2->lock);
5eb3dc62f   Darren Hart   futex: add double...
832
  }
8b8f319fc   Ingo Molnar   [PATCH] lockdep: ...
833
  /*
b2d0994b1   Darren Hart   futex: update fut...
834
   * Wake up waiters matching bitset queued on this futex (uaddr).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
835
   */
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
836
  static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
837
  {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
838
  	struct futex_hash_bucket *hb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
839
  	struct futex_q *this, *next;
ec92d0829   Pierre Peiffer   futex priority ba...
840
  	struct plist_head *head;
38d47c1b7   Peter Zijlstra   futex: rely on ge...
841
  	union futex_key key = FUTEX_KEY_INIT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
842
  	int ret;
cd689985c   Thomas Gleixner   futex: Add bitset...
843
844
  	if (!bitset)
  		return -EINVAL;
7485d0d37   KOSAKI Motohiro   futexes: Remove r...
845
  	ret = get_futex_key(uaddr, fshared, &key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
846
847
  	if (unlikely(ret != 0))
  		goto out;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
848
849
850
  	hb = hash_futex(&key);
  	spin_lock(&hb->lock);
  	head = &hb->chain;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
851

ec92d0829   Pierre Peiffer   futex priority ba...
852
  	plist_for_each_entry_safe(this, next, head, list) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
853
  		if (match_futex (&this->key, &key)) {
52400ba94   Darren Hart   futex: add requeu...
854
  			if (this->pi_state || this->rt_waiter) {
ed6f7b10e   Ingo Molnar   [PATCH] pi-futex:...
855
856
857
  				ret = -EINVAL;
  				break;
  			}
cd689985c   Thomas Gleixner   futex: Add bitset...
858
859
860
861
  
  			/* Check if one of the bits is set in both bitsets */
  			if (!(this->bitset & bitset))
  				continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
862
863
864
865
866
  			wake_futex(this);
  			if (++ret >= nr_wake)
  				break;
  		}
  	}
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
867
  	spin_unlock(&hb->lock);
38d47c1b7   Peter Zijlstra   futex: rely on ge...
868
  	put_futex_key(fshared, &key);
42d35d48c   Darren Hart   futex: make futex...
869
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
870
871
872
873
  	return ret;
  }
  
  /*
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
874
875
876
   * Wake up all waiters hashed on the physical page that is mapped
   * to this virtual address:
   */
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
877
  static int
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
878
  futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
879
  	      int nr_wake, int nr_wake2, int op)
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
880
  {
38d47c1b7   Peter Zijlstra   futex: rely on ge...
881
  	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
882
  	struct futex_hash_bucket *hb1, *hb2;
ec92d0829   Pierre Peiffer   futex priority ba...
883
  	struct plist_head *head;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
884
  	struct futex_q *this, *next;
e4dc5b7a3   Darren Hart   futex: clean up f...
885
  	int ret, op_ret;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
886

e4dc5b7a3   Darren Hart   futex: clean up f...
887
  retry:
7485d0d37   KOSAKI Motohiro   futexes: Remove r...
888
  	ret = get_futex_key(uaddr1, fshared, &key1);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
889
890
  	if (unlikely(ret != 0))
  		goto out;
7485d0d37   KOSAKI Motohiro   futexes: Remove r...
891
  	ret = get_futex_key(uaddr2, fshared, &key2);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
892
  	if (unlikely(ret != 0))
42d35d48c   Darren Hart   futex: make futex...
893
  		goto out_put_key1;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
894

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
895
896
  	hb1 = hash_futex(&key1);
  	hb2 = hash_futex(&key2);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
897

e4dc5b7a3   Darren Hart   futex: clean up f...
898
  retry_private:
eaaea8036   Thomas Gleixner   futex: Fix lockin...
899
  	double_lock_hb(hb1, hb2);
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
900
  	op_ret = futex_atomic_op_inuser(op, uaddr2);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
901
  	if (unlikely(op_ret < 0)) {
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
902

5eb3dc62f   Darren Hart   futex: add double...
903
  		double_unlock_hb(hb1, hb2);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
904

7ee1dd3fe   David Howells   [PATCH] FRV: Make...
905
  #ifndef CONFIG_MMU
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
906
907
908
909
  		/*
  		 * we don't get EFAULT from MMU faults if we don't have an MMU,
  		 * but we might get them from range checking
  		 */
7ee1dd3fe   David Howells   [PATCH] FRV: Make...
910
  		ret = op_ret;
42d35d48c   Darren Hart   futex: make futex...
911
  		goto out_put_keys;
7ee1dd3fe   David Howells   [PATCH] FRV: Make...
912
  #endif
796f8d9b9   David Gibson   [PATCH] FUTEX_WAK...
913
914
  		if (unlikely(op_ret != -EFAULT)) {
  			ret = op_ret;
42d35d48c   Darren Hart   futex: make futex...
915
  			goto out_put_keys;
796f8d9b9   David Gibson   [PATCH] FUTEX_WAK...
916
  		}
d0725992c   Thomas Gleixner   futex: Fix the wr...
917
  		ret = fault_in_user_writeable(uaddr2);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
918
  		if (ret)
de87fcc12   Darren Hart   futex: additional...
919
  			goto out_put_keys;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
920

e4dc5b7a3   Darren Hart   futex: clean up f...
921
922
  		if (!fshared)
  			goto retry_private;
de87fcc12   Darren Hart   futex: additional...
923
924
  		put_futex_key(fshared, &key2);
  		put_futex_key(fshared, &key1);
e4dc5b7a3   Darren Hart   futex: clean up f...
925
  		goto retry;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
926
  	}
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
927
  	head = &hb1->chain;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
928

ec92d0829   Pierre Peiffer   futex priority ba...
929
  	plist_for_each_entry_safe(this, next, head, list) {
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
930
931
932
933
934
935
936
937
  		if (match_futex (&this->key, &key1)) {
  			wake_futex(this);
  			if (++ret >= nr_wake)
  				break;
  		}
  	}
  
  	if (op_ret > 0) {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
938
  		head = &hb2->chain;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
939
940
  
  		op_ret = 0;
ec92d0829   Pierre Peiffer   futex priority ba...
941
  		plist_for_each_entry_safe(this, next, head, list) {
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
942
943
944
945
946
947
948
949
  			if (match_futex (&this->key, &key2)) {
  				wake_futex(this);
  				if (++op_ret >= nr_wake2)
  					break;
  			}
  		}
  		ret += op_ret;
  	}
5eb3dc62f   Darren Hart   futex: add double...
950
  	double_unlock_hb(hb1, hb2);
42d35d48c   Darren Hart   futex: make futex...
951
  out_put_keys:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
952
  	put_futex_key(fshared, &key2);
42d35d48c   Darren Hart   futex: make futex...
953
  out_put_key1:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
954
  	put_futex_key(fshared, &key1);
42d35d48c   Darren Hart   futex: make futex...
955
  out:
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
956
957
  	return ret;
  }
9121e4783   Darren Hart   futex: distangle ...
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
  /**
   * requeue_futex() - Requeue a futex_q from one hb to another
   * @q:		the futex_q to requeue
   * @hb1:	the source hash_bucket
   * @hb2:	the target hash_bucket
   * @key2:	the new key for the requeued futex_q
   */
  static inline
  void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
  		   struct futex_hash_bucket *hb2, union futex_key *key2)
  {
  
  	/*
  	 * If key1 and key2 hash to the same bucket, no need to
  	 * requeue.
  	 */
  	if (likely(&hb1->chain != &hb2->chain)) {
  		plist_del(&q->list, &hb1->chain);
  		plist_add(&q->list, &hb2->chain);
  		q->lock_ptr = &hb2->lock;
  #ifdef CONFIG_DEBUG_PI_LIST
a26724591   Thomas Gleixner   plist: Make plist...
979
  		q->list.plist.spinlock = &hb2->lock;
9121e4783   Darren Hart   futex: distangle ...
980
981
982
983
984
  #endif
  	}
  	get_futex_key_refs(key2);
  	q->key = *key2;
  }
52400ba94   Darren Hart   futex: add requeu...
985
986
  /**
   * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
d96ee56ce   Darren Hart   futex: Make funct...
987
988
989
   * @q:		the futex_q
   * @key:	the key of the requeue target futex
   * @hb:		the hash_bucket of the requeue target futex
52400ba94   Darren Hart   futex: add requeu...
990
991
992
993
994
   *
   * During futex_requeue, with requeue_pi=1, it is possible to acquire the
   * target futex if it is uncontended or via a lock steal.  Set the futex_q key
   * to the requeue target futex so the waiter can detect the wakeup on the right
   * futex, but remove it from the hb and NULL the rt_waiter so it can detect
beda2c7ea   Darren Hart   futex: Update fut...
995
996
997
   * atomic lock acquisition.  Set the q->lock_ptr to the requeue target hb->lock
   * to protect access to the pi_state to fixup the owner later.  Must be called
   * with both q->lock_ptr and hb->lock held.
52400ba94   Darren Hart   futex: add requeu...
998
999
   */
  static inline
beda2c7ea   Darren Hart   futex: Update fut...
1000
1001
  void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
  			   struct futex_hash_bucket *hb)
52400ba94   Darren Hart   futex: add requeu...
1002
  {
52400ba94   Darren Hart   futex: add requeu...
1003
1004
1005
1006
1007
1008
1009
1010
  	get_futex_key_refs(key);
  	q->key = *key;
  
  	WARN_ON(plist_node_empty(&q->list));
  	plist_del(&q->list, &q->list.plist);
  
  	WARN_ON(!q->rt_waiter);
  	q->rt_waiter = NULL;
beda2c7ea   Darren Hart   futex: Update fut...
1011
1012
  	q->lock_ptr = &hb->lock;
  #ifdef CONFIG_DEBUG_PI_LIST
a26724591   Thomas Gleixner   plist: Make plist...
1013
  	q->list.plist.spinlock = &hb->lock;
beda2c7ea   Darren Hart   futex: Update fut...
1014
  #endif
f1a11e057   Thomas Gleixner   futex: remove the...
1015
  	wake_up_state(q->task, TASK_NORMAL);
52400ba94   Darren Hart   futex: add requeu...
1016
1017
1018
1019
  }
  
  /**
   * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
bab5bc9e8   Darren Hart   futex: fixup unlo...
1020
1021
1022
1023
1024
1025
1026
   * @pifutex:		the user address of the to futex
   * @hb1:		the from futex hash bucket, must be locked by the caller
   * @hb2:		the to futex hash bucket, must be locked by the caller
   * @key1:		the from futex key
   * @key2:		the to futex key
   * @ps:			address to store the pi_state pointer
   * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
52400ba94   Darren Hart   futex: add requeu...
1027
1028
   *
   * Try and get the lock on behalf of the top waiter if we can do it atomically.
bab5bc9e8   Darren Hart   futex: fixup unlo...
1029
1030
1031
   * Wake the top waiter if we succeed.  If the caller specified set_waiters,
   * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
   * hb1 and hb2 must be held by the caller.
52400ba94   Darren Hart   futex: add requeu...
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
   *
   * Returns:
   *  0 - failed to acquire the lock atomicly
   *  1 - acquired the lock
   * <0 - error
   */
  static int futex_proxy_trylock_atomic(u32 __user *pifutex,
  				 struct futex_hash_bucket *hb1,
  				 struct futex_hash_bucket *hb2,
  				 union futex_key *key1, union futex_key *key2,
bab5bc9e8   Darren Hart   futex: fixup unlo...
1042
  				 struct futex_pi_state **ps, int set_waiters)
52400ba94   Darren Hart   futex: add requeu...
1043
  {
bab5bc9e8   Darren Hart   futex: fixup unlo...
1044
  	struct futex_q *top_waiter = NULL;
52400ba94   Darren Hart   futex: add requeu...
1045
1046
1047
1048
1049
  	u32 curval;
  	int ret;
  
  	if (get_futex_value_locked(&curval, pifutex))
  		return -EFAULT;
bab5bc9e8   Darren Hart   futex: fixup unlo...
1050
1051
1052
1053
1054
1055
1056
1057
  	/*
  	 * Find the top_waiter and determine if there are additional waiters.
  	 * If the caller intends to requeue more than 1 waiter to pifutex,
  	 * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
  	 * as we have means to handle the possible fault.  If not, don't set
  	 * the bit unecessarily as it will force the subsequent unlock to enter
  	 * the kernel.
  	 */
52400ba94   Darren Hart   futex: add requeu...
1058
1059
1060
1061
1062
  	top_waiter = futex_top_waiter(hb1, key1);
  
  	/* There are no waiters, nothing for us to do. */
  	if (!top_waiter)
  		return 0;
84bc4af59   Darren Hart   futex: Detect mis...
1063
1064
1065
  	/* Ensure we requeue to the expected futex. */
  	if (!match_futex(top_waiter->requeue_pi_key, key2))
  		return -EINVAL;
52400ba94   Darren Hart   futex: add requeu...
1066
  	/*
bab5bc9e8   Darren Hart   futex: fixup unlo...
1067
1068
1069
  	 * Try to take the lock for top_waiter.  Set the FUTEX_WAITERS bit in
  	 * the contended case or if set_waiters is 1.  The pi_state is returned
  	 * in ps in contended cases.
52400ba94   Darren Hart   futex: add requeu...
1070
  	 */
bab5bc9e8   Darren Hart   futex: fixup unlo...
1071
1072
  	ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
  				   set_waiters);
52400ba94   Darren Hart   futex: add requeu...
1073
  	if (ret == 1)
beda2c7ea   Darren Hart   futex: Update fut...
1074
  		requeue_pi_wake_futex(top_waiter, key2, hb2);
52400ba94   Darren Hart   futex: add requeu...
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
  
  	return ret;
  }
  
  /**
   * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
   * uaddr1:	source futex user address
   * uaddr2:	target futex user address
   * nr_wake:	number of waiters to wake (must be 1 for requeue_pi)
   * nr_requeue:	number of waiters to requeue (0-INT_MAX)
   * requeue_pi:	if we are attempting to requeue from a non-pi futex to a
   * 		pi futex (pi to pi requeue is not supported)
   *
   * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
   * uaddr2 atomically on behalf of the top waiter.
   *
   * Returns:
   * >=0 - on success, the number of tasks requeued or woken
   *  <0 - on error
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1094
   */
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1095
  static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
52400ba94   Darren Hart   futex: add requeu...
1096
1097
  			 int nr_wake, int nr_requeue, u32 *cmpval,
  			 int requeue_pi)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1098
  {
38d47c1b7   Peter Zijlstra   futex: rely on ge...
1099
  	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
52400ba94   Darren Hart   futex: add requeu...
1100
1101
  	int drop_count = 0, task_count = 0, ret;
  	struct futex_pi_state *pi_state = NULL;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1102
  	struct futex_hash_bucket *hb1, *hb2;
ec92d0829   Pierre Peiffer   futex priority ba...
1103
  	struct plist_head *head1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1104
  	struct futex_q *this, *next;
52400ba94   Darren Hart   futex: add requeu...
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
  	u32 curval2;
  
  	if (requeue_pi) {
  		/*
  		 * requeue_pi requires a pi_state, try to allocate it now
  		 * without any locks in case it fails.
  		 */
  		if (refill_pi_state_cache())
  			return -ENOMEM;
  		/*
  		 * requeue_pi must wake as many tasks as it can, up to nr_wake
  		 * + nr_requeue, since it acquires the rt_mutex prior to
  		 * returning to userspace, so as to not leave the rt_mutex with
  		 * waiters and no owner.  However, second and third wake-ups
  		 * cannot be predicted as they involve race conditions with the
  		 * first wake and a fault while looking up the pi_state.  Both
  		 * pthread_cond_signal() and pthread_cond_broadcast() should
  		 * use nr_wake=1.
  		 */
  		if (nr_wake != 1)
  			return -EINVAL;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1127

42d35d48c   Darren Hart   futex: make futex...
1128
  retry:
52400ba94   Darren Hart   futex: add requeu...
1129
1130
1131
1132
1133
1134
1135
1136
  	if (pi_state != NULL) {
  		/*
  		 * We will have to lookup the pi_state again, so free this one
  		 * to keep the accounting correct.
  		 */
  		free_pi_state(pi_state);
  		pi_state = NULL;
  	}
7485d0d37   KOSAKI Motohiro   futexes: Remove r...
1137
  	ret = get_futex_key(uaddr1, fshared, &key1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1138
1139
  	if (unlikely(ret != 0))
  		goto out;
7485d0d37   KOSAKI Motohiro   futexes: Remove r...
1140
  	ret = get_futex_key(uaddr2, fshared, &key2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1141
  	if (unlikely(ret != 0))
42d35d48c   Darren Hart   futex: make futex...
1142
  		goto out_put_key1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1143

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1144
1145
  	hb1 = hash_futex(&key1);
  	hb2 = hash_futex(&key2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1146

e4dc5b7a3   Darren Hart   futex: clean up f...
1147
  retry_private:
8b8f319fc   Ingo Molnar   [PATCH] lockdep: ...
1148
  	double_lock_hb(hb1, hb2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1149

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1150
1151
  	if (likely(cmpval != NULL)) {
  		u32 curval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1152

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1153
  		ret = get_futex_value_locked(&curval, uaddr1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1154
1155
  
  		if (unlikely(ret)) {
5eb3dc62f   Darren Hart   futex: add double...
1156
  			double_unlock_hb(hb1, hb2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1157

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1158
  			ret = get_user(curval, uaddr1);
e4dc5b7a3   Darren Hart   futex: clean up f...
1159
1160
  			if (ret)
  				goto out_put_keys;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1161

e4dc5b7a3   Darren Hart   futex: clean up f...
1162
1163
  			if (!fshared)
  				goto retry_private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1164

e4dc5b7a3   Darren Hart   futex: clean up f...
1165
1166
1167
  			put_futex_key(fshared, &key2);
  			put_futex_key(fshared, &key1);
  			goto retry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1168
  		}
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1169
  		if (curval != *cmpval) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1170
1171
1172
1173
  			ret = -EAGAIN;
  			goto out_unlock;
  		}
  	}
52400ba94   Darren Hart   futex: add requeu...
1174
  	if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
bab5bc9e8   Darren Hart   futex: fixup unlo...
1175
1176
1177
1178
1179
1180
  		/*
  		 * Attempt to acquire uaddr2 and wake the top waiter. If we
  		 * intend to requeue waiters, force setting the FUTEX_WAITERS
  		 * bit.  We force this here where we are able to easily handle
  		 * faults rather in the requeue loop below.
  		 */
52400ba94   Darren Hart   futex: add requeu...
1181
  		ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
bab5bc9e8   Darren Hart   futex: fixup unlo...
1182
  						 &key2, &pi_state, nr_requeue);
52400ba94   Darren Hart   futex: add requeu...
1183
1184
1185
1186
1187
1188
1189
1190
1191
  
  		/*
  		 * At this point the top_waiter has either taken uaddr2 or is
  		 * waiting on it.  If the former, then the pi_state will not
  		 * exist yet, look it up one more time to ensure we have a
  		 * reference to it.
  		 */
  		if (ret == 1) {
  			WARN_ON(pi_state);
89061d3d5   Darren Hart   futex: Move drop_...
1192
  			drop_count++;
52400ba94   Darren Hart   futex: add requeu...
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
  			task_count++;
  			ret = get_futex_value_locked(&curval2, uaddr2);
  			if (!ret)
  				ret = lookup_pi_state(curval2, hb2, &key2,
  						      &pi_state);
  		}
  
  		switch (ret) {
  		case 0:
  			break;
  		case -EFAULT:
  			double_unlock_hb(hb1, hb2);
  			put_futex_key(fshared, &key2);
  			put_futex_key(fshared, &key1);
d0725992c   Thomas Gleixner   futex: Fix the wr...
1207
  			ret = fault_in_user_writeable(uaddr2);
52400ba94   Darren Hart   futex: add requeu...
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
  			if (!ret)
  				goto retry;
  			goto out;
  		case -EAGAIN:
  			/* The owner was exiting, try again. */
  			double_unlock_hb(hb1, hb2);
  			put_futex_key(fshared, &key2);
  			put_futex_key(fshared, &key1);
  			cond_resched();
  			goto retry;
  		default:
  			goto out_unlock;
  		}
  	}
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1222
  	head1 = &hb1->chain;
ec92d0829   Pierre Peiffer   futex priority ba...
1223
  	plist_for_each_entry_safe(this, next, head1, list) {
52400ba94   Darren Hart   futex: add requeu...
1224
1225
1226
1227
  		if (task_count - nr_wake >= nr_requeue)
  			break;
  
  		if (!match_futex(&this->key, &key1))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1228
  			continue;
52400ba94   Darren Hart   futex: add requeu...
1229

392741e0a   Darren Hart   futex: Fix handli...
1230
1231
1232
1233
1234
1235
1236
1237
1238
  		/*
  		 * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
  		 * be paired with each other and no other futex ops.
  		 */
  		if ((requeue_pi && !this->rt_waiter) ||
  		    (!requeue_pi && this->rt_waiter)) {
  			ret = -EINVAL;
  			break;
  		}
52400ba94   Darren Hart   futex: add requeu...
1239
1240
1241
1242
1243
1244
1245
  
  		/*
  		 * Wake nr_wake waiters.  For requeue_pi, if we acquired the
  		 * lock, we already woke the top_waiter.  If not, it will be
  		 * woken by futex_unlock_pi().
  		 */
  		if (++task_count <= nr_wake && !requeue_pi) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1246
  			wake_futex(this);
52400ba94   Darren Hart   futex: add requeu...
1247
1248
  			continue;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1249

84bc4af59   Darren Hart   futex: Detect mis...
1250
1251
1252
1253
1254
  		/* Ensure we requeue to the expected futex for requeue_pi. */
  		if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
  			ret = -EINVAL;
  			break;
  		}
52400ba94   Darren Hart   futex: add requeu...
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
  		/*
  		 * Requeue nr_requeue waiters and possibly one more in the case
  		 * of requeue_pi if we couldn't acquire the lock atomically.
  		 */
  		if (requeue_pi) {
  			/* Prepare the waiter to take the rt_mutex. */
  			atomic_inc(&pi_state->refcount);
  			this->pi_state = pi_state;
  			ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
  							this->rt_waiter,
  							this->task, 1);
  			if (ret == 1) {
  				/* We got the lock. */
beda2c7ea   Darren Hart   futex: Update fut...
1268
  				requeue_pi_wake_futex(this, &key2, hb2);
89061d3d5   Darren Hart   futex: Move drop_...
1269
  				drop_count++;
52400ba94   Darren Hart   futex: add requeu...
1270
1271
1272
1273
1274
1275
1276
  				continue;
  			} else if (ret) {
  				/* -EDEADLK */
  				this->pi_state = NULL;
  				free_pi_state(pi_state);
  				goto out_unlock;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1277
  		}
52400ba94   Darren Hart   futex: add requeu...
1278
1279
  		requeue_futex(this, hb1, hb2, &key2);
  		drop_count++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1280
1281
1282
  	}
  
  out_unlock:
5eb3dc62f   Darren Hart   futex: add double...
1283
  	double_unlock_hb(hb1, hb2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1284

cd84a42f3   Darren Hart   futex: comment re...
1285
1286
1287
1288
1289
1290
  	/*
  	 * drop_futex_key_refs() must be called outside the spinlocks. During
  	 * the requeue we moved futex_q's from the hash bucket at key1 to the
  	 * one at key2 and updated their key pointer.  We no longer need to
  	 * hold the references to key1.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1291
  	while (--drop_count >= 0)
9adef58b1   Rusty Russell   futex: get_futex_...
1292
  		drop_futex_key_refs(&key1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1293

42d35d48c   Darren Hart   futex: make futex...
1294
  out_put_keys:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
1295
  	put_futex_key(fshared, &key2);
42d35d48c   Darren Hart   futex: make futex...
1296
  out_put_key1:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
1297
  	put_futex_key(fshared, &key1);
42d35d48c   Darren Hart   futex: make futex...
1298
  out:
52400ba94   Darren Hart   futex: add requeu...
1299
1300
1301
  	if (pi_state != NULL)
  		free_pi_state(pi_state);
  	return ret ? ret : task_count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1302
1303
1304
  }
  
  /* The key must be already stored in q->key. */
82af7aca5   Eric Sesterhenn   Removal of FUTEX_FD
1305
  static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1306
  {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1307
  	struct futex_hash_bucket *hb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1308

9adef58b1   Rusty Russell   futex: get_futex_...
1309
  	get_futex_key_refs(&q->key);
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1310
1311
  	hb = hash_futex(&q->key);
  	q->lock_ptr = &hb->lock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1312

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1313
1314
  	spin_lock(&hb->lock);
  	return hb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1315
  }
d40d65c8d   Darren Hart   futex: Correct qu...
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
  static inline void
  queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
  {
  	spin_unlock(&hb->lock);
  	drop_futex_key_refs(&q->key);
  }
  
  /**
   * queue_me() - Enqueue the futex_q on the futex_hash_bucket
   * @q:	The futex_q to enqueue
   * @hb:	The destination hash bucket
   *
   * The hb->lock must be held by the caller, and is released here. A call to
   * queue_me() is typically paired with exactly one call to unqueue_me().  The
   * exceptions involve the PI related operations, which may use unqueue_me_pi()
   * or nothing if the unqueue is done as part of the wake process and the unqueue
   * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
   * an example).
   */
82af7aca5   Eric Sesterhenn   Removal of FUTEX_FD
1335
  static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1336
  {
ec92d0829   Pierre Peiffer   futex priority ba...
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
  	int prio;
  
  	/*
  	 * The priority used to register this element is
  	 * - either the real thread-priority for the real-time threads
  	 * (i.e. threads with a priority lower than MAX_RT_PRIO)
  	 * - or MAX_RT_PRIO for non-RT threads.
  	 * Thus, all RT-threads are woken first in priority order, and
  	 * the others are woken last, in FIFO order.
  	 */
  	prio = min(current->normal_prio, MAX_RT_PRIO);
  
  	plist_node_init(&q->list, prio);
  #ifdef CONFIG_DEBUG_PI_LIST
a26724591   Thomas Gleixner   plist: Make plist...
1351
  	q->list.plist.spinlock = &hb->lock;
ec92d0829   Pierre Peiffer   futex priority ba...
1352
1353
  #endif
  	plist_add(&q->list, &hb->chain);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1354
  	q->task = current;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1355
  	spin_unlock(&hb->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1356
  }
d40d65c8d   Darren Hart   futex: Correct qu...
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
  /**
   * unqueue_me() - Remove the futex_q from its futex_hash_bucket
   * @q:	The futex_q to unqueue
   *
   * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
   * be paired with exactly one earlier call to queue_me().
   *
   * Returns:
   *   1 - if the futex_q was still queued (and we removed unqueued it)
   *   0 - if the futex_q was already removed by the waking thread
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1367
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1368
1369
  static int unqueue_me(struct futex_q *q)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1370
  	spinlock_t *lock_ptr;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1371
  	int ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1372
1373
  
  	/* In the common case we don't take the spinlock, which is nice. */
42d35d48c   Darren Hart   futex: make futex...
1374
  retry:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1375
  	lock_ptr = q->lock_ptr;
e91467ecd   Christian Borntraeger   [PATCH] bug in fu...
1376
  	barrier();
c80544dc0   Stephen Hemminger   sparse pointer us...
1377
  	if (lock_ptr != NULL) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
  		spin_lock(lock_ptr);
  		/*
  		 * q->lock_ptr can change between reading it and
  		 * spin_lock(), causing us to take the wrong lock.  This
  		 * corrects the race condition.
  		 *
  		 * Reasoning goes like this: if we have the wrong lock,
  		 * q->lock_ptr must have changed (maybe several times)
  		 * between reading it and the spin_lock().  It can
  		 * change again after the spin_lock() but only if it was
  		 * already changed before the spin_lock().  It cannot,
  		 * however, change back to the original value.  Therefore
  		 * we can detect whether we acquired the correct lock.
  		 */
  		if (unlikely(lock_ptr != q->lock_ptr)) {
  			spin_unlock(lock_ptr);
  			goto retry;
  		}
ec92d0829   Pierre Peiffer   futex priority ba...
1396
1397
  		WARN_ON(plist_node_empty(&q->list));
  		plist_del(&q->list, &q->list.plist);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1398
1399
  
  		BUG_ON(q->pi_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1400
1401
1402
  		spin_unlock(lock_ptr);
  		ret = 1;
  	}
9adef58b1   Rusty Russell   futex: get_futex_...
1403
  	drop_futex_key_refs(&q->key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1404
1405
  	return ret;
  }
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1406
1407
  /*
   * PI futexes can not be requeued and must remove themself from the
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1408
1409
   * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
   * and dropped here.
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1410
   */
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1411
  static void unqueue_me_pi(struct futex_q *q)
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1412
  {
ec92d0829   Pierre Peiffer   futex priority ba...
1413
1414
  	WARN_ON(plist_node_empty(&q->list));
  	plist_del(&q->list, &q->list.plist);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1415
1416
1417
1418
  
  	BUG_ON(!q->pi_state);
  	free_pi_state(q->pi_state);
  	q->pi_state = NULL;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1419
  	spin_unlock(q->lock_ptr);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1420

9adef58b1   Rusty Russell   futex: get_futex_...
1421
  	drop_futex_key_refs(&q->key);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1422
  }
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1423
  /*
cdf71a10c   Thomas Gleixner   futex: Prevent st...
1424
   * Fixup the pi_state owner with the new owner.
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1425
   *
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1426
1427
   * Must be called with hash bucket lock held and mm->sem held for non
   * private futexes.
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1428
   */
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1429
  static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1430
  				struct task_struct *newowner, int fshared)
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1431
  {
cdf71a10c   Thomas Gleixner   futex: Prevent st...
1432
  	u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1433
  	struct futex_pi_state *pi_state = q->pi_state;
1b7558e45   Thomas Gleixner   futexes: fix faul...
1434
  	struct task_struct *oldowner = pi_state->owner;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1435
  	u32 uval, curval, newval;
e4dc5b7a3   Darren Hart   futex: clean up f...
1436
  	int ret;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1437
1438
  
  	/* Owner died? */
1b7558e45   Thomas Gleixner   futexes: fix faul...
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
  	if (!pi_state->owner)
  		newtid |= FUTEX_OWNER_DIED;
  
  	/*
  	 * We are here either because we stole the rtmutex from the
  	 * pending owner or we are the pending owner which failed to
  	 * get the rtmutex. We have to replace the pending owner TID
  	 * in the user space variable. This must be atomic as we have
  	 * to preserve the owner died bit here.
  	 *
b2d0994b1   Darren Hart   futex: update fut...
1449
1450
1451
  	 * Note: We write the user space value _before_ changing the pi_state
  	 * because we can fault here. Imagine swapped out pages or a fork
  	 * that marked all the anonymous memory readonly for cow.
1b7558e45   Thomas Gleixner   futexes: fix faul...
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
  	 *
  	 * Modifying pi_state _before_ the user space value would
  	 * leave the pi_state in an inconsistent state when we fault
  	 * here, because we need to drop the hash bucket lock to
  	 * handle the fault. This might be observed in the PID check
  	 * in lookup_pi_state.
  	 */
  retry:
  	if (get_futex_value_locked(&uval, uaddr))
  		goto handle_fault;
  
  	while (1) {
  		newval = (uval & FUTEX_OWNER_DIED) | newtid;
  
  		curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
  
  		if (curval == -EFAULT)
  			goto handle_fault;
  		if (curval == uval)
  			break;
  		uval = curval;
  	}
  
  	/*
  	 * We fixed up user space. Now we need to fix the pi_state
  	 * itself.
  	 */
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1479
  	if (pi_state->owner != NULL) {
1d6154825   Thomas Gleixner   sched: Convert pi...
1480
  		raw_spin_lock_irq(&pi_state->owner->pi_lock);
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1481
1482
  		WARN_ON(list_empty(&pi_state->list));
  		list_del_init(&pi_state->list);
1d6154825   Thomas Gleixner   sched: Convert pi...
1483
  		raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1b7558e45   Thomas Gleixner   futexes: fix faul...
1484
  	}
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1485

cdf71a10c   Thomas Gleixner   futex: Prevent st...
1486
  	pi_state->owner = newowner;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1487

1d6154825   Thomas Gleixner   sched: Convert pi...
1488
  	raw_spin_lock_irq(&newowner->pi_lock);
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1489
  	WARN_ON(!list_empty(&pi_state->list));
cdf71a10c   Thomas Gleixner   futex: Prevent st...
1490
  	list_add(&pi_state->list, &newowner->pi_state_list);
1d6154825   Thomas Gleixner   sched: Convert pi...
1491
  	raw_spin_unlock_irq(&newowner->pi_lock);
1b7558e45   Thomas Gleixner   futexes: fix faul...
1492
  	return 0;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1493

d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1494
  	/*
1b7558e45   Thomas Gleixner   futexes: fix faul...
1495
1496
1497
1498
1499
1500
1501
1502
  	 * To handle the page fault we need to drop the hash bucket
  	 * lock here. That gives the other task (either the pending
  	 * owner itself or the task which stole the rtmutex) the
  	 * chance to try the fixup of the pi_state. So once we are
  	 * back from handling the fault we need to check the pi_state
  	 * after reacquiring the hash bucket lock and before trying to
  	 * do another fixup. When the fixup has been done already we
  	 * simply return.
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1503
  	 */
1b7558e45   Thomas Gleixner   futexes: fix faul...
1504
1505
  handle_fault:
  	spin_unlock(q->lock_ptr);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1506

d0725992c   Thomas Gleixner   futex: Fix the wr...
1507
  	ret = fault_in_user_writeable(uaddr);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1508

1b7558e45   Thomas Gleixner   futexes: fix faul...
1509
  	spin_lock(q->lock_ptr);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1510

1b7558e45   Thomas Gleixner   futexes: fix faul...
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
  	/*
  	 * Check if someone else fixed it for us:
  	 */
  	if (pi_state->owner != oldowner)
  		return 0;
  
  	if (ret)
  		return ret;
  
  	goto retry;
d0aa7a70b   Pierre Peiffer   futex_requeue_pi ...
1521
  }
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1522
1523
  /*
   * In case we must use restart_block to restart a futex_wait,
ce6bd420f   Steven Rostedt   futex: fix for fu...
1524
   * we encode in the 'flags' shared capability
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1525
   */
1acdac104   Thomas Gleixner   futex: make clock...
1526
1527
  #define FLAGS_SHARED		0x01
  #define FLAGS_CLOCKRT		0x02
a72188d8a   Darren Hart   futex: add FUTEX_...
1528
  #define FLAGS_HAS_TIMEOUT	0x04
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1529

72c1bbf30   Nick Piggin   futex: restartabl...
1530
  static long futex_wait_restart(struct restart_block *restart);
36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
1531

ca5f9524d   Darren Hart   futex: separate f...
1532
  /**
dd9739980   Darren Hart   futex: split out ...
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
   * fixup_owner() - Post lock pi_state and corner case management
   * @uaddr:	user address of the futex
   * @fshared:	whether the futex is shared (1) or not (0)
   * @q:		futex_q (contains pi_state and access to the rt_mutex)
   * @locked:	if the attempt to take the rt_mutex succeeded (1) or not (0)
   *
   * After attempting to lock an rt_mutex, this function is called to cleanup
   * the pi_state owner as well as handle race conditions that may allow us to
   * acquire the lock. Must be called with the hb lock held.
   *
   * Returns:
   *  1 - success, lock taken
   *  0 - success, lock not taken
   * <0 - on error (-EFAULT)
   */
  static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
  		       int locked)
  {
  	struct task_struct *owner;
  	int ret = 0;
  
  	if (locked) {
  		/*
  		 * Got the lock. We might not be the anticipated owner if we
  		 * did a lock-steal - fix up the PI-state in that case:
  		 */
  		if (q->pi_state->owner != current)
  			ret = fixup_pi_state_owner(uaddr, q, current, fshared);
  		goto out;
  	}
  
  	/*
  	 * Catch the rare case, where the lock was released when we were on the
  	 * way back before we locked the hash bucket.
  	 */
  	if (q->pi_state->owner == current) {
  		/*
  		 * Try to get the rt_mutex now. This might fail as some other
  		 * task acquired the rt_mutex after we removed ourself from the
  		 * rt_mutex waiters list.
  		 */
  		if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
  			locked = 1;
  			goto out;
  		}
  
  		/*
  		 * pi_state is incorrect, some other task did a lock steal and
  		 * we returned due to timeout or signal without taking the
  		 * rt_mutex. Too late. We can access the rt_mutex_owner without
  		 * locking, as the other task is now blocked on the hash bucket
  		 * lock. Fix the state up.
  		 */
  		owner = rt_mutex_owner(&q->pi_state->pi_mutex);
  		ret = fixup_pi_state_owner(uaddr, q, owner, fshared);
  		goto out;
  	}
  
  	/*
  	 * Paranoia check. If we did not take the lock, then we should not be
  	 * the owner, nor the pending owner, of the rt_mutex.
  	 */
  	if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
  		printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
  				"pi-state %p
  ", ret,
  				q->pi_state->pi_mutex.owner,
  				q->pi_state->owner);
  
  out:
  	return ret ? ret : locked;
  }
  
  /**
ca5f9524d   Darren Hart   futex: separate f...
1607
1608
1609
1610
   * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
   * @hb:		the futex hash bucket, must be locked by the caller
   * @q:		the futex_q to queue up on
   * @timeout:	the prepared hrtimer_sleeper, or null for no timeout
ca5f9524d   Darren Hart   futex: separate f...
1611
1612
   */
  static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
f1a11e057   Thomas Gleixner   futex: remove the...
1613
  				struct hrtimer_sleeper *timeout)
ca5f9524d   Darren Hart   futex: separate f...
1614
  {
9beba3c54   Darren Hart   futex: Add memory...
1615
1616
1617
1618
1619
1620
  	/*
  	 * The task state is guaranteed to be set before another task can
  	 * wake it. set_current_state() is implemented using set_mb() and
  	 * queue_me() calls spin_unlock() upon completion, both serializing
  	 * access to the hash list and forcing another memory barrier.
  	 */
f1a11e057   Thomas Gleixner   futex: remove the...
1621
  	set_current_state(TASK_INTERRUPTIBLE);
0729e1961   Darren Hart   futex: Fix wakeup...
1622
  	queue_me(q, hb);
ca5f9524d   Darren Hart   futex: separate f...
1623
1624
1625
1626
1627
1628
1629
1630
1631
  
  	/* Arm the timer */
  	if (timeout) {
  		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
  		if (!hrtimer_active(&timeout->timer))
  			timeout->task = NULL;
  	}
  
  	/*
0729e1961   Darren Hart   futex: Fix wakeup...
1632
1633
  	 * If we have been removed from the hash list, then another task
  	 * has tried to wake us, and we can skip the call to schedule().
ca5f9524d   Darren Hart   futex: separate f...
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
  	 */
  	if (likely(!plist_node_empty(&q->list))) {
  		/*
  		 * If the timer has already expired, current will already be
  		 * flagged for rescheduling. Only call schedule if there
  		 * is no timeout, or if it has yet to expire.
  		 */
  		if (!timeout || timeout->task)
  			schedule();
  	}
  	__set_current_state(TASK_RUNNING);
  }
f801073f8   Darren Hart   futex: split out ...
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
  /**
   * futex_wait_setup() - Prepare to wait on a futex
   * @uaddr:	the futex userspace address
   * @val:	the expected value
   * @fshared:	whether the futex is shared (1) or not (0)
   * @q:		the associated futex_q
   * @hb:		storage for hash_bucket pointer to be returned to caller
   *
   * Setup the futex_q and locate the hash_bucket.  Get the futex value and
   * compare it with the expected value.  Handle atomic faults internally.
   * Return with the hb lock held and a q.key reference on success, and unlocked
   * with no q.key reference on failure.
   *
   * Returns:
   *  0 - uaddr contains val and hb has been locked
   * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
   */
  static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
  			   struct futex_q *q, struct futex_hash_bucket **hb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1665
  {
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1666
1667
  	u32 uval;
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1668

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1669
  	/*
b2d0994b1   Darren Hart   futex: update fut...
1670
  	 * Access the page AFTER the hash-bucket is locked.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
  	 * Order is important:
  	 *
  	 *   Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
  	 *   Userspace waker:  if (cond(var)) { var = new; futex_wake(&var); }
  	 *
  	 * The basic logical guarantee of a futex is that it blocks ONLY
  	 * if cond(var) is known to be true at the time of blocking, for
  	 * any cond.  If we queued after testing *uaddr, that would open
  	 * a race condition where we could block indefinitely with
  	 * cond(var) false, which would violate the guarantee.
  	 *
  	 * A consequence is that futex_wait() can return zero and absorb
  	 * a wakeup when *uaddr != val on entry to the syscall.  This is
  	 * rare, but normal.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1685
  	 */
f801073f8   Darren Hart   futex: split out ...
1686
1687
  retry:
  	q->key = FUTEX_KEY_INIT;
7485d0d37   KOSAKI Motohiro   futexes: Remove r...
1688
  	ret = get_futex_key(uaddr, fshared, &q->key);
f801073f8   Darren Hart   futex: split out ...
1689
  	if (unlikely(ret != 0))
a5a2a0c7f   Darren Hart   futex: fix futex_...
1690
  		return ret;
f801073f8   Darren Hart   futex: split out ...
1691
1692
1693
  
  retry_private:
  	*hb = queue_lock(q);
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1694
  	ret = get_futex_value_locked(&uval, uaddr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1695

f801073f8   Darren Hart   futex: split out ...
1696
1697
  	if (ret) {
  		queue_unlock(q, *hb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1698

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1699
  		ret = get_user(uval, uaddr);
e4dc5b7a3   Darren Hart   futex: clean up f...
1700
  		if (ret)
f801073f8   Darren Hart   futex: split out ...
1701
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1702

e4dc5b7a3   Darren Hart   futex: clean up f...
1703
1704
  		if (!fshared)
  			goto retry_private;
f801073f8   Darren Hart   futex: split out ...
1705
  		put_futex_key(fshared, &q->key);
e4dc5b7a3   Darren Hart   futex: clean up f...
1706
  		goto retry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1707
  	}
ca5f9524d   Darren Hart   futex: separate f...
1708

f801073f8   Darren Hart   futex: split out ...
1709
1710
1711
  	if (uval != val) {
  		queue_unlock(q, *hb);
  		ret = -EWOULDBLOCK;
2fff78c78   Peter Zijlstra   futex: fix refere...
1712
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1713

f801073f8   Darren Hart   futex: split out ...
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
  out:
  	if (ret)
  		put_futex_key(fshared, &q->key);
  	return ret;
  }
  
  static int futex_wait(u32 __user *uaddr, int fshared,
  		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
  {
  	struct hrtimer_sleeper timeout, *to = NULL;
f801073f8   Darren Hart   futex: split out ...
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
  	struct restart_block *restart;
  	struct futex_hash_bucket *hb;
  	struct futex_q q;
  	int ret;
  
  	if (!bitset)
  		return -EINVAL;
  
  	q.pi_state = NULL;
  	q.bitset = bitset;
52400ba94   Darren Hart   futex: add requeu...
1734
  	q.rt_waiter = NULL;
84bc4af59   Darren Hart   futex: Detect mis...
1735
  	q.requeue_pi_key = NULL;
f801073f8   Darren Hart   futex: split out ...
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
  
  	if (abs_time) {
  		to = &timeout;
  
  		hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
  				      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  		hrtimer_init_sleeper(to, current);
  		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
  					     current->timer_slack_ns);
  	}
d58e6576b   Thomas Gleixner   futex: Handle spu...
1746
  retry:
f801073f8   Darren Hart   futex: split out ...
1747
1748
1749
1750
  	/* Prepare to wait on uaddr. */
  	ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
  	if (ret)
  		goto out;
ca5f9524d   Darren Hart   futex: separate f...
1751
  	/* queue_me and wait for wakeup, timeout, or a signal. */
f1a11e057   Thomas Gleixner   futex: remove the...
1752
  	futex_wait_queue_me(hb, &q, to);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1753
1754
  
  	/* If we were woken (and unqueued), we succeeded, whatever. */
2fff78c78   Peter Zijlstra   futex: fix refere...
1755
  	ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1756
  	if (!unqueue_me(&q))
2fff78c78   Peter Zijlstra   futex: fix refere...
1757
1758
  		goto out_put_key;
  	ret = -ETIMEDOUT;
ca5f9524d   Darren Hart   futex: separate f...
1759
  	if (to && !to->task)
2fff78c78   Peter Zijlstra   futex: fix refere...
1760
  		goto out_put_key;
72c1bbf30   Nick Piggin   futex: restartabl...
1761

e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1762
  	/*
d58e6576b   Thomas Gleixner   futex: Handle spu...
1763
1764
  	 * We expect signal_pending(current), but we might be the
  	 * victim of a spurious wakeup as well.
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
1765
  	 */
d58e6576b   Thomas Gleixner   futex: Handle spu...
1766
1767
1768
1769
  	if (!signal_pending(current)) {
  		put_futex_key(fshared, &q.key);
  		goto retry;
  	}
2fff78c78   Peter Zijlstra   futex: fix refere...
1770
  	ret = -ERESTARTSYS;
c19384b5b   Pierre Peiffer   Make futex_wait()...
1771
  	if (!abs_time)
2fff78c78   Peter Zijlstra   futex: fix refere...
1772
  		goto out_put_key;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1773

2fff78c78   Peter Zijlstra   futex: fix refere...
1774
1775
1776
1777
1778
1779
  	restart = &current_thread_info()->restart_block;
  	restart->fn = futex_wait_restart;
  	restart->futex.uaddr = (u32 *)uaddr;
  	restart->futex.val = val;
  	restart->futex.time = abs_time->tv64;
  	restart->futex.bitset = bitset;
a72188d8a   Darren Hart   futex: add FUTEX_...
1780
  	restart->futex.flags = FLAGS_HAS_TIMEOUT;
2fff78c78   Peter Zijlstra   futex: fix refere...
1781
1782
1783
1784
1785
  
  	if (fshared)
  		restart->futex.flags |= FLAGS_SHARED;
  	if (clockrt)
  		restart->futex.flags |= FLAGS_CLOCKRT;
42d35d48c   Darren Hart   futex: make futex...
1786

2fff78c78   Peter Zijlstra   futex: fix refere...
1787
1788
1789
1790
  	ret = -ERESTART_RESTARTBLOCK;
  
  out_put_key:
  	put_futex_key(fshared, &q.key);
42d35d48c   Darren Hart   futex: make futex...
1791
  out:
ca5f9524d   Darren Hart   futex: separate f...
1792
1793
1794
1795
  	if (to) {
  		hrtimer_cancel(&to->timer);
  		destroy_hrtimer_on_stack(&to->timer);
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1796
1797
  	return ret;
  }
72c1bbf30   Nick Piggin   futex: restartabl...
1798
1799
1800
  
  static long futex_wait_restart(struct restart_block *restart)
  {
ce6bd420f   Steven Rostedt   futex: fix for fu...
1801
  	u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1802
  	int fshared = 0;
a72188d8a   Darren Hart   futex: add FUTEX_...
1803
  	ktime_t t, *tp = NULL;
72c1bbf30   Nick Piggin   futex: restartabl...
1804

a72188d8a   Darren Hart   futex: add FUTEX_...
1805
1806
1807
1808
  	if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
  		t.tv64 = restart->futex.time;
  		tp = &t;
  	}
72c1bbf30   Nick Piggin   futex: restartabl...
1809
  	restart->fn = do_no_restart_syscall;
ce6bd420f   Steven Rostedt   futex: fix for fu...
1810
  	if (restart->futex.flags & FLAGS_SHARED)
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1811
  		fshared = 1;
a72188d8a   Darren Hart   futex: add FUTEX_...
1812
  	return (long)futex_wait(uaddr, fshared, restart->futex.val, tp,
1acdac104   Thomas Gleixner   futex: make clock...
1813
1814
  				restart->futex.bitset,
  				restart->futex.flags & FLAGS_CLOCKRT);
72c1bbf30   Nick Piggin   futex: restartabl...
1815
  }
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1816
1817
1818
1819
1820
1821
  /*
   * Userspace tried a 0 -> TID atomic transition of the futex value
   * and failed. The kernel side here does the whole locking operation:
   * if there are waiters then it will block, it does PI, etc. (Due to
   * races the kernel might see a 0 value of the futex too.)
   */
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1822
  static int futex_lock_pi(u32 __user *uaddr, int fshared,
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
1823
  			 int detect, ktime_t *time, int trylock)
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1824
  {
c5780e976   Thomas Gleixner   [PATCH] Use the c...
1825
  	struct hrtimer_sleeper timeout, *to = NULL;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1826
  	struct futex_hash_bucket *hb;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1827
  	struct futex_q q;
dd9739980   Darren Hart   futex: split out ...
1828
  	int res, ret;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1829
1830
1831
  
  	if (refill_pi_state_cache())
  		return -ENOMEM;
c19384b5b   Pierre Peiffer   Make futex_wait()...
1832
  	if (time) {
c5780e976   Thomas Gleixner   [PATCH] Use the c...
1833
  		to = &timeout;
237fc6e7a   Thomas Gleixner   add hrtimer speci...
1834
1835
  		hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
  				      HRTIMER_MODE_ABS);
c5780e976   Thomas Gleixner   [PATCH] Use the c...
1836
  		hrtimer_init_sleeper(to, current);
cc584b213   Arjan van de Ven   hrtimer: convert ...
1837
  		hrtimer_set_expires(&to->timer, *time);
c5780e976   Thomas Gleixner   [PATCH] Use the c...
1838
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1839
  	q.pi_state = NULL;
52400ba94   Darren Hart   futex: add requeu...
1840
  	q.rt_waiter = NULL;
84bc4af59   Darren Hart   futex: Detect mis...
1841
  	q.requeue_pi_key = NULL;
42d35d48c   Darren Hart   futex: make futex...
1842
  retry:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
1843
  	q.key = FUTEX_KEY_INIT;
7485d0d37   KOSAKI Motohiro   futexes: Remove r...
1844
  	ret = get_futex_key(uaddr, fshared, &q.key);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1845
  	if (unlikely(ret != 0))
42d35d48c   Darren Hart   futex: make futex...
1846
  		goto out;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1847

e4dc5b7a3   Darren Hart   futex: clean up f...
1848
  retry_private:
82af7aca5   Eric Sesterhenn   Removal of FUTEX_FD
1849
  	hb = queue_lock(&q);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1850

bab5bc9e8   Darren Hart   futex: fixup unlo...
1851
  	ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1852
  	if (unlikely(ret)) {
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1853
  		switch (ret) {
1a52084d0   Darren Hart   futex: split out ...
1854
1855
1856
1857
1858
1859
  		case 1:
  			/* We got the lock. */
  			ret = 0;
  			goto out_unlock_put_key;
  		case -EFAULT:
  			goto uaddr_faulted;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1860
1861
1862
1863
1864
1865
  		case -EAGAIN:
  			/*
  			 * Task is exiting and we just wait for the
  			 * exit to complete.
  			 */
  			queue_unlock(&q, hb);
de87fcc12   Darren Hart   futex: additional...
1866
  			put_futex_key(fshared, &q.key);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1867
1868
  			cond_resched();
  			goto retry;
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1869
  		default:
42d35d48c   Darren Hart   futex: make futex...
1870
  			goto out_unlock_put_key;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1871
  		}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1872
1873
1874
1875
1876
  	}
  
  	/*
  	 * Only actually queue now that the atomic ops are done:
  	 */
82af7aca5   Eric Sesterhenn   Removal of FUTEX_FD
1877
  	queue_me(&q, hb);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1878

c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
  	WARN_ON(!q.pi_state);
  	/*
  	 * Block on the PI mutex:
  	 */
  	if (!trylock)
  		ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
  	else {
  		ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
  		/* Fixup the trylock return value: */
  		ret = ret ? 0 : -EWOULDBLOCK;
  	}
a99e4e413   Vernon Mauery   [PATCH] pi-futex:...
1890
  	spin_lock(q.lock_ptr);
dd9739980   Darren Hart   futex: split out ...
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
  	/*
  	 * Fixup the pi_state owner and possibly acquire the lock if we
  	 * haven't already.
  	 */
  	res = fixup_owner(uaddr, fshared, &q, !ret);
  	/*
  	 * If fixup_owner() returned an error, proprogate that.  If it acquired
  	 * the lock, clear our -ETIMEDOUT or -EINTR.
  	 */
  	if (res)
  		ret = (res < 0) ? res : 0;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1902

e8f6386c0   Darren Hart   futex: unlock bef...
1903
  	/*
dd9739980   Darren Hart   futex: split out ...
1904
1905
  	 * If fixup_owner() faulted and was unable to handle the fault, unlock
  	 * it and return the fault to userspace.
e8f6386c0   Darren Hart   futex: unlock bef...
1906
1907
1908
  	 */
  	if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
  		rt_mutex_unlock(&q.pi_state->pi_mutex);
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1909
1910
  	/* Unqueue and drop the lock */
  	unqueue_me_pi(&q);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1911

5ecb01cfd   Mikael Pettersson   futex_lock_pi() k...
1912
  	goto out_put_key;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1913

42d35d48c   Darren Hart   futex: make futex...
1914
  out_unlock_put_key:
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1915
  	queue_unlock(&q, hb);
42d35d48c   Darren Hart   futex: make futex...
1916
  out_put_key:
38d47c1b7   Peter Zijlstra   futex: rely on ge...
1917
  	put_futex_key(fshared, &q.key);
42d35d48c   Darren Hart   futex: make futex...
1918
  out:
237fc6e7a   Thomas Gleixner   add hrtimer speci...
1919
1920
  	if (to)
  		destroy_hrtimer_on_stack(&to->timer);
dd9739980   Darren Hart   futex: split out ...
1921
  	return ret != -EINTR ? ret : -ERESTARTNOINTR;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1922

42d35d48c   Darren Hart   futex: make futex...
1923
  uaddr_faulted:
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
1924
  	queue_unlock(&q, hb);
d0725992c   Thomas Gleixner   futex: Fix the wr...
1925
  	ret = fault_in_user_writeable(uaddr);
e4dc5b7a3   Darren Hart   futex: clean up f...
1926
1927
  	if (ret)
  		goto out_put_key;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1928

e4dc5b7a3   Darren Hart   futex: clean up f...
1929
1930
1931
1932
1933
  	if (!fshared)
  		goto retry_private;
  
  	put_futex_key(fshared, &q.key);
  	goto retry;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1934
1935
1936
  }
  
  /*
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1937
1938
1939
1940
   * Userspace attempted a TID -> 0 atomic transition, and failed.
   * This is the in-kernel slowpath: we look up the PI state (if any),
   * and do the rt-mutex unlock.
   */
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
1941
  static int futex_unlock_pi(u32 __user *uaddr, int fshared)
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1942
1943
1944
1945
  {
  	struct futex_hash_bucket *hb;
  	struct futex_q *this, *next;
  	u32 uval;
ec92d0829   Pierre Peiffer   futex priority ba...
1946
  	struct plist_head *head;
38d47c1b7   Peter Zijlstra   futex: rely on ge...
1947
  	union futex_key key = FUTEX_KEY_INIT;
e4dc5b7a3   Darren Hart   futex: clean up f...
1948
  	int ret;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1949
1950
1951
1952
1953
1954
1955
  
  retry:
  	if (get_user(uval, uaddr))
  		return -EFAULT;
  	/*
  	 * We release only a lock we actually own:
  	 */
b488893a3   Pavel Emelyanov   pid namespaces: c...
1956
  	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1957
  		return -EPERM;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1958

7485d0d37   KOSAKI Motohiro   futexes: Remove r...
1959
  	ret = get_futex_key(uaddr, fshared, &key);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1960
1961
1962
1963
1964
  	if (unlikely(ret != 0))
  		goto out;
  
  	hb = hash_futex(&key);
  	spin_lock(&hb->lock);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1965
1966
1967
1968
1969
  	/*
  	 * To avoid races, try to do the TID -> 0 atomic transition
  	 * again. If it succeeds then we can return without waking
  	 * anyone else up:
  	 */
36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
1970
  	if (!(uval & FUTEX_OWNER_DIED))
b488893a3   Pavel Emelyanov   pid namespaces: c...
1971
  		uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
1972

c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1973
1974
1975
1976
1977
1978
1979
  
  	if (unlikely(uval == -EFAULT))
  		goto pi_faulted;
  	/*
  	 * Rare case: we managed to release the lock atomically,
  	 * no need to wake anyone else up:
  	 */
b488893a3   Pavel Emelyanov   pid namespaces: c...
1980
  	if (unlikely(uval == task_pid_vnr(current)))
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1981
1982
1983
1984
1985
1986
1987
  		goto out_unlock;
  
  	/*
  	 * Ok, other tasks may need to be woken up - check waiters
  	 * and do the wakeup if necessary:
  	 */
  	head = &hb->chain;
ec92d0829   Pierre Peiffer   futex priority ba...
1988
  	plist_for_each_entry_safe(this, next, head, list) {
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
  		if (!match_futex (&this->key, &key))
  			continue;
  		ret = wake_futex_pi(uaddr, uval, this);
  		/*
  		 * The atomic access to the futex value
  		 * generated a pagefault, so retry the
  		 * user-access and the wakeup:
  		 */
  		if (ret == -EFAULT)
  			goto pi_faulted;
  		goto out_unlock;
  	}
  	/*
  	 * No waiters - kernel unlocks the futex:
  	 */
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2004
2005
2006
2007
2008
  	if (!(uval & FUTEX_OWNER_DIED)) {
  		ret = unlock_futex_pi(uaddr, uval);
  		if (ret == -EFAULT)
  			goto pi_faulted;
  	}
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
2009
2010
2011
  
  out_unlock:
  	spin_unlock(&hb->lock);
38d47c1b7   Peter Zijlstra   futex: rely on ge...
2012
  	put_futex_key(fshared, &key);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
2013

42d35d48c   Darren Hart   futex: make futex...
2014
  out:
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
2015
2016
2017
  	return ret;
  
  pi_faulted:
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
2018
  	spin_unlock(&hb->lock);
e4dc5b7a3   Darren Hart   futex: clean up f...
2019
  	put_futex_key(fshared, &key);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
2020

d0725992c   Thomas Gleixner   futex: Fix the wr...
2021
  	ret = fault_in_user_writeable(uaddr);
b56863630   Darren Hart   futex: clean up f...
2022
  	if (!ret)
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
2023
  		goto retry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2024
2025
  	return ret;
  }
52400ba94   Darren Hart   futex: add requeu...
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
  /**
   * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
   * @hb:		the hash_bucket futex_q was original enqueued on
   * @q:		the futex_q woken while waiting to be requeued
   * @key2:	the futex_key of the requeue target futex
   * @timeout:	the timeout associated with the wait (NULL if none)
   *
   * Detect if the task was woken on the initial futex as opposed to the requeue
   * target futex.  If so, determine if it was a timeout or a signal that caused
   * the wakeup and return the appropriate error code to the caller.  Must be
   * called with the hb lock held.
   *
   * Returns
   *  0 - no early wakeup detected
1c840c149   Thomas Gleixner   futex: fix restar...
2040
   * <0 - -ETIMEDOUT or -ERESTARTNOINTR
52400ba94   Darren Hart   futex: add requeu...
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
   */
  static inline
  int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
  				   struct futex_q *q, union futex_key *key2,
  				   struct hrtimer_sleeper *timeout)
  {
  	int ret = 0;
  
  	/*
  	 * With the hb lock held, we avoid races while we process the wakeup.
  	 * We only need to hold hb (and not hb2) to ensure atomicity as the
  	 * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
  	 * It can't be requeued from uaddr2 to something else since we don't
  	 * support a PI aware source futex for requeue.
  	 */
  	if (!match_futex(&q->key, key2)) {
  		WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
  		/*
  		 * We were woken prior to requeue by a timeout or a signal.
  		 * Unqueue the futex_q and determine which it was.
  		 */
  		plist_del(&q->list, &q->list.plist);
52400ba94   Darren Hart   futex: add requeu...
2063

d58e6576b   Thomas Gleixner   futex: Handle spu...
2064
  		/* Handle spurious wakeups gracefully */
11df6dddc   Thomas Gleixner   futex: Fix spurio...
2065
  		ret = -EWOULDBLOCK;
52400ba94   Darren Hart   futex: add requeu...
2066
2067
  		if (timeout && !timeout->task)
  			ret = -ETIMEDOUT;
d58e6576b   Thomas Gleixner   futex: Handle spu...
2068
  		else if (signal_pending(current))
1c840c149   Thomas Gleixner   futex: fix restar...
2069
  			ret = -ERESTARTNOINTR;
52400ba94   Darren Hart   futex: add requeu...
2070
2071
2072
2073
2074
2075
  	}
  	return ret;
  }
  
  /**
   * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
56ec1607b   Darren Hart   futex: Correct fu...
2076
   * @uaddr:	the futex we initially wait on (non-pi)
52400ba94   Darren Hart   futex: add requeu...
2077
2078
2079
2080
   * @fshared:	whether the futexes are shared (1) or not (0).  They must be
   * 		the same type, no requeueing from private to shared, etc.
   * @val:	the expected value of uaddr
   * @abs_time:	absolute timeout
56ec1607b   Darren Hart   futex: Correct fu...
2081
   * @bitset:	32 bit wakeup bitset set by userspace, defaults to all
52400ba94   Darren Hart   futex: add requeu...
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
   * @clockrt:	whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
   * @uaddr2:	the pi futex we will take prior to returning to user-space
   *
   * The caller will wait on uaddr and will be requeued by futex_requeue() to
   * uaddr2 which must be PI aware.  Normal wakeup will wake on uaddr2 and
   * complete the acquisition of the rt_mutex prior to returning to userspace.
   * This ensures the rt_mutex maintains an owner when it has waiters; without
   * one, the pi logic wouldn't know which task to boost/deboost, if there was a
   * need to.
   *
   * We call schedule in futex_wait_queue_me() when we enqueue and return there
   * via the following:
   * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
cc6db4e60   Darren Hart   futex: Correct fu...
2095
2096
2097
   * 2) wakeup on uaddr2 after a requeue
   * 3) signal
   * 4) timeout
52400ba94   Darren Hart   futex: add requeu...
2098
   *
cc6db4e60   Darren Hart   futex: Correct fu...
2099
   * If 3, cleanup and return -ERESTARTNOINTR.
52400ba94   Darren Hart   futex: add requeu...
2100
2101
2102
2103
2104
2105
2106
   *
   * If 2, we may then block on trying to take the rt_mutex and return via:
   * 5) successful lock
   * 6) signal
   * 7) timeout
   * 8) other lock acquisition failure
   *
cc6db4e60   Darren Hart   futex: Correct fu...
2107
   * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
52400ba94   Darren Hart   futex: add requeu...
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
   *
   * If 4 or 7, we cleanup and return with -ETIMEDOUT.
   *
   * Returns:
   *  0 - On success
   * <0 - On error
   */
  static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
  				 u32 val, ktime_t *abs_time, u32 bitset,
  				 int clockrt, u32 __user *uaddr2)
  {
  	struct hrtimer_sleeper timeout, *to = NULL;
  	struct rt_mutex_waiter rt_waiter;
  	struct rt_mutex *pi_mutex = NULL;
52400ba94   Darren Hart   futex: add requeu...
2122
2123
2124
2125
  	struct futex_hash_bucket *hb;
  	union futex_key key2;
  	struct futex_q q;
  	int res, ret;
52400ba94   Darren Hart   futex: add requeu...
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
  
  	if (!bitset)
  		return -EINVAL;
  
  	if (abs_time) {
  		to = &timeout;
  		hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
  				      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  		hrtimer_init_sleeper(to, current);
  		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
  					     current->timer_slack_ns);
  	}
  
  	/*
  	 * The waiter is allocated on our stack, manipulated by the requeue
  	 * code while we sleep on uaddr.
  	 */
  	debug_rt_mutex_init_waiter(&rt_waiter);
  	rt_waiter.task = NULL;
52400ba94   Darren Hart   futex: add requeu...
2145
  	key2 = FUTEX_KEY_INIT;
7485d0d37   KOSAKI Motohiro   futexes: Remove r...
2146
  	ret = get_futex_key(uaddr2, fshared, &key2);
52400ba94   Darren Hart   futex: add requeu...
2147
2148
  	if (unlikely(ret != 0))
  		goto out;
84bc4af59   Darren Hart   futex: Detect mis...
2149
2150
2151
2152
  	q.pi_state = NULL;
  	q.bitset = bitset;
  	q.rt_waiter = &rt_waiter;
  	q.requeue_pi_key = &key2;
52400ba94   Darren Hart   futex: add requeu...
2153
2154
  	/* Prepare to wait on uaddr. */
  	ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
c8b15a706   Thomas Gleixner   futex: cleanup er...
2155
2156
  	if (ret)
  		goto out_key2;
52400ba94   Darren Hart   futex: add requeu...
2157
2158
  
  	/* Queue the futex_q, drop the hb lock, wait for wakeup. */
f1a11e057   Thomas Gleixner   futex: remove the...
2159
  	futex_wait_queue_me(hb, &q, to);
52400ba94   Darren Hart   futex: add requeu...
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
  
  	spin_lock(&hb->lock);
  	ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  	spin_unlock(&hb->lock);
  	if (ret)
  		goto out_put_keys;
  
  	/*
  	 * In order for us to be here, we know our q.key == key2, and since
  	 * we took the hb->lock above, we also know that futex_requeue() has
  	 * completed and we no longer have to concern ourselves with a wakeup
  	 * race with the atomic proxy lock acquition by the requeue code.
  	 */
  
  	/* Check if the requeue code acquired the second futex for us. */
  	if (!q.rt_waiter) {
  		/*
  		 * Got the lock. We might not be the anticipated owner if we
  		 * did a lock-steal - fix up the PI-state in that case.
  		 */
  		if (q.pi_state && (q.pi_state->owner != current)) {
  			spin_lock(q.lock_ptr);
  			ret = fixup_pi_state_owner(uaddr2, &q, current,
  						   fshared);
  			spin_unlock(q.lock_ptr);
  		}
  	} else {
  		/*
  		 * We have been woken up by futex_unlock_pi(), a timeout, or a
  		 * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
  		 * the pi_state.
  		 */
  		WARN_ON(!&q.pi_state);
  		pi_mutex = &q.pi_state->pi_mutex;
  		ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
  		debug_rt_mutex_free_waiter(&rt_waiter);
  
  		spin_lock(q.lock_ptr);
  		/*
  		 * Fixup the pi_state owner and possibly acquire the lock if we
  		 * haven't already.
  		 */
  		res = fixup_owner(uaddr2, fshared, &q, !ret);
  		/*
  		 * If fixup_owner() returned an error, proprogate that.  If it
56ec1607b   Darren Hart   futex: Correct fu...
2205
  		 * acquired the lock, clear -ETIMEDOUT or -EINTR.
52400ba94   Darren Hart   futex: add requeu...
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
  		 */
  		if (res)
  			ret = (res < 0) ? res : 0;
  
  		/* Unqueue and drop the lock. */
  		unqueue_me_pi(&q);
  	}
  
  	/*
  	 * If fixup_pi_state_owner() faulted and was unable to handle the
  	 * fault, unlock the rt_mutex and return the fault to userspace.
  	 */
  	if (ret == -EFAULT) {
  		if (rt_mutex_owner(pi_mutex) == current)
  			rt_mutex_unlock(pi_mutex);
  	} else if (ret == -EINTR) {
52400ba94   Darren Hart   futex: add requeu...
2222
  		/*
cc6db4e60   Darren Hart   futex: Correct fu...
2223
2224
2225
2226
2227
  		 * We've already been requeued, but cannot restart by calling
  		 * futex_lock_pi() directly. We could restart this syscall, but
  		 * it would detect that the user space "val" changed and return
  		 * -EWOULDBLOCK.  Save the overhead of the restart and return
  		 * -EWOULDBLOCK directly.
52400ba94   Darren Hart   futex: add requeu...
2228
  		 */
2070887fd   Thomas Gleixner   futex: fix restar...
2229
  		ret = -EWOULDBLOCK;
52400ba94   Darren Hart   futex: add requeu...
2230
2231
2232
2233
  	}
  
  out_put_keys:
  	put_futex_key(fshared, &q.key);
c8b15a706   Thomas Gleixner   futex: cleanup er...
2234
  out_key2:
52400ba94   Darren Hart   futex: add requeu...
2235
2236
2237
2238
2239
2240
2241
2242
2243
  	put_futex_key(fshared, &key2);
  
  out:
  	if (to) {
  		hrtimer_cancel(&to->timer);
  		destroy_hrtimer_on_stack(&to->timer);
  	}
  	return ret;
  }
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2244
2245
2246
2247
2248
2249
2250
  /*
   * Support for robust futexes: the kernel cleans up held futexes at
   * thread exit time.
   *
   * Implementation: user-space maintains a per-thread list of locks it
   * is holding. Upon do_exit(), the kernel carefully walks this list,
   * and marks all locks that are owned by this thread with the
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
2251
   * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2252
2253
2254
2255
2256
2257
2258
2259
   * always manipulated with the lock held, so the list is private and
   * per-thread. Userspace also maintains a per-thread 'list_op_pending'
   * field, to allow the kernel to clean up if the thread dies after
   * acquiring the lock, but just before it could have added itself to
   * the list. There can only be one such pending lock.
   */
  
  /**
d96ee56ce   Darren Hart   futex: Make funct...
2260
2261
2262
   * sys_set_robust_list() - Set the robust-futex list head of a task
   * @head:	pointer to the list-head
   * @len:	length of the list-head, as userspace expects
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2263
   */
836f92adf   Heiko Carstens   [CVE-2009-0029] S...
2264
2265
  SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
  		size_t, len)
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2266
  {
a0c1e9073   Thomas Gleixner   futex: runtime en...
2267
2268
  	if (!futex_cmpxchg_enabled)
  		return -ENOSYS;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
  	/*
  	 * The kernel knows only one size for now:
  	 */
  	if (unlikely(len != sizeof(*head)))
  		return -EINVAL;
  
  	current->robust_list = head;
  
  	return 0;
  }
  
  /**
d96ee56ce   Darren Hart   futex: Make funct...
2281
2282
2283
2284
   * sys_get_robust_list() - Get the robust-futex list head of a task
   * @pid:	pid of the process [zero for current task]
   * @head_ptr:	pointer to a list-head pointer, the kernel fills it in
   * @len_ptr:	pointer to a length field, the kernel fills in the header size
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2285
   */
836f92adf   Heiko Carstens   [CVE-2009-0029] S...
2286
2287
2288
  SYSCALL_DEFINE3(get_robust_list, int, pid,
  		struct robust_list_head __user * __user *, head_ptr,
  		size_t __user *, len_ptr)
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2289
  {
ba46df984   Al Viro   [PATCH] __user an...
2290
  	struct robust_list_head __user *head;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2291
  	unsigned long ret;
c69e8d9c0   David Howells   CRED: Use RCU to ...
2292
  	const struct cred *cred = current_cred(), *pcred;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2293

a0c1e9073   Thomas Gleixner   futex: runtime en...
2294
2295
  	if (!futex_cmpxchg_enabled)
  		return -ENOSYS;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2296
2297
2298
2299
2300
2301
  	if (!pid)
  		head = current->robust_list;
  	else {
  		struct task_struct *p;
  
  		ret = -ESRCH;
aaa2a97eb   Oleg Nesterov   [PATCH] sys_get_r...
2302
  		rcu_read_lock();
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
2303
  		p = find_task_by_vpid(pid);
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2304
2305
2306
  		if (!p)
  			goto err_unlock;
  		ret = -EPERM;
c69e8d9c0   David Howells   CRED: Use RCU to ...
2307
2308
2309
  		pcred = __task_cred(p);
  		if (cred->euid != pcred->euid &&
  		    cred->euid != pcred->uid &&
76aac0e9a   David Howells   CRED: Wrap task c...
2310
  		    !capable(CAP_SYS_PTRACE))
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2311
2312
  			goto err_unlock;
  		head = p->robust_list;
aaa2a97eb   Oleg Nesterov   [PATCH] sys_get_r...
2313
  		rcu_read_unlock();
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2314
2315
2316
2317
2318
2319
2320
  	}
  
  	if (put_user(sizeof(*head), len_ptr))
  		return -EFAULT;
  	return put_user(head, head_ptr);
  
  err_unlock:
aaa2a97eb   Oleg Nesterov   [PATCH] sys_get_r...
2321
  	rcu_read_unlock();
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2322
2323
2324
2325
2326
2327
2328
2329
  
  	return ret;
  }
  
  /*
   * Process a futex-list entry, check whether it's owned by the
   * dying task, and do notification if so:
   */
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2330
  int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2331
  {
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2332
  	u32 uval, nval, mval;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2333

8f17d3a50   Ingo Molnar   [PATCH] lightweig...
2334
2335
  retry:
  	if (get_user(uval, uaddr))
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2336
  		return -1;
b488893a3   Pavel Emelyanov   pid namespaces: c...
2337
  	if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
  		/*
  		 * Ok, this dying thread is truly holding a futex
  		 * of interest. Set the OWNER_DIED bit atomically
  		 * via cmpxchg, and if the value had FUTEX_WAITERS
  		 * set, wake up a waiter (if any). (We have to do a
  		 * futex_wake() even if OWNER_DIED is already set -
  		 * to handle the rare but possible case of recursive
  		 * thread-death.) The rest of the cleanup is done in
  		 * userspace.
  		 */
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2348
2349
  		mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
  		nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
2350
2351
2352
2353
  		if (nval == -EFAULT)
  			return -1;
  
  		if (nval != uval)
8f17d3a50   Ingo Molnar   [PATCH] lightweig...
2354
  			goto retry;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2355

e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2356
2357
2358
2359
  		/*
  		 * Wake robust non-PI futexes here. The wakeup of
  		 * PI futexes happens in exit_pi_state():
  		 */
36cf3b5c3   Thomas Gleixner   FUTEX: Tidy up th...
2360
  		if (!pi && (uval & FUTEX_WAITERS))
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
2361
  			futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2362
2363
2364
2365
2366
  	}
  	return 0;
  }
  
  /*
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2367
2368
2369
   * Fetch a robust-list pointer. Bit 0 signals PI futexes:
   */
  static inline int fetch_robust_entry(struct robust_list __user **entry,
ba46df984   Al Viro   [PATCH] __user an...
2370
2371
  				     struct robust_list __user * __user *head,
  				     int *pi)
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2372
2373
  {
  	unsigned long uentry;
ba46df984   Al Viro   [PATCH] __user an...
2374
  	if (get_user(uentry, (unsigned long __user *)head))
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2375
  		return -EFAULT;
ba46df984   Al Viro   [PATCH] __user an...
2376
  	*entry = (void __user *)(uentry & ~1UL);
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2377
2378
2379
2380
2381
2382
  	*pi = uentry & 1;
  
  	return 0;
  }
  
  /*
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2383
2384
2385
2386
2387
2388
2389
2390
   * Walk curr->robust_list (very carefully, it's a userspace list!)
   * and mark any locks found there dead, and notify any waiters.
   *
   * We silently return on any sign of list-walking problem.
   */
  void exit_robust_list(struct task_struct *curr)
  {
  	struct robust_list_head __user *head = curr->robust_list;
9f96cb1e8   Martin Schwidefsky   robust futex thre...
2391
2392
  	struct robust_list __user *entry, *next_entry, *pending;
  	unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2393
  	unsigned long futex_offset;
9f96cb1e8   Martin Schwidefsky   robust futex thre...
2394
  	int rc;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2395

a0c1e9073   Thomas Gleixner   futex: runtime en...
2396
2397
  	if (!futex_cmpxchg_enabled)
  		return;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2398
2399
2400
2401
  	/*
  	 * Fetch the list head (which was registered earlier, via
  	 * sys_set_robust_list()):
  	 */
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2402
  	if (fetch_robust_entry(&entry, &head->list.next, &pi))
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
  		return;
  	/*
  	 * Fetch the relative futex offset:
  	 */
  	if (get_user(futex_offset, &head->futex_offset))
  		return;
  	/*
  	 * Fetch any possibly pending lock-add first, and handle it
  	 * if it exists:
  	 */
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2413
  	if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2414
  		return;
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2415

9f96cb1e8   Martin Schwidefsky   robust futex thre...
2416
  	next_entry = NULL;	/* avoid warning with gcc */
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2417
2418
  	while (entry != &head->list) {
  		/*
9f96cb1e8   Martin Schwidefsky   robust futex thre...
2419
2420
2421
2422
2423
  		 * Fetch the next entry in the list before calling
  		 * handle_futex_death:
  		 */
  		rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
  		/*
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2424
  		 * A pending lock might already be on the list, so
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
2425
  		 * don't process it twice:
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2426
2427
  		 */
  		if (entry != pending)
ba46df984   Al Viro   [PATCH] __user an...
2428
  			if (handle_futex_death((void __user *)entry + futex_offset,
e3f2ddeac   Ingo Molnar   [PATCH] pi-futex:...
2429
  						curr, pi))
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2430
  				return;
9f96cb1e8   Martin Schwidefsky   robust futex thre...
2431
  		if (rc)
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2432
  			return;
9f96cb1e8   Martin Schwidefsky   robust futex thre...
2433
2434
  		entry = next_entry;
  		pi = next_pi;
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2435
2436
2437
2438
2439
2440
2441
2442
  		/*
  		 * Avoid excessively long or circular lists:
  		 */
  		if (!--limit)
  			break;
  
  		cond_resched();
  	}
9f96cb1e8   Martin Schwidefsky   robust futex thre...
2443
2444
2445
2446
  
  	if (pending)
  		handle_futex_death((void __user *)pending + futex_offset,
  				   curr, pip);
0771dfefc   Ingo Molnar   [PATCH] lightweig...
2447
  }
c19384b5b   Pierre Peiffer   Make futex_wait()...
2448
  long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
2449
  		u32 __user *uaddr2, u32 val2, u32 val3)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2450
  {
1acdac104   Thomas Gleixner   futex: make clock...
2451
  	int clockrt, ret = -ENOSYS;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
2452
  	int cmd = op & FUTEX_CMD_MASK;
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
2453
  	int fshared = 0;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
2454
2455
  
  	if (!(op & FUTEX_PRIVATE_FLAG))
c2f9f2015   Peter Zijlstra   futex: cleanup fs...
2456
  		fshared = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2457

1acdac104   Thomas Gleixner   futex: make clock...
2458
  	clockrt = op & FUTEX_CLOCK_REALTIME;
52400ba94   Darren Hart   futex: add requeu...
2459
  	if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
1acdac104   Thomas Gleixner   futex: make clock...
2460
  		return -ENOSYS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2461

34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
2462
  	switch (cmd) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2463
  	case FUTEX_WAIT:
cd689985c   Thomas Gleixner   futex: Add bitset...
2464
2465
  		val3 = FUTEX_BITSET_MATCH_ANY;
  	case FUTEX_WAIT_BITSET:
1acdac104   Thomas Gleixner   futex: make clock...
2466
  		ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2467
2468
  		break;
  	case FUTEX_WAKE:
cd689985c   Thomas Gleixner   futex: Add bitset...
2469
2470
2471
  		val3 = FUTEX_BITSET_MATCH_ANY;
  	case FUTEX_WAKE_BITSET:
  		ret = futex_wake(uaddr, fshared, val, val3);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2472
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2473
  	case FUTEX_REQUEUE:
52400ba94   Darren Hart   futex: add requeu...
2474
  		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2475
2476
  		break;
  	case FUTEX_CMP_REQUEUE:
52400ba94   Darren Hart   futex: add requeu...
2477
2478
  		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
  				    0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2479
  		break;
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
2480
  	case FUTEX_WAKE_OP:
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
2481
  		ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
4732efbeb   Jakub Jelinek   [PATCH] FUTEX_WAK...
2482
  		break;
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
2483
  	case FUTEX_LOCK_PI:
a0c1e9073   Thomas Gleixner   futex: runtime en...
2484
2485
  		if (futex_cmpxchg_enabled)
  			ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
2486
2487
  		break;
  	case FUTEX_UNLOCK_PI:
a0c1e9073   Thomas Gleixner   futex: runtime en...
2488
2489
  		if (futex_cmpxchg_enabled)
  			ret = futex_unlock_pi(uaddr, fshared);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
2490
2491
  		break;
  	case FUTEX_TRYLOCK_PI:
a0c1e9073   Thomas Gleixner   futex: runtime en...
2492
2493
  		if (futex_cmpxchg_enabled)
  			ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
2494
  		break;
52400ba94   Darren Hart   futex: add requeu...
2495
2496
2497
2498
2499
  	case FUTEX_WAIT_REQUEUE_PI:
  		val3 = FUTEX_BITSET_MATCH_ANY;
  		ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
  					    clockrt, uaddr2);
  		break;
52400ba94   Darren Hart   futex: add requeu...
2500
2501
2502
2503
  	case FUTEX_CMP_REQUEUE_PI:
  		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
  				    1);
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2504
2505
2506
2507
2508
  	default:
  		ret = -ENOSYS;
  	}
  	return ret;
  }
17da2bd90   Heiko Carstens   [CVE-2009-0029] S...
2509
2510
2511
  SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
  		struct timespec __user *, utime, u32 __user *, uaddr2,
  		u32, val3)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2512
  {
c19384b5b   Pierre Peiffer   Make futex_wait()...
2513
2514
  	struct timespec ts;
  	ktime_t t, *tp = NULL;
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
2515
  	u32 val2 = 0;
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
2516
  	int cmd = op & FUTEX_CMD_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2517

cd689985c   Thomas Gleixner   futex: Add bitset...
2518
  	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
52400ba94   Darren Hart   futex: add requeu...
2519
2520
  		      cmd == FUTEX_WAIT_BITSET ||
  		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
c19384b5b   Pierre Peiffer   Make futex_wait()...
2521
  		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2522
  			return -EFAULT;
c19384b5b   Pierre Peiffer   Make futex_wait()...
2523
  		if (!timespec_valid(&ts))
9741ef964   Thomas Gleixner   [PATCH] futex: ch...
2524
  			return -EINVAL;
c19384b5b   Pierre Peiffer   Make futex_wait()...
2525
2526
  
  		t = timespec_to_ktime(ts);
34f01cc1f   Eric Dumazet   FUTEX: new PRIVAT...
2527
  		if (cmd == FUTEX_WAIT)
5a7780e72   Thomas Gleixner   hrtimer: check re...
2528
  			t = ktime_add_safe(ktime_get(), t);
c19384b5b   Pierre Peiffer   Make futex_wait()...
2529
  		tp = &t;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2530
2531
  	}
  	/*
52400ba94   Darren Hart   futex: add requeu...
2532
  	 * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
f54f09861   Andreas Schwab   futex: pass nr_wa...
2533
  	 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2534
  	 */
f54f09861   Andreas Schwab   futex: pass nr_wa...
2535
  	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
ba9c22f2c   Darren Hart   futex: remove FUT...
2536
  	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
e2970f2fb   Ingo Molnar   [PATCH] pi-futex:...
2537
  		val2 = (u32) (unsigned long) utime;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2538

c19384b5b   Pierre Peiffer   Make futex_wait()...
2539
  	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2540
  }
f6d107fb1   Benjamin Herrenschmidt   Give futex init a...
2541
  static int __init futex_init(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2542
  {
a0c1e9073   Thomas Gleixner   futex: runtime en...
2543
  	u32 curval;
3e4ab747e   Thomas Gleixner   futex: fix init o...
2544
  	int i;
95362fa90   Akinobu Mita   [PATCH] futex: in...
2545

a0c1e9073   Thomas Gleixner   futex: runtime en...
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
  	/*
  	 * This will fail and we want it. Some arch implementations do
  	 * runtime detection of the futex_atomic_cmpxchg_inatomic()
  	 * functionality. We want to know that before we call in any
  	 * of the complex code paths. Also we want to prevent
  	 * registration of robust lists in that case. NULL is
  	 * guaranteed to fault and we get -EFAULT on functional
  	 * implementation, the non functional ones will return
  	 * -ENOSYS.
  	 */
  	curval = cmpxchg_futex_value_locked(NULL, 0, 0);
  	if (curval == -EFAULT)
  		futex_cmpxchg_enabled = 1;
3e4ab747e   Thomas Gleixner   futex: fix init o...
2559
2560
2561
2562
  	for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
  		plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
  		spin_lock_init(&futex_queues[i].lock);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2563
2564
  	return 0;
  }
f6d107fb1   Benjamin Herrenschmidt   Give futex init a...
2565
  __initcall(futex_init);