Blame view

kernel/locking/rwsem-xadd.c 17.7 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
  /* rwsem.c: R/W semaphores: contention handling functions
   *
   * Written by David Howells (dhowells@redhat.com).
   * Derived from arch/i386/kernel/semaphore.c
ce6711f3d   Alex Shi   rwsem: Implement ...
5
6
   *
   * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
fe6e674c6   Michel Lespinasse   rwsem: implement ...
7
   * and Michel Lespinasse <walken@google.com>
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
8
9
10
   *
   * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
   * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
12
13
14
   */
  #include <linux/rwsem.h>
  #include <linux/sched.h>
  #include <linux/init.h>
8bc3bcc93   Paul Gortmaker   lib: reduce the u...
15
  #include <linux/export.h>
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
16
  #include <linux/sched/rt.h>
7a215f89a   Davidlohr Bueso   locking/rwsem: Se...
17
  #include <linux/osq_lock.h>
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
18

7a215f89a   Davidlohr Bueso   locking/rwsem: Se...
19
  #include "rwsem.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20

4ea2176df   Ingo Molnar   [PATCH] lockdep: ...
21
  /*
3cf2f34e1   Tim Chen   rwsem: Add commen...
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
   * Guide to the rw_semaphore's count field for common values.
   * (32-bit case illustrated, similar for 64-bit)
   *
   * 0x0000000X	(1) X readers active or attempting lock, no writer waiting
   *		    X = #active_readers + #readers attempting to lock
   *		    (X*ACTIVE_BIAS)
   *
   * 0x00000000	rwsem is unlocked, and no one is waiting for the lock or
   *		attempting to read lock or write lock.
   *
   * 0xffff000X	(1) X readers active or attempting lock, with waiters for lock
   *		    X = #active readers + # readers attempting lock
   *		    (X*ACTIVE_BIAS + WAITING_BIAS)
   *		(2) 1 writer attempting lock, no waiters for lock
   *		    X-1 = #active readers + #readers attempting lock
   *		    ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
   *		(3) 1 writer active, no waiters for lock
   *		    X-1 = #active readers + #readers attempting lock
   *		    ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
   *
   * 0xffff0001	(1) 1 reader active or attempting lock, waiters for lock
   *		    (WAITING_BIAS + ACTIVE_BIAS)
   *		(2) 1 writer active or attempting lock, no waiters for lock
   *		    (ACTIVE_WRITE_BIAS)
   *
   * 0xffff0000	(1) There are writers or readers queued but none active
   *		    or in the process of attempting lock.
   *		    (WAITING_BIAS)
   *		Note: writer can attempt to steal lock for this count by adding
   *		ACTIVE_WRITE_BIAS in cmpxchg and checking the old count
   *
   * 0xfffe0001	(1) 1 writer active, or attempting lock. Waiters on queue.
   *		    (ACTIVE_WRITE_BIAS + WAITING_BIAS)
   *
   * Note: Readers attempt to lock by adding ACTIVE_BIAS in down_read and checking
   *	 the count becomes more than 0 for successful lock acquisition,
   *	 i.e. the case where there are only readers or nobody has lock.
   *	 (1st and 2nd case above).
   *
   *	 Writers attempt to lock by adding ACTIVE_WRITE_BIAS in down_write and
   *	 checking the count becomes ACTIVE_WRITE_BIAS for successful lock
   *	 acquisition (i.e. nobody else has lock or attempts lock).  If
   *	 unsuccessful, in rwsem_down_write_failed, we'll check to see if there
   *	 are only waiters but none active (5th case above), and attempt to
   *	 steal the lock.
   *
   */
  
  /*
4ea2176df   Ingo Molnar   [PATCH] lockdep: ...
71
72
73
74
75
76
77
78
79
80
   * Initialize an rwsem:
   */
  void __init_rwsem(struct rw_semaphore *sem, const char *name,
  		  struct lock_class_key *key)
  {
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  	/*
  	 * Make sure we are not reinitializing a held semaphore:
  	 */
  	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
4dfbb9d8c   Peter Zijlstra   Lockdep: add lock...
81
  	lockdep_init_map(&sem->dep_map, name, key, 0);
4ea2176df   Ingo Molnar   [PATCH] lockdep: ...
82
  #endif
8ee62b187   Jason Low   locking/rwsem: Co...
83
  	atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
84
  	raw_spin_lock_init(&sem->wait_lock);
4ea2176df   Ingo Molnar   [PATCH] lockdep: ...
85
  	INIT_LIST_HEAD(&sem->wait_list);
5db6c6fef   Davidlohr Bueso   locking/rwsem: Ad...
86
  #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
87
  	sem->owner = NULL;
4d9d951e6   Jason Low   locking/spinlocks...
88
  	osq_lock_init(&sem->osq);
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
89
  #endif
4ea2176df   Ingo Molnar   [PATCH] lockdep: ...
90
91
92
  }
  
  EXPORT_SYMBOL(__init_rwsem);
e2d57f782   Michel Lespinasse   rwsem: make the w...
93
94
95
96
  enum rwsem_waiter_type {
  	RWSEM_WAITING_FOR_WRITE,
  	RWSEM_WAITING_FOR_READ
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
97
98
99
  struct rwsem_waiter {
  	struct list_head list;
  	struct task_struct *task;
e2d57f782   Michel Lespinasse   rwsem: make the w...
100
  	enum rwsem_waiter_type type;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
101
  };
fe6e674c6   Michel Lespinasse   rwsem: implement ...
102
103
104
105
106
  enum rwsem_wake_type {
  	RWSEM_WAKE_ANY,		/* Wake whatever's at head of wait list */
  	RWSEM_WAKE_READERS,	/* Wake readers only */
  	RWSEM_WAKE_READ_OWNED	/* Waker thread holds the read lock */
  };
70bdc6e06   Michel Lespinasse   rwsem: lighter ac...
107

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
108
109
110
111
112
  /*
   * handle the lock release when processes blocked on it that can now run
   * - if we come here from up_xxxx(), then:
   *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
   *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
345af7bf3   Michel Lespinasse   rwsem: fully sepa...
113
   * - there must be someone on the queue
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
114
115
116
117
   * - the wait_lock must be held by the caller
   * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
   *   to actually wakeup the blocked task(s) and drop the reference count,
   *   preferably when the wait_lock is released
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
   * - woken process blocks are discarded from the list after having task zeroed
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
119
   * - writers are only marked woken if downgrading is false
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
120
   */
84b23f9b5   Davidlohr Bueso   locking/rwsem: Re...
121
122
123
  static void __rwsem_mark_wake(struct rw_semaphore *sem,
  			      enum rwsem_wake_type wake_type,
  			      struct wake_q_head *wake_q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
124
  {
70800c3c0   Davidlohr Bueso   locking/rwsem: Sc...
125
126
  	struct rwsem_waiter *waiter, *tmp;
  	long oldcount, woken = 0, adjustment = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127

70800c3c0   Davidlohr Bueso   locking/rwsem: Sc...
128
129
130
131
132
  	/*
  	 * Take a peek at the queue head waiter such that we can determine
  	 * the wakeup(s) to perform.
  	 */
  	waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
84b23f9b5   Davidlohr Bueso   locking/rwsem: Re...
133

8cf5322ce   Michel Lespinasse   rwsem: simplify _...
134
  	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
135
136
137
138
139
140
141
  		if (wake_type == RWSEM_WAKE_ANY) {
  			/*
  			 * Mark writer at the front of the queue for wakeup.
  			 * Until the task is actually later awoken later by
  			 * the caller, other writers are able to steal it.
  			 * Readers, on the other hand, will block as they
  			 * will notice the queued writer.
8cf5322ce   Michel Lespinasse   rwsem: simplify _...
142
  			 */
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
143
144
  			wake_q_add(wake_q, waiter->task);
  		}
84b23f9b5   Davidlohr Bueso   locking/rwsem: Re...
145
146
  
  		return;
8cf5322ce   Michel Lespinasse   rwsem: simplify _...
147
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148

84b23f9b5   Davidlohr Bueso   locking/rwsem: Re...
149
150
  	/*
  	 * Writers might steal the lock before we grant it to the next reader.
fe6e674c6   Michel Lespinasse   rwsem: implement ...
151
152
  	 * We prefer to do the first reader grant before counting readers
  	 * so we can bail out early if a writer stole the lock.
70bdc6e06   Michel Lespinasse   rwsem: lighter ac...
153
  	 */
fe6e674c6   Michel Lespinasse   rwsem: implement ...
154
155
156
  	if (wake_type != RWSEM_WAKE_READ_OWNED) {
  		adjustment = RWSEM_ACTIVE_READ_BIAS;
   try_reader_grant:
86a3b5f34   Peter Zijlstra   locking/atomic, a...
157
  		oldcount = atomic_long_fetch_add(adjustment, &sem->count);
fe6e674c6   Michel Lespinasse   rwsem: implement ...
158
  		if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
bf7b4c472   Waiman Long   locking/rwsem: Im...
159
160
161
162
163
164
165
166
  			/*
  			 * If the count is still less than RWSEM_WAITING_BIAS
  			 * after removing the adjustment, it is assumed that
  			 * a writer has stolen the lock. We have to undo our
  			 * reader grant.
  			 */
  			if (atomic_long_add_return(-adjustment, &sem->count) <
  			    RWSEM_WAITING_BIAS)
84b23f9b5   Davidlohr Bueso   locking/rwsem: Re...
167
  				return;
fe6e674c6   Michel Lespinasse   rwsem: implement ...
168
169
170
  			/* Last active locker left. Retry waking readers. */
  			goto try_reader_grant;
  		}
19c5d690e   Waiman Long   locking/rwsem: Ad...
171
172
173
174
175
176
  		/*
  		 * It is not really necessary to set it to reader-owned here,
  		 * but it gives the spinners an early indication that the
  		 * readers now have the lock.
  		 */
  		rwsem_set_reader_owned(sem);
fe6e674c6   Michel Lespinasse   rwsem: implement ...
177
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
178

84b23f9b5   Davidlohr Bueso   locking/rwsem: Re...
179
180
  	/*
  	 * Grant an infinite number of read locks to the readers at the front
70800c3c0   Davidlohr Bueso   locking/rwsem: Sc...
181
182
183
  	 * of the queue. We know that woken will be at least 1 as we accounted
  	 * for above. Note we increment the 'active part' of the count by the
  	 * number of readers before waking any processes up.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
184
  	 */
70800c3c0   Davidlohr Bueso   locking/rwsem: Sc...
185
186
  	list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
  		struct task_struct *tsk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187

70800c3c0   Davidlohr Bueso   locking/rwsem: Sc...
188
  		if (waiter->type == RWSEM_WAITING_FOR_WRITE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
189
  			break;
70800c3c0   Davidlohr Bueso   locking/rwsem: Sc...
190
  		woken++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191
  		tsk = waiter->task;
e38513905   Davidlohr Bueso   locking/rwsem: Re...
192
193
  
  		wake_q_add(wake_q, tsk);
70800c3c0   Davidlohr Bueso   locking/rwsem: Sc...
194
  		list_del(&waiter->list);
49e4b2bcf   Davidlohr Bueso   locking/rwsem: Do...
195
  		/*
e38513905   Davidlohr Bueso   locking/rwsem: Re...
196
197
198
199
  		 * Ensure that the last operation is setting the reader
  		 * waiter to nil such that rwsem_down_read_failed() cannot
  		 * race with do_exit() by always holding a reference count
  		 * to the task to wakeup.
49e4b2bcf   Davidlohr Bueso   locking/rwsem: Do...
200
  		 */
e38513905   Davidlohr Bueso   locking/rwsem: Re...
201
  		smp_store_release(&waiter->task, NULL);
70800c3c0   Davidlohr Bueso   locking/rwsem: Sc...
202
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203

70800c3c0   Davidlohr Bueso   locking/rwsem: Sc...
204
205
206
207
208
209
210
211
  	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
  	if (list_empty(&sem->wait_list)) {
  		/* hit end of list above */
  		adjustment -= RWSEM_WAITING_BIAS;
  	}
  
  	if (adjustment)
  		atomic_long_add(adjustment, &sem->count);
ce6711f3d   Alex Shi   rwsem: Implement ...
212
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213
  /*
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
214
   * Wait for the read lock to be granted
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215
   */
3ebae4f3a   Andi Kleen   asmlinkage: Mark ...
216
  __visible
1e78277cc   Michel Lespinasse   rwsem: move rwsem...
217
  struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
218
  {
b5f541810   Davidlohr Bueso   rwsem: no need fo...
219
  	long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
a8618a0e8   Michel Lespinasse   rwsem: smaller wr...
220
  	struct rwsem_waiter waiter;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221
  	struct task_struct *tsk = current;
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
222
  	WAKE_Q(wake_q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
223

a8618a0e8   Michel Lespinasse   rwsem: smaller wr...
224
  	waiter.task = tsk;
da16922cc   Michel Lespinasse   rwsem: simplify r...
225
  	waiter.type = RWSEM_WAITING_FOR_READ;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
226

f7dd1cee9   Michel Lespinasse   rwsem: shorter sp...
227
  	raw_spin_lock_irq(&sem->wait_lock);
fd41b3343   Michel Lespinasse   rwsem: let RWSEM_...
228
229
  	if (list_empty(&sem->wait_list))
  		adjustment += RWSEM_WAITING_BIAS;
a8618a0e8   Michel Lespinasse   rwsem: smaller wr...
230
  	list_add_tail(&waiter.list, &sem->wait_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
231

70bdc6e06   Michel Lespinasse   rwsem: lighter ac...
232
  	/* we're now waiting on the lock, but no longer actively locking */
8ee62b187   Jason Low   locking/rwsem: Co...
233
  	count = atomic_long_add_return(adjustment, &sem->count);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234

70800c3c0   Davidlohr Bueso   locking/rwsem: Sc...
235
236
  	/*
  	 * If there are no active locks, wake the front queued process(es).
25c393259   Michel Lespinasse   rwsem: do not blo...
237
238
239
240
241
242
243
  	 *
  	 * If there are no writers and we are first in the queue,
  	 * wake our own waiter to join the existing active readers !
  	 */
  	if (count == RWSEM_WAITING_BIAS ||
  	    (count > RWSEM_WAITING_BIAS &&
  	     adjustment != -RWSEM_ACTIVE_READ_BIAS))
84b23f9b5   Davidlohr Bueso   locking/rwsem: Re...
244
  		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
245

ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
246
  	raw_spin_unlock_irq(&sem->wait_lock);
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
247
  	wake_up_q(&wake_q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
248
249
  
  	/* wait to be given the lock */
f7dd1cee9   Michel Lespinasse   rwsem: shorter sp...
250
251
  	while (true) {
  		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
a8618a0e8   Michel Lespinasse   rwsem: smaller wr...
252
  		if (!waiter.task)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
253
254
  			break;
  		schedule();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255
  	}
73105994c   Davidlohr Bueso   locking/rwsem: Us...
256
  	__set_task_state(tsk, TASK_RUNNING);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257
258
  	return sem;
  }
db0e716a1   Davidlohr Bueso   locking/rwsem: Mo...
259
  EXPORT_SYMBOL(rwsem_down_read_failed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
260

c0fcb6c2d   Jason Low   locking/rwsem: Op...
261
262
263
264
265
  /*
   * This function must be called with the sem->wait_lock held to prevent
   * race conditions between checking the rwsem wait list and setting the
   * sem->count accordingly.
   */
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
266
267
  static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
  {
debfab74e   Jason Low   locking/rwsem: Av...
268
  	/*
c0fcb6c2d   Jason Low   locking/rwsem: Op...
269
  	 * Avoid trying to acquire write lock if count isn't RWSEM_WAITING_BIAS.
debfab74e   Jason Low   locking/rwsem: Av...
270
  	 */
c0fcb6c2d   Jason Low   locking/rwsem: Op...
271
272
273
274
275
276
277
278
279
280
  	if (count != RWSEM_WAITING_BIAS)
  		return false;
  
  	/*
  	 * Acquire the lock by trying to set it to ACTIVE_WRITE_BIAS. If there
  	 * are other tasks on the wait list, we need to add on WAITING_BIAS.
  	 */
  	count = list_is_singular(&sem->wait_list) ?
  			RWSEM_ACTIVE_WRITE_BIAS :
  			RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS;
8ee62b187   Jason Low   locking/rwsem: Co...
281
282
  	if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count)
  							== RWSEM_WAITING_BIAS) {
7a215f89a   Davidlohr Bueso   locking/rwsem: Se...
283
  		rwsem_set_owner(sem);
debfab74e   Jason Low   locking/rwsem: Av...
284
  		return true;
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
285
  	}
debfab74e   Jason Low   locking/rwsem: Av...
286

4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
287
288
  	return false;
  }
5db6c6fef   Davidlohr Bueso   locking/rwsem: Ad...
289
  #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
290
  /*
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
291
292
293
294
   * Try to acquire write lock before the writer has been put on wait queue.
   */
  static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
  {
8ee62b187   Jason Low   locking/rwsem: Co...
295
  	long old, count = atomic_long_read(&sem->count);
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
296
297
298
299
  
  	while (true) {
  		if (!(count == 0 || count == RWSEM_WAITING_BIAS))
  			return false;
8ee62b187   Jason Low   locking/rwsem: Co...
300
  		old = atomic_long_cmpxchg_acquire(&sem->count, count,
00eb4bab6   Davidlohr Bueso   locking/rwsem: Us...
301
  				      count + RWSEM_ACTIVE_WRITE_BIAS);
7a215f89a   Davidlohr Bueso   locking/rwsem: Se...
302
303
  		if (old == count) {
  			rwsem_set_owner(sem);
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
304
  			return true;
7a215f89a   Davidlohr Bueso   locking/rwsem: Se...
305
  		}
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
306
307
308
309
310
311
312
313
  
  		count = old;
  	}
  }
  
  static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
  {
  	struct task_struct *owner;
1a9936702   Davidlohr Bueso   locking/rwsem: Ch...
314
  	bool ret = true;
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
315
316
  
  	if (need_resched())
37e956245   Jason Low   locking/rwsem: Al...
317
  		return false;
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
318
319
  
  	rcu_read_lock();
4d3199e4c   Davidlohr Bueso   locking: Remove A...
320
  	owner = READ_ONCE(sem->owner);
19c5d690e   Waiman Long   locking/rwsem: Ad...
321
  	if (!rwsem_owner_is_writer(owner)) {
1a9936702   Davidlohr Bueso   locking/rwsem: Ch...
322
  		/*
19c5d690e   Waiman Long   locking/rwsem: Ad...
323
  		 * Don't spin if the rwsem is readers owned.
1a9936702   Davidlohr Bueso   locking/rwsem: Ch...
324
  		 */
19c5d690e   Waiman Long   locking/rwsem: Ad...
325
  		ret = !rwsem_owner_is_reader(owner);
1a9936702   Davidlohr Bueso   locking/rwsem: Ch...
326
327
  		goto done;
  	}
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
328

1a9936702   Davidlohr Bueso   locking/rwsem: Ch...
329
330
331
332
  	ret = owner->on_cpu;
  done:
  	rcu_read_unlock();
  	return ret;
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
333
  }
ddd0fa73c   Waiman Long   locking/rwsem: St...
334
335
336
337
  /*
   * Return true only if we can still spin on the owner field of the rwsem.
   */
  static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
338
  {
ddd0fa73c   Waiman Long   locking/rwsem: St...
339
340
341
342
  	struct task_struct *owner = READ_ONCE(sem->owner);
  
  	if (!rwsem_owner_is_writer(owner))
  		goto out;
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
343
  	rcu_read_lock();
9198f6edf   Jason Low   locking/rwsem: Fi...
344
345
346
347
348
349
350
351
352
353
354
  	while (sem->owner == owner) {
  		/*
  		 * Ensure we emit the owner->on_cpu, dereference _after_
  		 * checking sem->owner still matches owner, if that fails,
  		 * owner might point to free()d memory, if it still matches,
  		 * the rcu_read_lock() ensures the memory stays valid.
  		 */
  		barrier();
  
  		/* abort spinning when need_resched or owner is not running */
  		if (!owner->on_cpu || need_resched()) {
b3fd4f03c   Davidlohr Bueso   locking/rwsem: Av...
355
356
357
  			rcu_read_unlock();
  			return false;
  		}
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
358

3a6bfbc91   Davidlohr Bueso   arch, locking: Ci...
359
  		cpu_relax_lowlatency();
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
360
361
  	}
  	rcu_read_unlock();
ddd0fa73c   Waiman Long   locking/rwsem: St...
362
  out:
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
363
  	/*
19c5d690e   Waiman Long   locking/rwsem: Ad...
364
365
  	 * If there is a new owner or the owner is not set, we continue
  	 * spinning.
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
366
  	 */
19c5d690e   Waiman Long   locking/rwsem: Ad...
367
  	return !rwsem_owner_is_reader(READ_ONCE(sem->owner));
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
368
369
370
371
  }
  
  static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
  {
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
372
373
374
375
376
377
378
379
380
381
  	bool taken = false;
  
  	preempt_disable();
  
  	/* sem->wait_lock should not be held when doing optimistic spinning */
  	if (!rwsem_can_spin_on_owner(sem))
  		goto done;
  
  	if (!osq_lock(&sem->osq))
  		goto done;
ddd0fa73c   Waiman Long   locking/rwsem: St...
382
383
384
385
386
387
388
389
  	/*
  	 * Optimistically spin on the owner field and attempt to acquire the
  	 * lock whenever the owner changes. Spinning will be stopped when:
  	 *  1) the owning writer isn't running; or
  	 *  2) readers own the lock as we can't determine if they are
  	 *     actively running or not.
  	 */
  	while (rwsem_spin_on_owner(sem)) {
19c5d690e   Waiman Long   locking/rwsem: Ad...
390
  		/*
ddd0fa73c   Waiman Long   locking/rwsem: St...
391
  		 * Try to acquire the lock
19c5d690e   Waiman Long   locking/rwsem: Ad...
392
  		 */
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
393
394
395
396
397
398
399
400
401
402
403
  		if (rwsem_try_write_lock_unqueued(sem)) {
  			taken = true;
  			break;
  		}
  
  		/*
  		 * When there's no owner, we might have preempted between the
  		 * owner acquiring the lock and setting the owner field. If
  		 * we're an RT task that will live-lock because we won't let
  		 * the owner complete.
  		 */
ddd0fa73c   Waiman Long   locking/rwsem: St...
404
  		if (!sem->owner && (need_resched() || rt_task(current)))
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
405
406
407
408
409
410
411
412
  			break;
  
  		/*
  		 * The cpu_relax() call is a compiler barrier which forces
  		 * everything in this loop to be re-loaded. We don't need
  		 * memory barriers as we'll eventually observe the right
  		 * values at the cost of a few extra spins.
  		 */
3a6bfbc91   Davidlohr Bueso   arch, locking: Ci...
413
  		cpu_relax_lowlatency();
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
414
415
416
417
418
419
  	}
  	osq_unlock(&sem->osq);
  done:
  	preempt_enable();
  	return taken;
  }
59aabfc7e   Waiman Long   locking/rwsem: Re...
420
421
422
423
424
425
426
  /*
   * Return true if the rwsem has active spinner
   */
  static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
  {
  	return osq_is_locked(&sem->osq);
  }
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
427
428
429
430
431
  #else
  static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
  {
  	return false;
  }
59aabfc7e   Waiman Long   locking/rwsem: Re...
432
433
434
435
436
  
  static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
  {
  	return false;
  }
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
437
438
439
440
  #endif
  
  /*
   * Wait until we successfully acquire the write lock
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
441
   */
d47996082   Michal Hocko   locking/rwsem: In...
442
443
  static inline struct rw_semaphore *
  __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
  {
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
445
446
  	long count;
  	bool waiting = true; /* any queued threads before us */
1e78277cc   Michel Lespinasse   rwsem: move rwsem...
447
  	struct rwsem_waiter waiter;
d47996082   Michal Hocko   locking/rwsem: In...
448
  	struct rw_semaphore *ret = sem;
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
449
  	WAKE_Q(wake_q);
1e78277cc   Michel Lespinasse   rwsem: move rwsem...
450

4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
451
  	/* undo write bias from down_write operation, stop active locking */
8ee62b187   Jason Low   locking/rwsem: Co...
452
  	count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
453
454
455
456
457
458
459
460
461
462
  
  	/* do optimistic spinning and steal lock if possible */
  	if (rwsem_optimistic_spin(sem))
  		return sem;
  
  	/*
  	 * Optimistic spinning failed, proceed to the slowpath
  	 * and block until we can acquire the sem.
  	 */
  	waiter.task = current;
023fe4f71   Michel Lespinasse   rwsem: simplify r...
463
  	waiter.type = RWSEM_WAITING_FOR_WRITE;
1e78277cc   Michel Lespinasse   rwsem: move rwsem...
464
465
  
  	raw_spin_lock_irq(&sem->wait_lock);
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
466
467
  
  	/* account for this before adding a new element to the list */
1e78277cc   Michel Lespinasse   rwsem: move rwsem...
468
  	if (list_empty(&sem->wait_list))
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
469
  		waiting = false;
1e78277cc   Michel Lespinasse   rwsem: move rwsem...
470
471
472
  	list_add_tail(&waiter.list, &sem->wait_list);
  
  	/* we're now waiting on the lock, but no longer actively locking */
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
473
  	if (waiting) {
8ee62b187   Jason Low   locking/rwsem: Co...
474
  		count = atomic_long_read(&sem->count);
1e78277cc   Michel Lespinasse   rwsem: move rwsem...
475

4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
476
  		/*
0cc3d0116   Andrew Morton   locking/rwsem: Fi...
477
478
479
  		 * If there were already threads queued before us and there are
  		 * no active writers, the lock must be read owned; so we try to
  		 * wake any read locks that were queued ahead of us.
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
480
  		 */
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
481
482
  		if (count > RWSEM_WAITING_BIAS) {
  			WAKE_Q(wake_q);
84b23f9b5   Davidlohr Bueso   locking/rwsem: Re...
483
  			__rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
484
485
486
487
488
489
490
491
492
  			/*
  			 * The wakeup is normally called _after_ the wait_lock
  			 * is released, but given that we are proactively waking
  			 * readers we can deal with the wake_q overhead as it is
  			 * similar to releasing and taking the wait_lock again
  			 * for attempting rwsem_try_write_lock().
  			 */
  			wake_up_q(&wake_q);
  		}
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
493
494
  
  	} else
8ee62b187   Jason Low   locking/rwsem: Co...
495
  		count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count);
1e78277cc   Michel Lespinasse   rwsem: move rwsem...
496

023fe4f71   Michel Lespinasse   rwsem: simplify r...
497
  	/* wait until we successfully acquire the lock */
d47996082   Michal Hocko   locking/rwsem: In...
498
  	set_current_state(state);
1e78277cc   Michel Lespinasse   rwsem: move rwsem...
499
  	while (true) {
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
500
501
  		if (rwsem_try_write_lock(count, sem))
  			break;
1e78277cc   Michel Lespinasse   rwsem: move rwsem...
502
  		raw_spin_unlock_irq(&sem->wait_lock);
a7d2c573a   Michel Lespinasse   rwsem: avoid taki...
503
504
505
  
  		/* Block until there are no active lockers. */
  		do {
04cafed7f   Peter Zijlstra   locking/rwsem: Fi...
506
507
  			if (signal_pending_state(state, current))
  				goto out_nolock;
a7d2c573a   Michel Lespinasse   rwsem: avoid taki...
508
  			schedule();
d47996082   Michal Hocko   locking/rwsem: In...
509
  			set_current_state(state);
8ee62b187   Jason Low   locking/rwsem: Co...
510
  		} while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
a7d2c573a   Michel Lespinasse   rwsem: avoid taki...
511

023fe4f71   Michel Lespinasse   rwsem: simplify r...
512
  		raw_spin_lock_irq(&sem->wait_lock);
1e78277cc   Michel Lespinasse   rwsem: move rwsem...
513
  	}
4fc828e24   Davidlohr Bueso   locking/rwsem: Su...
514
  	__set_current_state(TASK_RUNNING);
023fe4f71   Michel Lespinasse   rwsem: simplify r...
515
516
  	list_del(&waiter.list);
  	raw_spin_unlock_irq(&sem->wait_lock);
1e78277cc   Michel Lespinasse   rwsem: move rwsem...
517

d47996082   Michal Hocko   locking/rwsem: In...
518
  	return ret;
04cafed7f   Peter Zijlstra   locking/rwsem: Fi...
519
520
521
522
523
524
  
  out_nolock:
  	__set_current_state(TASK_RUNNING);
  	raw_spin_lock_irq(&sem->wait_lock);
  	list_del(&waiter.list);
  	if (list_empty(&sem->wait_list))
8ee62b187   Jason Low   locking/rwsem: Co...
525
  		atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
04cafed7f   Peter Zijlstra   locking/rwsem: Fi...
526
  	else
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
527
  		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
04cafed7f   Peter Zijlstra   locking/rwsem: Fi...
528
  	raw_spin_unlock_irq(&sem->wait_lock);
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
529
  	wake_up_q(&wake_q);
04cafed7f   Peter Zijlstra   locking/rwsem: Fi...
530
531
  
  	return ERR_PTR(-EINTR);
d47996082   Michal Hocko   locking/rwsem: In...
532
533
534
535
536
537
  }
  
  __visible struct rw_semaphore * __sched
  rwsem_down_write_failed(struct rw_semaphore *sem)
  {
  	return __rwsem_down_write_failed_common(sem, TASK_UNINTERRUPTIBLE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
538
  }
db0e716a1   Davidlohr Bueso   locking/rwsem: Mo...
539
  EXPORT_SYMBOL(rwsem_down_write_failed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
540

d47996082   Michal Hocko   locking/rwsem: In...
541
542
543
544
545
546
  __visible struct rw_semaphore * __sched
  rwsem_down_write_failed_killable(struct rw_semaphore *sem)
  {
  	return __rwsem_down_write_failed_common(sem, TASK_KILLABLE);
  }
  EXPORT_SYMBOL(rwsem_down_write_failed_killable);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
547
548
549
550
  /*
   * handle waking up a waiter on the semaphore
   * - up_read/up_write has decremented the active part of count if we come here
   */
3ebae4f3a   Andi Kleen   asmlinkage: Mark ...
551
  __visible
d12337542   Thomas Gleixner   rwsem: Remove red...
552
  struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
553
554
  {
  	unsigned long flags;
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
555
  	WAKE_Q(wake_q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
556

59aabfc7e   Waiman Long   locking/rwsem: Re...
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
  	/*
  	 * If a spinner is present, it is not necessary to do the wakeup.
  	 * Try to do wakeup only if the trylock succeeds to minimize
  	 * spinlock contention which may introduce too much delay in the
  	 * unlock operation.
  	 *
  	 *    spinning writer		up_write/up_read caller
  	 *    ---------------		-----------------------
  	 * [S]   osq_unlock()		[L]   osq
  	 *	 MB			      RMB
  	 * [RmW] rwsem_try_write_lock() [RmW] spin_trylock(wait_lock)
  	 *
  	 * Here, it is important to make sure that there won't be a missed
  	 * wakeup while the rwsem is free and the only spinning writer goes
  	 * to sleep without taking the rwsem. Even when the spinning writer
  	 * is just going to break out of the waiting loop, it will still do
  	 * a trylock in rwsem_down_write_failed() before sleeping. IOW, if
  	 * rwsem_has_spinner() is true, it will guarantee at least one
  	 * trylock attempt on the rwsem later on.
  	 */
  	if (rwsem_has_spinner(sem)) {
  		/*
  		 * The smp_rmb() here is to make sure that the spinner
  		 * state is consulted before reading the wait_lock.
  		 */
  		smp_rmb();
  		if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
  			return sem;
  		goto locked;
  	}
ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
587
  	raw_spin_lock_irqsave(&sem->wait_lock, flags);
59aabfc7e   Waiman Long   locking/rwsem: Re...
588
  locked:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
589

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
590
  	if (!list_empty(&sem->wait_list))
84b23f9b5   Davidlohr Bueso   locking/rwsem: Re...
591
  		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
592

ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
593
  	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
594
  	wake_up_q(&wake_q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
595

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
596
597
  	return sem;
  }
db0e716a1   Davidlohr Bueso   locking/rwsem: Mo...
598
  EXPORT_SYMBOL(rwsem_wake);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
599
600
601
602
603
604
  
  /*
   * downgrade a write lock into a read lock
   * - caller incremented waiting part of count and discovered it still negative
   * - just wake up any readers at the front of the queue
   */
3ebae4f3a   Andi Kleen   asmlinkage: Mark ...
605
  __visible
d12337542   Thomas Gleixner   rwsem: Remove red...
606
  struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
607
608
  {
  	unsigned long flags;
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
609
  	WAKE_Q(wake_q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
610

ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
611
  	raw_spin_lock_irqsave(&sem->wait_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
612

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
613
  	if (!list_empty(&sem->wait_list))
84b23f9b5   Davidlohr Bueso   locking/rwsem: Re...
614
  		__rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
615

ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
616
  	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
133e89ef5   Davidlohr Bueso   locking/rwsem: En...
617
  	wake_up_q(&wake_q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
618

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
619
620
  	return sem;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
621
  EXPORT_SYMBOL(rwsem_downgrade_wake);