Blame view

lib/rwsem.c 8.07 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
  /* rwsem.c: R/W semaphores: contention handling functions
   *
   * Written by David Howells (dhowells@redhat.com).
   * Derived from arch/i386/kernel/semaphore.c
   */
  #include <linux/rwsem.h>
  #include <linux/sched.h>
  #include <linux/init.h>
  #include <linux/module.h>
4ea2176df   Ingo Molnar   [PATCH] lockdep: ...
10
11
12
13
14
15
16
17
18
19
20
  /*
   * Initialize an rwsem:
   */
  void __init_rwsem(struct rw_semaphore *sem, const char *name,
  		  struct lock_class_key *key)
  {
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  	/*
  	 * Make sure we are not reinitializing a held semaphore:
  	 */
  	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
4dfbb9d8c   Peter Zijlstra   Lockdep: add lock...
21
  	lockdep_init_map(&sem->dep_map, name, key, 0);
4ea2176df   Ingo Molnar   [PATCH] lockdep: ...
22
23
  #endif
  	sem->count = RWSEM_UNLOCKED_VALUE;
ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
24
  	raw_spin_lock_init(&sem->wait_lock);
4ea2176df   Ingo Molnar   [PATCH] lockdep: ...
25
26
27
28
  	INIT_LIST_HEAD(&sem->wait_list);
  }
  
  EXPORT_SYMBOL(__init_rwsem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
30
31
32
33
34
35
  struct rwsem_waiter {
  	struct list_head list;
  	struct task_struct *task;
  	unsigned int flags;
  #define RWSEM_WAITING_FOR_READ	0x00000001
  #define RWSEM_WAITING_FOR_WRITE	0x00000002
  };
70bdc6e06   Michel Lespinasse   rwsem: lighter ac...
36
37
38
39
40
41
42
  /* Wake types for __rwsem_do_wake().  Note that RWSEM_WAKE_NO_ACTIVE and
   * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
   * since the rwsem value was observed.
   */
  #define RWSEM_WAKE_ANY        0 /* Wake whatever's at head of wait list */
  #define RWSEM_WAKE_NO_ACTIVE  1 /* rwsem was observed with no active thread */
  #define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
43
44
45
46
47
  /*
   * handle the lock release when processes blocked on it that can now run
   * - if we come here from up_xxxx(), then:
   *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
   *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
345af7bf3   Michel Lespinasse   rwsem: fully sepa...
48
   * - there must be someone on the queue
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49
50
51
52
   * - the spinlock must be held by the caller
   * - woken process blocks are discarded from the list after having task zeroed
   * - writers are only woken if downgrading is false
   */
70bdc6e06   Michel Lespinasse   rwsem: lighter ac...
53
54
  static struct rw_semaphore *
  __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
55
56
57
58
  {
  	struct rwsem_waiter *waiter;
  	struct task_struct *tsk;
  	struct list_head *next;
fd41b3343   Michel Lespinasse   rwsem: let RWSEM_...
59
  	signed long oldcount, woken, loop, adjustment;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
60

345af7bf3   Michel Lespinasse   rwsem: fully sepa...
61
62
63
  	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
  	if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
  		goto readers_only;
70bdc6e06   Michel Lespinasse   rwsem: lighter ac...
64
  	if (wake_type == RWSEM_WAKE_READ_OWNED)
424acaaeb   Michel Lespinasse   rwsem: wake queue...
65
66
67
  		/* Another active reader was observed, so wakeup is not
  		 * likely to succeed. Save the atomic op.
  		 */
345af7bf3   Michel Lespinasse   rwsem: fully sepa...
68
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
69

345af7bf3   Michel Lespinasse   rwsem: fully sepa...
70
71
72
  	/* There's a writer at the front of the queue - try to grant it the
  	 * write lock.  However, we only wake this writer if we can transition
  	 * the active part of the count from 0 -> 1
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
73
  	 */
fd41b3343   Michel Lespinasse   rwsem: let RWSEM_...
74
75
76
  	adjustment = RWSEM_ACTIVE_WRITE_BIAS;
  	if (waiter->list.next == &sem->wait_list)
  		adjustment -= RWSEM_WAITING_BIAS;
345af7bf3   Michel Lespinasse   rwsem: fully sepa...
77
   try_again_write:
fd41b3343   Michel Lespinasse   rwsem: let RWSEM_...
78
  	oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
79
  	if (oldcount & RWSEM_ACTIVE_MASK)
345af7bf3   Michel Lespinasse   rwsem: fully sepa...
80
81
  		/* Someone grabbed the sem already */
  		goto undo_write;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82
83
84
85
86
87
88
  
  	/* We must be careful not to touch 'waiter' after we set ->task = NULL.
  	 * It is an allocated on the waiter's stack and may become invalid at
  	 * any time after that point (due to a wakeup from another source).
  	 */
  	list_del(&waiter->list);
  	tsk = waiter->task;
d59dd4620   Andrew Morton   [PATCH] use smp_m...
89
  	smp_mb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
90
91
92
93
  	waiter->task = NULL;
  	wake_up_process(tsk);
  	put_task_struct(tsk);
  	goto out;
345af7bf3   Michel Lespinasse   rwsem: fully sepa...
94
   readers_only:
70bdc6e06   Michel Lespinasse   rwsem: lighter ac...
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
  	/* If we come here from up_xxxx(), another thread might have reached
  	 * rwsem_down_failed_common() before we acquired the spinlock and
  	 * woken up a waiter, making it now active.  We prefer to check for
  	 * this first in order to not spend too much time with the spinlock
  	 * held if we're not going to be able to wake up readers in the end.
  	 *
  	 * Note that we do not need to update the rwsem count: any writer
  	 * trying to acquire rwsem will run rwsem_down_write_failed() due
  	 * to the waiting threads and block trying to acquire the spinlock.
  	 *
  	 * We use a dummy atomic update in order to acquire the cache line
  	 * exclusively since we expect to succeed and run the final rwsem
  	 * count adjustment pretty soon.
  	 */
  	if (wake_type == RWSEM_WAKE_ANY &&
424acaaeb   Michel Lespinasse   rwsem: wake queue...
110
111
  	    rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS)
  		/* Someone grabbed the sem for write already */
70bdc6e06   Michel Lespinasse   rwsem: lighter ac...
112
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113

345af7bf3   Michel Lespinasse   rwsem: fully sepa...
114
115
116
  	/* Grant an infinite number of read locks to the readers at the front
  	 * of the queue.  Note we increment the 'active part' of the count by
  	 * the number of readers before waking any processes up.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
117
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
119
120
121
122
123
124
125
126
127
128
  	woken = 0;
  	do {
  		woken++;
  
  		if (waiter->list.next == &sem->wait_list)
  			break;
  
  		waiter = list_entry(waiter->list.next,
  					struct rwsem_waiter, list);
  
  	} while (waiter->flags & RWSEM_WAITING_FOR_READ);
fd41b3343   Michel Lespinasse   rwsem: let RWSEM_...
129
130
131
132
  	adjustment = woken * RWSEM_ACTIVE_READ_BIAS;
  	if (waiter->flags & RWSEM_WAITING_FOR_READ)
  		/* hit end of list above */
  		adjustment -= RWSEM_WAITING_BIAS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
133

fd41b3343   Michel Lespinasse   rwsem: let RWSEM_...
134
  	rwsem_atomic_add(adjustment, sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
135
136
  
  	next = sem->wait_list.next;
fd41b3343   Michel Lespinasse   rwsem: let RWSEM_...
137
  	for (loop = woken; loop > 0; loop--) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
138
139
140
  		waiter = list_entry(next, struct rwsem_waiter, list);
  		next = waiter->list.next;
  		tsk = waiter->task;
d59dd4620   Andrew Morton   [PATCH] use smp_m...
141
  		smp_mb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
142
143
144
145
146
147
148
149
150
  		waiter->task = NULL;
  		wake_up_process(tsk);
  		put_task_struct(tsk);
  	}
  
  	sem->wait_list.next = next;
  	next->prev = &sem->wait_list;
  
   out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
151
  	return sem;
91af70814   Michel Lespinasse   rwsem: Test for n...
152
153
  	/* undo the change to the active count, but check for a transition
  	 * 1->0 */
345af7bf3   Michel Lespinasse   rwsem: fully sepa...
154
   undo_write:
fd41b3343   Michel Lespinasse   rwsem: let RWSEM_...
155
  	if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
345af7bf3   Michel Lespinasse   rwsem: fully sepa...
156
157
  		goto out;
  	goto try_again_write;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
159
160
161
162
  }
  
  /*
   * wait for a lock to be granted
   */
c7af77b58   Livio Soares   sched: mark rwsem...
163
  static struct rw_semaphore __sched *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164
  rwsem_down_failed_common(struct rw_semaphore *sem,
a8618a0e8   Michel Lespinasse   rwsem: smaller wr...
165
  			 unsigned int flags, signed long adjustment)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
166
  {
a8618a0e8   Michel Lespinasse   rwsem: smaller wr...
167
  	struct rwsem_waiter waiter;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168
169
170
171
172
173
  	struct task_struct *tsk = current;
  	signed long count;
  
  	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
  
  	/* set up my own style of waitqueue */
ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
174
  	raw_spin_lock_irq(&sem->wait_lock);
a8618a0e8   Michel Lespinasse   rwsem: smaller wr...
175
176
  	waiter.task = tsk;
  	waiter.flags = flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
177
  	get_task_struct(tsk);
fd41b3343   Michel Lespinasse   rwsem: let RWSEM_...
178
179
  	if (list_empty(&sem->wait_list))
  		adjustment += RWSEM_WAITING_BIAS;
a8618a0e8   Michel Lespinasse   rwsem: smaller wr...
180
  	list_add_tail(&waiter.list, &sem->wait_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181

70bdc6e06   Michel Lespinasse   rwsem: lighter ac...
182
  	/* we're now waiting on the lock, but no longer actively locking */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
183
  	count = rwsem_atomic_update(adjustment, sem);
424acaaeb   Michel Lespinasse   rwsem: wake queue...
184
185
186
187
188
189
190
  	/* If there are no active locks, wake the front queued process(es) up.
  	 *
  	 * Alternatively, if we're called from a failed down_write(), there
  	 * were already threads queued before us and there are no active
  	 * writers, the lock must be read owned; so we try to wake any read
  	 * locks that were queued ahead of us. */
  	if (count == RWSEM_WAITING_BIAS)
70bdc6e06   Michel Lespinasse   rwsem: lighter ac...
191
  		sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
424acaaeb   Michel Lespinasse   rwsem: wake queue...
192
193
194
  	else if (count > RWSEM_WAITING_BIAS &&
  		 adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
  		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
195

ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
196
  	raw_spin_unlock_irq(&sem->wait_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197
198
199
  
  	/* wait to be given the lock */
  	for (;;) {
a8618a0e8   Michel Lespinasse   rwsem: smaller wr...
200
  		if (!waiter.task)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
201
202
203
204
205
206
207
208
209
210
211
212
213
  			break;
  		schedule();
  		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
  	}
  
  	tsk->state = TASK_RUNNING;
  
  	return sem;
  }
  
  /*
   * wait for the read lock to be granted
   */
d12337542   Thomas Gleixner   rwsem: Remove red...
214
  struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215
  {
a8618a0e8   Michel Lespinasse   rwsem: smaller wr...
216
217
  	return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
  					-RWSEM_ACTIVE_READ_BIAS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
218
219
220
221
222
  }
  
  /*
   * wait for the write lock to be granted
   */
d12337542   Thomas Gleixner   rwsem: Remove red...
223
  struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
224
  {
a8618a0e8   Michel Lespinasse   rwsem: smaller wr...
225
226
  	return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
  					-RWSEM_ACTIVE_WRITE_BIAS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
227
228
229
230
231
232
  }
  
  /*
   * handle waking up a waiter on the semaphore
   * - up_read/up_write has decremented the active part of count if we come here
   */
d12337542   Thomas Gleixner   rwsem: Remove red...
233
  struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234
235
  {
  	unsigned long flags;
ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
236
  	raw_spin_lock_irqsave(&sem->wait_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
237
238
239
  
  	/* do nothing if list empty */
  	if (!list_empty(&sem->wait_list))
70bdc6e06   Michel Lespinasse   rwsem: lighter ac...
240
  		sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
241

ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
242
  	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
243

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244
245
246
247
248
249
250
251
  	return sem;
  }
  
  /*
   * downgrade a write lock into a read lock
   * - caller incremented waiting part of count and discovered it still negative
   * - just wake up any readers at the front of the queue
   */
d12337542   Thomas Gleixner   rwsem: Remove red...
252
  struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
253
254
  {
  	unsigned long flags;
ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
255
  	raw_spin_lock_irqsave(&sem->wait_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
256
257
258
  
  	/* do nothing if list empty */
  	if (!list_empty(&sem->wait_list))
70bdc6e06   Michel Lespinasse   rwsem: lighter ac...
259
  		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
260

ddb6c9b58   Thomas Gleixner   locking, rwsem: A...
261
  	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
263
264
265
266
267
268
269
  	return sem;
  }
  
  EXPORT_SYMBOL(rwsem_down_read_failed);
  EXPORT_SYMBOL(rwsem_down_write_failed);
  EXPORT_SYMBOL(rwsem_wake);
  EXPORT_SYMBOL(rwsem_downgrade_wake);