Blame view
lib/rwsem.c
8.09 KB
1da177e4c
|
1 2 3 4 5 6 7 8 9 |
/* rwsem.c: R/W semaphores: contention handling functions * * Written by David Howells (dhowells@redhat.com). * Derived from arch/i386/kernel/semaphore.c */ #include <linux/rwsem.h> #include <linux/sched.h> #include <linux/init.h> #include <linux/module.h> |
4ea2176df
|
10 11 12 13 14 15 16 17 18 19 20 |
/* * Initialize an rwsem: */ void __init_rwsem(struct rw_semaphore *sem, const char *name, struct lock_class_key *key) { #ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make sure we are not reinitializing a held semaphore: */ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); |
4dfbb9d8c
|
21 |
lockdep_init_map(&sem->dep_map, name, key, 0); |
4ea2176df
|
22 23 24 25 26 27 28 |
#endif sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } EXPORT_SYMBOL(__init_rwsem); |
1da177e4c
|
29 30 31 32 33 34 35 |
struct rwsem_waiter { struct list_head list; struct task_struct *task; unsigned int flags; #define RWSEM_WAITING_FOR_READ 0x00000001 #define RWSEM_WAITING_FOR_WRITE 0x00000002 }; |
70bdc6e06
|
36 37 38 39 40 41 42 |
/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held * since the rwsem value was observed. */ #define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ #define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ #define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ |
1da177e4c
|
43 44 45 46 47 |
/* * handle the lock release when processes blocked on it that can now run * - if we come here from up_xxxx(), then: * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) |
345af7bf3
|
48 |
* - there must be someone on the queue |
1da177e4c
|
49 50 51 52 |
* - the spinlock must be held by the caller * - woken process blocks are discarded from the list after having task zeroed * - writers are only woken if downgrading is false */ |
70bdc6e06
|
53 54 |
static struct rw_semaphore * __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) |
1da177e4c
|
55 56 57 58 |
{ struct rwsem_waiter *waiter; struct task_struct *tsk; struct list_head *next; |
fd41b3343
|
59 |
signed long oldcount, woken, loop, adjustment; |
1da177e4c
|
60 |
|
345af7bf3
|
61 62 63 |
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) goto readers_only; |
70bdc6e06
|
64 |
if (wake_type == RWSEM_WAKE_READ_OWNED) |
424acaaeb
|
65 66 67 |
/* Another active reader was observed, so wakeup is not * likely to succeed. Save the atomic op. */ |
345af7bf3
|
68 |
goto out; |
1da177e4c
|
69 |
|
345af7bf3
|
70 71 72 |
/* There's a writer at the front of the queue - try to grant it the * write lock. However, we only wake this writer if we can transition * the active part of the count from 0 -> 1 |
1da177e4c
|
73 |
*/ |
fd41b3343
|
74 75 76 |
adjustment = RWSEM_ACTIVE_WRITE_BIAS; if (waiter->list.next == &sem->wait_list) adjustment -= RWSEM_WAITING_BIAS; |
345af7bf3
|
77 |
try_again_write: |
fd41b3343
|
78 |
oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; |
1da177e4c
|
79 |
if (oldcount & RWSEM_ACTIVE_MASK) |
345af7bf3
|
80 81 |
/* Someone grabbed the sem already */ goto undo_write; |
1da177e4c
|
82 83 84 85 86 87 88 |
/* We must be careful not to touch 'waiter' after we set ->task = NULL. * It is an allocated on the waiter's stack and may become invalid at * any time after that point (due to a wakeup from another source). */ list_del(&waiter->list); tsk = waiter->task; |
d59dd4620
|
89 |
smp_mb(); |
1da177e4c
|
90 91 92 93 |
waiter->task = NULL; wake_up_process(tsk); put_task_struct(tsk); goto out; |
345af7bf3
|
94 |
readers_only: |
70bdc6e06
|
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
/* If we come here from up_xxxx(), another thread might have reached * rwsem_down_failed_common() before we acquired the spinlock and * woken up a waiter, making it now active. We prefer to check for * this first in order to not spend too much time with the spinlock * held if we're not going to be able to wake up readers in the end. * * Note that we do not need to update the rwsem count: any writer * trying to acquire rwsem will run rwsem_down_write_failed() due * to the waiting threads and block trying to acquire the spinlock. * * We use a dummy atomic update in order to acquire the cache line * exclusively since we expect to succeed and run the final rwsem * count adjustment pretty soon. */ if (wake_type == RWSEM_WAKE_ANY && |
424acaaeb
|
110 111 |
rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) /* Someone grabbed the sem for write already */ |
70bdc6e06
|
112 |
goto out; |
1da177e4c
|
113 |
|
345af7bf3
|
114 115 116 |
/* Grant an infinite number of read locks to the readers at the front * of the queue. Note we increment the 'active part' of the count by * the number of readers before waking any processes up. |
1da177e4c
|
117 |
*/ |
1da177e4c
|
118 119 120 121 122 123 124 125 126 127 128 |
woken = 0; do { woken++; if (waiter->list.next == &sem->wait_list) break; waiter = list_entry(waiter->list.next, struct rwsem_waiter, list); } while (waiter->flags & RWSEM_WAITING_FOR_READ); |
fd41b3343
|
129 130 131 132 |
adjustment = woken * RWSEM_ACTIVE_READ_BIAS; if (waiter->flags & RWSEM_WAITING_FOR_READ) /* hit end of list above */ adjustment -= RWSEM_WAITING_BIAS; |
1da177e4c
|
133 |
|
fd41b3343
|
134 |
rwsem_atomic_add(adjustment, sem); |
1da177e4c
|
135 136 |
next = sem->wait_list.next; |
fd41b3343
|
137 |
for (loop = woken; loop > 0; loop--) { |
1da177e4c
|
138 139 140 |
waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; tsk = waiter->task; |
d59dd4620
|
141 |
smp_mb(); |
1da177e4c
|
142 143 144 145 146 147 148 149 150 |
waiter->task = NULL; wake_up_process(tsk); put_task_struct(tsk); } sem->wait_list.next = next; next->prev = &sem->wait_list; out: |
1da177e4c
|
151 |
return sem; |
91af70814
|
152 153 |
/* undo the change to the active count, but check for a transition * 1->0 */ |
345af7bf3
|
154 |
undo_write: |
fd41b3343
|
155 |
if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) |
345af7bf3
|
156 157 |
goto out; goto try_again_write; |
1da177e4c
|
158 159 160 161 162 |
} /* * wait for a lock to be granted */ |
c7af77b58
|
163 |
static struct rw_semaphore __sched * |
1da177e4c
|
164 |
rwsem_down_failed_common(struct rw_semaphore *sem, |
a8618a0e8
|
165 |
unsigned int flags, signed long adjustment) |
1da177e4c
|
166 |
{ |
a8618a0e8
|
167 |
struct rwsem_waiter waiter; |
1da177e4c
|
168 169 170 171 172 173 174 |
struct task_struct *tsk = current; signed long count; set_task_state(tsk, TASK_UNINTERRUPTIBLE); /* set up my own style of waitqueue */ spin_lock_irq(&sem->wait_lock); |
a8618a0e8
|
175 176 |
waiter.task = tsk; waiter.flags = flags; |
1da177e4c
|
177 |
get_task_struct(tsk); |
fd41b3343
|
178 179 |
if (list_empty(&sem->wait_list)) adjustment += RWSEM_WAITING_BIAS; |
a8618a0e8
|
180 |
list_add_tail(&waiter.list, &sem->wait_list); |
1da177e4c
|
181 |
|
70bdc6e06
|
182 |
/* we're now waiting on the lock, but no longer actively locking */ |
1da177e4c
|
183 |
count = rwsem_atomic_update(adjustment, sem); |
424acaaeb
|
184 185 186 187 188 189 190 |
/* If there are no active locks, wake the front queued process(es) up. * * Alternatively, if we're called from a failed down_write(), there * were already threads queued before us and there are no active * writers, the lock must be read owned; so we try to wake any read * locks that were queued ahead of us. */ if (count == RWSEM_WAITING_BIAS) |
70bdc6e06
|
191 |
sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); |
424acaaeb
|
192 193 194 |
else if (count > RWSEM_WAITING_BIAS && adjustment == -RWSEM_ACTIVE_WRITE_BIAS) sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); |
1da177e4c
|
195 196 197 198 199 |
spin_unlock_irq(&sem->wait_lock); /* wait to be given the lock */ for (;;) { |
a8618a0e8
|
200 |
if (!waiter.task) |
1da177e4c
|
201 202 203 204 205 206 207 208 209 210 211 212 213 |
break; schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); } tsk->state = TASK_RUNNING; return sem; } /* * wait for the read lock to be granted */ |
d50efc6c4
|
214 |
asmregparm struct rw_semaphore __sched * |
1da177e4c
|
215 216 |
rwsem_down_read_failed(struct rw_semaphore *sem) { |
a8618a0e8
|
217 218 |
return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, -RWSEM_ACTIVE_READ_BIAS); |
1da177e4c
|
219 220 221 222 223 |
} /* * wait for the write lock to be granted */ |
d50efc6c4
|
224 |
asmregparm struct rw_semaphore __sched * |
1da177e4c
|
225 226 |
rwsem_down_write_failed(struct rw_semaphore *sem) { |
a8618a0e8
|
227 228 |
return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, -RWSEM_ACTIVE_WRITE_BIAS); |
1da177e4c
|
229 230 231 232 233 234 |
} /* * handle waking up a waiter on the semaphore * - up_read/up_write has decremented the active part of count if we come here */ |
d50efc6c4
|
235 |
asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) |
1da177e4c
|
236 237 |
{ unsigned long flags; |
1da177e4c
|
238 239 240 241 |
spin_lock_irqsave(&sem->wait_lock, flags); /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) |
70bdc6e06
|
242 |
sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); |
1da177e4c
|
243 244 |
spin_unlock_irqrestore(&sem->wait_lock, flags); |
1da177e4c
|
245 246 247 248 249 250 251 252 |
return sem; } /* * downgrade a write lock into a read lock * - caller incremented waiting part of count and discovered it still negative * - just wake up any readers at the front of the queue */ |
d50efc6c4
|
253 |
asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) |
1da177e4c
|
254 255 |
{ unsigned long flags; |
1da177e4c
|
256 257 258 259 |
spin_lock_irqsave(&sem->wait_lock, flags); /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) |
70bdc6e06
|
260 |
sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); |
1da177e4c
|
261 262 |
spin_unlock_irqrestore(&sem->wait_lock, flags); |
1da177e4c
|
263 264 265 266 267 268 269 |
return sem; } EXPORT_SYMBOL(rwsem_down_read_failed); EXPORT_SYMBOL(rwsem_down_write_failed); EXPORT_SYMBOL(rwsem_wake); EXPORT_SYMBOL(rwsem_downgrade_wake); |