Blame view

lib/percpu-rwsem.c 4.99 KB
9390ef0c8   Oleg Nesterov   percpu_rw_semapho...
1
  #include <linux/atomic.h>
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
2
3
4
  #include <linux/rwsem.h>
  #include <linux/percpu.h>
  #include <linux/wait.h>
8ebe34731   Oleg Nesterov   percpu_rw_semapho...
5
  #include <linux/lockdep.h>
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
6
7
8
9
  #include <linux/percpu-rwsem.h>
  #include <linux/rcupdate.h>
  #include <linux/sched.h>
  #include <linux/errno.h>
8ebe34731   Oleg Nesterov   percpu_rw_semapho...
10
11
  int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
  			const char *name, struct lock_class_key *rwsem_key)
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
12
13
14
15
  {
  	brw->fast_read_ctr = alloc_percpu(int);
  	if (unlikely(!brw->fast_read_ctr))
  		return -ENOMEM;
8ebe34731   Oleg Nesterov   percpu_rw_semapho...
16
17
  	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
  	__init_rwsem(&brw->rw_sem, name, rwsem_key);
9390ef0c8   Oleg Nesterov   percpu_rw_semapho...
18
  	atomic_set(&brw->write_ctr, 0);
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
19
20
21
22
23
24
25
26
27
28
29
30
31
  	atomic_set(&brw->slow_read_ctr, 0);
  	init_waitqueue_head(&brw->write_waitq);
  	return 0;
  }
  
  void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
  {
  	free_percpu(brw->fast_read_ctr);
  	brw->fast_read_ctr = NULL; /* catch use after free bugs */
  }
  
  /*
   * This is the fast-path for down_read/up_read, it only needs to ensure
9390ef0c8   Oleg Nesterov   percpu_rw_semapho...
32
   * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
   * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
   * serialize with the preempt-disabled section below.
   *
   * The nontrivial part is that we should guarantee acquire/release semantics
   * in case when
   *
   *	R_W: down_write() comes after up_read(), the writer should see all
   *	     changes done by the reader
   * or
   *	W_R: down_read() comes after up_write(), the reader should see all
   *	     changes done by the writer
   *
   * If this helper fails the callers rely on the normal rw_semaphore and
   * atomic_dec_and_test(), so in this case we have the necessary barriers.
   *
9390ef0c8   Oleg Nesterov   percpu_rw_semapho...
48
   * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
49
50
51
52
53
54
55
56
57
   * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
   * reader inside the critical section. See the comments in down_write and
   * up_write below.
   */
  static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
  {
  	bool success = false;
  
  	preempt_disable();
9390ef0c8   Oleg Nesterov   percpu_rw_semapho...
58
  	if (likely(!atomic_read(&brw->write_ctr))) {
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
59
60
61
62
63
64
65
66
67
68
69
  		__this_cpu_add(*brw->fast_read_ctr, val);
  		success = true;
  	}
  	preempt_enable();
  
  	return success;
  }
  
  /*
   * Like the normal down_read() this is not recursive, the writer can
   * come after the first percpu_down_read() and create the deadlock.
8ebe34731   Oleg Nesterov   percpu_rw_semapho...
70
71
72
73
   *
   * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
   * percpu_up_read() does rwsem_release(). This pairs with the usage
   * of ->rw_sem in percpu_down/up_write().
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
74
75
76
   */
  void percpu_down_read(struct percpu_rw_semaphore *brw)
  {
8ebe34731   Oleg Nesterov   percpu_rw_semapho...
77
78
79
  	might_sleep();
  	if (likely(update_fast_ctr(brw, +1))) {
  		rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
80
  		return;
8ebe34731   Oleg Nesterov   percpu_rw_semapho...
81
  	}
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
82
83
84
  
  	down_read(&brw->rw_sem);
  	atomic_inc(&brw->slow_read_ctr);
8ebe34731   Oleg Nesterov   percpu_rw_semapho...
85
86
  	/* avoid up_read()->rwsem_release() */
  	__up_read(&brw->rw_sem);
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
87
88
89
90
  }
  
  void percpu_up_read(struct percpu_rw_semaphore *brw)
  {
8ebe34731   Oleg Nesterov   percpu_rw_semapho...
91
  	rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
  	if (likely(update_fast_ctr(brw, -1)))
  		return;
  
  	/* false-positive is possible but harmless */
  	if (atomic_dec_and_test(&brw->slow_read_ctr))
  		wake_up_all(&brw->write_waitq);
  }
  
  static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
  {
  	unsigned int sum = 0;
  	int cpu;
  
  	for_each_possible_cpu(cpu) {
  		sum += per_cpu(*brw->fast_read_ctr, cpu);
  		per_cpu(*brw->fast_read_ctr, cpu) = 0;
  	}
  
  	return sum;
  }
  
  /*
9390ef0c8   Oleg Nesterov   percpu_rw_semapho...
114
115
   * A writer increments ->write_ctr to force the readers to switch to the
   * slow mode, note the atomic_read() check in update_fast_ctr().
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
116
117
118
119
120
121
122
123
124
125
   *
   * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
   * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
   * counter it represents the number of active readers.
   *
   * Finally the writer takes ->rw_sem for writing and blocks the new readers,
   * then waits until the slow counter becomes zero.
   */
  void percpu_down_write(struct percpu_rw_semaphore *brw)
  {
9390ef0c8   Oleg Nesterov   percpu_rw_semapho...
126
127
  	/* tell update_fast_ctr() there is a pending writer */
  	atomic_inc(&brw->write_ctr);
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
128
  	/*
9390ef0c8   Oleg Nesterov   percpu_rw_semapho...
129
  	 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
130
131
132
133
134
135
136
137
138
139
  	 *    so that update_fast_ctr() can't succeed.
  	 *
  	 * 2. Ensures we see the result of every previous this_cpu_add() in
  	 *    update_fast_ctr().
  	 *
  	 * 3. Ensures that if any reader has exited its critical section via
  	 *    fast-path, it executes a full memory barrier before we return.
  	 *    See R_W case in the comment above update_fast_ctr().
  	 */
  	synchronize_sched_expedited();
9390ef0c8   Oleg Nesterov   percpu_rw_semapho...
140
141
  	/* exclude other writers, and block the new readers completely */
  	down_write(&brw->rw_sem);
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
142
143
  	/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
  	atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
144
145
146
147
148
149
  	/* wait for all readers to complete their percpu_up_read() */
  	wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
  }
  
  void percpu_up_write(struct percpu_rw_semaphore *brw)
  {
9390ef0c8   Oleg Nesterov   percpu_rw_semapho...
150
  	/* release the lock, but the readers can't use the fast-path */
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
151
  	up_write(&brw->rw_sem);
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
152
153
154
155
156
  	/*
  	 * Insert the barrier before the next fast-path in down_read,
  	 * see W_R case in the comment above update_fast_ctr().
  	 */
  	synchronize_sched_expedited();
9390ef0c8   Oleg Nesterov   percpu_rw_semapho...
157
158
  	/* the last writer unblocks update_fast_ctr() */
  	atomic_dec(&brw->write_ctr);
a1fd3e24d   Oleg Nesterov   percpu_rw_semapho...
159
  }