Blame view

lib/sbitmap.c 16.3 KB
0fc479b1a   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-only
88459642c   Omar Sandoval   blk-mq: abstract ...
2
3
4
  /*
   * Copyright (C) 2016 Facebook
   * Copyright (C) 2013-2014 Jens Axboe
88459642c   Omar Sandoval   blk-mq: abstract ...
5
   */
af8601ad4   Ingo Molnar   kasan, sched/head...
6
  #include <linux/sched.h>
98d95416d   Omar Sandoval   sbitmap: randomiz...
7
  #include <linux/random.h>
88459642c   Omar Sandoval   blk-mq: abstract ...
8
  #include <linux/sbitmap.h>
24af1ccfe   Omar Sandoval   sbitmap: add help...
9
  #include <linux/seq_file.h>
88459642c   Omar Sandoval   blk-mq: abstract ...
10

b2dbff1bb   Jens Axboe   sbitmap: flush de...
11
12
13
14
15
16
  /*
   * See if we have deferred clears that we can batch move
   */
  static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
  {
  	unsigned long mask, val;
b2dbff1bb   Jens Axboe   sbitmap: flush de...
17
  	bool ret = false;
fe76fc6aa   Ming Lei   sbitmap: Protect ...
18
  	unsigned long flags;
b2dbff1bb   Jens Axboe   sbitmap: flush de...
19

fe76fc6aa   Ming Lei   sbitmap: Protect ...
20
  	spin_lock_irqsave(&sb->map[index].swap_lock, flags);
b2dbff1bb   Jens Axboe   sbitmap: flush de...
21
22
23
24
25
26
27
  
  	if (!sb->map[index].cleared)
  		goto out_unlock;
  
  	/*
  	 * First get a stable cleared mask, setting the old mask to 0.
  	 */
417232880   Pavel Begunkov   sbitmap: Replace ...
28
  	mask = xchg(&sb->map[index].cleared, 0);
b2dbff1bb   Jens Axboe   sbitmap: flush de...
29
30
31
32
33
34
35
36
37
38
  
  	/*
  	 * Now clear the masked bits in our free word
  	 */
  	do {
  		val = sb->map[index].word;
  	} while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val);
  
  	ret = true;
  out_unlock:
fe76fc6aa   Ming Lei   sbitmap: Protect ...
39
  	spin_unlock_irqrestore(&sb->map[index].swap_lock, flags);
b2dbff1bb   Jens Axboe   sbitmap: flush de...
40
41
  	return ret;
  }
88459642c   Omar Sandoval   blk-mq: abstract ...
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
  int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
  		      gfp_t flags, int node)
  {
  	unsigned int bits_per_word;
  	unsigned int i;
  
  	if (shift < 0) {
  		shift = ilog2(BITS_PER_LONG);
  		/*
  		 * If the bitmap is small, shrink the number of bits per word so
  		 * we spread over a few cachelines, at least. If less than 4
  		 * bits, just forget about it, it's not going to work optimally
  		 * anyway.
  		 */
  		if (depth >= 4) {
  			while ((4U << shift) > depth)
  				shift--;
  		}
  	}
  	bits_per_word = 1U << shift;
  	if (bits_per_word > BITS_PER_LONG)
  		return -EINVAL;
  
  	sb->shift = shift;
  	sb->depth = depth;
  	sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
  
  	if (depth == 0) {
  		sb->map = NULL;
  		return 0;
  	}
590b5b7d8   Kees Cook   treewide: kzalloc...
73
  	sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node);
88459642c   Omar Sandoval   blk-mq: abstract ...
74
75
76
77
78
79
  	if (!sb->map)
  		return -ENOMEM;
  
  	for (i = 0; i < sb->map_nr; i++) {
  		sb->map[i].depth = min(depth, bits_per_word);
  		depth -= sb->map[i].depth;
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
80
  		spin_lock_init(&sb->map[i].swap_lock);
88459642c   Omar Sandoval   blk-mq: abstract ...
81
82
83
84
85
86
87
88
89
  	}
  	return 0;
  }
  EXPORT_SYMBOL_GPL(sbitmap_init_node);
  
  void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
  {
  	unsigned int bits_per_word = 1U << sb->shift;
  	unsigned int i;
b2dbff1bb   Jens Axboe   sbitmap: flush de...
90
91
  	for (i = 0; i < sb->map_nr; i++)
  		sbitmap_deferred_clear(sb, i);
88459642c   Omar Sandoval   blk-mq: abstract ...
92
93
94
95
96
97
98
99
100
  	sb->depth = depth;
  	sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
  
  	for (i = 0; i < sb->map_nr; i++) {
  		sb->map[i].depth = min(depth, bits_per_word);
  		depth -= sb->map[i].depth;
  	}
  }
  EXPORT_SYMBOL_GPL(sbitmap_resize);
c05e66733   Omar Sandoval   sbitmap: add sbit...
101
102
  static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
  			      unsigned int hint, bool wrap)
88459642c   Omar Sandoval   blk-mq: abstract ...
103
104
105
106
107
  {
  	unsigned int orig_hint = hint;
  	int nr;
  
  	while (1) {
c05e66733   Omar Sandoval   sbitmap: add sbit...
108
109
  		nr = find_next_zero_bit(word, depth, hint);
  		if (unlikely(nr >= depth)) {
88459642c   Omar Sandoval   blk-mq: abstract ...
110
111
112
113
114
115
116
117
118
119
120
  			/*
  			 * We started with an offset, and we didn't reset the
  			 * offset to 0 in a failure case, so start from 0 to
  			 * exhaust the map.
  			 */
  			if (orig_hint && hint && wrap) {
  				hint = orig_hint = 0;
  				continue;
  			}
  			return -1;
  		}
4ace53f1e   Omar Sandoval   sbitmap: use test...
121
  		if (!test_and_set_bit_lock(nr, word))
88459642c   Omar Sandoval   blk-mq: abstract ...
122
123
124
  			break;
  
  		hint = nr + 1;
c05e66733   Omar Sandoval   sbitmap: add sbit...
125
  		if (hint >= depth - 1)
88459642c   Omar Sandoval   blk-mq: abstract ...
126
127
128
129
130
  			hint = 0;
  	}
  
  	return nr;
  }
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
  				     unsigned int alloc_hint, bool round_robin)
  {
  	int nr;
  
  	do {
  		nr = __sbitmap_get_word(&sb->map[index].word,
  					sb->map[index].depth, alloc_hint,
  					!round_robin);
  		if (nr != -1)
  			break;
  		if (!sbitmap_deferred_clear(sb, index))
  			break;
  	} while (1);
  
  	return nr;
  }
88459642c   Omar Sandoval   blk-mq: abstract ...
148
149
150
151
152
153
  int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
  {
  	unsigned int i, index;
  	int nr = -1;
  
  	index = SB_NR_TO_INDEX(sb, alloc_hint);
27fae429a   Jens Axboe   sbitmap: don't lo...
154
155
156
157
158
159
160
161
162
  	/*
  	 * Unless we're doing round robin tag allocation, just use the
  	 * alloc_hint to find the right word index. No point in looping
  	 * twice in find_next_zero_bit() for that case.
  	 */
  	if (round_robin)
  		alloc_hint = SB_NR_TO_BIT(sb, alloc_hint);
  	else
  		alloc_hint = 0;
88459642c   Omar Sandoval   blk-mq: abstract ...
163
  	for (i = 0; i < sb->map_nr; i++) {
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
164
165
  		nr = sbitmap_find_bit_in_index(sb, index, alloc_hint,
  						round_robin);
88459642c   Omar Sandoval   blk-mq: abstract ...
166
167
168
169
170
171
  		if (nr != -1) {
  			nr += index << sb->shift;
  			break;
  		}
  
  		/* Jump to next index. */
27fae429a   Jens Axboe   sbitmap: don't lo...
172
173
  		alloc_hint = 0;
  		if (++index >= sb->map_nr)
88459642c   Omar Sandoval   blk-mq: abstract ...
174
  			index = 0;
88459642c   Omar Sandoval   blk-mq: abstract ...
175
176
177
178
179
  	}
  
  	return nr;
  }
  EXPORT_SYMBOL_GPL(sbitmap_get);
c05e66733   Omar Sandoval   sbitmap: add sbit...
180
181
182
183
184
185
186
187
188
  int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
  			unsigned long shallow_depth)
  {
  	unsigned int i, index;
  	int nr = -1;
  
  	index = SB_NR_TO_INDEX(sb, alloc_hint);
  
  	for (i = 0; i < sb->map_nr; i++) {
b2dbff1bb   Jens Axboe   sbitmap: flush de...
189
  again:
c05e66733   Omar Sandoval   sbitmap: add sbit...
190
191
192
193
194
195
196
  		nr = __sbitmap_get_word(&sb->map[index].word,
  					min(sb->map[index].depth, shallow_depth),
  					SB_NR_TO_BIT(sb, alloc_hint), true);
  		if (nr != -1) {
  			nr += index << sb->shift;
  			break;
  		}
b2dbff1bb   Jens Axboe   sbitmap: flush de...
197
198
  		if (sbitmap_deferred_clear(sb, index))
  			goto again;
c05e66733   Omar Sandoval   sbitmap: add sbit...
199
200
201
202
203
204
205
206
207
208
209
210
211
  		/* Jump to next index. */
  		index++;
  		alloc_hint = index << sb->shift;
  
  		if (index >= sb->map_nr) {
  			index = 0;
  			alloc_hint = 0;
  		}
  	}
  
  	return nr;
  }
  EXPORT_SYMBOL_GPL(sbitmap_get_shallow);
88459642c   Omar Sandoval   blk-mq: abstract ...
212
213
214
215
216
  bool sbitmap_any_bit_set(const struct sbitmap *sb)
  {
  	unsigned int i;
  
  	for (i = 0; i < sb->map_nr; i++) {
b2dbff1bb   Jens Axboe   sbitmap: flush de...
217
  		if (sb->map[i].word & ~sb->map[i].cleared)
88459642c   Omar Sandoval   blk-mq: abstract ...
218
219
220
221
222
  			return true;
  	}
  	return false;
  }
  EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
223
  static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
88459642c   Omar Sandoval   blk-mq: abstract ...
224
  {
60658e0dc   Colin Ian King   sbitmap: initiali...
225
  	unsigned int i, weight = 0;
88459642c   Omar Sandoval   blk-mq: abstract ...
226
227
228
  
  	for (i = 0; i < sb->map_nr; i++) {
  		const struct sbitmap_word *word = &sb->map[i];
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
229
230
231
232
  		if (set)
  			weight += bitmap_weight(&word->word, word->depth);
  		else
  			weight += bitmap_weight(&word->cleared, word->depth);
88459642c   Omar Sandoval   blk-mq: abstract ...
233
234
235
  	}
  	return weight;
  }
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
236
237
238
239
240
241
242
243
244
245
  
  static unsigned int sbitmap_weight(const struct sbitmap *sb)
  {
  	return __sbitmap_weight(sb, true);
  }
  
  static unsigned int sbitmap_cleared(const struct sbitmap *sb)
  {
  	return __sbitmap_weight(sb, false);
  }
88459642c   Omar Sandoval   blk-mq: abstract ...
246

24af1ccfe   Omar Sandoval   sbitmap: add help...
247
248
249
250
  void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
  {
  	seq_printf(m, "depth=%u
  ", sb->depth);
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
251
252
253
254
  	seq_printf(m, "busy=%u
  ", sbitmap_weight(sb) - sbitmap_cleared(sb));
  	seq_printf(m, "cleared=%u
  ", sbitmap_cleared(sb));
24af1ccfe   Omar Sandoval   sbitmap: add help...
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
  	seq_printf(m, "bits_per_word=%u
  ", 1U << sb->shift);
  	seq_printf(m, "map_nr=%u
  ", sb->map_nr);
  }
  EXPORT_SYMBOL_GPL(sbitmap_show);
  
  static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte)
  {
  	if ((offset & 0xf) == 0) {
  		if (offset != 0)
  			seq_putc(m, '
  ');
  		seq_printf(m, "%08x:", offset);
  	}
  	if ((offset & 0x1) == 0)
  		seq_putc(m, ' ');
  	seq_printf(m, "%02x", byte);
  }
  
  void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
  {
  	u8 byte = 0;
  	unsigned int byte_bits = 0;
  	unsigned int offset = 0;
  	int i;
  
  	for (i = 0; i < sb->map_nr; i++) {
  		unsigned long word = READ_ONCE(sb->map[i].word);
6bf0eb550   John Garry   sbitmap: Consider...
284
  		unsigned long cleared = READ_ONCE(sb->map[i].cleared);
24af1ccfe   Omar Sandoval   sbitmap: add help...
285
  		unsigned int word_bits = READ_ONCE(sb->map[i].depth);
6bf0eb550   John Garry   sbitmap: Consider...
286
  		word &= ~cleared;
24af1ccfe   Omar Sandoval   sbitmap: add help...
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
  		while (word_bits > 0) {
  			unsigned int bits = min(8 - byte_bits, word_bits);
  
  			byte |= (word & (BIT(bits) - 1)) << byte_bits;
  			byte_bits += bits;
  			if (byte_bits == 8) {
  				emit_byte(m, offset, byte);
  				byte = 0;
  				byte_bits = 0;
  				offset++;
  			}
  			word >>= bits;
  			word_bits -= bits;
  		}
  	}
  	if (byte_bits) {
  		emit_byte(m, offset, byte);
  		offset++;
  	}
  	if (offset)
  		seq_putc(m, '
  ');
  }
  EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
a32755396   Omar Sandoval   sbitmap: fix miss...
311
312
  static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
  					unsigned int depth)
88459642c   Omar Sandoval   blk-mq: abstract ...
313
314
  {
  	unsigned int wake_batch;
a32755396   Omar Sandoval   sbitmap: fix miss...
315
  	unsigned int shallow_depth;
88459642c   Omar Sandoval   blk-mq: abstract ...
316
317
318
  
  	/*
  	 * For each batch, we wake up one queue. We need to make sure that our
a32755396   Omar Sandoval   sbitmap: fix miss...
319
320
321
322
323
324
325
326
327
328
329
330
331
  	 * batch size is small enough that the full depth of the bitmap,
  	 * potentially limited by a shallow depth, is enough to wake up all of
  	 * the queues.
  	 *
  	 * Each full word of the bitmap has bits_per_word bits, and there might
  	 * be a partial word. There are depth / bits_per_word full words and
  	 * depth % bits_per_word bits left over. In bitwise arithmetic:
  	 *
  	 * bits_per_word = 1 << shift
  	 * depth / bits_per_word = depth >> shift
  	 * depth % bits_per_word = depth & ((1 << shift) - 1)
  	 *
  	 * Each word can be limited to sbq->min_shallow_depth bits.
88459642c   Omar Sandoval   blk-mq: abstract ...
332
  	 */
a32755396   Omar Sandoval   sbitmap: fix miss...
333
334
335
336
337
  	shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth);
  	depth = ((depth >> sbq->sb.shift) * shallow_depth +
  		 min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth));
  	wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1,
  			     SBQ_WAKE_BATCH);
88459642c   Omar Sandoval   blk-mq: abstract ...
338
339
340
341
342
  
  	return wake_batch;
  }
  
  int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
f4a644db8   Omar Sandoval   sbitmap: push all...
343
  			    int shift, bool round_robin, gfp_t flags, int node)
88459642c   Omar Sandoval   blk-mq: abstract ...
344
345
346
347
348
349
350
  {
  	int ret;
  	int i;
  
  	ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node);
  	if (ret)
  		return ret;
40aabb674   Omar Sandoval   sbitmap: push per...
351
352
353
354
355
  	sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags);
  	if (!sbq->alloc_hint) {
  		sbitmap_free(&sbq->sb);
  		return -ENOMEM;
  	}
98d95416d   Omar Sandoval   sbitmap: randomiz...
356
357
358
359
  	if (depth && !round_robin) {
  		for_each_possible_cpu(i)
  			*per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
  	}
a32755396   Omar Sandoval   sbitmap: fix miss...
360
361
  	sbq->min_shallow_depth = UINT_MAX;
  	sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
88459642c   Omar Sandoval   blk-mq: abstract ...
362
  	atomic_set(&sbq->wake_index, 0);
5d2ee7122   Jens Axboe   sbitmap: optimize...
363
  	atomic_set(&sbq->ws_active, 0);
88459642c   Omar Sandoval   blk-mq: abstract ...
364

48e28166a   Omar Sandoval   sbitmap: allocate...
365
  	sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
88459642c   Omar Sandoval   blk-mq: abstract ...
366
  	if (!sbq->ws) {
40aabb674   Omar Sandoval   sbitmap: push per...
367
  		free_percpu(sbq->alloc_hint);
88459642c   Omar Sandoval   blk-mq: abstract ...
368
369
370
371
372
373
374
375
  		sbitmap_free(&sbq->sb);
  		return -ENOMEM;
  	}
  
  	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
  		init_waitqueue_head(&sbq->ws[i].wait);
  		atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch);
  	}
f4a644db8   Omar Sandoval   sbitmap: push all...
376
377
  
  	sbq->round_robin = round_robin;
88459642c   Omar Sandoval   blk-mq: abstract ...
378
379
380
  	return 0;
  }
  EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
a32755396   Omar Sandoval   sbitmap: fix miss...
381
382
  static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
  					    unsigned int depth)
88459642c   Omar Sandoval   blk-mq: abstract ...
383
  {
a32755396   Omar Sandoval   sbitmap: fix miss...
384
  	unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
385
386
387
388
389
  	int i;
  
  	if (sbq->wake_batch != wake_batch) {
  		WRITE_ONCE(sbq->wake_batch, wake_batch);
  		/*
e6fc46498   Ming Lei   blk-mq: avoid sta...
390
391
392
  		 * Pairs with the memory barrier in sbitmap_queue_wake_up()
  		 * to ensure that the batch size is updated before the wait
  		 * counts.
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
393
  		 */
a0934fd2b   Andrea Parri   sbitmap: fix impr...
394
  		smp_mb();
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
395
396
397
  		for (i = 0; i < SBQ_WAIT_QUEUES; i++)
  			atomic_set(&sbq->ws[i].wait_cnt, 1);
  	}
a32755396   Omar Sandoval   sbitmap: fix miss...
398
399
400
401
402
  }
  
  void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
  {
  	sbitmap_queue_update_wake_batch(sbq, depth);
88459642c   Omar Sandoval   blk-mq: abstract ...
403
404
405
  	sbitmap_resize(&sbq->sb, depth);
  }
  EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
f4a644db8   Omar Sandoval   sbitmap: push all...
406
  int __sbitmap_queue_get(struct sbitmap_queue *sbq)
40aabb674   Omar Sandoval   sbitmap: push per...
407
  {
05fd095d5   Omar Sandoval   sbitmap: re-initi...
408
  	unsigned int hint, depth;
40aabb674   Omar Sandoval   sbitmap: push per...
409
410
411
  	int nr;
  
  	hint = this_cpu_read(*sbq->alloc_hint);
05fd095d5   Omar Sandoval   sbitmap: re-initi...
412
413
414
415
416
  	depth = READ_ONCE(sbq->sb.depth);
  	if (unlikely(hint >= depth)) {
  		hint = depth ? prandom_u32() % depth : 0;
  		this_cpu_write(*sbq->alloc_hint, hint);
  	}
f4a644db8   Omar Sandoval   sbitmap: push all...
417
  	nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin);
40aabb674   Omar Sandoval   sbitmap: push per...
418
419
420
421
  
  	if (nr == -1) {
  		/* If the map is full, a hint won't do us much good. */
  		this_cpu_write(*sbq->alloc_hint, 0);
f4a644db8   Omar Sandoval   sbitmap: push all...
422
  	} else if (nr == hint || unlikely(sbq->round_robin)) {
40aabb674   Omar Sandoval   sbitmap: push per...
423
424
  		/* Only update the hint if we used it. */
  		hint = nr + 1;
05fd095d5   Omar Sandoval   sbitmap: re-initi...
425
  		if (hint >= depth - 1)
40aabb674   Omar Sandoval   sbitmap: push per...
426
427
428
429
430
431
432
  			hint = 0;
  		this_cpu_write(*sbq->alloc_hint, hint);
  	}
  
  	return nr;
  }
  EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
c05e66733   Omar Sandoval   sbitmap: add sbit...
433
434
435
436
437
  int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
  				unsigned int shallow_depth)
  {
  	unsigned int hint, depth;
  	int nr;
61445b56d   Omar Sandoval   sbitmap: warn if ...
438
  	WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth);
c05e66733   Omar Sandoval   sbitmap: add sbit...
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
  	hint = this_cpu_read(*sbq->alloc_hint);
  	depth = READ_ONCE(sbq->sb.depth);
  	if (unlikely(hint >= depth)) {
  		hint = depth ? prandom_u32() % depth : 0;
  		this_cpu_write(*sbq->alloc_hint, hint);
  	}
  	nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth);
  
  	if (nr == -1) {
  		/* If the map is full, a hint won't do us much good. */
  		this_cpu_write(*sbq->alloc_hint, 0);
  	} else if (nr == hint || unlikely(sbq->round_robin)) {
  		/* Only update the hint if we used it. */
  		hint = nr + 1;
  		if (hint >= depth - 1)
  			hint = 0;
  		this_cpu_write(*sbq->alloc_hint, hint);
  	}
  
  	return nr;
  }
  EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
a32755396   Omar Sandoval   sbitmap: fix miss...
461
462
463
464
465
466
467
  void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
  				     unsigned int min_shallow_depth)
  {
  	sbq->min_shallow_depth = min_shallow_depth;
  	sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth);
  }
  EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
88459642c   Omar Sandoval   blk-mq: abstract ...
468
469
470
  static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
  {
  	int i, wake_index;
5d2ee7122   Jens Axboe   sbitmap: optimize...
471
472
  	if (!atomic_read(&sbq->ws_active))
  		return NULL;
88459642c   Omar Sandoval   blk-mq: abstract ...
473
474
475
476
477
  	wake_index = atomic_read(&sbq->wake_index);
  	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
  		struct sbq_wait_state *ws = &sbq->ws[wake_index];
  
  		if (waitqueue_active(&ws->wait)) {
417232880   Pavel Begunkov   sbitmap: Replace ...
478
479
  			if (wake_index != atomic_read(&sbq->wake_index))
  				atomic_set(&sbq->wake_index, wake_index);
88459642c   Omar Sandoval   blk-mq: abstract ...
480
481
482
483
484
485
486
487
  			return ws;
  		}
  
  		wake_index = sbq_index_inc(wake_index);
  	}
  
  	return NULL;
  }
c854ab577   Jens Axboe   sbitmap: fix race...
488
  static bool __sbq_wake_up(struct sbitmap_queue *sbq)
88459642c   Omar Sandoval   blk-mq: abstract ...
489
490
  {
  	struct sbq_wait_state *ws;
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
491
  	unsigned int wake_batch;
88459642c   Omar Sandoval   blk-mq: abstract ...
492
  	int wait_cnt;
88459642c   Omar Sandoval   blk-mq: abstract ...
493
494
  	ws = sbq_wake_ptr(sbq);
  	if (!ws)
c854ab577   Jens Axboe   sbitmap: fix race...
495
  		return false;
88459642c   Omar Sandoval   blk-mq: abstract ...
496
497
  
  	wait_cnt = atomic_dec_return(&ws->wait_cnt);
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
498
  	if (wait_cnt <= 0) {
c854ab577   Jens Axboe   sbitmap: fix race...
499
  		int ret;
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
500
  		wake_batch = READ_ONCE(sbq->wake_batch);
c854ab577   Jens Axboe   sbitmap: fix race...
501

6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
502
503
504
505
506
507
  		/*
  		 * Pairs with the memory barrier in sbitmap_queue_resize() to
  		 * ensure that we see the batch size update before the wait
  		 * count is reset.
  		 */
  		smp_mb__before_atomic();
c854ab577   Jens Axboe   sbitmap: fix race...
508

6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
509
  		/*
c854ab577   Jens Axboe   sbitmap: fix race...
510
511
512
  		 * For concurrent callers of this, the one that failed the
  		 * atomic_cmpxhcg() race should call this function again
  		 * to wakeup a new batch on a different 'ws'.
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
513
  		 */
c854ab577   Jens Axboe   sbitmap: fix race...
514
515
516
517
518
519
520
521
  		ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
  		if (ret == wait_cnt) {
  			sbq_index_atomic_inc(&sbq->wake_index);
  			wake_up_nr(&ws->wait, wake_batch);
  			return false;
  		}
  
  		return true;
88459642c   Omar Sandoval   blk-mq: abstract ...
522
  	}
c854ab577   Jens Axboe   sbitmap: fix race...
523
524
525
  
  	return false;
  }
e6fc46498   Ming Lei   blk-mq: avoid sta...
526
  void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
c854ab577   Jens Axboe   sbitmap: fix race...
527
528
529
  {
  	while (__sbq_wake_up(sbq))
  		;
88459642c   Omar Sandoval   blk-mq: abstract ...
530
  }
e6fc46498   Ming Lei   blk-mq: avoid sta...
531
  EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
88459642c   Omar Sandoval   blk-mq: abstract ...
532

40aabb674   Omar Sandoval   sbitmap: push per...
533
  void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
f4a644db8   Omar Sandoval   sbitmap: push all...
534
  			 unsigned int cpu)
88459642c   Omar Sandoval   blk-mq: abstract ...
535
  {
e6d1fa584   Ming Lei   sbitmap: order RE...
536
537
538
539
540
541
542
543
544
545
546
  	/*
  	 * Once the clear bit is set, the bit may be allocated out.
  	 *
  	 * Orders READ/WRITE on the asssociated instance(such as request
  	 * of blk_mq) by this bit for avoiding race with re-allocation,
  	 * and its pair is the memory barrier implied in __sbitmap_get_word.
  	 *
  	 * One invariant is that the clear bit has to be zero when the bit
  	 * is in use.
  	 */
  	smp_mb__before_atomic();
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
547
  	sbitmap_deferred_clear_bit(&sbq->sb, nr);
e6fc46498   Ming Lei   blk-mq: avoid sta...
548
549
550
551
552
553
554
555
  	/*
  	 * Pairs with the memory barrier in set_current_state() to ensure the
  	 * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
  	 * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
  	 * waiter. See the comment on waitqueue_active().
  	 */
  	smp_mb__after_atomic();
  	sbitmap_queue_wake_up(sbq);
5c64a8df0   Omar Sandoval   sbitmap: don't up...
556
  	if (likely(!sbq->round_robin && nr < sbq->sb.depth))
40aabb674   Omar Sandoval   sbitmap: push per...
557
  		*per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
88459642c   Omar Sandoval   blk-mq: abstract ...
558
559
560
561
562
563
564
565
  }
  EXPORT_SYMBOL_GPL(sbitmap_queue_clear);
  
  void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
  {
  	int i, wake_index;
  
  	/*
f66227de5   Omar Sandoval   sbitmap: use smp_...
566
  	 * Pairs with the memory barrier in set_current_state() like in
e6fc46498   Ming Lei   blk-mq: avoid sta...
567
  	 * sbitmap_queue_wake_up().
88459642c   Omar Sandoval   blk-mq: abstract ...
568
569
570
571
572
573
574
575
576
577
578
579
580
  	 */
  	smp_mb();
  	wake_index = atomic_read(&sbq->wake_index);
  	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
  		struct sbq_wait_state *ws = &sbq->ws[wake_index];
  
  		if (waitqueue_active(&ws->wait))
  			wake_up(&ws->wait);
  
  		wake_index = sbq_index_inc(wake_index);
  	}
  }
  EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
24af1ccfe   Omar Sandoval   sbitmap: add help...
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
  
  void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
  {
  	bool first;
  	int i;
  
  	sbitmap_show(&sbq->sb, m);
  
  	seq_puts(m, "alloc_hint={");
  	first = true;
  	for_each_possible_cpu(i) {
  		if (!first)
  			seq_puts(m, ", ");
  		first = false;
  		seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i));
  	}
  	seq_puts(m, "}
  ");
  
  	seq_printf(m, "wake_batch=%u
  ", sbq->wake_batch);
  	seq_printf(m, "wake_index=%d
  ", atomic_read(&sbq->wake_index));
5d2ee7122   Jens Axboe   sbitmap: optimize...
604
605
  	seq_printf(m, "ws_active=%d
  ", atomic_read(&sbq->ws_active));
24af1ccfe   Omar Sandoval   sbitmap: add help...
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
  
  	seq_puts(m, "ws={
  ");
  	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
  		struct sbq_wait_state *ws = &sbq->ws[i];
  
  		seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},
  ",
  			   atomic_read(&ws->wait_cnt),
  			   waitqueue_active(&ws->wait) ? "active" : "inactive");
  	}
  	seq_puts(m, "}
  ");
  
  	seq_printf(m, "round_robin=%d
  ", sbq->round_robin);
a32755396   Omar Sandoval   sbitmap: fix miss...
622
623
  	seq_printf(m, "min_shallow_depth=%u
  ", sbq->min_shallow_depth);
24af1ccfe   Omar Sandoval   sbitmap: add help...
624
625
  }
  EXPORT_SYMBOL_GPL(sbitmap_queue_show);
5d2ee7122   Jens Axboe   sbitmap: optimize...
626

9f6b7ef6c   Jens Axboe   sbitmap: add help...
627
628
629
630
631
632
633
  void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
  			    struct sbq_wait_state *ws,
  			    struct sbq_wait *sbq_wait)
  {
  	if (!sbq_wait->sbq) {
  		sbq_wait->sbq = sbq;
  		atomic_inc(&sbq->ws_active);
df034c93f   David Jeffery   sbitmap: only que...
634
  		add_wait_queue(&ws->wait, &sbq_wait->wait);
9f6b7ef6c   Jens Axboe   sbitmap: add help...
635
  	}
9f6b7ef6c   Jens Axboe   sbitmap: add help...
636
637
638
639
640
641
642
643
644
645
646
647
  }
  EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue);
  
  void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait)
  {
  	list_del_init(&sbq_wait->wait.entry);
  	if (sbq_wait->sbq) {
  		atomic_dec(&sbq_wait->sbq->ws_active);
  		sbq_wait->sbq = NULL;
  	}
  }
  EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue);
5d2ee7122   Jens Axboe   sbitmap: optimize...
648
649
650
651
  void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
  			     struct sbq_wait_state *ws,
  			     struct sbq_wait *sbq_wait, int state)
  {
9f6b7ef6c   Jens Axboe   sbitmap: add help...
652
  	if (!sbq_wait->sbq) {
5d2ee7122   Jens Axboe   sbitmap: optimize...
653
  		atomic_inc(&sbq->ws_active);
9f6b7ef6c   Jens Axboe   sbitmap: add help...
654
  		sbq_wait->sbq = sbq;
5d2ee7122   Jens Axboe   sbitmap: optimize...
655
656
657
658
659
660
661
662
663
  	}
  	prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state);
  }
  EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait);
  
  void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
  			 struct sbq_wait *sbq_wait)
  {
  	finish_wait(&ws->wait, &sbq_wait->wait);
9f6b7ef6c   Jens Axboe   sbitmap: add help...
664
  	if (sbq_wait->sbq) {
5d2ee7122   Jens Axboe   sbitmap: optimize...
665
  		atomic_dec(&sbq->ws_active);
9f6b7ef6c   Jens Axboe   sbitmap: add help...
666
  		sbq_wait->sbq = NULL;
5d2ee7122   Jens Axboe   sbitmap: optimize...
667
668
669
  	}
  }
  EXPORT_SYMBOL_GPL(sbitmap_finish_wait);