Blame view

lib/sbitmap.c 16.6 KB
0fc479b1a   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-only
88459642c   Omar Sandoval   blk-mq: abstract ...
2
3
4
  /*
   * Copyright (C) 2016 Facebook
   * Copyright (C) 2013-2014 Jens Axboe
88459642c   Omar Sandoval   blk-mq: abstract ...
5
   */
af8601ad4   Ingo Molnar   kasan, sched/head...
6
  #include <linux/sched.h>
98d95416d   Omar Sandoval   sbitmap: randomiz...
7
  #include <linux/random.h>
88459642c   Omar Sandoval   blk-mq: abstract ...
8
  #include <linux/sbitmap.h>
24af1ccfe   Omar Sandoval   sbitmap: add help...
9
  #include <linux/seq_file.h>
88459642c   Omar Sandoval   blk-mq: abstract ...
10

b2dbff1bb   Jens Axboe   sbitmap: flush de...
11
12
13
14
15
16
  /*
   * See if we have deferred clears that we can batch move
   */
  static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
  {
  	unsigned long mask, val;
b2dbff1bb   Jens Axboe   sbitmap: flush de...
17
  	bool ret = false;
fe76fc6aa   Ming Lei   sbitmap: Protect ...
18
  	unsigned long flags;
b2dbff1bb   Jens Axboe   sbitmap: flush de...
19

fe76fc6aa   Ming Lei   sbitmap: Protect ...
20
  	spin_lock_irqsave(&sb->map[index].swap_lock, flags);
b2dbff1bb   Jens Axboe   sbitmap: flush de...
21
22
23
24
25
26
27
  
  	if (!sb->map[index].cleared)
  		goto out_unlock;
  
  	/*
  	 * First get a stable cleared mask, setting the old mask to 0.
  	 */
417232880   Pavel Begunkov   sbitmap: Replace ...
28
  	mask = xchg(&sb->map[index].cleared, 0);
b2dbff1bb   Jens Axboe   sbitmap: flush de...
29
30
31
32
33
34
35
36
37
38
  
  	/*
  	 * Now clear the masked bits in our free word
  	 */
  	do {
  		val = sb->map[index].word;
  	} while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val);
  
  	ret = true;
  out_unlock:
fe76fc6aa   Ming Lei   sbitmap: Protect ...
39
  	spin_unlock_irqrestore(&sb->map[index].swap_lock, flags);
b2dbff1bb   Jens Axboe   sbitmap: flush de...
40
41
  	return ret;
  }
88459642c   Omar Sandoval   blk-mq: abstract ...
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
  int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
  		      gfp_t flags, int node)
  {
  	unsigned int bits_per_word;
  	unsigned int i;
  
  	if (shift < 0) {
  		shift = ilog2(BITS_PER_LONG);
  		/*
  		 * If the bitmap is small, shrink the number of bits per word so
  		 * we spread over a few cachelines, at least. If less than 4
  		 * bits, just forget about it, it's not going to work optimally
  		 * anyway.
  		 */
  		if (depth >= 4) {
  			while ((4U << shift) > depth)
  				shift--;
  		}
  	}
  	bits_per_word = 1U << shift;
  	if (bits_per_word > BITS_PER_LONG)
  		return -EINVAL;
  
  	sb->shift = shift;
  	sb->depth = depth;
  	sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
  
  	if (depth == 0) {
  		sb->map = NULL;
  		return 0;
  	}
590b5b7d8   Kees Cook   treewide: kzalloc...
73
  	sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node);
88459642c   Omar Sandoval   blk-mq: abstract ...
74
75
76
77
78
79
  	if (!sb->map)
  		return -ENOMEM;
  
  	for (i = 0; i < sb->map_nr; i++) {
  		sb->map[i].depth = min(depth, bits_per_word);
  		depth -= sb->map[i].depth;
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
80
  		spin_lock_init(&sb->map[i].swap_lock);
88459642c   Omar Sandoval   blk-mq: abstract ...
81
82
83
84
85
86
87
88
89
  	}
  	return 0;
  }
  EXPORT_SYMBOL_GPL(sbitmap_init_node);
  
  void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
  {
  	unsigned int bits_per_word = 1U << sb->shift;
  	unsigned int i;
b2dbff1bb   Jens Axboe   sbitmap: flush de...
90
91
  	for (i = 0; i < sb->map_nr; i++)
  		sbitmap_deferred_clear(sb, i);
88459642c   Omar Sandoval   blk-mq: abstract ...
92
93
94
95
96
97
98
99
100
  	sb->depth = depth;
  	sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
  
  	for (i = 0; i < sb->map_nr; i++) {
  		sb->map[i].depth = min(depth, bits_per_word);
  		depth -= sb->map[i].depth;
  	}
  }
  EXPORT_SYMBOL_GPL(sbitmap_resize);
c05e66733   Omar Sandoval   sbitmap: add sbit...
101
102
  static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
  			      unsigned int hint, bool wrap)
88459642c   Omar Sandoval   blk-mq: abstract ...
103
104
105
106
107
  {
  	unsigned int orig_hint = hint;
  	int nr;
  
  	while (1) {
c05e66733   Omar Sandoval   sbitmap: add sbit...
108
109
  		nr = find_next_zero_bit(word, depth, hint);
  		if (unlikely(nr >= depth)) {
88459642c   Omar Sandoval   blk-mq: abstract ...
110
111
112
113
114
115
116
117
118
119
120
  			/*
  			 * We started with an offset, and we didn't reset the
  			 * offset to 0 in a failure case, so start from 0 to
  			 * exhaust the map.
  			 */
  			if (orig_hint && hint && wrap) {
  				hint = orig_hint = 0;
  				continue;
  			}
  			return -1;
  		}
4ace53f1e   Omar Sandoval   sbitmap: use test...
121
  		if (!test_and_set_bit_lock(nr, word))
88459642c   Omar Sandoval   blk-mq: abstract ...
122
123
124
  			break;
  
  		hint = nr + 1;
c05e66733   Omar Sandoval   sbitmap: add sbit...
125
  		if (hint >= depth - 1)
88459642c   Omar Sandoval   blk-mq: abstract ...
126
127
128
129
130
  			hint = 0;
  	}
  
  	return nr;
  }
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
  				     unsigned int alloc_hint, bool round_robin)
  {
  	int nr;
  
  	do {
  		nr = __sbitmap_get_word(&sb->map[index].word,
  					sb->map[index].depth, alloc_hint,
  					!round_robin);
  		if (nr != -1)
  			break;
  		if (!sbitmap_deferred_clear(sb, index))
  			break;
  	} while (1);
  
  	return nr;
  }
88459642c   Omar Sandoval   blk-mq: abstract ...
148
149
150
151
152
153
  int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
  {
  	unsigned int i, index;
  	int nr = -1;
  
  	index = SB_NR_TO_INDEX(sb, alloc_hint);
27fae429a   Jens Axboe   sbitmap: don't lo...
154
155
156
157
158
159
160
161
162
  	/*
  	 * Unless we're doing round robin tag allocation, just use the
  	 * alloc_hint to find the right word index. No point in looping
  	 * twice in find_next_zero_bit() for that case.
  	 */
  	if (round_robin)
  		alloc_hint = SB_NR_TO_BIT(sb, alloc_hint);
  	else
  		alloc_hint = 0;
88459642c   Omar Sandoval   blk-mq: abstract ...
163
  	for (i = 0; i < sb->map_nr; i++) {
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
164
165
  		nr = sbitmap_find_bit_in_index(sb, index, alloc_hint,
  						round_robin);
88459642c   Omar Sandoval   blk-mq: abstract ...
166
167
168
169
170
171
  		if (nr != -1) {
  			nr += index << sb->shift;
  			break;
  		}
  
  		/* Jump to next index. */
27fae429a   Jens Axboe   sbitmap: don't lo...
172
173
  		alloc_hint = 0;
  		if (++index >= sb->map_nr)
88459642c   Omar Sandoval   blk-mq: abstract ...
174
  			index = 0;
88459642c   Omar Sandoval   blk-mq: abstract ...
175
176
177
178
179
  	}
  
  	return nr;
  }
  EXPORT_SYMBOL_GPL(sbitmap_get);
c05e66733   Omar Sandoval   sbitmap: add sbit...
180
181
182
183
184
185
186
187
188
  int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
  			unsigned long shallow_depth)
  {
  	unsigned int i, index;
  	int nr = -1;
  
  	index = SB_NR_TO_INDEX(sb, alloc_hint);
  
  	for (i = 0; i < sb->map_nr; i++) {
b2dbff1bb   Jens Axboe   sbitmap: flush de...
189
  again:
c05e66733   Omar Sandoval   sbitmap: add sbit...
190
191
192
193
194
195
196
  		nr = __sbitmap_get_word(&sb->map[index].word,
  					min(sb->map[index].depth, shallow_depth),
  					SB_NR_TO_BIT(sb, alloc_hint), true);
  		if (nr != -1) {
  			nr += index << sb->shift;
  			break;
  		}
b2dbff1bb   Jens Axboe   sbitmap: flush de...
197
198
  		if (sbitmap_deferred_clear(sb, index))
  			goto again;
c05e66733   Omar Sandoval   sbitmap: add sbit...
199
200
201
202
203
204
205
206
207
208
209
210
211
  		/* Jump to next index. */
  		index++;
  		alloc_hint = index << sb->shift;
  
  		if (index >= sb->map_nr) {
  			index = 0;
  			alloc_hint = 0;
  		}
  	}
  
  	return nr;
  }
  EXPORT_SYMBOL_GPL(sbitmap_get_shallow);
88459642c   Omar Sandoval   blk-mq: abstract ...
212
213
214
215
216
  bool sbitmap_any_bit_set(const struct sbitmap *sb)
  {
  	unsigned int i;
  
  	for (i = 0; i < sb->map_nr; i++) {
b2dbff1bb   Jens Axboe   sbitmap: flush de...
217
  		if (sb->map[i].word & ~sb->map[i].cleared)
88459642c   Omar Sandoval   blk-mq: abstract ...
218
219
220
221
222
223
224
225
226
227
228
229
  			return true;
  	}
  	return false;
  }
  EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);
  
  bool sbitmap_any_bit_clear(const struct sbitmap *sb)
  {
  	unsigned int i;
  
  	for (i = 0; i < sb->map_nr; i++) {
  		const struct sbitmap_word *word = &sb->map[i];
b2dbff1bb   Jens Axboe   sbitmap: flush de...
230
  		unsigned long mask = word->word & ~word->cleared;
88459642c   Omar Sandoval   blk-mq: abstract ...
231
  		unsigned long ret;
b2dbff1bb   Jens Axboe   sbitmap: flush de...
232
  		ret = find_first_zero_bit(&mask, word->depth);
88459642c   Omar Sandoval   blk-mq: abstract ...
233
234
235
236
237
238
  		if (ret < word->depth)
  			return true;
  	}
  	return false;
  }
  EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
239
  static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
88459642c   Omar Sandoval   blk-mq: abstract ...
240
  {
60658e0dc   Colin Ian King   sbitmap: initiali...
241
  	unsigned int i, weight = 0;
88459642c   Omar Sandoval   blk-mq: abstract ...
242
243
244
  
  	for (i = 0; i < sb->map_nr; i++) {
  		const struct sbitmap_word *word = &sb->map[i];
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
245
246
247
248
  		if (set)
  			weight += bitmap_weight(&word->word, word->depth);
  		else
  			weight += bitmap_weight(&word->cleared, word->depth);
88459642c   Omar Sandoval   blk-mq: abstract ...
249
250
251
  	}
  	return weight;
  }
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
252
253
254
255
256
257
258
259
260
261
  
  static unsigned int sbitmap_weight(const struct sbitmap *sb)
  {
  	return __sbitmap_weight(sb, true);
  }
  
  static unsigned int sbitmap_cleared(const struct sbitmap *sb)
  {
  	return __sbitmap_weight(sb, false);
  }
88459642c   Omar Sandoval   blk-mq: abstract ...
262

24af1ccfe   Omar Sandoval   sbitmap: add help...
263
264
265
266
  void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
  {
  	seq_printf(m, "depth=%u
  ", sb->depth);
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
267
268
269
270
  	seq_printf(m, "busy=%u
  ", sbitmap_weight(sb) - sbitmap_cleared(sb));
  	seq_printf(m, "cleared=%u
  ", sbitmap_cleared(sb));
24af1ccfe   Omar Sandoval   sbitmap: add help...
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
  	seq_printf(m, "bits_per_word=%u
  ", 1U << sb->shift);
  	seq_printf(m, "map_nr=%u
  ", sb->map_nr);
  }
  EXPORT_SYMBOL_GPL(sbitmap_show);
  
  static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte)
  {
  	if ((offset & 0xf) == 0) {
  		if (offset != 0)
  			seq_putc(m, '
  ');
  		seq_printf(m, "%08x:", offset);
  	}
  	if ((offset & 0x1) == 0)
  		seq_putc(m, ' ');
  	seq_printf(m, "%02x", byte);
  }
  
  void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
  {
  	u8 byte = 0;
  	unsigned int byte_bits = 0;
  	unsigned int offset = 0;
  	int i;
  
  	for (i = 0; i < sb->map_nr; i++) {
  		unsigned long word = READ_ONCE(sb->map[i].word);
  		unsigned int word_bits = READ_ONCE(sb->map[i].depth);
  
  		while (word_bits > 0) {
  			unsigned int bits = min(8 - byte_bits, word_bits);
  
  			byte |= (word & (BIT(bits) - 1)) << byte_bits;
  			byte_bits += bits;
  			if (byte_bits == 8) {
  				emit_byte(m, offset, byte);
  				byte = 0;
  				byte_bits = 0;
  				offset++;
  			}
  			word >>= bits;
  			word_bits -= bits;
  		}
  	}
  	if (byte_bits) {
  		emit_byte(m, offset, byte);
  		offset++;
  	}
  	if (offset)
  		seq_putc(m, '
  ');
  }
  EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
a32755396   Omar Sandoval   sbitmap: fix miss...
326
327
  static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
  					unsigned int depth)
88459642c   Omar Sandoval   blk-mq: abstract ...
328
329
  {
  	unsigned int wake_batch;
a32755396   Omar Sandoval   sbitmap: fix miss...
330
  	unsigned int shallow_depth;
88459642c   Omar Sandoval   blk-mq: abstract ...
331
332
333
  
  	/*
  	 * For each batch, we wake up one queue. We need to make sure that our
a32755396   Omar Sandoval   sbitmap: fix miss...
334
335
336
337
338
339
340
341
342
343
344
345
346
  	 * batch size is small enough that the full depth of the bitmap,
  	 * potentially limited by a shallow depth, is enough to wake up all of
  	 * the queues.
  	 *
  	 * Each full word of the bitmap has bits_per_word bits, and there might
  	 * be a partial word. There are depth / bits_per_word full words and
  	 * depth % bits_per_word bits left over. In bitwise arithmetic:
  	 *
  	 * bits_per_word = 1 << shift
  	 * depth / bits_per_word = depth >> shift
  	 * depth % bits_per_word = depth & ((1 << shift) - 1)
  	 *
  	 * Each word can be limited to sbq->min_shallow_depth bits.
88459642c   Omar Sandoval   blk-mq: abstract ...
347
  	 */
a32755396   Omar Sandoval   sbitmap: fix miss...
348
349
350
351
352
  	shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth);
  	depth = ((depth >> sbq->sb.shift) * shallow_depth +
  		 min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth));
  	wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1,
  			     SBQ_WAKE_BATCH);
88459642c   Omar Sandoval   blk-mq: abstract ...
353
354
355
356
357
  
  	return wake_batch;
  }
  
  int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
f4a644db8   Omar Sandoval   sbitmap: push all...
358
  			    int shift, bool round_robin, gfp_t flags, int node)
88459642c   Omar Sandoval   blk-mq: abstract ...
359
360
361
362
363
364
365
  {
  	int ret;
  	int i;
  
  	ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node);
  	if (ret)
  		return ret;
40aabb674   Omar Sandoval   sbitmap: push per...
366
367
368
369
370
  	sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags);
  	if (!sbq->alloc_hint) {
  		sbitmap_free(&sbq->sb);
  		return -ENOMEM;
  	}
98d95416d   Omar Sandoval   sbitmap: randomiz...
371
372
373
374
  	if (depth && !round_robin) {
  		for_each_possible_cpu(i)
  			*per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
  	}
a32755396   Omar Sandoval   sbitmap: fix miss...
375
376
  	sbq->min_shallow_depth = UINT_MAX;
  	sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
88459642c   Omar Sandoval   blk-mq: abstract ...
377
  	atomic_set(&sbq->wake_index, 0);
5d2ee7122   Jens Axboe   sbitmap: optimize...
378
  	atomic_set(&sbq->ws_active, 0);
88459642c   Omar Sandoval   blk-mq: abstract ...
379

48e28166a   Omar Sandoval   sbitmap: allocate...
380
  	sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
88459642c   Omar Sandoval   blk-mq: abstract ...
381
  	if (!sbq->ws) {
40aabb674   Omar Sandoval   sbitmap: push per...
382
  		free_percpu(sbq->alloc_hint);
88459642c   Omar Sandoval   blk-mq: abstract ...
383
384
385
386
387
388
389
390
  		sbitmap_free(&sbq->sb);
  		return -ENOMEM;
  	}
  
  	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
  		init_waitqueue_head(&sbq->ws[i].wait);
  		atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch);
  	}
f4a644db8   Omar Sandoval   sbitmap: push all...
391
392
  
  	sbq->round_robin = round_robin;
88459642c   Omar Sandoval   blk-mq: abstract ...
393
394
395
  	return 0;
  }
  EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
a32755396   Omar Sandoval   sbitmap: fix miss...
396
397
  static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
  					    unsigned int depth)
88459642c   Omar Sandoval   blk-mq: abstract ...
398
  {
a32755396   Omar Sandoval   sbitmap: fix miss...
399
  	unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
400
401
402
403
404
  	int i;
  
  	if (sbq->wake_batch != wake_batch) {
  		WRITE_ONCE(sbq->wake_batch, wake_batch);
  		/*
e6fc46498   Ming Lei   blk-mq: avoid sta...
405
406
407
  		 * Pairs with the memory barrier in sbitmap_queue_wake_up()
  		 * to ensure that the batch size is updated before the wait
  		 * counts.
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
408
  		 */
a0934fd2b   Andrea Parri   sbitmap: fix impr...
409
  		smp_mb();
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
410
411
412
  		for (i = 0; i < SBQ_WAIT_QUEUES; i++)
  			atomic_set(&sbq->ws[i].wait_cnt, 1);
  	}
a32755396   Omar Sandoval   sbitmap: fix miss...
413
414
415
416
417
  }
  
  void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
  {
  	sbitmap_queue_update_wake_batch(sbq, depth);
88459642c   Omar Sandoval   blk-mq: abstract ...
418
419
420
  	sbitmap_resize(&sbq->sb, depth);
  }
  EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
f4a644db8   Omar Sandoval   sbitmap: push all...
421
  int __sbitmap_queue_get(struct sbitmap_queue *sbq)
40aabb674   Omar Sandoval   sbitmap: push per...
422
  {
05fd095d5   Omar Sandoval   sbitmap: re-initi...
423
  	unsigned int hint, depth;
40aabb674   Omar Sandoval   sbitmap: push per...
424
425
426
  	int nr;
  
  	hint = this_cpu_read(*sbq->alloc_hint);
05fd095d5   Omar Sandoval   sbitmap: re-initi...
427
428
429
430
431
  	depth = READ_ONCE(sbq->sb.depth);
  	if (unlikely(hint >= depth)) {
  		hint = depth ? prandom_u32() % depth : 0;
  		this_cpu_write(*sbq->alloc_hint, hint);
  	}
f4a644db8   Omar Sandoval   sbitmap: push all...
432
  	nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin);
40aabb674   Omar Sandoval   sbitmap: push per...
433
434
435
436
  
  	if (nr == -1) {
  		/* If the map is full, a hint won't do us much good. */
  		this_cpu_write(*sbq->alloc_hint, 0);
f4a644db8   Omar Sandoval   sbitmap: push all...
437
  	} else if (nr == hint || unlikely(sbq->round_robin)) {
40aabb674   Omar Sandoval   sbitmap: push per...
438
439
  		/* Only update the hint if we used it. */
  		hint = nr + 1;
05fd095d5   Omar Sandoval   sbitmap: re-initi...
440
  		if (hint >= depth - 1)
40aabb674   Omar Sandoval   sbitmap: push per...
441
442
443
444
445
446
447
  			hint = 0;
  		this_cpu_write(*sbq->alloc_hint, hint);
  	}
  
  	return nr;
  }
  EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
c05e66733   Omar Sandoval   sbitmap: add sbit...
448
449
450
451
452
  int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
  				unsigned int shallow_depth)
  {
  	unsigned int hint, depth;
  	int nr;
61445b56d   Omar Sandoval   sbitmap: warn if ...
453
  	WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth);
c05e66733   Omar Sandoval   sbitmap: add sbit...
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
  	hint = this_cpu_read(*sbq->alloc_hint);
  	depth = READ_ONCE(sbq->sb.depth);
  	if (unlikely(hint >= depth)) {
  		hint = depth ? prandom_u32() % depth : 0;
  		this_cpu_write(*sbq->alloc_hint, hint);
  	}
  	nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth);
  
  	if (nr == -1) {
  		/* If the map is full, a hint won't do us much good. */
  		this_cpu_write(*sbq->alloc_hint, 0);
  	} else if (nr == hint || unlikely(sbq->round_robin)) {
  		/* Only update the hint if we used it. */
  		hint = nr + 1;
  		if (hint >= depth - 1)
  			hint = 0;
  		this_cpu_write(*sbq->alloc_hint, hint);
  	}
  
  	return nr;
  }
  EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
a32755396   Omar Sandoval   sbitmap: fix miss...
476
477
478
479
480
481
482
  void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
  				     unsigned int min_shallow_depth)
  {
  	sbq->min_shallow_depth = min_shallow_depth;
  	sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth);
  }
  EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
88459642c   Omar Sandoval   blk-mq: abstract ...
483
484
485
  static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
  {
  	int i, wake_index;
5d2ee7122   Jens Axboe   sbitmap: optimize...
486
487
  	if (!atomic_read(&sbq->ws_active))
  		return NULL;
88459642c   Omar Sandoval   blk-mq: abstract ...
488
489
490
491
492
  	wake_index = atomic_read(&sbq->wake_index);
  	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
  		struct sbq_wait_state *ws = &sbq->ws[wake_index];
  
  		if (waitqueue_active(&ws->wait)) {
417232880   Pavel Begunkov   sbitmap: Replace ...
493
494
  			if (wake_index != atomic_read(&sbq->wake_index))
  				atomic_set(&sbq->wake_index, wake_index);
88459642c   Omar Sandoval   blk-mq: abstract ...
495
496
497
498
499
500
501
502
  			return ws;
  		}
  
  		wake_index = sbq_index_inc(wake_index);
  	}
  
  	return NULL;
  }
c854ab577   Jens Axboe   sbitmap: fix race...
503
  static bool __sbq_wake_up(struct sbitmap_queue *sbq)
88459642c   Omar Sandoval   blk-mq: abstract ...
504
505
  {
  	struct sbq_wait_state *ws;
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
506
  	unsigned int wake_batch;
88459642c   Omar Sandoval   blk-mq: abstract ...
507
  	int wait_cnt;
88459642c   Omar Sandoval   blk-mq: abstract ...
508
509
  	ws = sbq_wake_ptr(sbq);
  	if (!ws)
c854ab577   Jens Axboe   sbitmap: fix race...
510
  		return false;
88459642c   Omar Sandoval   blk-mq: abstract ...
511
512
  
  	wait_cnt = atomic_dec_return(&ws->wait_cnt);
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
513
  	if (wait_cnt <= 0) {
c854ab577   Jens Axboe   sbitmap: fix race...
514
  		int ret;
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
515
  		wake_batch = READ_ONCE(sbq->wake_batch);
c854ab577   Jens Axboe   sbitmap: fix race...
516

6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
517
518
519
520
521
522
  		/*
  		 * Pairs with the memory barrier in sbitmap_queue_resize() to
  		 * ensure that we see the batch size update before the wait
  		 * count is reset.
  		 */
  		smp_mb__before_atomic();
c854ab577   Jens Axboe   sbitmap: fix race...
523

6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
524
  		/*
c854ab577   Jens Axboe   sbitmap: fix race...
525
526
527
  		 * For concurrent callers of this, the one that failed the
  		 * atomic_cmpxhcg() race should call this function again
  		 * to wakeup a new batch on a different 'ws'.
6c0ca7ae2   Omar Sandoval   sbitmap: fix wake...
528
  		 */
c854ab577   Jens Axboe   sbitmap: fix race...
529
530
531
532
533
534
535
536
  		ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
  		if (ret == wait_cnt) {
  			sbq_index_atomic_inc(&sbq->wake_index);
  			wake_up_nr(&ws->wait, wake_batch);
  			return false;
  		}
  
  		return true;
88459642c   Omar Sandoval   blk-mq: abstract ...
537
  	}
c854ab577   Jens Axboe   sbitmap: fix race...
538
539
540
  
  	return false;
  }
e6fc46498   Ming Lei   blk-mq: avoid sta...
541
  void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
c854ab577   Jens Axboe   sbitmap: fix race...
542
543
544
  {
  	while (__sbq_wake_up(sbq))
  		;
88459642c   Omar Sandoval   blk-mq: abstract ...
545
  }
e6fc46498   Ming Lei   blk-mq: avoid sta...
546
  EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
88459642c   Omar Sandoval   blk-mq: abstract ...
547

40aabb674   Omar Sandoval   sbitmap: push per...
548
  void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
f4a644db8   Omar Sandoval   sbitmap: push all...
549
  			 unsigned int cpu)
88459642c   Omar Sandoval   blk-mq: abstract ...
550
  {
e6d1fa584   Ming Lei   sbitmap: order RE...
551
552
553
554
555
556
557
558
559
560
561
  	/*
  	 * Once the clear bit is set, the bit may be allocated out.
  	 *
  	 * Orders READ/WRITE on the asssociated instance(such as request
  	 * of blk_mq) by this bit for avoiding race with re-allocation,
  	 * and its pair is the memory barrier implied in __sbitmap_get_word.
  	 *
  	 * One invariant is that the clear bit has to be zero when the bit
  	 * is in use.
  	 */
  	smp_mb__before_atomic();
ea86ea2cd   Jens Axboe   sbitmap: ammortiz...
562
  	sbitmap_deferred_clear_bit(&sbq->sb, nr);
e6fc46498   Ming Lei   blk-mq: avoid sta...
563
564
565
566
567
568
569
570
  	/*
  	 * Pairs with the memory barrier in set_current_state() to ensure the
  	 * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
  	 * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
  	 * waiter. See the comment on waitqueue_active().
  	 */
  	smp_mb__after_atomic();
  	sbitmap_queue_wake_up(sbq);
5c64a8df0   Omar Sandoval   sbitmap: don't up...
571
  	if (likely(!sbq->round_robin && nr < sbq->sb.depth))
40aabb674   Omar Sandoval   sbitmap: push per...
572
  		*per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
88459642c   Omar Sandoval   blk-mq: abstract ...
573
574
575
576
577
578
579
580
  }
  EXPORT_SYMBOL_GPL(sbitmap_queue_clear);
  
  void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
  {
  	int i, wake_index;
  
  	/*
f66227de5   Omar Sandoval   sbitmap: use smp_...
581
  	 * Pairs with the memory barrier in set_current_state() like in
e6fc46498   Ming Lei   blk-mq: avoid sta...
582
  	 * sbitmap_queue_wake_up().
88459642c   Omar Sandoval   blk-mq: abstract ...
583
584
585
586
587
588
589
590
591
592
593
594
595
  	 */
  	smp_mb();
  	wake_index = atomic_read(&sbq->wake_index);
  	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
  		struct sbq_wait_state *ws = &sbq->ws[wake_index];
  
  		if (waitqueue_active(&ws->wait))
  			wake_up(&ws->wait);
  
  		wake_index = sbq_index_inc(wake_index);
  	}
  }
  EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
24af1ccfe   Omar Sandoval   sbitmap: add help...
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
  
  void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
  {
  	bool first;
  	int i;
  
  	sbitmap_show(&sbq->sb, m);
  
  	seq_puts(m, "alloc_hint={");
  	first = true;
  	for_each_possible_cpu(i) {
  		if (!first)
  			seq_puts(m, ", ");
  		first = false;
  		seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i));
  	}
  	seq_puts(m, "}
  ");
  
  	seq_printf(m, "wake_batch=%u
  ", sbq->wake_batch);
  	seq_printf(m, "wake_index=%d
  ", atomic_read(&sbq->wake_index));
5d2ee7122   Jens Axboe   sbitmap: optimize...
619
620
  	seq_printf(m, "ws_active=%d
  ", atomic_read(&sbq->ws_active));
24af1ccfe   Omar Sandoval   sbitmap: add help...
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
  
  	seq_puts(m, "ws={
  ");
  	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
  		struct sbq_wait_state *ws = &sbq->ws[i];
  
  		seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},
  ",
  			   atomic_read(&ws->wait_cnt),
  			   waitqueue_active(&ws->wait) ? "active" : "inactive");
  	}
  	seq_puts(m, "}
  ");
  
  	seq_printf(m, "round_robin=%d
  ", sbq->round_robin);
a32755396   Omar Sandoval   sbitmap: fix miss...
637
638
  	seq_printf(m, "min_shallow_depth=%u
  ", sbq->min_shallow_depth);
24af1ccfe   Omar Sandoval   sbitmap: add help...
639
640
  }
  EXPORT_SYMBOL_GPL(sbitmap_queue_show);
5d2ee7122   Jens Axboe   sbitmap: optimize...
641

9f6b7ef6c   Jens Axboe   sbitmap: add help...
642
643
644
645
646
647
648
  void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
  			    struct sbq_wait_state *ws,
  			    struct sbq_wait *sbq_wait)
  {
  	if (!sbq_wait->sbq) {
  		sbq_wait->sbq = sbq;
  		atomic_inc(&sbq->ws_active);
dba0d9caa   David Jeffery   sbitmap: only que...
649
  		add_wait_queue(&ws->wait, &sbq_wait->wait);
9f6b7ef6c   Jens Axboe   sbitmap: add help...
650
  	}
9f6b7ef6c   Jens Axboe   sbitmap: add help...
651
652
653
654
655
656
657
658
659
660
661
662
  }
  EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue);
  
  void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait)
  {
  	list_del_init(&sbq_wait->wait.entry);
  	if (sbq_wait->sbq) {
  		atomic_dec(&sbq_wait->sbq->ws_active);
  		sbq_wait->sbq = NULL;
  	}
  }
  EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue);
5d2ee7122   Jens Axboe   sbitmap: optimize...
663
664
665
666
  void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
  			     struct sbq_wait_state *ws,
  			     struct sbq_wait *sbq_wait, int state)
  {
9f6b7ef6c   Jens Axboe   sbitmap: add help...
667
  	if (!sbq_wait->sbq) {
5d2ee7122   Jens Axboe   sbitmap: optimize...
668
  		atomic_inc(&sbq->ws_active);
9f6b7ef6c   Jens Axboe   sbitmap: add help...
669
  		sbq_wait->sbq = sbq;
5d2ee7122   Jens Axboe   sbitmap: optimize...
670
671
672
673
674
675
676
677
678
  	}
  	prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state);
  }
  EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait);
  
  void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
  			 struct sbq_wait *sbq_wait)
  {
  	finish_wait(&ws->wait, &sbq_wait->wait);
9f6b7ef6c   Jens Axboe   sbitmap: add help...
679
  	if (sbq_wait->sbq) {
5d2ee7122   Jens Axboe   sbitmap: optimize...
680
  		atomic_dec(&sbq->ws_active);
9f6b7ef6c   Jens Axboe   sbitmap: add help...
681
  		sbq_wait->sbq = NULL;
5d2ee7122   Jens Axboe   sbitmap: optimize...
682
683
684
  	}
  }
  EXPORT_SYMBOL_GPL(sbitmap_finish_wait);