Blame view

drivers/md/bcache/btree.c 56.7 KB
cafe56359   Kent Overstreet   bcache: A block l...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
  /*
   * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
   *
   * Uses a block device as cache for other block devices; optimized for SSDs.
   * All allocation is done in buckets, which should match the erase block size
   * of the device.
   *
   * Buckets containing cached data are kept on a heap sorted by priority;
   * bucket priority is increased on cache hit, and periodically all the buckets
   * on the heap have their priority scaled down. This currently is just used as
   * an LRU but in the future should allow for more intelligent heuristics.
   *
   * Buckets have an 8 bit counter; freeing is accomplished by incrementing the
   * counter. Garbage collection is used to remove stale pointers.
   *
   * Indexing is done via a btree; nodes are not necessarily fully sorted, rather
   * as keys are inserted we only sort the pages that have not yet been written.
   * When garbage collection is run, we resort the entire node.
   *
   * All configuration is done via sysfs; see Documentation/bcache.txt.
   */
  
  #include "bcache.h"
  #include "btree.h"
  #include "debug.h"
65d45231b   Kent Overstreet   bcache: Abstract ...
26
  #include "extents.h"
cafe56359   Kent Overstreet   bcache: A block l...
27
28
29
30
  
  #include <linux/slab.h>
  #include <linux/bitops.h>
  #include <linux/hash.h>
72a44517f   Kent Overstreet   bcache: Convert g...
31
  #include <linux/kthread.h>
cd953ed03   Geert Uytterhoeven   bcache: Add missi...
32
  #include <linux/prefetch.h>
cafe56359   Kent Overstreet   bcache: A block l...
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
  #include <linux/random.h>
  #include <linux/rcupdate.h>
  #include <trace/events/bcache.h>
  
  /*
   * Todo:
   * register_bcache: Return errors out to userspace correctly
   *
   * Writeback: don't undirty key until after a cache flush
   *
   * Create an iterator for key pointers
   *
   * On btree write error, mark bucket such that it won't be freed from the cache
   *
   * Journalling:
   *   Check for bad keys in replay
   *   Propagate barriers
   *   Refcount journal entries in journal_replay
   *
   * Garbage collection:
   *   Finish incremental gc
   *   Gc should free old UUIDs, data for invalid UUIDs
   *
   * Provide a way to list backing device UUIDs we have data cached for, and
   * probably how long it's been since we've seen them, and a way to invalidate
   * dirty data for devices that will never be attached again
   *
   * Keep 1 min/5 min/15 min statistics of how busy a block device has been, so
   * that based on that and how much dirty data we have we can keep writeback
   * from being starved
   *
   * Add a tracepoint or somesuch to watch for writeback starvation
   *
   * When btree depth > 1 and splitting an interior node, we have to make sure
   * alloc_bucket() cannot fail. This should be true but is not completely
   * obvious.
   *
cafe56359   Kent Overstreet   bcache: A block l...
70
71
72
73
74
   * Plugging?
   *
   * If data write is less than hard sector size of ssd, round up offset in open
   * bucket to the next whole sector
   *
cafe56359   Kent Overstreet   bcache: A block l...
75
76
77
78
79
80
81
82
83
84
85
   * Superblock needs to be fleshed out for multiple cache devices
   *
   * Add a sysfs tunable for the number of writeback IOs in flight
   *
   * Add a sysfs tunable for the number of open data buckets
   *
   * IO tracking: Can we track when one process is doing io on behalf of another?
   * IO tracking: Don't use just an average, weigh more recent stuff higher
   *
   * Test module load/unload
   */
cafe56359   Kent Overstreet   bcache: A block l...
86
87
88
89
90
91
92
  #define MAX_NEED_GC		64
  #define MAX_SAVE_PRIO		72
  
  #define PTR_DIRTY_BIT		(((uint64_t) 1 << 36))
  
  #define PTR_HASH(c, k)							\
  	(((k)->ptr[0] >> c->bucket_bits) | PTR_GEN(k, 0))
df8e89701   Kent Overstreet   bcache: Move some...
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  #define insert_lock(s, b)	((b)->level <= (s)->lock)
  
  /*
   * These macros are for recursing down the btree - they handle the details of
   * locking and looking up nodes in the cache for you. They're best treated as
   * mere syntax when reading code that uses them.
   *
   * op->lock determines whether we take a read or a write lock at a given depth.
   * If you've got a read lock and find that you need a write lock (i.e. you're
   * going to have to split), set op->lock and return -EINTR; btree_root() will
   * call you again and you'll have the correct lock.
   */
  
  /**
   * btree - recurse down the btree on a specified key
   * @fn:		function to call, which will be passed the child node
   * @key:	key to recurse on
   * @b:		parent btree node
   * @op:		pointer to struct btree_op
   */
  #define btree(fn, key, b, op, ...)					\
  ({									\
  	int _r, l = (b)->level - 1;					\
  	bool _w = l <= (op)->lock;					\
2452cc890   Slava Pestov   bcache: try to se...
117
118
  	struct btree *_child = bch_btree_node_get((b)->c, op, key, l,	\
  						  _w, b);		\
df8e89701   Kent Overstreet   bcache: Move some...
119
  	if (!IS_ERR(_child)) {						\
df8e89701   Kent Overstreet   bcache: Move some...
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
  		_r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__);	\
  		rw_unlock(_w, _child);					\
  	} else								\
  		_r = PTR_ERR(_child);					\
  	_r;								\
  })
  
  /**
   * btree_root - call a function on the root of the btree
   * @fn:		function to call, which will be passed the child node
   * @c:		cache set
   * @op:		pointer to struct btree_op
   */
  #define btree_root(fn, c, op, ...)					\
  ({									\
  	int _r = -EINTR;						\
  	do {								\
  		struct btree *_b = (c)->root;				\
  		bool _w = insert_lock(op, _b);				\
  		rw_lock(_w, _b, _b->level);				\
  		if (_b == (c)->root &&					\
  		    _w == insert_lock(op, _b)) {			\
df8e89701   Kent Overstreet   bcache: Move some...
142
143
144
  			_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__);	\
  		}							\
  		rw_unlock(_w, _b);					\
0a63b66db   Kent Overstreet   bcache: Rework bt...
145
  		bch_cannibalize_unlock(c);				\
78365411b   Kent Overstreet   bcache: Rework al...
146
147
  		if (_r == -EINTR)					\
  			schedule();					\
df8e89701   Kent Overstreet   bcache: Move some...
148
149
  	} while (_r == -EINTR);						\
  									\
0a63b66db   Kent Overstreet   bcache: Rework bt...
150
  	finish_wait(&(c)->btree_cache_wait, &(op)->wait);		\
df8e89701   Kent Overstreet   bcache: Move some...
151
152
  	_r;								\
  })
a85e968e6   Kent Overstreet   bcache: Add struc...
153
154
155
156
  static inline struct bset *write_block(struct btree *b)
  {
  	return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c);
  }
2a285686c   Kent Overstreet   bcache: btree loc...
157
158
159
160
161
162
163
164
165
166
167
168
169
  static void bch_btree_init_next(struct btree *b)
  {
  	/* If not a leaf node, always sort */
  	if (b->level && b->keys.nsets)
  		bch_btree_sort(&b->keys, &b->c->sort);
  	else
  		bch_btree_sort_lazy(&b->keys, &b->c->sort);
  
  	if (b->written < btree_blocks(b))
  		bch_bset_init_next(&b->keys, write_block(b),
  				   bset_magic(&b->c->sb));
  
  }
cafe56359   Kent Overstreet   bcache: A block l...
170
  /* Btree key manipulation */
3a3b6a4e0   Kent Overstreet   bcache: Don't bot...
171
  void bkey_put(struct cache_set *c, struct bkey *k)
e7c590eb6   Kent Overstreet   bcache: Convert b...
172
173
174
175
176
177
178
  {
  	unsigned i;
  
  	for (i = 0; i < KEY_PTRS(k); i++)
  		if (ptr_available(c, k, i))
  			atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin);
  }
cafe56359   Kent Overstreet   bcache: A block l...
179
180
181
182
183
  /* Btree IO */
  
  static uint64_t btree_csum_set(struct btree *b, struct bset *i)
  {
  	uint64_t crc = b->key.ptr[0];
fafff81ce   Kent Overstreet   bcache: Bkey inde...
184
  	void *data = (void *) i + 8, *end = bset_bkey_last(i);
cafe56359   Kent Overstreet   bcache: A block l...
185

169ef1cf6   Kent Overstreet   bcache: Don't exp...
186
  	crc = bch_crc64_update(crc, data, end - data);
c19ed23a0   Kent Overstreet   bcache: Sparse fixes
187
  	return crc ^ 0xffffffffffffffffULL;
cafe56359   Kent Overstreet   bcache: A block l...
188
  }
78b77bf8b   Kent Overstreet   bcache: Btree ver...
189
  void bch_btree_node_read_done(struct btree *b)
cafe56359   Kent Overstreet   bcache: A block l...
190
  {
cafe56359   Kent Overstreet   bcache: A block l...
191
  	const char *err = "bad btree header";
ee811287c   Kent Overstreet   bcache: Rename/sh...
192
  	struct bset *i = btree_bset_first(b);
579435114   Kent Overstreet   bcache: Refactor ...
193
  	struct btree_iter *iter;
cafe56359   Kent Overstreet   bcache: A block l...
194

bcf090e00   Kent Overstreet   bcache: Make sure...
195
  	iter = mempool_alloc(b->c->fill_iter, GFP_NOIO);
579435114   Kent Overstreet   bcache: Refactor ...
196
  	iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
cafe56359   Kent Overstreet   bcache: A block l...
197
  	iter->used = 0;
280481d06   Kent Overstreet   bcache: Debug cod...
198
  #ifdef CONFIG_BCACHE_DEBUG
c052dd9a2   Kent Overstreet   bcache: Convert b...
199
  	iter->b = &b->keys;
280481d06   Kent Overstreet   bcache: Debug cod...
200
  #endif
579435114   Kent Overstreet   bcache: Refactor ...
201
  	if (!i->seq)
cafe56359   Kent Overstreet   bcache: A block l...
202
203
204
  		goto err;
  
  	for (;
a85e968e6   Kent Overstreet   bcache: Add struc...
205
  	     b->written < btree_blocks(b) && i->seq == b->keys.set[0].data->seq;
cafe56359   Kent Overstreet   bcache: A block l...
206
207
208
209
210
211
  	     i = write_block(b)) {
  		err = "unsupported bset version";
  		if (i->version > BCACHE_BSET_VERSION)
  			goto err;
  
  		err = "bad btree header";
ee811287c   Kent Overstreet   bcache: Rename/sh...
212
213
  		if (b->written + set_blocks(i, block_bytes(b->c)) >
  		    btree_blocks(b))
cafe56359   Kent Overstreet   bcache: A block l...
214
215
216
  			goto err;
  
  		err = "bad magic";
81ab4190a   Kent Overstreet   bcache: Pull on d...
217
  		if (i->magic != bset_magic(&b->c->sb))
cafe56359   Kent Overstreet   bcache: A block l...
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
  			goto err;
  
  		err = "bad checksum";
  		switch (i->version) {
  		case 0:
  			if (i->csum != csum_set(i))
  				goto err;
  			break;
  		case BCACHE_BSET_VERSION:
  			if (i->csum != btree_csum_set(b, i))
  				goto err;
  			break;
  		}
  
  		err = "empty set";
a85e968e6   Kent Overstreet   bcache: Add struc...
233
  		if (i != b->keys.set[0].data && !i->keys)
cafe56359   Kent Overstreet   bcache: A block l...
234
  			goto err;
fafff81ce   Kent Overstreet   bcache: Bkey inde...
235
  		bch_btree_iter_push(iter, i->start, bset_bkey_last(i));
cafe56359   Kent Overstreet   bcache: A block l...
236

ee811287c   Kent Overstreet   bcache: Rename/sh...
237
  		b->written += set_blocks(i, block_bytes(b->c));
cafe56359   Kent Overstreet   bcache: A block l...
238
239
240
241
  	}
  
  	err = "corrupted btree";
  	for (i = write_block(b);
a85e968e6   Kent Overstreet   bcache: Add struc...
242
  	     bset_sector_offset(&b->keys, i) < KEY_SIZE(&b->key);
cafe56359   Kent Overstreet   bcache: A block l...
243
  	     i = ((void *) i) + block_bytes(b->c))
a85e968e6   Kent Overstreet   bcache: Add struc...
244
  		if (i->seq == b->keys.set[0].data->seq)
cafe56359   Kent Overstreet   bcache: A block l...
245
  			goto err;
a85e968e6   Kent Overstreet   bcache: Add struc...
246
  	bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort);
cafe56359   Kent Overstreet   bcache: A block l...
247

a85e968e6   Kent Overstreet   bcache: Add struc...
248
  	i = b->keys.set[0].data;
cafe56359   Kent Overstreet   bcache: A block l...
249
  	err = "short btree key";
a85e968e6   Kent Overstreet   bcache: Add struc...
250
251
  	if (b->keys.set[0].size &&
  	    bkey_cmp(&b->key, &b->keys.set[0].end) < 0)
cafe56359   Kent Overstreet   bcache: A block l...
252
253
254
  		goto err;
  
  	if (b->written < btree_blocks(b))
a85e968e6   Kent Overstreet   bcache: Add struc...
255
256
  		bch_bset_init_next(&b->keys, write_block(b),
  				   bset_magic(&b->c->sb));
cafe56359   Kent Overstreet   bcache: A block l...
257
  out:
579435114   Kent Overstreet   bcache: Refactor ...
258
259
  	mempool_free(iter, b->c->fill_iter);
  	return;
cafe56359   Kent Overstreet   bcache: A block l...
260
261
  err:
  	set_btree_node_io_error(b);
88b9f8c42   Kent Overstreet   bcache: kill index()
262
  	bch_cache_set_error(b->c, "%s at bucket %zu, block %u, %u keys",
cafe56359   Kent Overstreet   bcache: A block l...
263
  			    err, PTR_BUCKET_NR(b->c, &b->key, 0),
88b9f8c42   Kent Overstreet   bcache: kill index()
264
  			    bset_block_offset(b, i), i->keys);
cafe56359   Kent Overstreet   bcache: A block l...
265
266
  	goto out;
  }
4246a0b63   Christoph Hellwig   block: add a bi_e...
267
  static void btree_node_read_endio(struct bio *bio)
cafe56359   Kent Overstreet   bcache: A block l...
268
  {
579435114   Kent Overstreet   bcache: Refactor ...
269
270
271
  	struct closure *cl = bio->bi_private;
  	closure_put(cl);
  }
cafe56359   Kent Overstreet   bcache: A block l...
272

78b77bf8b   Kent Overstreet   bcache: Btree ver...
273
  static void bch_btree_node_read(struct btree *b)
579435114   Kent Overstreet   bcache: Refactor ...
274
275
276
277
  {
  	uint64_t start_time = local_clock();
  	struct closure cl;
  	struct bio *bio;
cafe56359   Kent Overstreet   bcache: A block l...
278

c37511b86   Kent Overstreet   bcache: Fix/revam...
279
  	trace_bcache_btree_read(b);
579435114   Kent Overstreet   bcache: Refactor ...
280
  	closure_init_stack(&cl);
cafe56359   Kent Overstreet   bcache: A block l...
281

579435114   Kent Overstreet   bcache: Refactor ...
282
  	bio = bch_bbio_alloc(b->c);
4f024f379   Kent Overstreet   block: Abstract o...
283
  	bio->bi_iter.bi_size = KEY_SIZE(&b->key) << 9;
579435114   Kent Overstreet   bcache: Refactor ...
284
285
  	bio->bi_end_io	= btree_node_read_endio;
  	bio->bi_private	= &cl;
ad0d9e76a   Mike Christie   bcache: use bio o...
286
  	bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
cafe56359   Kent Overstreet   bcache: A block l...
287

a85e968e6   Kent Overstreet   bcache: Add struc...
288
  	bch_bio_map(bio, b->keys.set[0].data);
cafe56359   Kent Overstreet   bcache: A block l...
289

579435114   Kent Overstreet   bcache: Refactor ...
290
291
  	bch_submit_bbio(bio, b->c, &b->key, 0);
  	closure_sync(&cl);
cafe56359   Kent Overstreet   bcache: A block l...
292

4246a0b63   Christoph Hellwig   block: add a bi_e...
293
  	if (bio->bi_error)
579435114   Kent Overstreet   bcache: Refactor ...
294
295
296
297
298
299
300
301
  		set_btree_node_io_error(b);
  
  	bch_bbio_free(bio, b->c);
  
  	if (btree_node_io_error(b))
  		goto err;
  
  	bch_btree_node_read_done(b);
579435114   Kent Overstreet   bcache: Refactor ...
302
  	bch_time_stats_update(&b->c->btree_read_time, start_time);
579435114   Kent Overstreet   bcache: Refactor ...
303
304
305
  
  	return;
  err:
61cbd250f   Geert Uytterhoeven   bcache: Correct p...
306
  	bch_cache_set_error(b->c, "io error reading bucket %zu",
579435114   Kent Overstreet   bcache: Refactor ...
307
  			    PTR_BUCKET_NR(b->c, &b->key, 0));
cafe56359   Kent Overstreet   bcache: A block l...
308
309
310
311
312
313
  }
  
  static void btree_complete_write(struct btree *b, struct btree_write *w)
  {
  	if (w->prio_blocked &&
  	    !atomic_sub_return(w->prio_blocked, &b->c->prio_blocked))
119ba0f82   Kent Overstreet   bcache: Convert a...
314
  		wake_up_allocators(b->c);
cafe56359   Kent Overstreet   bcache: A block l...
315
316
317
318
319
  
  	if (w->journal) {
  		atomic_dec_bug(w->journal);
  		__closure_wake_up(&b->c->journal.wait);
  	}
cafe56359   Kent Overstreet   bcache: A block l...
320
321
  	w->prio_blocked	= 0;
  	w->journal	= NULL;
cafe56359   Kent Overstreet   bcache: A block l...
322
  }
cb7a583e6   Kent Overstreet   bcache: kill clos...
323
324
325
326
327
328
  static void btree_node_write_unlock(struct closure *cl)
  {
  	struct btree *b = container_of(cl, struct btree, io);
  
  	up(&b->io_mutex);
  }
579435114   Kent Overstreet   bcache: Refactor ...
329
  static void __btree_node_write_done(struct closure *cl)
cafe56359   Kent Overstreet   bcache: A block l...
330
  {
cb7a583e6   Kent Overstreet   bcache: kill clos...
331
  	struct btree *b = container_of(cl, struct btree, io);
cafe56359   Kent Overstreet   bcache: A block l...
332
333
334
335
336
337
338
  	struct btree_write *w = btree_prev_write(b);
  
  	bch_bbio_free(b->bio, b->c);
  	b->bio = NULL;
  	btree_complete_write(b, w);
  
  	if (btree_node_dirty(b))
56b30770b   Kent Overstreet   bcache: Kill btre...
339
  		schedule_delayed_work(&b->work, 30 * HZ);
cafe56359   Kent Overstreet   bcache: A block l...
340

cb7a583e6   Kent Overstreet   bcache: kill clos...
341
  	closure_return_with_destructor(cl, btree_node_write_unlock);
cafe56359   Kent Overstreet   bcache: A block l...
342
  }
579435114   Kent Overstreet   bcache: Refactor ...
343
  static void btree_node_write_done(struct closure *cl)
cafe56359   Kent Overstreet   bcache: A block l...
344
  {
cb7a583e6   Kent Overstreet   bcache: kill clos...
345
  	struct btree *b = container_of(cl, struct btree, io);
cafe56359   Kent Overstreet   bcache: A block l...
346

491221f88   Guoqing Jiang   block: export bio...
347
  	bio_free_pages(b->bio);
579435114   Kent Overstreet   bcache: Refactor ...
348
  	__btree_node_write_done(cl);
cafe56359   Kent Overstreet   bcache: A block l...
349
  }
4246a0b63   Christoph Hellwig   block: add a bi_e...
350
  static void btree_node_write_endio(struct bio *bio)
579435114   Kent Overstreet   bcache: Refactor ...
351
352
  {
  	struct closure *cl = bio->bi_private;
cb7a583e6   Kent Overstreet   bcache: kill clos...
353
  	struct btree *b = container_of(cl, struct btree, io);
579435114   Kent Overstreet   bcache: Refactor ...
354

4246a0b63   Christoph Hellwig   block: add a bi_e...
355
  	if (bio->bi_error)
579435114   Kent Overstreet   bcache: Refactor ...
356
  		set_btree_node_io_error(b);
4246a0b63   Christoph Hellwig   block: add a bi_e...
357
  	bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree");
579435114   Kent Overstreet   bcache: Refactor ...
358
359
360
361
  	closure_put(cl);
  }
  
  static void do_btree_node_write(struct btree *b)
cafe56359   Kent Overstreet   bcache: A block l...
362
  {
cb7a583e6   Kent Overstreet   bcache: kill clos...
363
  	struct closure *cl = &b->io;
ee811287c   Kent Overstreet   bcache: Rename/sh...
364
  	struct bset *i = btree_bset_last(b);
cafe56359   Kent Overstreet   bcache: A block l...
365
366
367
368
  	BKEY_PADDED(key) k;
  
  	i->version	= BCACHE_BSET_VERSION;
  	i->csum		= btree_csum_set(b, i);
579435114   Kent Overstreet   bcache: Refactor ...
369
370
371
372
  	BUG_ON(b->bio);
  	b->bio = bch_bbio_alloc(b->c);
  
  	b->bio->bi_end_io	= btree_node_write_endio;
faadf0c96   Kent Overstreet   bcache: Drop some...
373
  	b->bio->bi_private	= cl;
ee811287c   Kent Overstreet   bcache: Rename/sh...
374
  	b->bio->bi_iter.bi_size	= roundup(set_bytes(i), block_bytes(b->c));
ad0d9e76a   Mike Christie   bcache: use bio o...
375
  	bio_set_op_attrs(b->bio, REQ_OP_WRITE, REQ_META|WRITE_SYNC|REQ_FUA);
169ef1cf6   Kent Overstreet   bcache: Don't exp...
376
  	bch_bio_map(b->bio, i);
cafe56359   Kent Overstreet   bcache: A block l...
377

e49c7c374   Kent Overstreet   bcache: FUA fixes
378
379
380
381
382
383
384
385
386
387
388
389
390
391
  	/*
  	 * If we're appending to a leaf node, we don't technically need FUA -
  	 * this write just needs to be persisted before the next journal write,
  	 * which will be marked FLUSH|FUA.
  	 *
  	 * Similarly if we're writing a new btree root - the pointer is going to
  	 * be in the next journal entry.
  	 *
  	 * But if we're writing a new btree node (that isn't a root) or
  	 * appending to a non leaf btree node, we need either FUA or a flush
  	 * when we write the parent with the new pointer. FUA is cheaper than a
  	 * flush, and writes appending to leaf nodes aren't blocking anything so
  	 * just make all btree node writes FUA to keep things sane.
  	 */
cafe56359   Kent Overstreet   bcache: A block l...
392
  	bkey_copy(&k.key, &b->key);
ee811287c   Kent Overstreet   bcache: Rename/sh...
393
  	SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
a85e968e6   Kent Overstreet   bcache: Add struc...
394
  		       bset_sector_offset(&b->keys, i));
cafe56359   Kent Overstreet   bcache: A block l...
395

501d52a90   Kent Overstreet   bcache: Allocate ...
396
  	if (!bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) {
cafe56359   Kent Overstreet   bcache: A block l...
397
398
399
  		int j;
  		struct bio_vec *bv;
  		void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
7988613b0   Kent Overstreet   block: Convert bi...
400
  		bio_for_each_segment_all(bv, b->bio, j)
cafe56359   Kent Overstreet   bcache: A block l...
401
402
  			memcpy(page_address(bv->bv_page),
  			       base + j * PAGE_SIZE, PAGE_SIZE);
cafe56359   Kent Overstreet   bcache: A block l...
403
  		bch_submit_bbio(b->bio, b->c, &k.key, 0);
579435114   Kent Overstreet   bcache: Refactor ...
404
  		continue_at(cl, btree_node_write_done, NULL);
cafe56359   Kent Overstreet   bcache: A block l...
405
406
  	} else {
  		b->bio->bi_vcnt = 0;
169ef1cf6   Kent Overstreet   bcache: Don't exp...
407
  		bch_bio_map(b->bio, i);
cafe56359   Kent Overstreet   bcache: A block l...
408

cafe56359   Kent Overstreet   bcache: A block l...
409
410
411
  		bch_submit_bbio(b->bio, b->c, &k.key, 0);
  
  		closure_sync(cl);
cb7a583e6   Kent Overstreet   bcache: kill clos...
412
  		continue_at_nobarrier(cl, __btree_node_write_done, NULL);
cafe56359   Kent Overstreet   bcache: A block l...
413
414
  	}
  }
2a285686c   Kent Overstreet   bcache: btree loc...
415
  void __bch_btree_node_write(struct btree *b, struct closure *parent)
cafe56359   Kent Overstreet   bcache: A block l...
416
  {
ee811287c   Kent Overstreet   bcache: Rename/sh...
417
  	struct bset *i = btree_bset_last(b);
cafe56359   Kent Overstreet   bcache: A block l...
418

2a285686c   Kent Overstreet   bcache: btree loc...
419
  	lockdep_assert_held(&b->write_lock);
c37511b86   Kent Overstreet   bcache: Fix/revam...
420
  	trace_bcache_btree_write(b);
cafe56359   Kent Overstreet   bcache: A block l...
421
  	BUG_ON(current->bio_list);
579435114   Kent Overstreet   bcache: Refactor ...
422
423
  	BUG_ON(b->written >= btree_blocks(b));
  	BUG_ON(b->written && !i->keys);
ee811287c   Kent Overstreet   bcache: Rename/sh...
424
  	BUG_ON(btree_bset_first(b)->seq != i->seq);
dc9d98d62   Kent Overstreet   bcache: Convert d...
425
  	bch_check_keys(&b->keys, "writing");
cafe56359   Kent Overstreet   bcache: A block l...
426

cafe56359   Kent Overstreet   bcache: A block l...
427
  	cancel_delayed_work(&b->work);
579435114   Kent Overstreet   bcache: Refactor ...
428
  	/* If caller isn't waiting for write, parent refcount is cache set */
cb7a583e6   Kent Overstreet   bcache: kill clos...
429
430
  	down(&b->io_mutex);
  	closure_init(&b->io, parent ?: &b->c->cl);
579435114   Kent Overstreet   bcache: Refactor ...
431

cafe56359   Kent Overstreet   bcache: A block l...
432
433
  	clear_bit(BTREE_NODE_dirty,	 &b->flags);
  	change_bit(BTREE_NODE_write_idx, &b->flags);
579435114   Kent Overstreet   bcache: Refactor ...
434
  	do_btree_node_write(b);
cafe56359   Kent Overstreet   bcache: A block l...
435

ee811287c   Kent Overstreet   bcache: Rename/sh...
436
  	atomic_long_add(set_blocks(i, block_bytes(b->c)) * b->c->sb.block_size,
cafe56359   Kent Overstreet   bcache: A block l...
437
  			&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
a85e968e6   Kent Overstreet   bcache: Add struc...
438
  	b->written += set_blocks(i, block_bytes(b->c));
2a285686c   Kent Overstreet   bcache: btree loc...
439
  }
a85e968e6   Kent Overstreet   bcache: Add struc...
440

2a285686c   Kent Overstreet   bcache: btree loc...
441
442
443
444
445
446
447
  void bch_btree_node_write(struct btree *b, struct closure *parent)
  {
  	unsigned nsets = b->keys.nsets;
  
  	lockdep_assert_held(&b->lock);
  
  	__bch_btree_node_write(b, parent);
cafe56359   Kent Overstreet   bcache: A block l...
448

78b77bf8b   Kent Overstreet   bcache: Btree ver...
449
450
451
452
  	/*
  	 * do verify if there was more than one set initially (i.e. we did a
  	 * sort) and we sorted down to a single set:
  	 */
2a285686c   Kent Overstreet   bcache: btree loc...
453
  	if (nsets && !b->keys.nsets)
78b77bf8b   Kent Overstreet   bcache: Btree ver...
454
  		bch_btree_verify(b);
2a285686c   Kent Overstreet   bcache: btree loc...
455
  	bch_btree_init_next(b);
cafe56359   Kent Overstreet   bcache: A block l...
456
  }
f269af5a0   Kent Overstreet   bcache: Add btree...
457
458
459
460
461
  static void bch_btree_node_write_sync(struct btree *b)
  {
  	struct closure cl;
  
  	closure_init_stack(&cl);
2a285686c   Kent Overstreet   bcache: btree loc...
462
463
  
  	mutex_lock(&b->write_lock);
f269af5a0   Kent Overstreet   bcache: Add btree...
464
  	bch_btree_node_write(b, &cl);
2a285686c   Kent Overstreet   bcache: btree loc...
465
  	mutex_unlock(&b->write_lock);
f269af5a0   Kent Overstreet   bcache: Add btree...
466
467
  	closure_sync(&cl);
  }
579435114   Kent Overstreet   bcache: Refactor ...
468
  static void btree_node_write_work(struct work_struct *w)
cafe56359   Kent Overstreet   bcache: A block l...
469
470
  {
  	struct btree *b = container_of(to_delayed_work(w), struct btree, work);
2a285686c   Kent Overstreet   bcache: btree loc...
471
  	mutex_lock(&b->write_lock);
cafe56359   Kent Overstreet   bcache: A block l...
472
  	if (btree_node_dirty(b))
2a285686c   Kent Overstreet   bcache: btree loc...
473
474
  		__bch_btree_node_write(b, NULL);
  	mutex_unlock(&b->write_lock);
cafe56359   Kent Overstreet   bcache: A block l...
475
  }
c18536a72   Kent Overstreet   bcache: Prune str...
476
  static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
cafe56359   Kent Overstreet   bcache: A block l...
477
  {
ee811287c   Kent Overstreet   bcache: Rename/sh...
478
  	struct bset *i = btree_bset_last(b);
cafe56359   Kent Overstreet   bcache: A block l...
479
  	struct btree_write *w = btree_current_write(b);
2a285686c   Kent Overstreet   bcache: btree loc...
480
  	lockdep_assert_held(&b->write_lock);
579435114   Kent Overstreet   bcache: Refactor ...
481
482
  	BUG_ON(!b->written);
  	BUG_ON(!i->keys);
cafe56359   Kent Overstreet   bcache: A block l...
483

579435114   Kent Overstreet   bcache: Refactor ...
484
  	if (!btree_node_dirty(b))
56b30770b   Kent Overstreet   bcache: Kill btre...
485
  		schedule_delayed_work(&b->work, 30 * HZ);
cafe56359   Kent Overstreet   bcache: A block l...
486

579435114   Kent Overstreet   bcache: Refactor ...
487
  	set_btree_node_dirty(b);
cafe56359   Kent Overstreet   bcache: A block l...
488

c18536a72   Kent Overstreet   bcache: Prune str...
489
  	if (journal_ref) {
cafe56359   Kent Overstreet   bcache: A block l...
490
  		if (w->journal &&
c18536a72   Kent Overstreet   bcache: Prune str...
491
  		    journal_pin_cmp(b->c, w->journal, journal_ref)) {
cafe56359   Kent Overstreet   bcache: A block l...
492
493
494
495
496
  			atomic_dec_bug(w->journal);
  			w->journal = NULL;
  		}
  
  		if (!w->journal) {
c18536a72   Kent Overstreet   bcache: Prune str...
497
  			w->journal = journal_ref;
cafe56359   Kent Overstreet   bcache: A block l...
498
499
500
  			atomic_inc(w->journal);
  		}
  	}
cafe56359   Kent Overstreet   bcache: A block l...
501
  	/* Force write if set is too big */
579435114   Kent Overstreet   bcache: Refactor ...
502
503
504
  	if (set_bytes(i) > PAGE_SIZE - 48 &&
  	    !current->bio_list)
  		bch_btree_node_write(b, NULL);
cafe56359   Kent Overstreet   bcache: A block l...
505
506
507
508
509
510
  }
  
  /*
   * Btree in memory cache - allocation/freeing
   * mca -> memory cache
   */
cafe56359   Kent Overstreet   bcache: A block l...
511
512
513
  #define mca_reserve(c)	(((c->root && c->root->level)		\
  			  ? c->root->level : 1) * 8 + 16)
  #define mca_can_free(c)						\
0a63b66db   Kent Overstreet   bcache: Rework bt...
514
  	max_t(int, 0, c->btree_cache_used - mca_reserve(c))
cafe56359   Kent Overstreet   bcache: A block l...
515
516
517
  
  static void mca_data_free(struct btree *b)
  {
cb7a583e6   Kent Overstreet   bcache: kill clos...
518
  	BUG_ON(b->io_mutex.count != 1);
cafe56359   Kent Overstreet   bcache: A block l...
519

a85e968e6   Kent Overstreet   bcache: Add struc...
520
  	bch_btree_keys_free(&b->keys);
cafe56359   Kent Overstreet   bcache: A block l...
521

0a63b66db   Kent Overstreet   bcache: Rework bt...
522
  	b->c->btree_cache_used--;
ee811287c   Kent Overstreet   bcache: Rename/sh...
523
  	list_move(&b->list, &b->c->btree_cache_freed);
cafe56359   Kent Overstreet   bcache: A block l...
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
  }
  
  static void mca_bucket_free(struct btree *b)
  {
  	BUG_ON(btree_node_dirty(b));
  
  	b->key.ptr[0] = 0;
  	hlist_del_init_rcu(&b->hash);
  	list_move(&b->list, &b->c->btree_cache_freeable);
  }
  
  static unsigned btree_order(struct bkey *k)
  {
  	return ilog2(KEY_SIZE(k) / PAGE_SECTORS ?: 1);
  }
  
  static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
  {
a85e968e6   Kent Overstreet   bcache: Add struc...
542
  	if (!bch_btree_keys_alloc(&b->keys,
ee811287c   Kent Overstreet   bcache: Rename/sh...
543
544
545
546
  				  max_t(unsigned,
  					ilog2(b->c->btree_pages),
  					btree_order(k)),
  				  gfp)) {
0a63b66db   Kent Overstreet   bcache: Rework bt...
547
  		b->c->btree_cache_used++;
ee811287c   Kent Overstreet   bcache: Rename/sh...
548
549
550
551
  		list_move(&b->list, &b->c->btree_cache);
  	} else {
  		list_move(&b->list, &b->c->btree_cache_freed);
  	}
cafe56359   Kent Overstreet   bcache: A block l...
552
553
554
555
556
557
558
559
560
561
562
  }
  
  static struct btree *mca_bucket_alloc(struct cache_set *c,
  				      struct bkey *k, gfp_t gfp)
  {
  	struct btree *b = kzalloc(sizeof(struct btree), gfp);
  	if (!b)
  		return NULL;
  
  	init_rwsem(&b->lock);
  	lockdep_set_novalidate_class(&b->lock);
2a285686c   Kent Overstreet   bcache: btree loc...
563
564
  	mutex_init(&b->write_lock);
  	lockdep_set_novalidate_class(&b->write_lock);
cafe56359   Kent Overstreet   bcache: A block l...
565
  	INIT_LIST_HEAD(&b->list);
579435114   Kent Overstreet   bcache: Refactor ...
566
  	INIT_DELAYED_WORK(&b->work, btree_node_write_work);
cafe56359   Kent Overstreet   bcache: A block l...
567
  	b->c = c;
cb7a583e6   Kent Overstreet   bcache: kill clos...
568
  	sema_init(&b->io_mutex, 1);
cafe56359   Kent Overstreet   bcache: A block l...
569
570
571
572
  
  	mca_data_alloc(b, k, gfp);
  	return b;
  }
e8e1d4682   Kent Overstreet   bcache: Convert t...
573
  static int mca_reap(struct btree *b, unsigned min_order, bool flush)
cafe56359   Kent Overstreet   bcache: A block l...
574
  {
e8e1d4682   Kent Overstreet   bcache: Convert t...
575
576
577
  	struct closure cl;
  
  	closure_init_stack(&cl);
cafe56359   Kent Overstreet   bcache: A block l...
578
579
580
581
  	lockdep_assert_held(&b->c->bucket_lock);
  
  	if (!down_write_trylock(&b->lock))
  		return -ENOMEM;
a85e968e6   Kent Overstreet   bcache: Add struc...
582
  	BUG_ON(btree_node_dirty(b) && !b->keys.set[0].data);
e8e1d4682   Kent Overstreet   bcache: Convert t...
583

a85e968e6   Kent Overstreet   bcache: Add struc...
584
  	if (b->keys.page_order < min_order)
cb7a583e6   Kent Overstreet   bcache: kill clos...
585
586
587
588
589
590
591
592
593
  		goto out_unlock;
  
  	if (!flush) {
  		if (btree_node_dirty(b))
  			goto out_unlock;
  
  		if (down_trylock(&b->io_mutex))
  			goto out_unlock;
  		up(&b->io_mutex);
cafe56359   Kent Overstreet   bcache: A block l...
594
  	}
2a285686c   Kent Overstreet   bcache: btree loc...
595
  	mutex_lock(&b->write_lock);
f269af5a0   Kent Overstreet   bcache: Add btree...
596
  	if (btree_node_dirty(b))
2a285686c   Kent Overstreet   bcache: btree loc...
597
598
599
600
  		__bch_btree_node_write(b, &cl);
  	mutex_unlock(&b->write_lock);
  
  	closure_sync(&cl);
cafe56359   Kent Overstreet   bcache: A block l...
601

e8e1d4682   Kent Overstreet   bcache: Convert t...
602
  	/* wait for any in flight btree write */
cb7a583e6   Kent Overstreet   bcache: kill clos...
603
604
  	down(&b->io_mutex);
  	up(&b->io_mutex);
e8e1d4682   Kent Overstreet   bcache: Convert t...
605

cafe56359   Kent Overstreet   bcache: A block l...
606
  	return 0;
cb7a583e6   Kent Overstreet   bcache: kill clos...
607
608
609
  out_unlock:
  	rw_unlock(true, b);
  	return -ENOMEM;
cafe56359   Kent Overstreet   bcache: A block l...
610
  }
7dc19d5af   Dave Chinner   drivers: convert ...
611
612
  static unsigned long bch_mca_scan(struct shrinker *shrink,
  				  struct shrink_control *sc)
cafe56359   Kent Overstreet   bcache: A block l...
613
614
615
616
  {
  	struct cache_set *c = container_of(shrink, struct cache_set, shrink);
  	struct btree *b, *t;
  	unsigned long i, nr = sc->nr_to_scan;
7dc19d5af   Dave Chinner   drivers: convert ...
617
  	unsigned long freed = 0;
cafe56359   Kent Overstreet   bcache: A block l...
618
619
  
  	if (c->shrinker_disabled)
7dc19d5af   Dave Chinner   drivers: convert ...
620
  		return SHRINK_STOP;
cafe56359   Kent Overstreet   bcache: A block l...
621

0a63b66db   Kent Overstreet   bcache: Rework bt...
622
  	if (c->btree_cache_alloc_lock)
7dc19d5af   Dave Chinner   drivers: convert ...
623
  		return SHRINK_STOP;
cafe56359   Kent Overstreet   bcache: A block l...
624
625
  
  	/* Return -1 if we can't do anything right now */
a698e08c8   Kent Overstreet   bcache: Fix a shr...
626
  	if (sc->gfp_mask & __GFP_IO)
cafe56359   Kent Overstreet   bcache: A block l...
627
628
629
  		mutex_lock(&c->bucket_lock);
  	else if (!mutex_trylock(&c->bucket_lock))
  		return -1;
36c9ea983   Kent Overstreet   bcache: Document ...
630
631
632
633
634
635
636
  	/*
  	 * It's _really_ critical that we don't free too many btree nodes - we
  	 * have to always leave ourselves a reserve. The reserve is how we
  	 * guarantee that allocating memory for a new btree node can always
  	 * succeed, so that inserting keys into the btree can always succeed and
  	 * IO can always make forward progress:
  	 */
cafe56359   Kent Overstreet   bcache: A block l...
637
638
639
640
641
  	nr /= c->btree_pages;
  	nr = min_t(unsigned long, nr, mca_can_free(c));
  
  	i = 0;
  	list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
7dc19d5af   Dave Chinner   drivers: convert ...
642
  		if (freed >= nr)
cafe56359   Kent Overstreet   bcache: A block l...
643
644
645
  			break;
  
  		if (++i > 3 &&
e8e1d4682   Kent Overstreet   bcache: Convert t...
646
  		    !mca_reap(b, 0, false)) {
cafe56359   Kent Overstreet   bcache: A block l...
647
648
  			mca_data_free(b);
  			rw_unlock(true, b);
7dc19d5af   Dave Chinner   drivers: convert ...
649
  			freed++;
cafe56359   Kent Overstreet   bcache: A block l...
650
651
  		}
  	}
0a63b66db   Kent Overstreet   bcache: Rework bt...
652
  	for (i = 0; (nr--) && i < c->btree_cache_used; i++) {
b0f32a56f   Kent Overstreet   bcache: Minor btr...
653
654
  		if (list_empty(&c->btree_cache))
  			goto out;
cafe56359   Kent Overstreet   bcache: A block l...
655
656
657
658
  		b = list_first_entry(&c->btree_cache, struct btree, list);
  		list_rotate_left(&c->btree_cache);
  
  		if (!b->accessed &&
e8e1d4682   Kent Overstreet   bcache: Convert t...
659
  		    !mca_reap(b, 0, false)) {
cafe56359   Kent Overstreet   bcache: A block l...
660
661
662
  			mca_bucket_free(b);
  			mca_data_free(b);
  			rw_unlock(true, b);
7dc19d5af   Dave Chinner   drivers: convert ...
663
  			freed++;
cafe56359   Kent Overstreet   bcache: A block l...
664
665
666
667
  		} else
  			b->accessed = 0;
  	}
  out:
cafe56359   Kent Overstreet   bcache: A block l...
668
  	mutex_unlock(&c->bucket_lock);
7dc19d5af   Dave Chinner   drivers: convert ...
669
670
671
672
673
674
675
676
677
678
  	return freed;
  }
  
  static unsigned long bch_mca_count(struct shrinker *shrink,
  				   struct shrink_control *sc)
  {
  	struct cache_set *c = container_of(shrink, struct cache_set, shrink);
  
  	if (c->shrinker_disabled)
  		return 0;
0a63b66db   Kent Overstreet   bcache: Rework bt...
679
  	if (c->btree_cache_alloc_lock)
7dc19d5af   Dave Chinner   drivers: convert ...
680
681
682
  		return 0;
  
  	return mca_can_free(c) * c->btree_pages;
cafe56359   Kent Overstreet   bcache: A block l...
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
  }
  
  void bch_btree_cache_free(struct cache_set *c)
  {
  	struct btree *b;
  	struct closure cl;
  	closure_init_stack(&cl);
  
  	if (c->shrink.list.next)
  		unregister_shrinker(&c->shrink);
  
  	mutex_lock(&c->bucket_lock);
  
  #ifdef CONFIG_BCACHE_DEBUG
  	if (c->verify_data)
  		list_move(&c->verify_data->list, &c->btree_cache);
78b77bf8b   Kent Overstreet   bcache: Btree ver...
699
700
  
  	free_pages((unsigned long) c->verify_ondisk, ilog2(bucket_pages(c)));
cafe56359   Kent Overstreet   bcache: A block l...
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
  #endif
  
  	list_splice(&c->btree_cache_freeable,
  		    &c->btree_cache);
  
  	while (!list_empty(&c->btree_cache)) {
  		b = list_first_entry(&c->btree_cache, struct btree, list);
  
  		if (btree_node_dirty(b))
  			btree_complete_write(b, btree_current_write(b));
  		clear_bit(BTREE_NODE_dirty, &b->flags);
  
  		mca_data_free(b);
  	}
  
  	while (!list_empty(&c->btree_cache_freed)) {
  		b = list_first_entry(&c->btree_cache_freed,
  				     struct btree, list);
  		list_del(&b->list);
  		cancel_delayed_work_sync(&b->work);
  		kfree(b);
  	}
  
  	mutex_unlock(&c->bucket_lock);
  }
  
  int bch_btree_cache_alloc(struct cache_set *c)
  {
  	unsigned i;
cafe56359   Kent Overstreet   bcache: A block l...
730
  	for (i = 0; i < mca_reserve(c); i++)
72a44517f   Kent Overstreet   bcache: Convert g...
731
732
  		if (!mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL))
  			return -ENOMEM;
cafe56359   Kent Overstreet   bcache: A block l...
733
734
735
736
737
738
  
  	list_splice_init(&c->btree_cache,
  			 &c->btree_cache_freeable);
  
  #ifdef CONFIG_BCACHE_DEBUG
  	mutex_init(&c->verify_lock);
78b77bf8b   Kent Overstreet   bcache: Btree ver...
739
740
  	c->verify_ondisk = (void *)
  		__get_free_pages(GFP_KERNEL, ilog2(bucket_pages(c)));
cafe56359   Kent Overstreet   bcache: A block l...
741
742
743
  	c->verify_data = mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL);
  
  	if (c->verify_data &&
a85e968e6   Kent Overstreet   bcache: Add struc...
744
  	    c->verify_data->keys.set->data)
cafe56359   Kent Overstreet   bcache: A block l...
745
746
747
748
  		list_del_init(&c->verify_data->list);
  	else
  		c->verify_data = NULL;
  #endif
7dc19d5af   Dave Chinner   drivers: convert ...
749
750
  	c->shrink.count_objects = bch_mca_count;
  	c->shrink.scan_objects = bch_mca_scan;
cafe56359   Kent Overstreet   bcache: A block l...
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
  	c->shrink.seeks = 4;
  	c->shrink.batch = c->btree_pages * 2;
  	register_shrinker(&c->shrink);
  
  	return 0;
  }
  
  /* Btree in memory cache - hash table */
  
  static struct hlist_head *mca_hash(struct cache_set *c, struct bkey *k)
  {
  	return &c->bucket_hash[hash_32(PTR_HASH(c, k), BUCKET_HASH_BITS)];
  }
  
  static struct btree *mca_find(struct cache_set *c, struct bkey *k)
  {
  	struct btree *b;
  
  	rcu_read_lock();
  	hlist_for_each_entry_rcu(b, mca_hash(c, k), hash)
  		if (PTR_HASH(c, &b->key) == PTR_HASH(c, k))
  			goto out;
  	b = NULL;
  out:
  	rcu_read_unlock();
  	return b;
  }
0a63b66db   Kent Overstreet   bcache: Rework bt...
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
  static int mca_cannibalize_lock(struct cache_set *c, struct btree_op *op)
  {
  	struct task_struct *old;
  
  	old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current);
  	if (old && old != current) {
  		if (op)
  			prepare_to_wait(&c->btree_cache_wait, &op->wait,
  					TASK_UNINTERRUPTIBLE);
  		return -EINTR;
  	}
  
  	return 0;
  }
  
  static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op,
  				     struct bkey *k)
cafe56359   Kent Overstreet   bcache: A block l...
795
  {
e8e1d4682   Kent Overstreet   bcache: Convert t...
796
  	struct btree *b;
cafe56359   Kent Overstreet   bcache: A block l...
797

c37511b86   Kent Overstreet   bcache: Fix/revam...
798
  	trace_bcache_btree_cache_cannibalize(c);
0a63b66db   Kent Overstreet   bcache: Rework bt...
799
800
  	if (mca_cannibalize_lock(c, op))
  		return ERR_PTR(-EINTR);
cafe56359   Kent Overstreet   bcache: A block l...
801

e8e1d4682   Kent Overstreet   bcache: Convert t...
802
803
804
  	list_for_each_entry_reverse(b, &c->btree_cache, list)
  		if (!mca_reap(b, btree_order(k), false))
  			return b;
cafe56359   Kent Overstreet   bcache: A block l...
805

e8e1d4682   Kent Overstreet   bcache: Convert t...
806
807
808
  	list_for_each_entry_reverse(b, &c->btree_cache, list)
  		if (!mca_reap(b, btree_order(k), true))
  			return b;
cafe56359   Kent Overstreet   bcache: A block l...
809

0a63b66db   Kent Overstreet   bcache: Rework bt...
810
811
  	WARN(1, "btree cache cannibalize failed
  ");
e8e1d4682   Kent Overstreet   bcache: Convert t...
812
  	return ERR_PTR(-ENOMEM);
cafe56359   Kent Overstreet   bcache: A block l...
813
814
815
816
817
818
819
820
  }
  
  /*
   * We can only have one thread cannibalizing other cached btree nodes at a time,
   * or we'll deadlock. We use an open coded mutex to ensure that, which a
   * cannibalize_bucket() will take. This means every time we unlock the root of
   * the btree, we need to release this lock if we have it held.
   */
df8e89701   Kent Overstreet   bcache: Move some...
821
  static void bch_cannibalize_unlock(struct cache_set *c)
cafe56359   Kent Overstreet   bcache: A block l...
822
  {
0a63b66db   Kent Overstreet   bcache: Rework bt...
823
824
825
  	if (c->btree_cache_alloc_lock == current) {
  		c->btree_cache_alloc_lock = NULL;
  		wake_up(&c->btree_cache_wait);
cafe56359   Kent Overstreet   bcache: A block l...
826
827
  	}
  }
0a63b66db   Kent Overstreet   bcache: Rework bt...
828
829
  static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op,
  			       struct bkey *k, int level)
cafe56359   Kent Overstreet   bcache: A block l...
830
831
  {
  	struct btree *b;
e8e1d4682   Kent Overstreet   bcache: Convert t...
832
  	BUG_ON(current->bio_list);
cafe56359   Kent Overstreet   bcache: A block l...
833
834
835
836
837
838
839
840
841
  	lockdep_assert_held(&c->bucket_lock);
  
  	if (mca_find(c, k))
  		return NULL;
  
  	/* btree_free() doesn't free memory; it sticks the node on the end of
  	 * the list. Check if there's any freed nodes there:
  	 */
  	list_for_each_entry(b, &c->btree_cache_freeable, list)
e8e1d4682   Kent Overstreet   bcache: Convert t...
842
  		if (!mca_reap(b, btree_order(k), false))
cafe56359   Kent Overstreet   bcache: A block l...
843
844
845
846
847
848
  			goto out;
  
  	/* We never free struct btree itself, just the memory that holds the on
  	 * disk node. Check the freed list before allocating a new one:
  	 */
  	list_for_each_entry(b, &c->btree_cache_freed, list)
e8e1d4682   Kent Overstreet   bcache: Convert t...
849
  		if (!mca_reap(b, 0, false)) {
cafe56359   Kent Overstreet   bcache: A block l...
850
  			mca_data_alloc(b, k, __GFP_NOWARN|GFP_NOIO);
a85e968e6   Kent Overstreet   bcache: Add struc...
851
  			if (!b->keys.set[0].data)
cafe56359   Kent Overstreet   bcache: A block l...
852
853
854
855
856
857
858
859
860
861
  				goto err;
  			else
  				goto out;
  		}
  
  	b = mca_bucket_alloc(c, k, __GFP_NOWARN|GFP_NOIO);
  	if (!b)
  		goto err;
  
  	BUG_ON(!down_write_trylock(&b->lock));
a85e968e6   Kent Overstreet   bcache: Add struc...
862
  	if (!b->keys.set->data)
cafe56359   Kent Overstreet   bcache: A block l...
863
864
  		goto err;
  out:
cb7a583e6   Kent Overstreet   bcache: kill clos...
865
  	BUG_ON(b->io_mutex.count != 1);
cafe56359   Kent Overstreet   bcache: A block l...
866
867
868
869
870
871
872
  
  	bkey_copy(&b->key, k);
  	list_move(&b->list, &c->btree_cache);
  	hlist_del_init_rcu(&b->hash);
  	hlist_add_head_rcu(&b->hash, mca_hash(c, k));
  
  	lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_);
d6fd3b11c   Kent Overstreet   bcache: Explicitl...
873
  	b->parent	= (void *) ~0UL;
a85e968e6   Kent Overstreet   bcache: Add struc...
874
875
876
  	b->flags	= 0;
  	b->written	= 0;
  	b->level	= level;
cafe56359   Kent Overstreet   bcache: A block l...
877

65d45231b   Kent Overstreet   bcache: Abstract ...
878
  	if (!b->level)
a85e968e6   Kent Overstreet   bcache: Add struc...
879
880
  		bch_btree_keys_init(&b->keys, &bch_extent_keys_ops,
  				    &b->c->expensive_debug_checks);
65d45231b   Kent Overstreet   bcache: Abstract ...
881
  	else
a85e968e6   Kent Overstreet   bcache: Add struc...
882
883
  		bch_btree_keys_init(&b->keys, &bch_btree_keys_ops,
  				    &b->c->expensive_debug_checks);
cafe56359   Kent Overstreet   bcache: A block l...
884
885
886
887
888
  
  	return b;
  err:
  	if (b)
  		rw_unlock(true, b);
0a63b66db   Kent Overstreet   bcache: Rework bt...
889
  	b = mca_cannibalize(c, op, k);
cafe56359   Kent Overstreet   bcache: A block l...
890
891
892
893
894
895
896
897
898
899
  	if (!IS_ERR(b))
  		goto out;
  
  	return b;
  }
  
  /**
   * bch_btree_node_get - find a btree node in the cache and lock it, reading it
   * in from disk if necessary.
   *
b54d6934d   Kent Overstreet   bcache: Kill op->cl
900
   * If IO is necessary and running under generic_make_request, returns -EAGAIN.
cafe56359   Kent Overstreet   bcache: A block l...
901
902
903
904
   *
   * The btree node will have either a read or a write lock held, depending on
   * level and op->lock.
   */
0a63b66db   Kent Overstreet   bcache: Rework bt...
905
  struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
2452cc890   Slava Pestov   bcache: try to se...
906
907
  				 struct bkey *k, int level, bool write,
  				 struct btree *parent)
cafe56359   Kent Overstreet   bcache: A block l...
908
909
  {
  	int i = 0;
cafe56359   Kent Overstreet   bcache: A block l...
910
911
912
913
914
915
916
  	struct btree *b;
  
  	BUG_ON(level < 0);
  retry:
  	b = mca_find(c, k);
  
  	if (!b) {
579435114   Kent Overstreet   bcache: Refactor ...
917
918
  		if (current->bio_list)
  			return ERR_PTR(-EAGAIN);
cafe56359   Kent Overstreet   bcache: A block l...
919
  		mutex_lock(&c->bucket_lock);
0a63b66db   Kent Overstreet   bcache: Rework bt...
920
  		b = mca_alloc(c, op, k, level);
cafe56359   Kent Overstreet   bcache: A block l...
921
922
923
924
925
926
  		mutex_unlock(&c->bucket_lock);
  
  		if (!b)
  			goto retry;
  		if (IS_ERR(b))
  			return b;
579435114   Kent Overstreet   bcache: Refactor ...
927
  		bch_btree_node_read(b);
cafe56359   Kent Overstreet   bcache: A block l...
928
929
930
931
932
933
934
935
936
937
938
  
  		if (!write)
  			downgrade_write(&b->lock);
  	} else {
  		rw_lock(write, b, level);
  		if (PTR_HASH(c, &b->key) != PTR_HASH(c, k)) {
  			rw_unlock(write, b);
  			goto retry;
  		}
  		BUG_ON(b->level != level);
  	}
2452cc890   Slava Pestov   bcache: try to se...
939
  	b->parent = parent;
cafe56359   Kent Overstreet   bcache: A block l...
940
  	b->accessed = 1;
a85e968e6   Kent Overstreet   bcache: Add struc...
941
942
943
  	for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
  		prefetch(b->keys.set[i].tree);
  		prefetch(b->keys.set[i].data);
cafe56359   Kent Overstreet   bcache: A block l...
944
  	}
a85e968e6   Kent Overstreet   bcache: Add struc...
945
946
  	for (; i <= b->keys.nsets; i++)
  		prefetch(b->keys.set[i].data);
cafe56359   Kent Overstreet   bcache: A block l...
947

579435114   Kent Overstreet   bcache: Refactor ...
948
  	if (btree_node_io_error(b)) {
cafe56359   Kent Overstreet   bcache: A block l...
949
  		rw_unlock(write, b);
579435114   Kent Overstreet   bcache: Refactor ...
950
951
952
953
  		return ERR_PTR(-EIO);
  	}
  
  	BUG_ON(!b->written);
cafe56359   Kent Overstreet   bcache: A block l...
954
955
956
  
  	return b;
  }
2452cc890   Slava Pestov   bcache: try to se...
957
  static void btree_node_prefetch(struct btree *parent, struct bkey *k)
cafe56359   Kent Overstreet   bcache: A block l...
958
959
  {
  	struct btree *b;
2452cc890   Slava Pestov   bcache: try to se...
960
961
962
  	mutex_lock(&parent->c->bucket_lock);
  	b = mca_alloc(parent->c, NULL, k, parent->level - 1);
  	mutex_unlock(&parent->c->bucket_lock);
cafe56359   Kent Overstreet   bcache: A block l...
963
964
  
  	if (!IS_ERR_OR_NULL(b)) {
2452cc890   Slava Pestov   bcache: try to se...
965
  		b->parent = parent;
579435114   Kent Overstreet   bcache: Refactor ...
966
  		bch_btree_node_read(b);
cafe56359   Kent Overstreet   bcache: A block l...
967
968
969
970
971
  		rw_unlock(true, b);
  	}
  }
  
  /* Btree alloc */
e8e1d4682   Kent Overstreet   bcache: Convert t...
972
  static void btree_node_free(struct btree *b)
cafe56359   Kent Overstreet   bcache: A block l...
973
  {
c37511b86   Kent Overstreet   bcache: Fix/revam...
974
  	trace_bcache_btree_node_free(b);
cafe56359   Kent Overstreet   bcache: A block l...
975
  	BUG_ON(b == b->c->root);
cafe56359   Kent Overstreet   bcache: A block l...
976

2a285686c   Kent Overstreet   bcache: btree loc...
977
  	mutex_lock(&b->write_lock);
cafe56359   Kent Overstreet   bcache: A block l...
978
979
980
  	if (btree_node_dirty(b))
  		btree_complete_write(b, btree_current_write(b));
  	clear_bit(BTREE_NODE_dirty, &b->flags);
2a285686c   Kent Overstreet   bcache: btree loc...
981
  	mutex_unlock(&b->write_lock);
cafe56359   Kent Overstreet   bcache: A block l...
982
983
984
  	cancel_delayed_work(&b->work);
  
  	mutex_lock(&b->c->bucket_lock);
cafe56359   Kent Overstreet   bcache: A block l...
985
986
987
988
  	bch_bucket_free(b->c, &b->key);
  	mca_bucket_free(b);
  	mutex_unlock(&b->c->bucket_lock);
  }
c5aa4a315   Slava Pestov   bcache: wait for ...
989
  struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
2452cc890   Slava Pestov   bcache: try to se...
990
991
  				     int level, bool wait,
  				     struct btree *parent)
cafe56359   Kent Overstreet   bcache: A block l...
992
993
994
995
996
997
  {
  	BKEY_PADDED(key) k;
  	struct btree *b = ERR_PTR(-EAGAIN);
  
  	mutex_lock(&c->bucket_lock);
  retry:
c5aa4a315   Slava Pestov   bcache: wait for ...
998
  	if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
cafe56359   Kent Overstreet   bcache: A block l...
999
  		goto err;
3a3b6a4e0   Kent Overstreet   bcache: Don't bot...
1000
  	bkey_put(c, &k.key);
cafe56359   Kent Overstreet   bcache: A block l...
1001
  	SET_KEY_SIZE(&k.key, c->btree_pages * PAGE_SECTORS);
0a63b66db   Kent Overstreet   bcache: Rework bt...
1002
  	b = mca_alloc(c, op, &k.key, level);
cafe56359   Kent Overstreet   bcache: A block l...
1003
1004
1005
1006
  	if (IS_ERR(b))
  		goto err_free;
  
  	if (!b) {
b1a67b0f4   Kent Overstreet   bcache: Style/che...
1007
1008
  		cache_bug(c,
  			"Tried to allocate bucket that was in btree cache");
cafe56359   Kent Overstreet   bcache: A block l...
1009
1010
  		goto retry;
  	}
cafe56359   Kent Overstreet   bcache: A block l...
1011
  	b->accessed = 1;
2452cc890   Slava Pestov   bcache: try to se...
1012
  	b->parent = parent;
a85e968e6   Kent Overstreet   bcache: Add struc...
1013
  	bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
cafe56359   Kent Overstreet   bcache: A block l...
1014
1015
  
  	mutex_unlock(&c->bucket_lock);
c37511b86   Kent Overstreet   bcache: Fix/revam...
1016
1017
  
  	trace_bcache_btree_node_alloc(b);
cafe56359   Kent Overstreet   bcache: A block l...
1018
1019
1020
  	return b;
  err_free:
  	bch_bucket_free(c, &k.key);
cafe56359   Kent Overstreet   bcache: A block l...
1021
1022
  err:
  	mutex_unlock(&c->bucket_lock);
c37511b86   Kent Overstreet   bcache: Fix/revam...
1023

913dc33fb   Slava Pestov   bcache: fix crash...
1024
  	trace_bcache_btree_node_alloc_fail(c);
cafe56359   Kent Overstreet   bcache: A block l...
1025
1026
  	return b;
  }
c5aa4a315   Slava Pestov   bcache: wait for ...
1027
  static struct btree *bch_btree_node_alloc(struct cache_set *c,
2452cc890   Slava Pestov   bcache: try to se...
1028
1029
  					  struct btree_op *op, int level,
  					  struct btree *parent)
c5aa4a315   Slava Pestov   bcache: wait for ...
1030
  {
2452cc890   Slava Pestov   bcache: try to se...
1031
  	return __bch_btree_node_alloc(c, op, level, op != NULL, parent);
c5aa4a315   Slava Pestov   bcache: wait for ...
1032
  }
0a63b66db   Kent Overstreet   bcache: Rework bt...
1033
1034
  static struct btree *btree_node_alloc_replacement(struct btree *b,
  						  struct btree_op *op)
cafe56359   Kent Overstreet   bcache: A block l...
1035
  {
2452cc890   Slava Pestov   bcache: try to se...
1036
  	struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
67539e852   Kent Overstreet   bcache: Add struc...
1037
  	if (!IS_ERR_OR_NULL(n)) {
2a285686c   Kent Overstreet   bcache: btree loc...
1038
  		mutex_lock(&n->write_lock);
89ebb4a28   Kent Overstreet   bcache: Convert s...
1039
  		bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
67539e852   Kent Overstreet   bcache: Add struc...
1040
  		bkey_copy_key(&n->key, &b->key);
2a285686c   Kent Overstreet   bcache: btree loc...
1041
  		mutex_unlock(&n->write_lock);
67539e852   Kent Overstreet   bcache: Add struc...
1042
  	}
cafe56359   Kent Overstreet   bcache: A block l...
1043
1044
1045
  
  	return n;
  }
8835c1234   Kent Overstreet   bcache: Add make_...
1046
1047
1048
  static void make_btree_freeing_key(struct btree *b, struct bkey *k)
  {
  	unsigned i;
05335cff9   Kent Overstreet   bcache: Fix a rac...
1049
1050
1051
  	mutex_lock(&b->c->bucket_lock);
  
  	atomic_inc(&b->c->prio_blocked);
8835c1234   Kent Overstreet   bcache: Add make_...
1052
1053
  	bkey_copy(k, &b->key);
  	bkey_copy_key(k, &ZERO_KEY);
05335cff9   Kent Overstreet   bcache: Fix a rac...
1054
1055
1056
1057
  	for (i = 0; i < KEY_PTRS(k); i++)
  		SET_PTR_GEN(k, i,
  			    bch_inc_gen(PTR_CACHE(b->c, &b->key, i),
  					PTR_BUCKET(b->c, &b->key, i)));
8835c1234   Kent Overstreet   bcache: Add make_...
1058

05335cff9   Kent Overstreet   bcache: Fix a rac...
1059
  	mutex_unlock(&b->c->bucket_lock);
8835c1234   Kent Overstreet   bcache: Add make_...
1060
  }
78365411b   Kent Overstreet   bcache: Rework al...
1061
1062
1063
1064
  static int btree_check_reserve(struct btree *b, struct btree_op *op)
  {
  	struct cache_set *c = b->c;
  	struct cache *ca;
0a63b66db   Kent Overstreet   bcache: Rework bt...
1065
  	unsigned i, reserve = (c->root->level - b->level) * 2 + 1;
78365411b   Kent Overstreet   bcache: Rework al...
1066
1067
1068
1069
1070
1071
  
  	mutex_lock(&c->bucket_lock);
  
  	for_each_cache(ca, c, i)
  		if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) {
  			if (op)
0a63b66db   Kent Overstreet   bcache: Rework bt...
1072
  				prepare_to_wait(&c->btree_cache_wait, &op->wait,
78365411b   Kent Overstreet   bcache: Rework al...
1073
  						TASK_UNINTERRUPTIBLE);
0a63b66db   Kent Overstreet   bcache: Rework bt...
1074
1075
  			mutex_unlock(&c->bucket_lock);
  			return -EINTR;
78365411b   Kent Overstreet   bcache: Rework al...
1076
1077
1078
  		}
  
  	mutex_unlock(&c->bucket_lock);
0a63b66db   Kent Overstreet   bcache: Rework bt...
1079
1080
  
  	return mca_cannibalize_lock(b->c, op);
78365411b   Kent Overstreet   bcache: Rework al...
1081
  }
cafe56359   Kent Overstreet   bcache: A block l...
1082
  /* Garbage collection */
487dded86   Kent Overstreet   bcache: Fix anoth...
1083
1084
  static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
  				    struct bkey *k)
cafe56359   Kent Overstreet   bcache: A block l...
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
  {
  	uint8_t stale = 0;
  	unsigned i;
  	struct bucket *g;
  
  	/*
  	 * ptr_invalid() can't return true for the keys that mark btree nodes as
  	 * freed, but since ptr_bad() returns true we'll never actually use them
  	 * for anything and thus we don't want mark their pointers here
  	 */
  	if (!bkey_cmp(k, &ZERO_KEY))
  		return stale;
  
  	for (i = 0; i < KEY_PTRS(k); i++) {
  		if (!ptr_available(c, k, i))
  			continue;
  
  		g = PTR_BUCKET(c, k, i);
3a2fd9d50   Kent Overstreet   bcache: Kill buck...
1103
1104
  		if (gen_after(g->last_gc, PTR_GEN(k, i)))
  			g->last_gc = PTR_GEN(k, i);
cafe56359   Kent Overstreet   bcache: A block l...
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
  
  		if (ptr_stale(c, k, i)) {
  			stale = max(stale, ptr_stale(c, k, i));
  			continue;
  		}
  
  		cache_bug_on(GC_MARK(g) &&
  			     (GC_MARK(g) == GC_MARK_METADATA) != (level != 0),
  			     c, "inconsistent ptrs: mark = %llu, level = %i",
  			     GC_MARK(g), level);
  
  		if (level)
  			SET_GC_MARK(g, GC_MARK_METADATA);
  		else if (KEY_DIRTY(k))
  			SET_GC_MARK(g, GC_MARK_DIRTY);
4fe6a8167   Kent Overstreet   bcache: Add a rea...
1120
1121
  		else if (!GC_MARK(g))
  			SET_GC_MARK(g, GC_MARK_RECLAIMABLE);
cafe56359   Kent Overstreet   bcache: A block l...
1122
1123
1124
1125
  
  		/* guard against overflow */
  		SET_GC_SECTORS_USED(g, min_t(unsigned,
  					     GC_SECTORS_USED(g) + KEY_SIZE(k),
947174476   Darrick J. Wong   bcache: fix BUG_O...
1126
  					     MAX_GC_SECTORS_USED));
cafe56359   Kent Overstreet   bcache: A block l...
1127
1128
1129
1130
1131
1132
1133
1134
  
  		BUG_ON(!GC_SECTORS_USED(g));
  	}
  
  	return stale;
  }
  
  #define btree_mark_key(b, k)	__bch_btree_mark_key(b->c, b->level, k)
487dded86   Kent Overstreet   bcache: Fix anoth...
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
  void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k)
  {
  	unsigned i;
  
  	for (i = 0; i < KEY_PTRS(k); i++)
  		if (ptr_available(c, k, i) &&
  		    !ptr_stale(c, k, i)) {
  			struct bucket *b = PTR_BUCKET(c, k, i);
  
  			b->gen = PTR_GEN(k, i);
  
  			if (level && bkey_cmp(k, &ZERO_KEY))
  				b->prio = BTREE_PRIO;
  			else if (!level && b->prio == BTREE_PRIO)
  				b->prio = INITIAL_PRIO;
  		}
  
  	__bch_btree_mark_key(c, level, k);
  }
a1f0358b2   Kent Overstreet   bcache: Increment...
1154
  static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
cafe56359   Kent Overstreet   bcache: A block l...
1155
1156
  {
  	uint8_t stale = 0;
a1f0358b2   Kent Overstreet   bcache: Increment...
1157
  	unsigned keys = 0, good_keys = 0;
cafe56359   Kent Overstreet   bcache: A block l...
1158
1159
1160
1161
1162
  	struct bkey *k;
  	struct btree_iter iter;
  	struct bset_tree *t;
  
  	gc->nodes++;
c052dd9a2   Kent Overstreet   bcache: Convert b...
1163
  	for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) {
cafe56359   Kent Overstreet   bcache: A block l...
1164
  		stale = max(stale, btree_mark_key(b, k));
a1f0358b2   Kent Overstreet   bcache: Increment...
1165
  		keys++;
cafe56359   Kent Overstreet   bcache: A block l...
1166

a85e968e6   Kent Overstreet   bcache: Add struc...
1167
  		if (bch_ptr_bad(&b->keys, k))
cafe56359   Kent Overstreet   bcache: A block l...
1168
  			continue;
cafe56359   Kent Overstreet   bcache: A block l...
1169
1170
  		gc->key_bytes += bkey_u64s(k);
  		gc->nkeys++;
a1f0358b2   Kent Overstreet   bcache: Increment...
1171
  		good_keys++;
cafe56359   Kent Overstreet   bcache: A block l...
1172
1173
  
  		gc->data += KEY_SIZE(k);
cafe56359   Kent Overstreet   bcache: A block l...
1174
  	}
a85e968e6   Kent Overstreet   bcache: Add struc...
1175
  	for (t = b->keys.set; t <= &b->keys.set[b->keys.nsets]; t++)
cafe56359   Kent Overstreet   bcache: A block l...
1176
  		btree_bug_on(t->size &&
a85e968e6   Kent Overstreet   bcache: Add struc...
1177
  			     bset_written(&b->keys, t) &&
cafe56359   Kent Overstreet   bcache: A block l...
1178
1179
  			     bkey_cmp(&b->key, &t->end) < 0,
  			     b, "found short btree key in gc");
a1f0358b2   Kent Overstreet   bcache: Increment...
1180
1181
  	if (b->c->gc_always_rewrite)
  		return true;
cafe56359   Kent Overstreet   bcache: A block l...
1182

a1f0358b2   Kent Overstreet   bcache: Increment...
1183
1184
  	if (stale > 10)
  		return true;
cafe56359   Kent Overstreet   bcache: A block l...
1185

a1f0358b2   Kent Overstreet   bcache: Increment...
1186
1187
  	if ((keys - good_keys) * 2 > keys)
  		return true;
cafe56359   Kent Overstreet   bcache: A block l...
1188

a1f0358b2   Kent Overstreet   bcache: Increment...
1189
  	return false;
cafe56359   Kent Overstreet   bcache: A block l...
1190
  }
a1f0358b2   Kent Overstreet   bcache: Increment...
1191
  #define GC_MERGE_NODES	4U
cafe56359   Kent Overstreet   bcache: A block l...
1192
1193
1194
  
  struct gc_merge_info {
  	struct btree	*b;
cafe56359   Kent Overstreet   bcache: A block l...
1195
1196
  	unsigned	keys;
  };
a1f0358b2   Kent Overstreet   bcache: Increment...
1197
1198
1199
1200
  static int bch_btree_insert_node(struct btree *, struct btree_op *,
  				 struct keylist *, atomic_t *, struct bkey *);
  
  static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
0a63b66db   Kent Overstreet   bcache: Rework bt...
1201
  			     struct gc_stat *gc, struct gc_merge_info *r)
cafe56359   Kent Overstreet   bcache: A block l...
1202
  {
a1f0358b2   Kent Overstreet   bcache: Increment...
1203
1204
  	unsigned i, nodes = 0, keys = 0, blocks;
  	struct btree *new_nodes[GC_MERGE_NODES];
0a63b66db   Kent Overstreet   bcache: Rework bt...
1205
  	struct keylist keylist;
b54d6934d   Kent Overstreet   bcache: Kill op->cl
1206
  	struct closure cl;
a1f0358b2   Kent Overstreet   bcache: Increment...
1207
  	struct bkey *k;
b54d6934d   Kent Overstreet   bcache: Kill op->cl
1208

0a63b66db   Kent Overstreet   bcache: Rework bt...
1209
1210
1211
1212
  	bch_keylist_init(&keylist);
  
  	if (btree_check_reserve(b, NULL))
  		return 0;
a1f0358b2   Kent Overstreet   bcache: Increment...
1213
  	memset(new_nodes, 0, sizeof(new_nodes));
b54d6934d   Kent Overstreet   bcache: Kill op->cl
1214
  	closure_init_stack(&cl);
cafe56359   Kent Overstreet   bcache: A block l...
1215

a1f0358b2   Kent Overstreet   bcache: Increment...
1216
  	while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b))
cafe56359   Kent Overstreet   bcache: A block l...
1217
1218
1219
1220
1221
  		keys += r[nodes++].keys;
  
  	blocks = btree_default_blocks(b->c) * 2 / 3;
  
  	if (nodes < 2 ||
a85e968e6   Kent Overstreet   bcache: Add struc...
1222
  	    __set_blocks(b->keys.set[0].data, keys,
ee811287c   Kent Overstreet   bcache: Rename/sh...
1223
  			 block_bytes(b->c)) > blocks * (nodes - 1))
a1f0358b2   Kent Overstreet   bcache: Increment...
1224
  		return 0;
cafe56359   Kent Overstreet   bcache: A block l...
1225

a1f0358b2   Kent Overstreet   bcache: Increment...
1226
  	for (i = 0; i < nodes; i++) {
0a63b66db   Kent Overstreet   bcache: Rework bt...
1227
  		new_nodes[i] = btree_node_alloc_replacement(r[i].b, NULL);
a1f0358b2   Kent Overstreet   bcache: Increment...
1228
1229
  		if (IS_ERR_OR_NULL(new_nodes[i]))
  			goto out_nocoalesce;
cafe56359   Kent Overstreet   bcache: A block l...
1230
  	}
0a63b66db   Kent Overstreet   bcache: Rework bt...
1231
1232
1233
1234
1235
1236
1237
1238
  	/*
  	 * We have to check the reserve here, after we've allocated our new
  	 * nodes, to make sure the insert below will succeed - we also check
  	 * before as an optimization to potentially avoid a bunch of expensive
  	 * allocs/sorts
  	 */
  	if (btree_check_reserve(b, NULL))
  		goto out_nocoalesce;
2a285686c   Kent Overstreet   bcache: btree loc...
1239
1240
  	for (i = 0; i < nodes; i++)
  		mutex_lock(&new_nodes[i]->write_lock);
cafe56359   Kent Overstreet   bcache: A block l...
1241
  	for (i = nodes - 1; i > 0; --i) {
ee811287c   Kent Overstreet   bcache: Rename/sh...
1242
1243
  		struct bset *n1 = btree_bset_first(new_nodes[i]);
  		struct bset *n2 = btree_bset_first(new_nodes[i - 1]);
cafe56359   Kent Overstreet   bcache: A block l...
1244
1245
1246
  		struct bkey *k, *last = NULL;
  
  		keys = 0;
a1f0358b2   Kent Overstreet   bcache: Increment...
1247
1248
  		if (i > 1) {
  			for (k = n2->start;
fafff81ce   Kent Overstreet   bcache: Bkey inde...
1249
  			     k < bset_bkey_last(n2);
a1f0358b2   Kent Overstreet   bcache: Increment...
1250
1251
  			     k = bkey_next(k)) {
  				if (__set_blocks(n1, n1->keys + keys +
ee811287c   Kent Overstreet   bcache: Rename/sh...
1252
1253
  						 bkey_u64s(k),
  						 block_bytes(b->c)) > blocks)
a1f0358b2   Kent Overstreet   bcache: Increment...
1254
1255
1256
1257
1258
1259
  					break;
  
  				last = k;
  				keys += bkey_u64s(k);
  			}
  		} else {
cafe56359   Kent Overstreet   bcache: A block l...
1260
1261
1262
1263
1264
1265
1266
1267
  			/*
  			 * Last node we're not getting rid of - we're getting
  			 * rid of the node at r[0]. Have to try and fit all of
  			 * the remaining keys into this node; we can't ensure
  			 * they will always fit due to rounding and variable
  			 * length keys (shouldn't be possible in practice,
  			 * though)
  			 */
a1f0358b2   Kent Overstreet   bcache: Increment...
1268
  			if (__set_blocks(n1, n1->keys + n2->keys,
ee811287c   Kent Overstreet   bcache: Rename/sh...
1269
1270
  					 block_bytes(b->c)) >
  			    btree_blocks(new_nodes[i]))
a1f0358b2   Kent Overstreet   bcache: Increment...
1271
  				goto out_nocoalesce;
cafe56359   Kent Overstreet   bcache: A block l...
1272
1273
  
  			keys = n2->keys;
a1f0358b2   Kent Overstreet   bcache: Increment...
1274
  			/* Take the key of the node we're getting rid of */
cafe56359   Kent Overstreet   bcache: A block l...
1275
  			last = &r->b->key;
a1f0358b2   Kent Overstreet   bcache: Increment...
1276
  		}
cafe56359   Kent Overstreet   bcache: A block l...
1277

ee811287c   Kent Overstreet   bcache: Rename/sh...
1278
1279
  		BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c)) >
  		       btree_blocks(new_nodes[i]));
cafe56359   Kent Overstreet   bcache: A block l...
1280

a1f0358b2   Kent Overstreet   bcache: Increment...
1281
1282
  		if (last)
  			bkey_copy_key(&new_nodes[i]->key, last);
cafe56359   Kent Overstreet   bcache: A block l...
1283

fafff81ce   Kent Overstreet   bcache: Bkey inde...
1284
  		memcpy(bset_bkey_last(n1),
cafe56359   Kent Overstreet   bcache: A block l...
1285
  		       n2->start,
fafff81ce   Kent Overstreet   bcache: Bkey inde...
1286
  		       (void *) bset_bkey_idx(n2, keys) - (void *) n2->start);
cafe56359   Kent Overstreet   bcache: A block l...
1287
1288
  
  		n1->keys += keys;
a1f0358b2   Kent Overstreet   bcache: Increment...
1289
  		r[i].keys = n1->keys;
cafe56359   Kent Overstreet   bcache: A block l...
1290
1291
  
  		memmove(n2->start,
fafff81ce   Kent Overstreet   bcache: Bkey inde...
1292
1293
1294
  			bset_bkey_idx(n2, keys),
  			(void *) bset_bkey_last(n2) -
  			(void *) bset_bkey_idx(n2, keys));
cafe56359   Kent Overstreet   bcache: A block l...
1295
1296
  
  		n2->keys -= keys;
0a63b66db   Kent Overstreet   bcache: Rework bt...
1297
  		if (__bch_keylist_realloc(&keylist,
085d2a3dd   Kent Overstreet   bcache: Make bch_...
1298
  					  bkey_u64s(&new_nodes[i]->key)))
a1f0358b2   Kent Overstreet   bcache: Increment...
1299
1300
1301
  			goto out_nocoalesce;
  
  		bch_btree_node_write(new_nodes[i], &cl);
0a63b66db   Kent Overstreet   bcache: Rework bt...
1302
  		bch_keylist_add(&keylist, &new_nodes[i]->key);
cafe56359   Kent Overstreet   bcache: A block l...
1303
  	}
2a285686c   Kent Overstreet   bcache: btree loc...
1304
1305
  	for (i = 0; i < nodes; i++)
  		mutex_unlock(&new_nodes[i]->write_lock);
05335cff9   Kent Overstreet   bcache: Fix a rac...
1306
1307
1308
1309
1310
1311
  	closure_sync(&cl);
  
  	/* We emptied out this node */
  	BUG_ON(btree_bset_first(new_nodes[0])->keys);
  	btree_node_free(new_nodes[0]);
  	rw_unlock(true, new_nodes[0]);
400ffaa2a   Slava Pestov   bcache: fix use-a...
1312
  	new_nodes[0] = NULL;
05335cff9   Kent Overstreet   bcache: Fix a rac...
1313

a1f0358b2   Kent Overstreet   bcache: Increment...
1314
  	for (i = 0; i < nodes; i++) {
0a63b66db   Kent Overstreet   bcache: Rework bt...
1315
  		if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key)))
a1f0358b2   Kent Overstreet   bcache: Increment...
1316
  			goto out_nocoalesce;
cafe56359   Kent Overstreet   bcache: A block l...
1317

0a63b66db   Kent Overstreet   bcache: Rework bt...
1318
1319
  		make_btree_freeing_key(r[i].b, keylist.top);
  		bch_keylist_push(&keylist);
a1f0358b2   Kent Overstreet   bcache: Increment...
1320
  	}
cafe56359   Kent Overstreet   bcache: A block l...
1321

0a63b66db   Kent Overstreet   bcache: Rework bt...
1322
1323
  	bch_btree_insert_node(b, op, &keylist, NULL, NULL);
  	BUG_ON(!bch_keylist_empty(&keylist));
a1f0358b2   Kent Overstreet   bcache: Increment...
1324
1325
1326
1327
1328
1329
1330
  
  	for (i = 0; i < nodes; i++) {
  		btree_node_free(r[i].b);
  		rw_unlock(true, r[i].b);
  
  		r[i].b = new_nodes[i];
  	}
a1f0358b2   Kent Overstreet   bcache: Increment...
1331
1332
1333
1334
  	memmove(r, r + 1, sizeof(r[0]) * (nodes - 1));
  	r[nodes - 1].b = ERR_PTR(-EINTR);
  
  	trace_bcache_btree_gc_coalesce(nodes);
cafe56359   Kent Overstreet   bcache: A block l...
1335
  	gc->nodes--;
cafe56359   Kent Overstreet   bcache: A block l...
1336

0a63b66db   Kent Overstreet   bcache: Rework bt...
1337
  	bch_keylist_free(&keylist);
a1f0358b2   Kent Overstreet   bcache: Increment...
1338
1339
1340
1341
1342
  	/* Invalidated our iterator */
  	return -EINTR;
  
  out_nocoalesce:
  	closure_sync(&cl);
0a63b66db   Kent Overstreet   bcache: Rework bt...
1343
  	bch_keylist_free(&keylist);
a1f0358b2   Kent Overstreet   bcache: Increment...
1344

0a63b66db   Kent Overstreet   bcache: Rework bt...
1345
  	while ((k = bch_keylist_pop(&keylist)))
a1f0358b2   Kent Overstreet   bcache: Increment...
1346
1347
1348
1349
1350
1351
1352
1353
1354
  		if (!bkey_cmp(k, &ZERO_KEY))
  			atomic_dec(&b->c->prio_blocked);
  
  	for (i = 0; i < nodes; i++)
  		if (!IS_ERR_OR_NULL(new_nodes[i])) {
  			btree_node_free(new_nodes[i]);
  			rw_unlock(true, new_nodes[i]);
  		}
  	return 0;
cafe56359   Kent Overstreet   bcache: A block l...
1355
  }
0a63b66db   Kent Overstreet   bcache: Rework bt...
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
  static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
  				 struct btree *replace)
  {
  	struct keylist keys;
  	struct btree *n;
  
  	if (btree_check_reserve(b, NULL))
  		return 0;
  
  	n = btree_node_alloc_replacement(replace, NULL);
  
  	/* recheck reserve after allocating replacement node */
  	if (btree_check_reserve(b, NULL)) {
  		btree_node_free(n);
  		rw_unlock(true, n);
  		return 0;
  	}
  
  	bch_btree_node_write_sync(n);
  
  	bch_keylist_init(&keys);
  	bch_keylist_add(&keys, &n->key);
  
  	make_btree_freeing_key(replace, keys.top);
  	bch_keylist_push(&keys);
  
  	bch_btree_insert_node(b, op, &keys, NULL, NULL);
  	BUG_ON(!bch_keylist_empty(&keys));
  
  	btree_node_free(replace);
  	rw_unlock(true, n);
  
  	/* Invalidated our iterator */
  	return -EINTR;
  }
a1f0358b2   Kent Overstreet   bcache: Increment...
1391
  static unsigned btree_gc_count_keys(struct btree *b)
cafe56359   Kent Overstreet   bcache: A block l...
1392
  {
a1f0358b2   Kent Overstreet   bcache: Increment...
1393
1394
1395
  	struct bkey *k;
  	struct btree_iter iter;
  	unsigned ret = 0;
cafe56359   Kent Overstreet   bcache: A block l...
1396

c052dd9a2   Kent Overstreet   bcache: Convert b...
1397
  	for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
a1f0358b2   Kent Overstreet   bcache: Increment...
1398
1399
1400
1401
  		ret += bkey_u64s(k);
  
  	return ret;
  }
cafe56359   Kent Overstreet   bcache: A block l...
1402

a1f0358b2   Kent Overstreet   bcache: Increment...
1403
1404
1405
  static int btree_gc_recurse(struct btree *b, struct btree_op *op,
  			    struct closure *writes, struct gc_stat *gc)
  {
a1f0358b2   Kent Overstreet   bcache: Increment...
1406
1407
  	int ret = 0;
  	bool should_rewrite;
a1f0358b2   Kent Overstreet   bcache: Increment...
1408
  	struct bkey *k;
a1f0358b2   Kent Overstreet   bcache: Increment...
1409
  	struct btree_iter iter;
cafe56359   Kent Overstreet   bcache: A block l...
1410
  	struct gc_merge_info r[GC_MERGE_NODES];
2a285686c   Kent Overstreet   bcache: btree loc...
1411
  	struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1;
cafe56359   Kent Overstreet   bcache: A block l...
1412

c052dd9a2   Kent Overstreet   bcache: Convert b...
1413
  	bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done);
cafe56359   Kent Overstreet   bcache: A block l...
1414

2a285686c   Kent Overstreet   bcache: btree loc...
1415
1416
  	for (i = r; i < r + ARRAY_SIZE(r); i++)
  		i->b = ERR_PTR(-EINTR);
cafe56359   Kent Overstreet   bcache: A block l...
1417

a1f0358b2   Kent Overstreet   bcache: Increment...
1418
  	while (1) {
a85e968e6   Kent Overstreet   bcache: Add struc...
1419
  		k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
a1f0358b2   Kent Overstreet   bcache: Increment...
1420
  		if (k) {
0a63b66db   Kent Overstreet   bcache: Rework bt...
1421
  			r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
2452cc890   Slava Pestov   bcache: try to se...
1422
  						  true, b);
a1f0358b2   Kent Overstreet   bcache: Increment...
1423
1424
1425
1426
1427
1428
  			if (IS_ERR(r->b)) {
  				ret = PTR_ERR(r->b);
  				break;
  			}
  
  			r->keys = btree_gc_count_keys(r->b);
0a63b66db   Kent Overstreet   bcache: Rework bt...
1429
  			ret = btree_gc_coalesce(b, op, gc, r);
a1f0358b2   Kent Overstreet   bcache: Increment...
1430
1431
  			if (ret)
  				break;
cafe56359   Kent Overstreet   bcache: A block l...
1432
  		}
a1f0358b2   Kent Overstreet   bcache: Increment...
1433
1434
  		if (!last->b)
  			break;
cafe56359   Kent Overstreet   bcache: A block l...
1435

a1f0358b2   Kent Overstreet   bcache: Increment...
1436
1437
  		if (!IS_ERR(last->b)) {
  			should_rewrite = btree_gc_mark_node(last->b, gc);
0a63b66db   Kent Overstreet   bcache: Rework bt...
1438
1439
1440
  			if (should_rewrite) {
  				ret = btree_gc_rewrite_node(b, op, last->b);
  				if (ret)
a1f0358b2   Kent Overstreet   bcache: Increment...
1441
  					break;
a1f0358b2   Kent Overstreet   bcache: Increment...
1442
1443
1444
1445
1446
1447
1448
  			}
  
  			if (last->b->level) {
  				ret = btree_gc_recurse(last->b, op, writes, gc);
  				if (ret)
  					break;
  			}
cafe56359   Kent Overstreet   bcache: A block l...
1449

a1f0358b2   Kent Overstreet   bcache: Increment...
1450
1451
1452
1453
1454
1455
  			bkey_copy_key(&b->c->gc_done, &last->b->key);
  
  			/*
  			 * Must flush leaf nodes before gc ends, since replace
  			 * operations aren't journalled
  			 */
2a285686c   Kent Overstreet   bcache: btree loc...
1456
  			mutex_lock(&last->b->write_lock);
a1f0358b2   Kent Overstreet   bcache: Increment...
1457
1458
  			if (btree_node_dirty(last->b))
  				bch_btree_node_write(last->b, writes);
2a285686c   Kent Overstreet   bcache: btree loc...
1459
  			mutex_unlock(&last->b->write_lock);
a1f0358b2   Kent Overstreet   bcache: Increment...
1460
1461
1462
1463
1464
  			rw_unlock(true, last->b);
  		}
  
  		memmove(r + 1, r, sizeof(r[0]) * (GC_MERGE_NODES - 1));
  		r->b = NULL;
cafe56359   Kent Overstreet   bcache: A block l...
1465

cafe56359   Kent Overstreet   bcache: A block l...
1466
1467
1468
1469
  		if (need_resched()) {
  			ret = -EAGAIN;
  			break;
  		}
cafe56359   Kent Overstreet   bcache: A block l...
1470
  	}
2a285686c   Kent Overstreet   bcache: btree loc...
1471
1472
1473
1474
1475
1476
1477
  	for (i = r; i < r + ARRAY_SIZE(r); i++)
  		if (!IS_ERR_OR_NULL(i->b)) {
  			mutex_lock(&i->b->write_lock);
  			if (btree_node_dirty(i->b))
  				bch_btree_node_write(i->b, writes);
  			mutex_unlock(&i->b->write_lock);
  			rw_unlock(true, i->b);
a1f0358b2   Kent Overstreet   bcache: Increment...
1478
  		}
cafe56359   Kent Overstreet   bcache: A block l...
1479

cafe56359   Kent Overstreet   bcache: A block l...
1480
1481
1482
1483
1484
1485
1486
  	return ret;
  }
  
  static int bch_btree_gc_root(struct btree *b, struct btree_op *op,
  			     struct closure *writes, struct gc_stat *gc)
  {
  	struct btree *n = NULL;
a1f0358b2   Kent Overstreet   bcache: Increment...
1487
1488
  	int ret = 0;
  	bool should_rewrite;
cafe56359   Kent Overstreet   bcache: A block l...
1489

a1f0358b2   Kent Overstreet   bcache: Increment...
1490
1491
  	should_rewrite = btree_gc_mark_node(b, gc);
  	if (should_rewrite) {
0a63b66db   Kent Overstreet   bcache: Rework bt...
1492
  		n = btree_node_alloc_replacement(b, NULL);
cafe56359   Kent Overstreet   bcache: A block l...
1493

a1f0358b2   Kent Overstreet   bcache: Increment...
1494
1495
  		if (!IS_ERR_OR_NULL(n)) {
  			bch_btree_node_write_sync(n);
2a285686c   Kent Overstreet   bcache: btree loc...
1496

a1f0358b2   Kent Overstreet   bcache: Increment...
1497
1498
1499
  			bch_btree_set_root(n);
  			btree_node_free(b);
  			rw_unlock(true, n);
cafe56359   Kent Overstreet   bcache: A block l...
1500

a1f0358b2   Kent Overstreet   bcache: Increment...
1501
1502
1503
  			return -EINTR;
  		}
  	}
cafe56359   Kent Overstreet   bcache: A block l...
1504

487dded86   Kent Overstreet   bcache: Fix anoth...
1505
  	__bch_btree_mark_key(b->c, b->level + 1, &b->key);
a1f0358b2   Kent Overstreet   bcache: Increment...
1506
1507
1508
1509
  	if (b->level) {
  		ret = btree_gc_recurse(b, op, writes, gc);
  		if (ret)
  			return ret;
cafe56359   Kent Overstreet   bcache: A block l...
1510
  	}
a1f0358b2   Kent Overstreet   bcache: Increment...
1511
  	bkey_copy_key(&b->c->gc_done, &b->key);
cafe56359   Kent Overstreet   bcache: A block l...
1512
1513
1514
1515
1516
1517
1518
  	return ret;
  }
  
  static void btree_gc_start(struct cache_set *c)
  {
  	struct cache *ca;
  	struct bucket *b;
cafe56359   Kent Overstreet   bcache: A block l...
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
  	unsigned i;
  
  	if (!c->gc_mark_valid)
  		return;
  
  	mutex_lock(&c->bucket_lock);
  
  	c->gc_mark_valid = 0;
  	c->gc_done = ZERO_KEY;
  
  	for_each_cache(ca, c, i)
  		for_each_bucket(b, ca) {
3a2fd9d50   Kent Overstreet   bcache: Kill buck...
1531
  			b->last_gc = b->gen;
29ebf465b   Kent Overstreet   bcache: Fix GC_SE...
1532
  			if (!atomic_read(&b->pin)) {
4fe6a8167   Kent Overstreet   bcache: Add a rea...
1533
  				SET_GC_MARK(b, 0);
29ebf465b   Kent Overstreet   bcache: Fix GC_SE...
1534
1535
  				SET_GC_SECTORS_USED(b, 0);
  			}
cafe56359   Kent Overstreet   bcache: A block l...
1536
  		}
cafe56359   Kent Overstreet   bcache: A block l...
1537
1538
  	mutex_unlock(&c->bucket_lock);
  }
2531d9ee6   Kent Overstreet   bcache: Kill unus...
1539
  static size_t bch_btree_gc_finish(struct cache_set *c)
cafe56359   Kent Overstreet   bcache: A block l...
1540
1541
1542
1543
  {
  	size_t available = 0;
  	struct bucket *b;
  	struct cache *ca;
cafe56359   Kent Overstreet   bcache: A block l...
1544
1545
1546
1547
1548
1549
1550
  	unsigned i;
  
  	mutex_lock(&c->bucket_lock);
  
  	set_gc_sectors(c);
  	c->gc_mark_valid = 1;
  	c->need_gc	= 0;
cafe56359   Kent Overstreet   bcache: A block l...
1551
1552
1553
  	for (i = 0; i < KEY_PTRS(&c->uuid_bucket); i++)
  		SET_GC_MARK(PTR_BUCKET(c, &c->uuid_bucket, i),
  			    GC_MARK_METADATA);
bf0a628a9   Nicholas Swenson   bcache: fix for g...
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
  	/* don't reclaim buckets to which writeback keys point */
  	rcu_read_lock();
  	for (i = 0; i < c->nr_uuids; i++) {
  		struct bcache_device *d = c->devices[i];
  		struct cached_dev *dc;
  		struct keybuf_key *w, *n;
  		unsigned j;
  
  		if (!d || UUID_FLASH_ONLY(&c->uuids[i]))
  			continue;
  		dc = container_of(d, struct cached_dev, disk);
  
  		spin_lock(&dc->writeback_keys.lock);
  		rbtree_postorder_for_each_entry_safe(w, n,
  					&dc->writeback_keys.keys, node)
  			for (j = 0; j < KEY_PTRS(&w->key); j++)
  				SET_GC_MARK(PTR_BUCKET(c, &w->key, j),
  					    GC_MARK_DIRTY);
  		spin_unlock(&dc->writeback_keys.lock);
  	}
  	rcu_read_unlock();
cafe56359   Kent Overstreet   bcache: A block l...
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
  	for_each_cache(ca, c, i) {
  		uint64_t *i;
  
  		ca->invalidate_needs_gc = 0;
  
  		for (i = ca->sb.d; i < ca->sb.d + ca->sb.keys; i++)
  			SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA);
  
  		for (i = ca->prio_buckets;
  		     i < ca->prio_buckets + prio_buckets(ca) * 2; i++)
  			SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA);
  
  		for_each_bucket(b, ca) {
cafe56359   Kent Overstreet   bcache: A block l...
1588
  			c->need_gc	= max(c->need_gc, bucket_gc_gen(b));
4fe6a8167   Kent Overstreet   bcache: Add a rea...
1589
1590
1591
1592
1593
1594
  			if (atomic_read(&b->pin))
  				continue;
  
  			BUG_ON(!GC_MARK(b) && GC_SECTORS_USED(b));
  
  			if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE)
cafe56359   Kent Overstreet   bcache: A block l...
1595
  				available++;
cafe56359   Kent Overstreet   bcache: A block l...
1596
1597
  		}
  	}
cafe56359   Kent Overstreet   bcache: A block l...
1598
1599
1600
  	mutex_unlock(&c->bucket_lock);
  	return available;
  }
72a44517f   Kent Overstreet   bcache: Convert g...
1601
  static void bch_btree_gc(struct cache_set *c)
cafe56359   Kent Overstreet   bcache: A block l...
1602
  {
cafe56359   Kent Overstreet   bcache: A block l...
1603
1604
1605
1606
1607
  	int ret;
  	unsigned long available;
  	struct gc_stat stats;
  	struct closure writes;
  	struct btree_op op;
cafe56359   Kent Overstreet   bcache: A block l...
1608
  	uint64_t start_time = local_clock();
579435114   Kent Overstreet   bcache: Refactor ...
1609

c37511b86   Kent Overstreet   bcache: Fix/revam...
1610
  	trace_bcache_gc_start(c);
cafe56359   Kent Overstreet   bcache: A block l...
1611
1612
1613
  
  	memset(&stats, 0, sizeof(struct gc_stat));
  	closure_init_stack(&writes);
b54d6934d   Kent Overstreet   bcache: Kill op->cl
1614
  	bch_btree_op_init(&op, SHRT_MAX);
cafe56359   Kent Overstreet   bcache: A block l...
1615
1616
  
  	btree_gc_start(c);
a1f0358b2   Kent Overstreet   bcache: Increment...
1617
1618
1619
  	do {
  		ret = btree_root(gc_root, c, &op, &writes, &stats);
  		closure_sync(&writes);
c5f1e5adf   Kent Overstreet   bcache: Add a con...
1620
  		cond_resched();
cafe56359   Kent Overstreet   bcache: A block l...
1621

a1f0358b2   Kent Overstreet   bcache: Increment...
1622
1623
1624
  		if (ret && ret != -EAGAIN)
  			pr_warn("gc failed!");
  	} while (ret);
cafe56359   Kent Overstreet   bcache: A block l...
1625
1626
  
  	available = bch_btree_gc_finish(c);
579435114   Kent Overstreet   bcache: Refactor ...
1627
  	wake_up_allocators(c);
169ef1cf6   Kent Overstreet   bcache: Don't exp...
1628
  	bch_time_stats_update(&c->btree_gc_time, start_time);
cafe56359   Kent Overstreet   bcache: A block l...
1629
1630
  
  	stats.key_bytes *= sizeof(uint64_t);
cafe56359   Kent Overstreet   bcache: A block l...
1631
1632
1633
  	stats.data	<<= 9;
  	stats.in_use	= (c->nbuckets - available) * 100 / c->nbuckets;
  	memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
cafe56359   Kent Overstreet   bcache: A block l...
1634

c37511b86   Kent Overstreet   bcache: Fix/revam...
1635
  	trace_bcache_gc_end(c);
cafe56359   Kent Overstreet   bcache: A block l...
1636

72a44517f   Kent Overstreet   bcache: Convert g...
1637
1638
1639
1640
1641
1642
  	bch_moving_gc(c);
  }
  
  static int bch_gc_thread(void *arg)
  {
  	struct cache_set *c = arg;
a1f0358b2   Kent Overstreet   bcache: Increment...
1643
1644
  	struct cache *ca;
  	unsigned i;
72a44517f   Kent Overstreet   bcache: Convert g...
1645
1646
  
  	while (1) {
a1f0358b2   Kent Overstreet   bcache: Increment...
1647
  again:
72a44517f   Kent Overstreet   bcache: Convert g...
1648
1649
1650
1651
1652
  		bch_btree_gc(c);
  
  		set_current_state(TASK_INTERRUPTIBLE);
  		if (kthread_should_stop())
  			break;
a1f0358b2   Kent Overstreet   bcache: Increment...
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
  		mutex_lock(&c->bucket_lock);
  
  		for_each_cache(ca, c, i)
  			if (ca->invalidate_needs_gc) {
  				mutex_unlock(&c->bucket_lock);
  				set_current_state(TASK_RUNNING);
  				goto again;
  			}
  
  		mutex_unlock(&c->bucket_lock);
72a44517f   Kent Overstreet   bcache: Convert g...
1663
1664
1665
1666
  		schedule();
  	}
  
  	return 0;
cafe56359   Kent Overstreet   bcache: A block l...
1667
  }
72a44517f   Kent Overstreet   bcache: Convert g...
1668
  int bch_gc_thread_start(struct cache_set *c)
cafe56359   Kent Overstreet   bcache: A block l...
1669
  {
72a44517f   Kent Overstreet   bcache: Convert g...
1670
1671
1672
1673
1674
1675
  	c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc");
  	if (IS_ERR(c->gc_thread))
  		return PTR_ERR(c->gc_thread);
  
  	set_task_state(c->gc_thread, TASK_INTERRUPTIBLE);
  	return 0;
cafe56359   Kent Overstreet   bcache: A block l...
1676
1677
1678
  }
  
  /* Initial partial gc */
487dded86   Kent Overstreet   bcache: Fix anoth...
1679
  static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
cafe56359   Kent Overstreet   bcache: A block l...
1680
  {
50310164b   Kent Overstreet   bcache: Kill bch_...
1681
  	int ret = 0;
50310164b   Kent Overstreet   bcache: Kill bch_...
1682
  	struct bkey *k, *p = NULL;
cafe56359   Kent Overstreet   bcache: A block l...
1683
  	struct btree_iter iter;
487dded86   Kent Overstreet   bcache: Fix anoth...
1684
1685
  	for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid)
  		bch_initial_mark_key(b->c, b->level, k);
cafe56359   Kent Overstreet   bcache: A block l...
1686

487dded86   Kent Overstreet   bcache: Fix anoth...
1687
  	bch_initial_mark_key(b->c, b->level + 1, &b->key);
cafe56359   Kent Overstreet   bcache: A block l...
1688
1689
  
  	if (b->level) {
c052dd9a2   Kent Overstreet   bcache: Convert b...
1690
  		bch_btree_iter_init(&b->keys, &iter, NULL);
cafe56359   Kent Overstreet   bcache: A block l...
1691

50310164b   Kent Overstreet   bcache: Kill bch_...
1692
  		do {
a85e968e6   Kent Overstreet   bcache: Add struc...
1693
1694
  			k = bch_btree_iter_next_filter(&iter, &b->keys,
  						       bch_ptr_bad);
50310164b   Kent Overstreet   bcache: Kill bch_...
1695
  			if (k)
2452cc890   Slava Pestov   bcache: try to se...
1696
  				btree_node_prefetch(b, k);
cafe56359   Kent Overstreet   bcache: A block l...
1697

50310164b   Kent Overstreet   bcache: Kill bch_...
1698
  			if (p)
487dded86   Kent Overstreet   bcache: Fix anoth...
1699
  				ret = btree(check_recurse, p, b, op);
cafe56359   Kent Overstreet   bcache: A block l...
1700

50310164b   Kent Overstreet   bcache: Kill bch_...
1701
1702
  			p = k;
  		} while (p && !ret);
cafe56359   Kent Overstreet   bcache: A block l...
1703
  	}
487dded86   Kent Overstreet   bcache: Fix anoth...
1704
  	return ret;
cafe56359   Kent Overstreet   bcache: A block l...
1705
  }
c18536a72   Kent Overstreet   bcache: Prune str...
1706
  int bch_btree_check(struct cache_set *c)
cafe56359   Kent Overstreet   bcache: A block l...
1707
  {
c18536a72   Kent Overstreet   bcache: Prune str...
1708
  	struct btree_op op;
cafe56359   Kent Overstreet   bcache: A block l...
1709

b54d6934d   Kent Overstreet   bcache: Kill op->cl
1710
  	bch_btree_op_init(&op, SHRT_MAX);
cafe56359   Kent Overstreet   bcache: A block l...
1711

487dded86   Kent Overstreet   bcache: Fix anoth...
1712
  	return btree_root(check_recurse, c, &op);
cafe56359   Kent Overstreet   bcache: A block l...
1713
  }
2531d9ee6   Kent Overstreet   bcache: Kill unus...
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
  void bch_initial_gc_finish(struct cache_set *c)
  {
  	struct cache *ca;
  	struct bucket *b;
  	unsigned i;
  
  	bch_btree_gc_finish(c);
  
  	mutex_lock(&c->bucket_lock);
  
  	/*
  	 * We need to put some unused buckets directly on the prio freelist in
  	 * order to get the allocator thread started - it needs freed buckets in
  	 * order to rewrite the prios and gens, and it needs to rewrite prios
  	 * and gens in order to free buckets.
  	 *
  	 * This is only safe for buckets that have no live data in them, which
  	 * there should always be some of.
  	 */
  	for_each_cache(ca, c, i) {
  		for_each_bucket(b, ca) {
  			if (fifo_full(&ca->free[RESERVE_PRIO]))
  				break;
  
  			if (bch_can_invalidate_bucket(ca, b) &&
  			    !GC_MARK(b)) {
  				__bch_invalidate_one_bucket(ca, b);
  				fifo_push(&ca->free[RESERVE_PRIO],
  					  b - ca->buckets);
  			}
  		}
  	}
  
  	mutex_unlock(&c->bucket_lock);
  }
cafe56359   Kent Overstreet   bcache: A block l...
1749
  /* Btree insertion */
829a60b90   Kent Overstreet   bcache: Move inse...
1750
1751
  static bool btree_insert_key(struct btree *b, struct bkey *k,
  			     struct bkey *replace_key)
cafe56359   Kent Overstreet   bcache: A block l...
1752
  {
829a60b90   Kent Overstreet   bcache: Move inse...
1753
  	unsigned status;
cafe56359   Kent Overstreet   bcache: A block l...
1754
1755
  
  	BUG_ON(bkey_cmp(k, &b->key) > 0);
1fa8455de   Kent Overstreet   bcache: Fix dirty...
1756

829a60b90   Kent Overstreet   bcache: Move inse...
1757
1758
1759
1760
  	status = bch_btree_insert_key(&b->keys, k, replace_key);
  	if (status != BTREE_INSERT_STATUS_NO_INSERT) {
  		bch_check_keys(&b->keys, "%u for %s", status,
  			       replace_key ? "replace" : "insert");
cafe56359   Kent Overstreet   bcache: A block l...
1761

829a60b90   Kent Overstreet   bcache: Move inse...
1762
1763
1764
1765
1766
  		trace_bcache_btree_insert_key(b, k, replace_key != NULL,
  					      status);
  		return true;
  	} else
  		return false;
cafe56359   Kent Overstreet   bcache: A block l...
1767
  }
59158fde4   Kent Overstreet   bcache: Add bch_b...
1768
1769
  static size_t insert_u64s_remaining(struct btree *b)
  {
3572324af   Kent Overstreet   bcache: Minor fix...
1770
  	long ret = bch_btree_keys_u64s_remaining(&b->keys);
59158fde4   Kent Overstreet   bcache: Add bch_b...
1771
1772
1773
1774
1775
1776
1777
1778
1779
  
  	/*
  	 * Might land in the middle of an existing extent and have to split it
  	 */
  	if (b->keys.ops->is_extents)
  		ret -= KEY_MAX_U64S;
  
  	return max(ret, 0L);
  }
26c949f80   Kent Overstreet   bcache: Add btree...
1780
  static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op,
1b207d80d   Kent Overstreet   bcache: Kill op->...
1781
1782
  				  struct keylist *insert_keys,
  				  struct bkey *replace_key)
cafe56359   Kent Overstreet   bcache: A block l...
1783
1784
  {
  	bool ret = false;
dc9d98d62   Kent Overstreet   bcache: Convert d...
1785
  	int oldsize = bch_count_data(&b->keys);
cafe56359   Kent Overstreet   bcache: A block l...
1786

26c949f80   Kent Overstreet   bcache: Add btree...
1787
  	while (!bch_keylist_empty(insert_keys)) {
c2f95ae2e   Kent Overstreet   bcache: Clean up ...
1788
  		struct bkey *k = insert_keys->keys;
26c949f80   Kent Overstreet   bcache: Add btree...
1789

59158fde4   Kent Overstreet   bcache: Add bch_b...
1790
  		if (bkey_u64s(k) > insert_u64s_remaining(b))
403b6cdeb   Kent Overstreet   bcache: Insert mu...
1791
1792
1793
  			break;
  
  		if (bkey_cmp(k, &b->key) <= 0) {
3a3b6a4e0   Kent Overstreet   bcache: Don't bot...
1794
1795
  			if (!b->level)
  				bkey_put(b->c, k);
26c949f80   Kent Overstreet   bcache: Add btree...
1796

829a60b90   Kent Overstreet   bcache: Move inse...
1797
  			ret |= btree_insert_key(b, k, replace_key);
26c949f80   Kent Overstreet   bcache: Add btree...
1798
1799
  			bch_keylist_pop_front(insert_keys);
  		} else if (bkey_cmp(&START_KEY(k), &b->key) < 0) {
26c949f80   Kent Overstreet   bcache: Add btree...
1800
  			BKEY_PADDED(key) temp;
c2f95ae2e   Kent Overstreet   bcache: Clean up ...
1801
  			bkey_copy(&temp.key, insert_keys->keys);
26c949f80   Kent Overstreet   bcache: Add btree...
1802
1803
  
  			bch_cut_back(&b->key, &temp.key);
c2f95ae2e   Kent Overstreet   bcache: Clean up ...
1804
  			bch_cut_front(&b->key, insert_keys->keys);
26c949f80   Kent Overstreet   bcache: Add btree...
1805

829a60b90   Kent Overstreet   bcache: Move inse...
1806
  			ret |= btree_insert_key(b, &temp.key, replace_key);
26c949f80   Kent Overstreet   bcache: Add btree...
1807
1808
1809
1810
  			break;
  		} else {
  			break;
  		}
cafe56359   Kent Overstreet   bcache: A block l...
1811
  	}
829a60b90   Kent Overstreet   bcache: Move inse...
1812
1813
  	if (!ret)
  		op->insert_collision = true;
403b6cdeb   Kent Overstreet   bcache: Insert mu...
1814
  	BUG_ON(!bch_keylist_empty(insert_keys) && b->level);
dc9d98d62   Kent Overstreet   bcache: Convert d...
1815
  	BUG_ON(bch_count_data(&b->keys) < oldsize);
cafe56359   Kent Overstreet   bcache: A block l...
1816
1817
  	return ret;
  }
26c949f80   Kent Overstreet   bcache: Add btree...
1818
1819
  static int btree_split(struct btree *b, struct btree_op *op,
  		       struct keylist *insert_keys,
1b207d80d   Kent Overstreet   bcache: Kill op->...
1820
  		       struct bkey *replace_key)
cafe56359   Kent Overstreet   bcache: A block l...
1821
  {
d6fd3b11c   Kent Overstreet   bcache: Explicitl...
1822
  	bool split;
cafe56359   Kent Overstreet   bcache: A block l...
1823
1824
  	struct btree *n1, *n2 = NULL, *n3 = NULL;
  	uint64_t start_time = local_clock();
b54d6934d   Kent Overstreet   bcache: Kill op->cl
1825
  	struct closure cl;
17e21a9f2   Kent Overstreet   bcache: Have btre...
1826
  	struct keylist parent_keys;
b54d6934d   Kent Overstreet   bcache: Kill op->cl
1827
1828
  
  	closure_init_stack(&cl);
17e21a9f2   Kent Overstreet   bcache: Have btre...
1829
  	bch_keylist_init(&parent_keys);
cafe56359   Kent Overstreet   bcache: A block l...
1830

0a63b66db   Kent Overstreet   bcache: Rework bt...
1831
1832
1833
1834
1835
1836
1837
  	if (btree_check_reserve(b, op)) {
  		if (!b->level)
  			return -EINTR;
  		else
  			WARN(1, "insufficient reserve for split
  ");
  	}
78365411b   Kent Overstreet   bcache: Rework al...
1838

0a63b66db   Kent Overstreet   bcache: Rework bt...
1839
  	n1 = btree_node_alloc_replacement(b, op);
cafe56359   Kent Overstreet   bcache: A block l...
1840
1841
  	if (IS_ERR(n1))
  		goto err;
ee811287c   Kent Overstreet   bcache: Rename/sh...
1842
1843
  	split = set_blocks(btree_bset_first(n1),
  			   block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5;
cafe56359   Kent Overstreet   bcache: A block l...
1844

cafe56359   Kent Overstreet   bcache: A block l...
1845
1846
  	if (split) {
  		unsigned keys = 0;
ee811287c   Kent Overstreet   bcache: Rename/sh...
1847
  		trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
c37511b86   Kent Overstreet   bcache: Fix/revam...
1848

2452cc890   Slava Pestov   bcache: try to se...
1849
  		n2 = bch_btree_node_alloc(b->c, op, b->level, b->parent);
cafe56359   Kent Overstreet   bcache: A block l...
1850
1851
  		if (IS_ERR(n2))
  			goto err_free1;
d6fd3b11c   Kent Overstreet   bcache: Explicitl...
1852
  		if (!b->parent) {
2452cc890   Slava Pestov   bcache: try to se...
1853
  			n3 = bch_btree_node_alloc(b->c, op, b->level + 1, NULL);
cafe56359   Kent Overstreet   bcache: A block l...
1854
1855
1856
  			if (IS_ERR(n3))
  				goto err_free2;
  		}
2a285686c   Kent Overstreet   bcache: btree loc...
1857
1858
  		mutex_lock(&n1->write_lock);
  		mutex_lock(&n2->write_lock);
1b207d80d   Kent Overstreet   bcache: Kill op->...
1859
  		bch_btree_insert_keys(n1, op, insert_keys, replace_key);
cafe56359   Kent Overstreet   bcache: A block l...
1860

d6fd3b11c   Kent Overstreet   bcache: Explicitl...
1861
1862
  		/*
  		 * Has to be a linear search because we don't have an auxiliary
cafe56359   Kent Overstreet   bcache: A block l...
1863
1864
  		 * search tree yet
  		 */
ee811287c   Kent Overstreet   bcache: Rename/sh...
1865
1866
  		while (keys < (btree_bset_first(n1)->keys * 3) / 5)
  			keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1),
fafff81ce   Kent Overstreet   bcache: Bkey inde...
1867
  							keys));
cafe56359   Kent Overstreet   bcache: A block l...
1868

fafff81ce   Kent Overstreet   bcache: Bkey inde...
1869
  		bkey_copy_key(&n1->key,
ee811287c   Kent Overstreet   bcache: Rename/sh...
1870
1871
  			      bset_bkey_idx(btree_bset_first(n1), keys));
  		keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1), keys));
cafe56359   Kent Overstreet   bcache: A block l...
1872

ee811287c   Kent Overstreet   bcache: Rename/sh...
1873
1874
  		btree_bset_first(n2)->keys = btree_bset_first(n1)->keys - keys;
  		btree_bset_first(n1)->keys = keys;
cafe56359   Kent Overstreet   bcache: A block l...
1875

ee811287c   Kent Overstreet   bcache: Rename/sh...
1876
1877
1878
  		memcpy(btree_bset_first(n2)->start,
  		       bset_bkey_last(btree_bset_first(n1)),
  		       btree_bset_first(n2)->keys * sizeof(uint64_t));
cafe56359   Kent Overstreet   bcache: A block l...
1879
1880
  
  		bkey_copy_key(&n2->key, &b->key);
17e21a9f2   Kent Overstreet   bcache: Have btre...
1881
  		bch_keylist_add(&parent_keys, &n2->key);
b54d6934d   Kent Overstreet   bcache: Kill op->cl
1882
  		bch_btree_node_write(n2, &cl);
2a285686c   Kent Overstreet   bcache: btree loc...
1883
  		mutex_unlock(&n2->write_lock);
cafe56359   Kent Overstreet   bcache: A block l...
1884
  		rw_unlock(true, n2);
c37511b86   Kent Overstreet   bcache: Fix/revam...
1885
  	} else {
ee811287c   Kent Overstreet   bcache: Rename/sh...
1886
  		trace_bcache_btree_node_compact(b, btree_bset_first(n1)->keys);
c37511b86   Kent Overstreet   bcache: Fix/revam...
1887

2a285686c   Kent Overstreet   bcache: btree loc...
1888
  		mutex_lock(&n1->write_lock);
1b207d80d   Kent Overstreet   bcache: Kill op->...
1889
  		bch_btree_insert_keys(n1, op, insert_keys, replace_key);
c37511b86   Kent Overstreet   bcache: Fix/revam...
1890
  	}
cafe56359   Kent Overstreet   bcache: A block l...
1891

17e21a9f2   Kent Overstreet   bcache: Have btre...
1892
  	bch_keylist_add(&parent_keys, &n1->key);
b54d6934d   Kent Overstreet   bcache: Kill op->cl
1893
  	bch_btree_node_write(n1, &cl);
2a285686c   Kent Overstreet   bcache: btree loc...
1894
  	mutex_unlock(&n1->write_lock);
cafe56359   Kent Overstreet   bcache: A block l...
1895
1896
  
  	if (n3) {
d6fd3b11c   Kent Overstreet   bcache: Explicitl...
1897
  		/* Depth increases, make a new root */
2a285686c   Kent Overstreet   bcache: btree loc...
1898
  		mutex_lock(&n3->write_lock);
cafe56359   Kent Overstreet   bcache: A block l...
1899
  		bkey_copy_key(&n3->key, &MAX_KEY);
17e21a9f2   Kent Overstreet   bcache: Have btre...
1900
  		bch_btree_insert_keys(n3, op, &parent_keys, NULL);
b54d6934d   Kent Overstreet   bcache: Kill op->cl
1901
  		bch_btree_node_write(n3, &cl);
2a285686c   Kent Overstreet   bcache: btree loc...
1902
  		mutex_unlock(&n3->write_lock);
cafe56359   Kent Overstreet   bcache: A block l...
1903

b54d6934d   Kent Overstreet   bcache: Kill op->cl
1904
  		closure_sync(&cl);
cafe56359   Kent Overstreet   bcache: A block l...
1905
1906
  		bch_btree_set_root(n3);
  		rw_unlock(true, n3);
d6fd3b11c   Kent Overstreet   bcache: Explicitl...
1907
1908
  	} else if (!b->parent) {
  		/* Root filled up but didn't need to be split */
b54d6934d   Kent Overstreet   bcache: Kill op->cl
1909
  		closure_sync(&cl);
cafe56359   Kent Overstreet   bcache: A block l...
1910
1911
  		bch_btree_set_root(n1);
  	} else {
17e21a9f2   Kent Overstreet   bcache: Have btre...
1912
  		/* Split a non root node */
b54d6934d   Kent Overstreet   bcache: Kill op->cl
1913
  		closure_sync(&cl);
17e21a9f2   Kent Overstreet   bcache: Have btre...
1914
1915
  		make_btree_freeing_key(b, parent_keys.top);
  		bch_keylist_push(&parent_keys);
17e21a9f2   Kent Overstreet   bcache: Have btre...
1916
1917
  		bch_btree_insert_node(b->parent, op, &parent_keys, NULL, NULL);
  		BUG_ON(!bch_keylist_empty(&parent_keys));
cafe56359   Kent Overstreet   bcache: A block l...
1918
  	}
05335cff9   Kent Overstreet   bcache: Fix a rac...
1919
  	btree_node_free(b);
cafe56359   Kent Overstreet   bcache: A block l...
1920
  	rw_unlock(true, n1);
cafe56359   Kent Overstreet   bcache: A block l...
1921

169ef1cf6   Kent Overstreet   bcache: Don't exp...
1922
  	bch_time_stats_update(&b->c->btree_split_time, start_time);
cafe56359   Kent Overstreet   bcache: A block l...
1923
1924
1925
  
  	return 0;
  err_free2:
5f5837d2d   Kent Overstreet   bcache: Do bkey_p...
1926
  	bkey_put(b->c, &n2->key);
e8e1d4682   Kent Overstreet   bcache: Convert t...
1927
  	btree_node_free(n2);
cafe56359   Kent Overstreet   bcache: A block l...
1928
1929
  	rw_unlock(true, n2);
  err_free1:
5f5837d2d   Kent Overstreet   bcache: Do bkey_p...
1930
  	bkey_put(b->c, &n1->key);
e8e1d4682   Kent Overstreet   bcache: Convert t...
1931
  	btree_node_free(n1);
cafe56359   Kent Overstreet   bcache: A block l...
1932
1933
  	rw_unlock(true, n1);
  err:
0a63b66db   Kent Overstreet   bcache: Rework bt...
1934
  	WARN(1, "bcache: btree split failed (level %u)", b->level);
5f5837d2d   Kent Overstreet   bcache: Do bkey_p...
1935

cafe56359   Kent Overstreet   bcache: A block l...
1936
1937
1938
1939
  	if (n3 == ERR_PTR(-EAGAIN) ||
  	    n2 == ERR_PTR(-EAGAIN) ||
  	    n1 == ERR_PTR(-EAGAIN))
  		return -EAGAIN;
cafe56359   Kent Overstreet   bcache: A block l...
1940
1941
  	return -ENOMEM;
  }
26c949f80   Kent Overstreet   bcache: Add btree...
1942
  static int bch_btree_insert_node(struct btree *b, struct btree_op *op,
c18536a72   Kent Overstreet   bcache: Prune str...
1943
  				 struct keylist *insert_keys,
1b207d80d   Kent Overstreet   bcache: Kill op->...
1944
1945
  				 atomic_t *journal_ref,
  				 struct bkey *replace_key)
cafe56359   Kent Overstreet   bcache: A block l...
1946
  {
2a285686c   Kent Overstreet   bcache: btree loc...
1947
  	struct closure cl;
17e21a9f2   Kent Overstreet   bcache: Have btre...
1948
  	BUG_ON(b->level && replace_key);
2a285686c   Kent Overstreet   bcache: btree loc...
1949
1950
1951
1952
1953
1954
1955
  	closure_init_stack(&cl);
  
  	mutex_lock(&b->write_lock);
  
  	if (write_block(b) != btree_bset_last(b) &&
  	    b->keys.last_set_unwritten)
  		bch_btree_init_next(b); /* just wrote a set */
59158fde4   Kent Overstreet   bcache: Add bch_b...
1956
  	if (bch_keylist_nkeys(insert_keys) > insert_u64s_remaining(b)) {
2a285686c   Kent Overstreet   bcache: btree loc...
1957
1958
1959
  		mutex_unlock(&b->write_lock);
  		goto split;
  	}
3b3e9e50d   Kent Overstreet   bcache: Don't ret...
1960

2a285686c   Kent Overstreet   bcache: btree loc...
1961
  	BUG_ON(write_block(b) != btree_bset_last(b));
cafe56359   Kent Overstreet   bcache: A block l...
1962

2a285686c   Kent Overstreet   bcache: btree loc...
1963
1964
1965
1966
1967
1968
  	if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) {
  		if (!b->level)
  			bch_btree_leaf_dirty(b, journal_ref);
  		else
  			bch_btree_node_write(b, &cl);
  	}
17e21a9f2   Kent Overstreet   bcache: Have btre...
1969

2a285686c   Kent Overstreet   bcache: btree loc...
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
  	mutex_unlock(&b->write_lock);
  
  	/* wait for btree node write if necessary, after unlock */
  	closure_sync(&cl);
  
  	return 0;
  split:
  	if (current->bio_list) {
  		op->lock = b->c->root->level + 1;
  		return -EAGAIN;
  	} else if (op->lock <= b->c->root->level) {
  		op->lock = b->c->root->level + 1;
  		return -EINTR;
  	} else {
  		/* Invalidated all iterators */
  		int ret = btree_split(b, op, insert_keys, replace_key);
  
  		if (bch_keylist_empty(insert_keys))
  			return 0;
  		else if (!ret)
  			return -EINTR;
  		return ret;
17e21a9f2   Kent Overstreet   bcache: Have btre...
1992
  	}
26c949f80   Kent Overstreet   bcache: Add btree...
1993
  }
cafe56359   Kent Overstreet   bcache: A block l...
1994

e7c590eb6   Kent Overstreet   bcache: Convert b...
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
  int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
  			       struct bkey *check_key)
  {
  	int ret = -EINTR;
  	uint64_t btree_ptr = b->key.ptr[0];
  	unsigned long seq = b->seq;
  	struct keylist insert;
  	bool upgrade = op->lock == -1;
  
  	bch_keylist_init(&insert);
  
  	if (upgrade) {
  		rw_unlock(false, b);
  		rw_lock(true, b, b->level);
  
  		if (b->key.ptr[0] != btree_ptr ||
2ef9ccbfc   Zheng Liu   bcache: fix a liv...
2011
2012
                     b->seq != seq + 1) {
                         op->lock = b->level;
e7c590eb6   Kent Overstreet   bcache: Convert b...
2013
  			goto out;
2ef9ccbfc   Zheng Liu   bcache: fix a liv...
2014
                 }
e7c590eb6   Kent Overstreet   bcache: Convert b...
2015
2016
2017
2018
2019
2020
2021
2022
  	}
  
  	SET_KEY_PTRS(check_key, 1);
  	get_random_bytes(&check_key->ptr[0], sizeof(uint64_t));
  
  	SET_PTR_DEV(check_key, 0, PTR_CHECK_DEV);
  
  	bch_keylist_add(&insert, check_key);
1b207d80d   Kent Overstreet   bcache: Kill op->...
2023
  	ret = bch_btree_insert_node(b, op, &insert, NULL, NULL);
e7c590eb6   Kent Overstreet   bcache: Convert b...
2024
2025
2026
2027
2028
2029
2030
  
  	BUG_ON(!ret && !bch_keylist_empty(&insert));
  out:
  	if (upgrade)
  		downgrade_write(&b->lock);
  	return ret;
  }
cc7b88192   Kent Overstreet   bcache: Convert b...
2031
2032
2033
2034
2035
2036
  struct btree_insert_op {
  	struct btree_op	op;
  	struct keylist	*keys;
  	atomic_t	*journal_ref;
  	struct bkey	*replace_key;
  };
cafe56359   Kent Overstreet   bcache: A block l...
2037

08239ca2a   Wei Yongjun   bcache: fix spars...
2038
  static int btree_insert_fn(struct btree_op *b_op, struct btree *b)
cc7b88192   Kent Overstreet   bcache: Convert b...
2039
2040
2041
  {
  	struct btree_insert_op *op = container_of(b_op,
  					struct btree_insert_op, op);
cafe56359   Kent Overstreet   bcache: A block l...
2042

cc7b88192   Kent Overstreet   bcache: Convert b...
2043
2044
2045
2046
2047
2048
  	int ret = bch_btree_insert_node(b, &op->op, op->keys,
  					op->journal_ref, op->replace_key);
  	if (ret && !bch_keylist_empty(op->keys))
  		return ret;
  	else
  		return MAP_DONE;
cafe56359   Kent Overstreet   bcache: A block l...
2049
  }
cc7b88192   Kent Overstreet   bcache: Convert b...
2050
2051
  int bch_btree_insert(struct cache_set *c, struct keylist *keys,
  		     atomic_t *journal_ref, struct bkey *replace_key)
cafe56359   Kent Overstreet   bcache: A block l...
2052
  {
cc7b88192   Kent Overstreet   bcache: Convert b...
2053
  	struct btree_insert_op op;
cafe56359   Kent Overstreet   bcache: A block l...
2054
  	int ret = 0;
cafe56359   Kent Overstreet   bcache: A block l...
2055

cc7b88192   Kent Overstreet   bcache: Convert b...
2056
  	BUG_ON(current->bio_list);
4f3d40147   Kent Overstreet   bcache: Add expli...
2057
  	BUG_ON(bch_keylist_empty(keys));
cafe56359   Kent Overstreet   bcache: A block l...
2058

cc7b88192   Kent Overstreet   bcache: Convert b...
2059
2060
2061
2062
  	bch_btree_op_init(&op.op, 0);
  	op.keys		= keys;
  	op.journal_ref	= journal_ref;
  	op.replace_key	= replace_key;
cafe56359   Kent Overstreet   bcache: A block l...
2063

cc7b88192   Kent Overstreet   bcache: Convert b...
2064
2065
2066
2067
2068
2069
  	while (!ret && !bch_keylist_empty(keys)) {
  		op.op.lock = 0;
  		ret = bch_btree_map_leaf_nodes(&op.op, c,
  					       &START_KEY(keys->keys),
  					       btree_insert_fn);
  	}
cafe56359   Kent Overstreet   bcache: A block l...
2070

cc7b88192   Kent Overstreet   bcache: Convert b...
2071
2072
  	if (ret) {
  		struct bkey *k;
cafe56359   Kent Overstreet   bcache: A block l...
2073

cc7b88192   Kent Overstreet   bcache: Convert b...
2074
  		pr_err("error %i", ret);
cafe56359   Kent Overstreet   bcache: A block l...
2075

cc7b88192   Kent Overstreet   bcache: Convert b...
2076
  		while ((k = bch_keylist_pop(keys)))
3a3b6a4e0   Kent Overstreet   bcache: Don't bot...
2077
  			bkey_put(c, k);
cc7b88192   Kent Overstreet   bcache: Convert b...
2078
2079
  	} else if (op.op.insert_collision)
  		ret = -ESRCH;
6054c6d4d   Kent Overstreet   bcache: Don't use...
2080

cafe56359   Kent Overstreet   bcache: A block l...
2081
2082
2083
2084
2085
2086
  	return ret;
  }
  
  void bch_btree_set_root(struct btree *b)
  {
  	unsigned i;
e49c7c374   Kent Overstreet   bcache: FUA fixes
2087
2088
2089
  	struct closure cl;
  
  	closure_init_stack(&cl);
cafe56359   Kent Overstreet   bcache: A block l...
2090

c37511b86   Kent Overstreet   bcache: Fix/revam...
2091
  	trace_bcache_btree_set_root(b);
cafe56359   Kent Overstreet   bcache: A block l...
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
  	BUG_ON(!b->written);
  
  	for (i = 0; i < KEY_PTRS(&b->key); i++)
  		BUG_ON(PTR_BUCKET(b->c, &b->key, i)->prio != BTREE_PRIO);
  
  	mutex_lock(&b->c->bucket_lock);
  	list_del_init(&b->list);
  	mutex_unlock(&b->c->bucket_lock);
  
  	b->c->root = b;
cafe56359   Kent Overstreet   bcache: A block l...
2102

e49c7c374   Kent Overstreet   bcache: FUA fixes
2103
2104
  	bch_journal_meta(b->c, &cl);
  	closure_sync(&cl);
cafe56359   Kent Overstreet   bcache: A block l...
2105
  }
48dad8baf   Kent Overstreet   bcache: Add btree...
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
  /* Map across nodes or keys */
  
  static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
  				       struct bkey *from,
  				       btree_map_nodes_fn *fn, int flags)
  {
  	int ret = MAP_CONTINUE;
  
  	if (b->level) {
  		struct bkey *k;
  		struct btree_iter iter;
c052dd9a2   Kent Overstreet   bcache: Convert b...
2117
  		bch_btree_iter_init(&b->keys, &iter, from);
48dad8baf   Kent Overstreet   bcache: Add btree...
2118

a85e968e6   Kent Overstreet   bcache: Add struc...
2119
  		while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
48dad8baf   Kent Overstreet   bcache: Add btree...
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
  						       bch_ptr_bad))) {
  			ret = btree(map_nodes_recurse, k, b,
  				    op, from, fn, flags);
  			from = NULL;
  
  			if (ret != MAP_CONTINUE)
  				return ret;
  		}
  	}
  
  	if (!b->level || flags == MAP_ALL_NODES)
  		ret = fn(op, b);
  
  	return ret;
  }
  
  int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
  			  struct bkey *from, btree_map_nodes_fn *fn, int flags)
  {
b54d6934d   Kent Overstreet   bcache: Kill op->cl
2139
  	return btree_root(map_nodes_recurse, c, op, from, fn, flags);
48dad8baf   Kent Overstreet   bcache: Add btree...
2140
2141
2142
2143
2144
2145
2146
2147
2148
  }
  
  static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
  				      struct bkey *from, btree_map_keys_fn *fn,
  				      int flags)
  {
  	int ret = MAP_CONTINUE;
  	struct bkey *k;
  	struct btree_iter iter;
c052dd9a2   Kent Overstreet   bcache: Convert b...
2149
  	bch_btree_iter_init(&b->keys, &iter, from);
48dad8baf   Kent Overstreet   bcache: Add btree...
2150

a85e968e6   Kent Overstreet   bcache: Add struc...
2151
  	while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
48dad8baf   Kent Overstreet   bcache: Add btree...
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
  		ret = !b->level
  			? fn(op, b, k)
  			: btree(map_keys_recurse, k, b, op, from, fn, flags);
  		from = NULL;
  
  		if (ret != MAP_CONTINUE)
  			return ret;
  	}
  
  	if (!b->level && (flags & MAP_END_KEY))
  		ret = fn(op, b, &KEY(KEY_INODE(&b->key),
  				     KEY_OFFSET(&b->key), 0));
  
  	return ret;
  }
  
  int bch_btree_map_keys(struct btree_op *op, struct cache_set *c,
  		       struct bkey *from, btree_map_keys_fn *fn, int flags)
  {
b54d6934d   Kent Overstreet   bcache: Kill op->cl
2171
  	return btree_root(map_keys_recurse, c, op, from, fn, flags);
48dad8baf   Kent Overstreet   bcache: Add btree...
2172
  }
cafe56359   Kent Overstreet   bcache: A block l...
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
  /* Keybuf code */
  
  static inline int keybuf_cmp(struct keybuf_key *l, struct keybuf_key *r)
  {
  	/* Overlapping keys compare equal */
  	if (bkey_cmp(&l->key, &START_KEY(&r->key)) <= 0)
  		return -1;
  	if (bkey_cmp(&START_KEY(&l->key), &r->key) >= 0)
  		return 1;
  	return 0;
  }
  
  static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,
  					    struct keybuf_key *r)
  {
  	return clamp_t(int64_t, bkey_cmp(&l->key, &r->key), -1, 1);
  }
48dad8baf   Kent Overstreet   bcache: Add btree...
2190
2191
  struct refill {
  	struct btree_op	op;
48a915a87   Kent Overstreet   bcache: Better fu...
2192
  	unsigned	nr_found;
48dad8baf   Kent Overstreet   bcache: Add btree...
2193
2194
2195
2196
  	struct keybuf	*buf;
  	struct bkey	*end;
  	keybuf_pred_fn	*pred;
  };
cafe56359   Kent Overstreet   bcache: A block l...
2197

48dad8baf   Kent Overstreet   bcache: Add btree...
2198
2199
2200
2201
2202
2203
  static int refill_keybuf_fn(struct btree_op *op, struct btree *b,
  			    struct bkey *k)
  {
  	struct refill *refill = container_of(op, struct refill, op);
  	struct keybuf *buf = refill->buf;
  	int ret = MAP_CONTINUE;
cafe56359   Kent Overstreet   bcache: A block l...
2204

48dad8baf   Kent Overstreet   bcache: Add btree...
2205
2206
2207
2208
  	if (bkey_cmp(k, refill->end) >= 0) {
  		ret = MAP_DONE;
  		goto out;
  	}
cafe56359   Kent Overstreet   bcache: A block l...
2209

48dad8baf   Kent Overstreet   bcache: Add btree...
2210
2211
  	if (!KEY_SIZE(k)) /* end key */
  		goto out;
cafe56359   Kent Overstreet   bcache: A block l...
2212

48dad8baf   Kent Overstreet   bcache: Add btree...
2213
2214
  	if (refill->pred(buf, k)) {
  		struct keybuf_key *w;
cafe56359   Kent Overstreet   bcache: A block l...
2215

48dad8baf   Kent Overstreet   bcache: Add btree...
2216
  		spin_lock(&buf->lock);
cafe56359   Kent Overstreet   bcache: A block l...
2217

48dad8baf   Kent Overstreet   bcache: Add btree...
2218
2219
2220
2221
2222
  		w = array_alloc(&buf->freelist);
  		if (!w) {
  			spin_unlock(&buf->lock);
  			return MAP_DONE;
  		}
cafe56359   Kent Overstreet   bcache: A block l...
2223

48dad8baf   Kent Overstreet   bcache: Add btree...
2224
2225
  		w->private = NULL;
  		bkey_copy(&w->key, k);
cafe56359   Kent Overstreet   bcache: A block l...
2226

48dad8baf   Kent Overstreet   bcache: Add btree...
2227
2228
  		if (RB_INSERT(&buf->keys, w, node, keybuf_cmp))
  			array_free(&buf->freelist, w);
48a915a87   Kent Overstreet   bcache: Better fu...
2229
2230
  		else
  			refill->nr_found++;
cafe56359   Kent Overstreet   bcache: A block l...
2231

48dad8baf   Kent Overstreet   bcache: Add btree...
2232
2233
  		if (array_freelist_empty(&buf->freelist))
  			ret = MAP_DONE;
cafe56359   Kent Overstreet   bcache: A block l...
2234

48dad8baf   Kent Overstreet   bcache: Add btree...
2235
  		spin_unlock(&buf->lock);
cafe56359   Kent Overstreet   bcache: A block l...
2236
  	}
48dad8baf   Kent Overstreet   bcache: Add btree...
2237
2238
2239
  out:
  	buf->last_scanned = *k;
  	return ret;
cafe56359   Kent Overstreet   bcache: A block l...
2240
2241
2242
  }
  
  void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
72c270612   Kent Overstreet   bcache: Write out...
2243
  		       struct bkey *end, keybuf_pred_fn *pred)
cafe56359   Kent Overstreet   bcache: A block l...
2244
2245
  {
  	struct bkey start = buf->last_scanned;
48dad8baf   Kent Overstreet   bcache: Add btree...
2246
  	struct refill refill;
cafe56359   Kent Overstreet   bcache: A block l...
2247
2248
  
  	cond_resched();
b54d6934d   Kent Overstreet   bcache: Kill op->cl
2249
  	bch_btree_op_init(&refill.op, -1);
48a915a87   Kent Overstreet   bcache: Better fu...
2250
2251
2252
2253
  	refill.nr_found	= 0;
  	refill.buf	= buf;
  	refill.end	= end;
  	refill.pred	= pred;
48dad8baf   Kent Overstreet   bcache: Add btree...
2254
2255
2256
  
  	bch_btree_map_keys(&refill.op, c, &buf->last_scanned,
  			   refill_keybuf_fn, MAP_END_KEY);
cafe56359   Kent Overstreet   bcache: A block l...
2257

48a915a87   Kent Overstreet   bcache: Better fu...
2258
2259
2260
2261
  	trace_bcache_keyscan(refill.nr_found,
  			     KEY_INODE(&start), KEY_OFFSET(&start),
  			     KEY_INODE(&buf->last_scanned),
  			     KEY_OFFSET(&buf->last_scanned));
cafe56359   Kent Overstreet   bcache: A block l...
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
  
  	spin_lock(&buf->lock);
  
  	if (!RB_EMPTY_ROOT(&buf->keys)) {
  		struct keybuf_key *w;
  		w = RB_FIRST(&buf->keys, struct keybuf_key, node);
  		buf->start	= START_KEY(&w->key);
  
  		w = RB_LAST(&buf->keys, struct keybuf_key, node);
  		buf->end	= w->key;
  	} else {
  		buf->start	= MAX_KEY;
  		buf->end	= MAX_KEY;
  	}
  
  	spin_unlock(&buf->lock);
  }
  
  static void __bch_keybuf_del(struct keybuf *buf, struct keybuf_key *w)
  {
  	rb_erase(&w->node, &buf->keys);
  	array_free(&buf->freelist, w);
  }
  
  void bch_keybuf_del(struct keybuf *buf, struct keybuf_key *w)
  {
  	spin_lock(&buf->lock);
  	__bch_keybuf_del(buf, w);
  	spin_unlock(&buf->lock);
  }
  
  bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
  				  struct bkey *end)
  {
  	bool ret = false;
  	struct keybuf_key *p, *w, s;
  	s.key = *start;
  
  	if (bkey_cmp(end, &buf->start) <= 0 ||
  	    bkey_cmp(start, &buf->end) >= 0)
  		return false;
  
  	spin_lock(&buf->lock);
  	w = RB_GREATER(&buf->keys, s, node, keybuf_nonoverlapping_cmp);
  
  	while (w && bkey_cmp(&START_KEY(&w->key), end) < 0) {
  		p = w;
  		w = RB_NEXT(w, node);
  
  		if (p->private)
  			ret = true;
  		else
  			__bch_keybuf_del(buf, p);
  	}
  
  	spin_unlock(&buf->lock);
  	return ret;
  }
  
  struct keybuf_key *bch_keybuf_next(struct keybuf *buf)
  {
  	struct keybuf_key *w;
  	spin_lock(&buf->lock);
  
  	w = RB_FIRST(&buf->keys, struct keybuf_key, node);
  
  	while (w && w->private)
  		w = RB_NEXT(w, node);
  
  	if (w)
  		w->private = ERR_PTR(-EINTR);
  
  	spin_unlock(&buf->lock);
  	return w;
  }
  
  struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
48dad8baf   Kent Overstreet   bcache: Add btree...
2339
2340
2341
  					  struct keybuf *buf,
  					  struct bkey *end,
  					  keybuf_pred_fn *pred)
cafe56359   Kent Overstreet   bcache: A block l...
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
  {
  	struct keybuf_key *ret;
  
  	while (1) {
  		ret = bch_keybuf_next(buf);
  		if (ret)
  			break;
  
  		if (bkey_cmp(&buf->last_scanned, end) >= 0) {
  			pr_debug("scan finished");
  			break;
  		}
72c270612   Kent Overstreet   bcache: Write out...
2354
  		bch_refill_keybuf(c, buf, end, pred);
cafe56359   Kent Overstreet   bcache: A block l...
2355
2356
2357
2358
  	}
  
  	return ret;
  }
72c270612   Kent Overstreet   bcache: Write out...
2359
  void bch_keybuf_init(struct keybuf *buf)
cafe56359   Kent Overstreet   bcache: A block l...
2360
  {
cafe56359   Kent Overstreet   bcache: A block l...
2361
2362
2363
2364
2365
2366
  	buf->last_scanned	= MAX_KEY;
  	buf->keys		= RB_ROOT;
  
  	spin_lock_init(&buf->lock);
  	array_allocator_init(&buf->freelist);
  }