Commit 4f81a4176297db57c7ef3b2893092dd837c1e2a8
Committed by
Alasdair G Kergon
1 parent
44feb387f6
Exists in
master
and in
20 other branches
dm thin: move bio_prison code to separate module
The bio prison code will be useful to other future DM targets so move it to a separate module. Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Showing 5 changed files with 499 additions and 404 deletions Side-by-side Diff
drivers/md/Kconfig
... | ... | @@ -216,6 +216,13 @@ |
216 | 216 | as a cache, holding recently-read blocks in memory and performing |
217 | 217 | delayed writes. |
218 | 218 | |
219 | +config DM_BIO_PRISON | |
220 | + tristate | |
221 | + depends on BLK_DEV_DM && EXPERIMENTAL | |
222 | + ---help--- | |
223 | + Some bio locking schemes used by other device-mapper targets | |
224 | + including thin provisioning. | |
225 | + | |
219 | 226 | source "drivers/md/persistent-data/Kconfig" |
220 | 227 | |
221 | 228 | config DM_CRYPT |
... | ... | @@ -247,6 +254,7 @@ |
247 | 254 | tristate "Thin provisioning target (EXPERIMENTAL)" |
248 | 255 | depends on BLK_DEV_DM && EXPERIMENTAL |
249 | 256 | select DM_PERSISTENT_DATA |
257 | + select DM_BIO_PRISON | |
250 | 258 | ---help--- |
251 | 259 | Provides thin provisioning and snapshots that share a data store. |
252 | 260 |
drivers/md/Makefile
... | ... | @@ -29,6 +29,7 @@ |
29 | 29 | obj-$(CONFIG_BLK_DEV_MD) += md-mod.o |
30 | 30 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o |
31 | 31 | obj-$(CONFIG_DM_BUFIO) += dm-bufio.o |
32 | +obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o | |
32 | 33 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o |
33 | 34 | obj-$(CONFIG_DM_DELAY) += dm-delay.o |
34 | 35 | obj-$(CONFIG_DM_FLAKEY) += dm-flakey.o |
drivers/md/dm-bio-prison.c
1 | +/* | |
2 | + * Copyright (C) 2012 Red Hat, Inc. | |
3 | + * | |
4 | + * This file is released under the GPL. | |
5 | + */ | |
6 | + | |
7 | +#include "dm.h" | |
8 | +#include "dm-bio-prison.h" | |
9 | + | |
10 | +#include <linux/spinlock.h> | |
11 | +#include <linux/mempool.h> | |
12 | +#include <linux/module.h> | |
13 | +#include <linux/slab.h> | |
14 | + | |
15 | +/*----------------------------------------------------------------*/ | |
16 | + | |
17 | +struct dm_bio_prison_cell { | |
18 | + struct hlist_node list; | |
19 | + struct dm_bio_prison *prison; | |
20 | + struct dm_cell_key key; | |
21 | + struct bio *holder; | |
22 | + struct bio_list bios; | |
23 | +}; | |
24 | + | |
25 | +struct dm_bio_prison { | |
26 | + spinlock_t lock; | |
27 | + mempool_t *cell_pool; | |
28 | + | |
29 | + unsigned nr_buckets; | |
30 | + unsigned hash_mask; | |
31 | + struct hlist_head *cells; | |
32 | +}; | |
33 | + | |
34 | +/*----------------------------------------------------------------*/ | |
35 | + | |
36 | +static uint32_t calc_nr_buckets(unsigned nr_cells) | |
37 | +{ | |
38 | + uint32_t n = 128; | |
39 | + | |
40 | + nr_cells /= 4; | |
41 | + nr_cells = min(nr_cells, 8192u); | |
42 | + | |
43 | + while (n < nr_cells) | |
44 | + n <<= 1; | |
45 | + | |
46 | + return n; | |
47 | +} | |
48 | + | |
49 | +static struct kmem_cache *_cell_cache; | |
50 | + | |
51 | +/* | |
52 | + * @nr_cells should be the number of cells you want in use _concurrently_. | |
53 | + * Don't confuse it with the number of distinct keys. | |
54 | + */ | |
55 | +struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) | |
56 | +{ | |
57 | + unsigned i; | |
58 | + uint32_t nr_buckets = calc_nr_buckets(nr_cells); | |
59 | + size_t len = sizeof(struct dm_bio_prison) + | |
60 | + (sizeof(struct hlist_head) * nr_buckets); | |
61 | + struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); | |
62 | + | |
63 | + if (!prison) | |
64 | + return NULL; | |
65 | + | |
66 | + spin_lock_init(&prison->lock); | |
67 | + prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); | |
68 | + if (!prison->cell_pool) { | |
69 | + kfree(prison); | |
70 | + return NULL; | |
71 | + } | |
72 | + | |
73 | + prison->nr_buckets = nr_buckets; | |
74 | + prison->hash_mask = nr_buckets - 1; | |
75 | + prison->cells = (struct hlist_head *) (prison + 1); | |
76 | + for (i = 0; i < nr_buckets; i++) | |
77 | + INIT_HLIST_HEAD(prison->cells + i); | |
78 | + | |
79 | + return prison; | |
80 | +} | |
81 | +EXPORT_SYMBOL_GPL(dm_bio_prison_create); | |
82 | + | |
83 | +void dm_bio_prison_destroy(struct dm_bio_prison *prison) | |
84 | +{ | |
85 | + mempool_destroy(prison->cell_pool); | |
86 | + kfree(prison); | |
87 | +} | |
88 | +EXPORT_SYMBOL_GPL(dm_bio_prison_destroy); | |
89 | + | |
90 | +static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) | |
91 | +{ | |
92 | + const unsigned long BIG_PRIME = 4294967291UL; | |
93 | + uint64_t hash = key->block * BIG_PRIME; | |
94 | + | |
95 | + return (uint32_t) (hash & prison->hash_mask); | |
96 | +} | |
97 | + | |
98 | +static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) | |
99 | +{ | |
100 | + return (lhs->virtual == rhs->virtual) && | |
101 | + (lhs->dev == rhs->dev) && | |
102 | + (lhs->block == rhs->block); | |
103 | +} | |
104 | + | |
105 | +static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, | |
106 | + struct dm_cell_key *key) | |
107 | +{ | |
108 | + struct dm_bio_prison_cell *cell; | |
109 | + struct hlist_node *tmp; | |
110 | + | |
111 | + hlist_for_each_entry(cell, tmp, bucket, list) | |
112 | + if (keys_equal(&cell->key, key)) | |
113 | + return cell; | |
114 | + | |
115 | + return NULL; | |
116 | +} | |
117 | + | |
118 | +/* | |
119 | + * This may block if a new cell needs allocating. You must ensure that | |
120 | + * cells will be unlocked even if the calling thread is blocked. | |
121 | + * | |
122 | + * Returns 1 if the cell was already held, 0 if @inmate is the new holder. | |
123 | + */ | |
124 | +int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, | |
125 | + struct bio *inmate, struct dm_bio_prison_cell **ref) | |
126 | +{ | |
127 | + int r = 1; | |
128 | + unsigned long flags; | |
129 | + uint32_t hash = hash_key(prison, key); | |
130 | + struct dm_bio_prison_cell *cell, *cell2; | |
131 | + | |
132 | + BUG_ON(hash > prison->nr_buckets); | |
133 | + | |
134 | + spin_lock_irqsave(&prison->lock, flags); | |
135 | + | |
136 | + cell = __search_bucket(prison->cells + hash, key); | |
137 | + if (cell) { | |
138 | + bio_list_add(&cell->bios, inmate); | |
139 | + goto out; | |
140 | + } | |
141 | + | |
142 | + /* | |
143 | + * Allocate a new cell | |
144 | + */ | |
145 | + spin_unlock_irqrestore(&prison->lock, flags); | |
146 | + cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); | |
147 | + spin_lock_irqsave(&prison->lock, flags); | |
148 | + | |
149 | + /* | |
150 | + * We've been unlocked, so we have to double check that | |
151 | + * nobody else has inserted this cell in the meantime. | |
152 | + */ | |
153 | + cell = __search_bucket(prison->cells + hash, key); | |
154 | + if (cell) { | |
155 | + mempool_free(cell2, prison->cell_pool); | |
156 | + bio_list_add(&cell->bios, inmate); | |
157 | + goto out; | |
158 | + } | |
159 | + | |
160 | + /* | |
161 | + * Use new cell. | |
162 | + */ | |
163 | + cell = cell2; | |
164 | + | |
165 | + cell->prison = prison; | |
166 | + memcpy(&cell->key, key, sizeof(cell->key)); | |
167 | + cell->holder = inmate; | |
168 | + bio_list_init(&cell->bios); | |
169 | + hlist_add_head(&cell->list, prison->cells + hash); | |
170 | + | |
171 | + r = 0; | |
172 | + | |
173 | +out: | |
174 | + spin_unlock_irqrestore(&prison->lock, flags); | |
175 | + | |
176 | + *ref = cell; | |
177 | + | |
178 | + return r; | |
179 | +} | |
180 | +EXPORT_SYMBOL_GPL(dm_bio_detain); | |
181 | + | |
182 | +/* | |
183 | + * @inmates must have been initialised prior to this call | |
184 | + */ | |
185 | +static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) | |
186 | +{ | |
187 | + struct dm_bio_prison *prison = cell->prison; | |
188 | + | |
189 | + hlist_del(&cell->list); | |
190 | + | |
191 | + if (inmates) { | |
192 | + bio_list_add(inmates, cell->holder); | |
193 | + bio_list_merge(inmates, &cell->bios); | |
194 | + } | |
195 | + | |
196 | + mempool_free(cell, prison->cell_pool); | |
197 | +} | |
198 | + | |
199 | +void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) | |
200 | +{ | |
201 | + unsigned long flags; | |
202 | + struct dm_bio_prison *prison = cell->prison; | |
203 | + | |
204 | + spin_lock_irqsave(&prison->lock, flags); | |
205 | + __cell_release(cell, bios); | |
206 | + spin_unlock_irqrestore(&prison->lock, flags); | |
207 | +} | |
208 | +EXPORT_SYMBOL_GPL(dm_cell_release); | |
209 | + | |
210 | +/* | |
211 | + * There are a couple of places where we put a bio into a cell briefly | |
212 | + * before taking it out again. In these situations we know that no other | |
213 | + * bio may be in the cell. This function releases the cell, and also does | |
214 | + * a sanity check. | |
215 | + */ | |
216 | +static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) | |
217 | +{ | |
218 | + BUG_ON(cell->holder != bio); | |
219 | + BUG_ON(!bio_list_empty(&cell->bios)); | |
220 | + | |
221 | + __cell_release(cell, NULL); | |
222 | +} | |
223 | + | |
224 | +void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) | |
225 | +{ | |
226 | + unsigned long flags; | |
227 | + struct dm_bio_prison *prison = cell->prison; | |
228 | + | |
229 | + spin_lock_irqsave(&prison->lock, flags); | |
230 | + __cell_release_singleton(cell, bio); | |
231 | + spin_unlock_irqrestore(&prison->lock, flags); | |
232 | +} | |
233 | +EXPORT_SYMBOL_GPL(dm_cell_release_singleton); | |
234 | + | |
235 | +/* | |
236 | + * Sometimes we don't want the holder, just the additional bios. | |
237 | + */ | |
238 | +static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) | |
239 | +{ | |
240 | + struct dm_bio_prison *prison = cell->prison; | |
241 | + | |
242 | + hlist_del(&cell->list); | |
243 | + bio_list_merge(inmates, &cell->bios); | |
244 | + | |
245 | + mempool_free(cell, prison->cell_pool); | |
246 | +} | |
247 | + | |
248 | +void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) | |
249 | +{ | |
250 | + unsigned long flags; | |
251 | + struct dm_bio_prison *prison = cell->prison; | |
252 | + | |
253 | + spin_lock_irqsave(&prison->lock, flags); | |
254 | + __cell_release_no_holder(cell, inmates); | |
255 | + spin_unlock_irqrestore(&prison->lock, flags); | |
256 | +} | |
257 | +EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); | |
258 | + | |
259 | +void dm_cell_error(struct dm_bio_prison_cell *cell) | |
260 | +{ | |
261 | + struct dm_bio_prison *prison = cell->prison; | |
262 | + struct bio_list bios; | |
263 | + struct bio *bio; | |
264 | + unsigned long flags; | |
265 | + | |
266 | + bio_list_init(&bios); | |
267 | + | |
268 | + spin_lock_irqsave(&prison->lock, flags); | |
269 | + __cell_release(cell, &bios); | |
270 | + spin_unlock_irqrestore(&prison->lock, flags); | |
271 | + | |
272 | + while ((bio = bio_list_pop(&bios))) | |
273 | + bio_io_error(bio); | |
274 | +} | |
275 | +EXPORT_SYMBOL_GPL(dm_cell_error); | |
276 | + | |
277 | +/*----------------------------------------------------------------*/ | |
278 | + | |
279 | +#define DEFERRED_SET_SIZE 64 | |
280 | + | |
281 | +struct dm_deferred_entry { | |
282 | + struct dm_deferred_set *ds; | |
283 | + unsigned count; | |
284 | + struct list_head work_items; | |
285 | +}; | |
286 | + | |
287 | +struct dm_deferred_set { | |
288 | + spinlock_t lock; | |
289 | + unsigned current_entry; | |
290 | + unsigned sweeper; | |
291 | + struct dm_deferred_entry entries[DEFERRED_SET_SIZE]; | |
292 | +}; | |
293 | + | |
294 | +struct dm_deferred_set *dm_deferred_set_create(void) | |
295 | +{ | |
296 | + int i; | |
297 | + struct dm_deferred_set *ds; | |
298 | + | |
299 | + ds = kmalloc(sizeof(*ds), GFP_KERNEL); | |
300 | + if (!ds) | |
301 | + return NULL; | |
302 | + | |
303 | + spin_lock_init(&ds->lock); | |
304 | + ds->current_entry = 0; | |
305 | + ds->sweeper = 0; | |
306 | + for (i = 0; i < DEFERRED_SET_SIZE; i++) { | |
307 | + ds->entries[i].ds = ds; | |
308 | + ds->entries[i].count = 0; | |
309 | + INIT_LIST_HEAD(&ds->entries[i].work_items); | |
310 | + } | |
311 | + | |
312 | + return ds; | |
313 | +} | |
314 | +EXPORT_SYMBOL_GPL(dm_deferred_set_create); | |
315 | + | |
316 | +void dm_deferred_set_destroy(struct dm_deferred_set *ds) | |
317 | +{ | |
318 | + kfree(ds); | |
319 | +} | |
320 | +EXPORT_SYMBOL_GPL(dm_deferred_set_destroy); | |
321 | + | |
322 | +struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds) | |
323 | +{ | |
324 | + unsigned long flags; | |
325 | + struct dm_deferred_entry *entry; | |
326 | + | |
327 | + spin_lock_irqsave(&ds->lock, flags); | |
328 | + entry = ds->entries + ds->current_entry; | |
329 | + entry->count++; | |
330 | + spin_unlock_irqrestore(&ds->lock, flags); | |
331 | + | |
332 | + return entry; | |
333 | +} | |
334 | +EXPORT_SYMBOL_GPL(dm_deferred_entry_inc); | |
335 | + | |
336 | +static unsigned ds_next(unsigned index) | |
337 | +{ | |
338 | + return (index + 1) % DEFERRED_SET_SIZE; | |
339 | +} | |
340 | + | |
341 | +static void __sweep(struct dm_deferred_set *ds, struct list_head *head) | |
342 | +{ | |
343 | + while ((ds->sweeper != ds->current_entry) && | |
344 | + !ds->entries[ds->sweeper].count) { | |
345 | + list_splice_init(&ds->entries[ds->sweeper].work_items, head); | |
346 | + ds->sweeper = ds_next(ds->sweeper); | |
347 | + } | |
348 | + | |
349 | + if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count) | |
350 | + list_splice_init(&ds->entries[ds->sweeper].work_items, head); | |
351 | +} | |
352 | + | |
353 | +void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head) | |
354 | +{ | |
355 | + unsigned long flags; | |
356 | + | |
357 | + spin_lock_irqsave(&entry->ds->lock, flags); | |
358 | + BUG_ON(!entry->count); | |
359 | + --entry->count; | |
360 | + __sweep(entry->ds, head); | |
361 | + spin_unlock_irqrestore(&entry->ds->lock, flags); | |
362 | +} | |
363 | +EXPORT_SYMBOL_GPL(dm_deferred_entry_dec); | |
364 | + | |
365 | +/* | |
366 | + * Returns 1 if deferred or 0 if no pending items to delay job. | |
367 | + */ | |
368 | +int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work) | |
369 | +{ | |
370 | + int r = 1; | |
371 | + unsigned long flags; | |
372 | + unsigned next_entry; | |
373 | + | |
374 | + spin_lock_irqsave(&ds->lock, flags); | |
375 | + if ((ds->sweeper == ds->current_entry) && | |
376 | + !ds->entries[ds->current_entry].count) | |
377 | + r = 0; | |
378 | + else { | |
379 | + list_add(work, &ds->entries[ds->current_entry].work_items); | |
380 | + next_entry = ds_next(ds->current_entry); | |
381 | + if (!ds->entries[next_entry].count) | |
382 | + ds->current_entry = next_entry; | |
383 | + } | |
384 | + spin_unlock_irqrestore(&ds->lock, flags); | |
385 | + | |
386 | + return r; | |
387 | +} | |
388 | +EXPORT_SYMBOL_GPL(dm_deferred_set_add_work); | |
389 | + | |
390 | +/*----------------------------------------------------------------*/ | |
391 | + | |
392 | +static int __init dm_bio_prison_init(void) | |
393 | +{ | |
394 | + _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); | |
395 | + if (!_cell_cache) | |
396 | + return -ENOMEM; | |
397 | + | |
398 | + return 0; | |
399 | +} | |
400 | + | |
401 | +static void __exit dm_bio_prison_exit(void) | |
402 | +{ | |
403 | + kmem_cache_destroy(_cell_cache); | |
404 | + _cell_cache = NULL; | |
405 | +} | |
406 | + | |
407 | +/* | |
408 | + * module hooks | |
409 | + */ | |
410 | +module_init(dm_bio_prison_init); | |
411 | +module_exit(dm_bio_prison_exit); | |
412 | + | |
413 | +MODULE_DESCRIPTION(DM_NAME " bio prison"); | |
414 | +MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); | |
415 | +MODULE_LICENSE("GPL"); |
drivers/md/dm-bio-prison.h
1 | +/* | |
2 | + * Copyright (C) 2011-2012 Red Hat, Inc. | |
3 | + * | |
4 | + * This file is released under the GPL. | |
5 | + */ | |
6 | + | |
7 | +#ifndef DM_BIO_PRISON_H | |
8 | +#define DM_BIO_PRISON_H | |
9 | + | |
10 | +#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ | |
11 | +#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ | |
12 | + | |
13 | +#include <linux/list.h> | |
14 | +#include <linux/bio.h> | |
15 | + | |
16 | +/*----------------------------------------------------------------*/ | |
17 | + | |
18 | +/* | |
19 | + * Sometimes we can't deal with a bio straight away. We put them in prison | |
20 | + * where they can't cause any mischief. Bios are put in a cell identified | |
21 | + * by a key, multiple bios can be in the same cell. When the cell is | |
22 | + * subsequently unlocked the bios become available. | |
23 | + */ | |
24 | +struct dm_bio_prison; | |
25 | +struct dm_bio_prison_cell; | |
26 | + | |
27 | +/* FIXME: this needs to be more abstract */ | |
28 | +struct dm_cell_key { | |
29 | + int virtual; | |
30 | + dm_thin_id dev; | |
31 | + dm_block_t block; | |
32 | +}; | |
33 | + | |
34 | +struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells); | |
35 | +void dm_bio_prison_destroy(struct dm_bio_prison *prison); | |
36 | + | |
37 | +/* | |
38 | + * This may block if a new cell needs allocating. You must ensure that | |
39 | + * cells will be unlocked even if the calling thread is blocked. | |
40 | + * | |
41 | + * Returns 1 if the cell was already held, 0 if @inmate is the new holder. | |
42 | + */ | |
43 | +int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, | |
44 | + struct bio *inmate, struct dm_bio_prison_cell **ref); | |
45 | + | |
46 | +void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios); | |
47 | +void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio); // FIXME: bio arg not needed | |
48 | +void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates); | |
49 | +void dm_cell_error(struct dm_bio_prison_cell *cell); | |
50 | + | |
51 | +/*----------------------------------------------------------------*/ | |
52 | + | |
53 | +/* | |
54 | + * We use the deferred set to keep track of pending reads to shared blocks. | |
55 | + * We do this to ensure the new mapping caused by a write isn't performed | |
56 | + * until these prior reads have completed. Otherwise the insertion of the | |
57 | + * new mapping could free the old block that the read bios are mapped to. | |
58 | + */ | |
59 | + | |
60 | +struct dm_deferred_set; | |
61 | +struct dm_deferred_entry; | |
62 | + | |
63 | +struct dm_deferred_set *dm_deferred_set_create(void); | |
64 | +void dm_deferred_set_destroy(struct dm_deferred_set *ds); | |
65 | + | |
66 | +struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds); | |
67 | +void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head); | |
68 | +int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work); | |
69 | + | |
70 | +/*----------------------------------------------------------------*/ | |
71 | + | |
72 | +#endif |
drivers/md/dm-thin.c
... | ... | @@ -5,6 +5,7 @@ |
5 | 5 | */ |
6 | 6 | |
7 | 7 | #include "dm-thin-metadata.h" |
8 | +#include "dm-bio-prison.h" | |
8 | 9 | #include "dm.h" |
9 | 10 | |
10 | 11 | #include <linux/device-mapper.h> |
... | ... | @@ -21,7 +22,6 @@ |
21 | 22 | * Tunable constants |
22 | 23 | */ |
23 | 24 | #define ENDIO_HOOK_POOL_SIZE 1024 |
24 | -#define DEFERRED_SET_SIZE 64 | |
25 | 25 | #define MAPPING_POOL_SIZE 1024 |
26 | 26 | #define PRISON_CELLS 1024 |
27 | 27 | #define COMMIT_PERIOD HZ |
... | ... | @@ -99,404 +99,6 @@ |
99 | 99 | /*----------------------------------------------------------------*/ |
100 | 100 | |
101 | 101 | /* |
102 | - * Sometimes we can't deal with a bio straight away. We put them in prison | |
103 | - * where they can't cause any mischief. Bios are put in a cell identified | |
104 | - * by a key, multiple bios can be in the same cell. When the cell is | |
105 | - * subsequently unlocked the bios become available. | |
106 | - */ | |
107 | -struct dm_bio_prison; | |
108 | - | |
109 | -struct dm_cell_key { | |
110 | - int virtual; | |
111 | - dm_thin_id dev; | |
112 | - dm_block_t block; | |
113 | -}; | |
114 | - | |
115 | -struct dm_bio_prison_cell { | |
116 | - struct hlist_node list; | |
117 | - struct dm_bio_prison *prison; | |
118 | - struct dm_cell_key key; | |
119 | - struct bio *holder; | |
120 | - struct bio_list bios; | |
121 | -}; | |
122 | - | |
123 | -struct dm_bio_prison { | |
124 | - spinlock_t lock; | |
125 | - mempool_t *cell_pool; | |
126 | - | |
127 | - unsigned nr_buckets; | |
128 | - unsigned hash_mask; | |
129 | - struct hlist_head *cells; | |
130 | -}; | |
131 | - | |
132 | -static uint32_t calc_nr_buckets(unsigned nr_cells) | |
133 | -{ | |
134 | - uint32_t n = 128; | |
135 | - | |
136 | - nr_cells /= 4; | |
137 | - nr_cells = min(nr_cells, 8192u); | |
138 | - | |
139 | - while (n < nr_cells) | |
140 | - n <<= 1; | |
141 | - | |
142 | - return n; | |
143 | -} | |
144 | - | |
145 | -static struct kmem_cache *_cell_cache; | |
146 | - | |
147 | -/* | |
148 | - * @nr_cells should be the number of cells you want in use _concurrently_. | |
149 | - * Don't confuse it with the number of distinct keys. | |
150 | - */ | |
151 | -static struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) | |
152 | -{ | |
153 | - unsigned i; | |
154 | - uint32_t nr_buckets = calc_nr_buckets(nr_cells); | |
155 | - size_t len = sizeof(struct dm_bio_prison) + | |
156 | - (sizeof(struct hlist_head) * nr_buckets); | |
157 | - struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); | |
158 | - | |
159 | - if (!prison) | |
160 | - return NULL; | |
161 | - | |
162 | - spin_lock_init(&prison->lock); | |
163 | - prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); | |
164 | - if (!prison->cell_pool) { | |
165 | - kfree(prison); | |
166 | - return NULL; | |
167 | - } | |
168 | - | |
169 | - prison->nr_buckets = nr_buckets; | |
170 | - prison->hash_mask = nr_buckets - 1; | |
171 | - prison->cells = (struct hlist_head *) (prison + 1); | |
172 | - for (i = 0; i < nr_buckets; i++) | |
173 | - INIT_HLIST_HEAD(prison->cells + i); | |
174 | - | |
175 | - return prison; | |
176 | -} | |
177 | - | |
178 | -static void dm_bio_prison_destroy(struct dm_bio_prison *prison) | |
179 | -{ | |
180 | - mempool_destroy(prison->cell_pool); | |
181 | - kfree(prison); | |
182 | -} | |
183 | - | |
184 | -static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) | |
185 | -{ | |
186 | - const unsigned long BIG_PRIME = 4294967291UL; | |
187 | - uint64_t hash = key->block * BIG_PRIME; | |
188 | - | |
189 | - return (uint32_t) (hash & prison->hash_mask); | |
190 | -} | |
191 | - | |
192 | -static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) | |
193 | -{ | |
194 | - return (lhs->virtual == rhs->virtual) && | |
195 | - (lhs->dev == rhs->dev) && | |
196 | - (lhs->block == rhs->block); | |
197 | -} | |
198 | - | |
199 | -static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, | |
200 | - struct dm_cell_key *key) | |
201 | -{ | |
202 | - struct dm_bio_prison_cell *cell; | |
203 | - struct hlist_node *tmp; | |
204 | - | |
205 | - hlist_for_each_entry(cell, tmp, bucket, list) | |
206 | - if (keys_equal(&cell->key, key)) | |
207 | - return cell; | |
208 | - | |
209 | - return NULL; | |
210 | -} | |
211 | - | |
212 | -/* | |
213 | - * This may block if a new cell needs allocating. You must ensure that | |
214 | - * cells will be unlocked even if the calling thread is blocked. | |
215 | - * | |
216 | - * Returns 1 if the cell was already held, 0 if @inmate is the new holder. | |
217 | - */ | |
218 | -static int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, | |
219 | - struct bio *inmate, struct dm_bio_prison_cell **ref) | |
220 | -{ | |
221 | - int r = 1; | |
222 | - unsigned long flags; | |
223 | - uint32_t hash = hash_key(prison, key); | |
224 | - struct dm_bio_prison_cell *cell, *cell2; | |
225 | - | |
226 | - BUG_ON(hash > prison->nr_buckets); | |
227 | - | |
228 | - spin_lock_irqsave(&prison->lock, flags); | |
229 | - | |
230 | - cell = __search_bucket(prison->cells + hash, key); | |
231 | - if (cell) { | |
232 | - bio_list_add(&cell->bios, inmate); | |
233 | - goto out; | |
234 | - } | |
235 | - | |
236 | - /* | |
237 | - * Allocate a new cell | |
238 | - */ | |
239 | - spin_unlock_irqrestore(&prison->lock, flags); | |
240 | - cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); | |
241 | - spin_lock_irqsave(&prison->lock, flags); | |
242 | - | |
243 | - /* | |
244 | - * We've been unlocked, so we have to double check that | |
245 | - * nobody else has inserted this cell in the meantime. | |
246 | - */ | |
247 | - cell = __search_bucket(prison->cells + hash, key); | |
248 | - if (cell) { | |
249 | - mempool_free(cell2, prison->cell_pool); | |
250 | - bio_list_add(&cell->bios, inmate); | |
251 | - goto out; | |
252 | - } | |
253 | - | |
254 | - /* | |
255 | - * Use new cell. | |
256 | - */ | |
257 | - cell = cell2; | |
258 | - | |
259 | - cell->prison = prison; | |
260 | - memcpy(&cell->key, key, sizeof(cell->key)); | |
261 | - cell->holder = inmate; | |
262 | - bio_list_init(&cell->bios); | |
263 | - hlist_add_head(&cell->list, prison->cells + hash); | |
264 | - | |
265 | - r = 0; | |
266 | - | |
267 | -out: | |
268 | - spin_unlock_irqrestore(&prison->lock, flags); | |
269 | - | |
270 | - *ref = cell; | |
271 | - | |
272 | - return r; | |
273 | -} | |
274 | - | |
275 | -/* | |
276 | - * @inmates must have been initialised prior to this call | |
277 | - */ | |
278 | -static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) | |
279 | -{ | |
280 | - struct dm_bio_prison *prison = cell->prison; | |
281 | - | |
282 | - hlist_del(&cell->list); | |
283 | - | |
284 | - if (inmates) { | |
285 | - bio_list_add(inmates, cell->holder); | |
286 | - bio_list_merge(inmates, &cell->bios); | |
287 | - } | |
288 | - | |
289 | - mempool_free(cell, prison->cell_pool); | |
290 | -} | |
291 | - | |
292 | -static void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) | |
293 | -{ | |
294 | - unsigned long flags; | |
295 | - struct dm_bio_prison *prison = cell->prison; | |
296 | - | |
297 | - spin_lock_irqsave(&prison->lock, flags); | |
298 | - __cell_release(cell, bios); | |
299 | - spin_unlock_irqrestore(&prison->lock, flags); | |
300 | -} | |
301 | - | |
302 | -/* | |
303 | - * There are a couple of places where we put a bio into a cell briefly | |
304 | - * before taking it out again. In these situations we know that no other | |
305 | - * bio may be in the cell. This function releases the cell, and also does | |
306 | - * a sanity check. | |
307 | - */ | |
308 | -static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) | |
309 | -{ | |
310 | - BUG_ON(cell->holder != bio); | |
311 | - BUG_ON(!bio_list_empty(&cell->bios)); | |
312 | - | |
313 | - __cell_release(cell, NULL); | |
314 | -} | |
315 | - | |
316 | -static void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) | |
317 | -{ | |
318 | - unsigned long flags; | |
319 | - struct dm_bio_prison *prison = cell->prison; | |
320 | - | |
321 | - spin_lock_irqsave(&prison->lock, flags); | |
322 | - __cell_release_singleton(cell, bio); | |
323 | - spin_unlock_irqrestore(&prison->lock, flags); | |
324 | -} | |
325 | - | |
326 | -/* | |
327 | - * Sometimes we don't want the holder, just the additional bios. | |
328 | - */ | |
329 | -static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, | |
330 | - struct bio_list *inmates) | |
331 | -{ | |
332 | - struct dm_bio_prison *prison = cell->prison; | |
333 | - | |
334 | - hlist_del(&cell->list); | |
335 | - bio_list_merge(inmates, &cell->bios); | |
336 | - | |
337 | - mempool_free(cell, prison->cell_pool); | |
338 | -} | |
339 | - | |
340 | -static void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, | |
341 | - struct bio_list *inmates) | |
342 | -{ | |
343 | - unsigned long flags; | |
344 | - struct dm_bio_prison *prison = cell->prison; | |
345 | - | |
346 | - spin_lock_irqsave(&prison->lock, flags); | |
347 | - __cell_release_no_holder(cell, inmates); | |
348 | - spin_unlock_irqrestore(&prison->lock, flags); | |
349 | -} | |
350 | - | |
351 | -static void dm_cell_error(struct dm_bio_prison_cell *cell) | |
352 | -{ | |
353 | - struct dm_bio_prison *prison = cell->prison; | |
354 | - struct bio_list bios; | |
355 | - struct bio *bio; | |
356 | - unsigned long flags; | |
357 | - | |
358 | - bio_list_init(&bios); | |
359 | - | |
360 | - spin_lock_irqsave(&prison->lock, flags); | |
361 | - __cell_release(cell, &bios); | |
362 | - spin_unlock_irqrestore(&prison->lock, flags); | |
363 | - | |
364 | - while ((bio = bio_list_pop(&bios))) | |
365 | - bio_io_error(bio); | |
366 | -} | |
367 | - | |
368 | -/*----------------------------------------------------------------*/ | |
369 | - | |
370 | -/* | |
371 | - * We use the deferred set to keep track of pending reads to shared blocks. | |
372 | - * We do this to ensure the new mapping caused by a write isn't performed | |
373 | - * until these prior reads have completed. Otherwise the insertion of the | |
374 | - * new mapping could free the old block that the read bios are mapped to. | |
375 | - */ | |
376 | - | |
377 | -struct dm_deferred_set; | |
378 | -struct dm_deferred_entry { | |
379 | - struct dm_deferred_set *ds; | |
380 | - unsigned count; | |
381 | - struct list_head work_items; | |
382 | -}; | |
383 | - | |
384 | -struct dm_deferred_set { | |
385 | - spinlock_t lock; | |
386 | - unsigned current_entry; | |
387 | - unsigned sweeper; | |
388 | - struct dm_deferred_entry entries[DEFERRED_SET_SIZE]; | |
389 | -}; | |
390 | - | |
391 | -static struct dm_deferred_set *dm_deferred_set_create(void) | |
392 | -{ | |
393 | - int i; | |
394 | - struct dm_deferred_set *ds; | |
395 | - | |
396 | - ds = kmalloc(sizeof(*ds), GFP_KERNEL); | |
397 | - if (!ds) | |
398 | - return NULL; | |
399 | - | |
400 | - spin_lock_init(&ds->lock); | |
401 | - ds->current_entry = 0; | |
402 | - ds->sweeper = 0; | |
403 | - for (i = 0; i < DEFERRED_SET_SIZE; i++) { | |
404 | - ds->entries[i].ds = ds; | |
405 | - ds->entries[i].count = 0; | |
406 | - INIT_LIST_HEAD(&ds->entries[i].work_items); | |
407 | - } | |
408 | - | |
409 | - return ds; | |
410 | -} | |
411 | - | |
412 | -static void dm_deferred_set_destroy(struct dm_deferred_set *ds) | |
413 | -{ | |
414 | - kfree(ds); | |
415 | -} | |
416 | - | |
417 | -static struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds) | |
418 | -{ | |
419 | - unsigned long flags; | |
420 | - struct dm_deferred_entry *entry; | |
421 | - | |
422 | - spin_lock_irqsave(&ds->lock, flags); | |
423 | - entry = ds->entries + ds->current_entry; | |
424 | - entry->count++; | |
425 | - spin_unlock_irqrestore(&ds->lock, flags); | |
426 | - | |
427 | - return entry; | |
428 | -} | |
429 | - | |
430 | -static unsigned ds_next(unsigned index) | |
431 | -{ | |
432 | - return (index + 1) % DEFERRED_SET_SIZE; | |
433 | -} | |
434 | - | |
435 | -static void __sweep(struct dm_deferred_set *ds, struct list_head *head) | |
436 | -{ | |
437 | - while ((ds->sweeper != ds->current_entry) && | |
438 | - !ds->entries[ds->sweeper].count) { | |
439 | - list_splice_init(&ds->entries[ds->sweeper].work_items, head); | |
440 | - ds->sweeper = ds_next(ds->sweeper); | |
441 | - } | |
442 | - | |
443 | - if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count) | |
444 | - list_splice_init(&ds->entries[ds->sweeper].work_items, head); | |
445 | -} | |
446 | - | |
447 | -static void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head) | |
448 | -{ | |
449 | - unsigned long flags; | |
450 | - | |
451 | - spin_lock_irqsave(&entry->ds->lock, flags); | |
452 | - BUG_ON(!entry->count); | |
453 | - --entry->count; | |
454 | - __sweep(entry->ds, head); | |
455 | - spin_unlock_irqrestore(&entry->ds->lock, flags); | |
456 | -} | |
457 | - | |
458 | -/* | |
459 | - * Returns 1 if deferred or 0 if no pending items to delay job. | |
460 | - */ | |
461 | -static int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work) | |
462 | -{ | |
463 | - int r = 1; | |
464 | - unsigned long flags; | |
465 | - unsigned next_entry; | |
466 | - | |
467 | - spin_lock_irqsave(&ds->lock, flags); | |
468 | - if ((ds->sweeper == ds->current_entry) && | |
469 | - !ds->entries[ds->current_entry].count) | |
470 | - r = 0; | |
471 | - else { | |
472 | - list_add(work, &ds->entries[ds->current_entry].work_items); | |
473 | - next_entry = ds_next(ds->current_entry); | |
474 | - if (!ds->entries[next_entry].count) | |
475 | - ds->current_entry = next_entry; | |
476 | - } | |
477 | - spin_unlock_irqrestore(&ds->lock, flags); | |
478 | - | |
479 | - return r; | |
480 | -} | |
481 | - | |
482 | -static int __init dm_bio_prison_init(void) | |
483 | -{ | |
484 | - _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); | |
485 | - if (!_cell_cache) | |
486 | - return -ENOMEM; | |
487 | - | |
488 | - return 0; | |
489 | -} | |
490 | - | |
491 | -static void __exit dm_bio_prison_exit(void) | |
492 | -{ | |
493 | - kmem_cache_destroy(_cell_cache); | |
494 | - _cell_cache = NULL; | |
495 | -} | |
496 | - | |
497 | -/*----------------------------------------------------------------*/ | |
498 | - | |
499 | -/* | |
500 | 102 | * Key building. |
501 | 103 | */ |
502 | 104 | static void build_data_key(struct dm_thin_device *td, |
... | ... | @@ -2852,7 +2454,7 @@ |
2852 | 2454 | .name = "thin-pool", |
2853 | 2455 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | |
2854 | 2456 | DM_TARGET_IMMUTABLE, |
2855 | - .version = {1, 4, 0}, | |
2457 | + .version = {1, 5, 0}, | |
2856 | 2458 | .module = THIS_MODULE, |
2857 | 2459 | .ctr = pool_ctr, |
2858 | 2460 | .dtr = pool_dtr, |
... | ... | @@ -3143,7 +2745,7 @@ |
3143 | 2745 | |
3144 | 2746 | static struct target_type thin_target = { |
3145 | 2747 | .name = "thin", |
3146 | - .version = {1, 4, 0}, | |
2748 | + .version = {1, 5, 0}, | |
3147 | 2749 | .module = THIS_MODULE, |
3148 | 2750 | .ctr = thin_ctr, |
3149 | 2751 | .dtr = thin_dtr, |
... | ... | @@ -3173,8 +2775,6 @@ |
3173 | 2775 | |
3174 | 2776 | r = -ENOMEM; |
3175 | 2777 | |
3176 | - dm_bio_prison_init(); | |
3177 | - | |
3178 | 2778 | _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0); |
3179 | 2779 | if (!_new_mapping_cache) |
3180 | 2780 | goto bad_new_mapping_cache; |
... | ... | @@ -3200,7 +2800,6 @@ |
3200 | 2800 | dm_unregister_target(&thin_target); |
3201 | 2801 | dm_unregister_target(&pool_target); |
3202 | 2802 | |
3203 | - dm_bio_prison_exit(); | |
3204 | 2803 | kmem_cache_destroy(_new_mapping_cache); |
3205 | 2804 | kmem_cache_destroy(_endio_hook_cache); |
3206 | 2805 | } |