Commit ed8b752bccf2560e305e25125721d2f0ac759e88
Committed by
Alasdair G Kergon
1 parent
772ae5f54d
Exists in
master
and in
6 other branches
dm table: set flush capability based on underlying devices
DM has always advertised both REQ_FLUSH and REQ_FUA flush capabilities regardless of whether or not a given DM device's underlying devices also advertised a need for them. Block's flush-merge changes from 2.6.39 have proven to be more costly for DM devices. Performance regressions have been reported even when DM's underlying devices do not advertise that they have a write cache. Fix the performance regressions by configuring a DM device's flushing capabilities based on those of the underlying devices' capabilities. Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Showing 2 changed files with 43 additions and 1 deletions Inline Diff
drivers/md/dm-table.c
1 | /* | 1 | /* |
2 | * Copyright (C) 2001 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001 Sistina Software (UK) Limited. |
3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include "dm.h" | 8 | #include "dm.h" |
9 | 9 | ||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/vmalloc.h> | 11 | #include <linux/vmalloc.h> |
12 | #include <linux/blkdev.h> | 12 | #include <linux/blkdev.h> |
13 | #include <linux/namei.h> | 13 | #include <linux/namei.h> |
14 | #include <linux/ctype.h> | 14 | #include <linux/ctype.h> |
15 | #include <linux/string.h> | 15 | #include <linux/string.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
18 | #include <linux/mutex.h> | 18 | #include <linux/mutex.h> |
19 | #include <linux/delay.h> | 19 | #include <linux/delay.h> |
20 | #include <linux/atomic.h> | 20 | #include <linux/atomic.h> |
21 | 21 | ||
22 | #define DM_MSG_PREFIX "table" | 22 | #define DM_MSG_PREFIX "table" |
23 | 23 | ||
24 | #define MAX_DEPTH 16 | 24 | #define MAX_DEPTH 16 |
25 | #define NODE_SIZE L1_CACHE_BYTES | 25 | #define NODE_SIZE L1_CACHE_BYTES |
26 | #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) | 26 | #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) |
27 | #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) | 27 | #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * The table has always exactly one reference from either mapped_device->map | 30 | * The table has always exactly one reference from either mapped_device->map |
31 | * or hash_cell->new_map. This reference is not counted in table->holders. | 31 | * or hash_cell->new_map. This reference is not counted in table->holders. |
32 | * A pair of dm_create_table/dm_destroy_table functions is used for table | 32 | * A pair of dm_create_table/dm_destroy_table functions is used for table |
33 | * creation/destruction. | 33 | * creation/destruction. |
34 | * | 34 | * |
35 | * Temporary references from the other code increase table->holders. A pair | 35 | * Temporary references from the other code increase table->holders. A pair |
36 | * of dm_table_get/dm_table_put functions is used to manipulate it. | 36 | * of dm_table_get/dm_table_put functions is used to manipulate it. |
37 | * | 37 | * |
38 | * When the table is about to be destroyed, we wait for table->holders to | 38 | * When the table is about to be destroyed, we wait for table->holders to |
39 | * drop to zero. | 39 | * drop to zero. |
40 | */ | 40 | */ |
41 | 41 | ||
42 | struct dm_table { | 42 | struct dm_table { |
43 | struct mapped_device *md; | 43 | struct mapped_device *md; |
44 | atomic_t holders; | 44 | atomic_t holders; |
45 | unsigned type; | 45 | unsigned type; |
46 | 46 | ||
47 | /* btree table */ | 47 | /* btree table */ |
48 | unsigned int depth; | 48 | unsigned int depth; |
49 | unsigned int counts[MAX_DEPTH]; /* in nodes */ | 49 | unsigned int counts[MAX_DEPTH]; /* in nodes */ |
50 | sector_t *index[MAX_DEPTH]; | 50 | sector_t *index[MAX_DEPTH]; |
51 | 51 | ||
52 | unsigned int num_targets; | 52 | unsigned int num_targets; |
53 | unsigned int num_allocated; | 53 | unsigned int num_allocated; |
54 | sector_t *highs; | 54 | sector_t *highs; |
55 | struct dm_target *targets; | 55 | struct dm_target *targets; |
56 | 56 | ||
57 | unsigned integrity_supported:1; | 57 | unsigned integrity_supported:1; |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * Indicates the rw permissions for the new logical | 60 | * Indicates the rw permissions for the new logical |
61 | * device. This should be a combination of FMODE_READ | 61 | * device. This should be a combination of FMODE_READ |
62 | * and FMODE_WRITE. | 62 | * and FMODE_WRITE. |
63 | */ | 63 | */ |
64 | fmode_t mode; | 64 | fmode_t mode; |
65 | 65 | ||
66 | /* a list of devices used by this table */ | 66 | /* a list of devices used by this table */ |
67 | struct list_head devices; | 67 | struct list_head devices; |
68 | 68 | ||
69 | /* events get handed up using this callback */ | 69 | /* events get handed up using this callback */ |
70 | void (*event_fn)(void *); | 70 | void (*event_fn)(void *); |
71 | void *event_context; | 71 | void *event_context; |
72 | 72 | ||
73 | struct dm_md_mempools *mempools; | 73 | struct dm_md_mempools *mempools; |
74 | 74 | ||
75 | struct list_head target_callbacks; | 75 | struct list_head target_callbacks; |
76 | }; | 76 | }; |
77 | 77 | ||
78 | /* | 78 | /* |
79 | * Similar to ceiling(log_size(n)) | 79 | * Similar to ceiling(log_size(n)) |
80 | */ | 80 | */ |
81 | static unsigned int int_log(unsigned int n, unsigned int base) | 81 | static unsigned int int_log(unsigned int n, unsigned int base) |
82 | { | 82 | { |
83 | int result = 0; | 83 | int result = 0; |
84 | 84 | ||
85 | while (n > 1) { | 85 | while (n > 1) { |
86 | n = dm_div_up(n, base); | 86 | n = dm_div_up(n, base); |
87 | result++; | 87 | result++; |
88 | } | 88 | } |
89 | 89 | ||
90 | return result; | 90 | return result; |
91 | } | 91 | } |
92 | 92 | ||
93 | /* | 93 | /* |
94 | * Calculate the index of the child node of the n'th node k'th key. | 94 | * Calculate the index of the child node of the n'th node k'th key. |
95 | */ | 95 | */ |
96 | static inline unsigned int get_child(unsigned int n, unsigned int k) | 96 | static inline unsigned int get_child(unsigned int n, unsigned int k) |
97 | { | 97 | { |
98 | return (n * CHILDREN_PER_NODE) + k; | 98 | return (n * CHILDREN_PER_NODE) + k; |
99 | } | 99 | } |
100 | 100 | ||
101 | /* | 101 | /* |
102 | * Return the n'th node of level l from table t. | 102 | * Return the n'th node of level l from table t. |
103 | */ | 103 | */ |
104 | static inline sector_t *get_node(struct dm_table *t, | 104 | static inline sector_t *get_node(struct dm_table *t, |
105 | unsigned int l, unsigned int n) | 105 | unsigned int l, unsigned int n) |
106 | { | 106 | { |
107 | return t->index[l] + (n * KEYS_PER_NODE); | 107 | return t->index[l] + (n * KEYS_PER_NODE); |
108 | } | 108 | } |
109 | 109 | ||
110 | /* | 110 | /* |
111 | * Return the highest key that you could lookup from the n'th | 111 | * Return the highest key that you could lookup from the n'th |
112 | * node on level l of the btree. | 112 | * node on level l of the btree. |
113 | */ | 113 | */ |
114 | static sector_t high(struct dm_table *t, unsigned int l, unsigned int n) | 114 | static sector_t high(struct dm_table *t, unsigned int l, unsigned int n) |
115 | { | 115 | { |
116 | for (; l < t->depth - 1; l++) | 116 | for (; l < t->depth - 1; l++) |
117 | n = get_child(n, CHILDREN_PER_NODE - 1); | 117 | n = get_child(n, CHILDREN_PER_NODE - 1); |
118 | 118 | ||
119 | if (n >= t->counts[l]) | 119 | if (n >= t->counts[l]) |
120 | return (sector_t) - 1; | 120 | return (sector_t) - 1; |
121 | 121 | ||
122 | return get_node(t, l, n)[KEYS_PER_NODE - 1]; | 122 | return get_node(t, l, n)[KEYS_PER_NODE - 1]; |
123 | } | 123 | } |
124 | 124 | ||
125 | /* | 125 | /* |
126 | * Fills in a level of the btree based on the highs of the level | 126 | * Fills in a level of the btree based on the highs of the level |
127 | * below it. | 127 | * below it. |
128 | */ | 128 | */ |
129 | static int setup_btree_index(unsigned int l, struct dm_table *t) | 129 | static int setup_btree_index(unsigned int l, struct dm_table *t) |
130 | { | 130 | { |
131 | unsigned int n, k; | 131 | unsigned int n, k; |
132 | sector_t *node; | 132 | sector_t *node; |
133 | 133 | ||
134 | for (n = 0U; n < t->counts[l]; n++) { | 134 | for (n = 0U; n < t->counts[l]; n++) { |
135 | node = get_node(t, l, n); | 135 | node = get_node(t, l, n); |
136 | 136 | ||
137 | for (k = 0U; k < KEYS_PER_NODE; k++) | 137 | for (k = 0U; k < KEYS_PER_NODE; k++) |
138 | node[k] = high(t, l + 1, get_child(n, k)); | 138 | node[k] = high(t, l + 1, get_child(n, k)); |
139 | } | 139 | } |
140 | 140 | ||
141 | return 0; | 141 | return 0; |
142 | } | 142 | } |
143 | 143 | ||
144 | void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size) | 144 | void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size) |
145 | { | 145 | { |
146 | unsigned long size; | 146 | unsigned long size; |
147 | void *addr; | 147 | void *addr; |
148 | 148 | ||
149 | /* | 149 | /* |
150 | * Check that we're not going to overflow. | 150 | * Check that we're not going to overflow. |
151 | */ | 151 | */ |
152 | if (nmemb > (ULONG_MAX / elem_size)) | 152 | if (nmemb > (ULONG_MAX / elem_size)) |
153 | return NULL; | 153 | return NULL; |
154 | 154 | ||
155 | size = nmemb * elem_size; | 155 | size = nmemb * elem_size; |
156 | addr = vzalloc(size); | 156 | addr = vzalloc(size); |
157 | 157 | ||
158 | return addr; | 158 | return addr; |
159 | } | 159 | } |
160 | EXPORT_SYMBOL(dm_vcalloc); | 160 | EXPORT_SYMBOL(dm_vcalloc); |
161 | 161 | ||
162 | /* | 162 | /* |
163 | * highs, and targets are managed as dynamic arrays during a | 163 | * highs, and targets are managed as dynamic arrays during a |
164 | * table load. | 164 | * table load. |
165 | */ | 165 | */ |
166 | static int alloc_targets(struct dm_table *t, unsigned int num) | 166 | static int alloc_targets(struct dm_table *t, unsigned int num) |
167 | { | 167 | { |
168 | sector_t *n_highs; | 168 | sector_t *n_highs; |
169 | struct dm_target *n_targets; | 169 | struct dm_target *n_targets; |
170 | int n = t->num_targets; | 170 | int n = t->num_targets; |
171 | 171 | ||
172 | /* | 172 | /* |
173 | * Allocate both the target array and offset array at once. | 173 | * Allocate both the target array and offset array at once. |
174 | * Append an empty entry to catch sectors beyond the end of | 174 | * Append an empty entry to catch sectors beyond the end of |
175 | * the device. | 175 | * the device. |
176 | */ | 176 | */ |
177 | n_highs = (sector_t *) dm_vcalloc(num + 1, sizeof(struct dm_target) + | 177 | n_highs = (sector_t *) dm_vcalloc(num + 1, sizeof(struct dm_target) + |
178 | sizeof(sector_t)); | 178 | sizeof(sector_t)); |
179 | if (!n_highs) | 179 | if (!n_highs) |
180 | return -ENOMEM; | 180 | return -ENOMEM; |
181 | 181 | ||
182 | n_targets = (struct dm_target *) (n_highs + num); | 182 | n_targets = (struct dm_target *) (n_highs + num); |
183 | 183 | ||
184 | if (n) { | 184 | if (n) { |
185 | memcpy(n_highs, t->highs, sizeof(*n_highs) * n); | 185 | memcpy(n_highs, t->highs, sizeof(*n_highs) * n); |
186 | memcpy(n_targets, t->targets, sizeof(*n_targets) * n); | 186 | memcpy(n_targets, t->targets, sizeof(*n_targets) * n); |
187 | } | 187 | } |
188 | 188 | ||
189 | memset(n_highs + n, -1, sizeof(*n_highs) * (num - n)); | 189 | memset(n_highs + n, -1, sizeof(*n_highs) * (num - n)); |
190 | vfree(t->highs); | 190 | vfree(t->highs); |
191 | 191 | ||
192 | t->num_allocated = num; | 192 | t->num_allocated = num; |
193 | t->highs = n_highs; | 193 | t->highs = n_highs; |
194 | t->targets = n_targets; | 194 | t->targets = n_targets; |
195 | 195 | ||
196 | return 0; | 196 | return 0; |
197 | } | 197 | } |
198 | 198 | ||
199 | int dm_table_create(struct dm_table **result, fmode_t mode, | 199 | int dm_table_create(struct dm_table **result, fmode_t mode, |
200 | unsigned num_targets, struct mapped_device *md) | 200 | unsigned num_targets, struct mapped_device *md) |
201 | { | 201 | { |
202 | struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); | 202 | struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); |
203 | 203 | ||
204 | if (!t) | 204 | if (!t) |
205 | return -ENOMEM; | 205 | return -ENOMEM; |
206 | 206 | ||
207 | INIT_LIST_HEAD(&t->devices); | 207 | INIT_LIST_HEAD(&t->devices); |
208 | INIT_LIST_HEAD(&t->target_callbacks); | 208 | INIT_LIST_HEAD(&t->target_callbacks); |
209 | atomic_set(&t->holders, 0); | 209 | atomic_set(&t->holders, 0); |
210 | 210 | ||
211 | if (!num_targets) | 211 | if (!num_targets) |
212 | num_targets = KEYS_PER_NODE; | 212 | num_targets = KEYS_PER_NODE; |
213 | 213 | ||
214 | num_targets = dm_round_up(num_targets, KEYS_PER_NODE); | 214 | num_targets = dm_round_up(num_targets, KEYS_PER_NODE); |
215 | 215 | ||
216 | if (alloc_targets(t, num_targets)) { | 216 | if (alloc_targets(t, num_targets)) { |
217 | kfree(t); | 217 | kfree(t); |
218 | t = NULL; | 218 | t = NULL; |
219 | return -ENOMEM; | 219 | return -ENOMEM; |
220 | } | 220 | } |
221 | 221 | ||
222 | t->mode = mode; | 222 | t->mode = mode; |
223 | t->md = md; | 223 | t->md = md; |
224 | *result = t; | 224 | *result = t; |
225 | return 0; | 225 | return 0; |
226 | } | 226 | } |
227 | 227 | ||
228 | static void free_devices(struct list_head *devices) | 228 | static void free_devices(struct list_head *devices) |
229 | { | 229 | { |
230 | struct list_head *tmp, *next; | 230 | struct list_head *tmp, *next; |
231 | 231 | ||
232 | list_for_each_safe(tmp, next, devices) { | 232 | list_for_each_safe(tmp, next, devices) { |
233 | struct dm_dev_internal *dd = | 233 | struct dm_dev_internal *dd = |
234 | list_entry(tmp, struct dm_dev_internal, list); | 234 | list_entry(tmp, struct dm_dev_internal, list); |
235 | DMWARN("dm_table_destroy: dm_put_device call missing for %s", | 235 | DMWARN("dm_table_destroy: dm_put_device call missing for %s", |
236 | dd->dm_dev.name); | 236 | dd->dm_dev.name); |
237 | kfree(dd); | 237 | kfree(dd); |
238 | } | 238 | } |
239 | } | 239 | } |
240 | 240 | ||
241 | void dm_table_destroy(struct dm_table *t) | 241 | void dm_table_destroy(struct dm_table *t) |
242 | { | 242 | { |
243 | unsigned int i; | 243 | unsigned int i; |
244 | 244 | ||
245 | if (!t) | 245 | if (!t) |
246 | return; | 246 | return; |
247 | 247 | ||
248 | while (atomic_read(&t->holders)) | 248 | while (atomic_read(&t->holders)) |
249 | msleep(1); | 249 | msleep(1); |
250 | smp_mb(); | 250 | smp_mb(); |
251 | 251 | ||
252 | /* free the indexes */ | 252 | /* free the indexes */ |
253 | if (t->depth >= 2) | 253 | if (t->depth >= 2) |
254 | vfree(t->index[t->depth - 2]); | 254 | vfree(t->index[t->depth - 2]); |
255 | 255 | ||
256 | /* free the targets */ | 256 | /* free the targets */ |
257 | for (i = 0; i < t->num_targets; i++) { | 257 | for (i = 0; i < t->num_targets; i++) { |
258 | struct dm_target *tgt = t->targets + i; | 258 | struct dm_target *tgt = t->targets + i; |
259 | 259 | ||
260 | if (tgt->type->dtr) | 260 | if (tgt->type->dtr) |
261 | tgt->type->dtr(tgt); | 261 | tgt->type->dtr(tgt); |
262 | 262 | ||
263 | dm_put_target_type(tgt->type); | 263 | dm_put_target_type(tgt->type); |
264 | } | 264 | } |
265 | 265 | ||
266 | vfree(t->highs); | 266 | vfree(t->highs); |
267 | 267 | ||
268 | /* free the device list */ | 268 | /* free the device list */ |
269 | if (t->devices.next != &t->devices) | 269 | if (t->devices.next != &t->devices) |
270 | free_devices(&t->devices); | 270 | free_devices(&t->devices); |
271 | 271 | ||
272 | dm_free_md_mempools(t->mempools); | 272 | dm_free_md_mempools(t->mempools); |
273 | 273 | ||
274 | kfree(t); | 274 | kfree(t); |
275 | } | 275 | } |
276 | 276 | ||
277 | void dm_table_get(struct dm_table *t) | 277 | void dm_table_get(struct dm_table *t) |
278 | { | 278 | { |
279 | atomic_inc(&t->holders); | 279 | atomic_inc(&t->holders); |
280 | } | 280 | } |
281 | EXPORT_SYMBOL(dm_table_get); | 281 | EXPORT_SYMBOL(dm_table_get); |
282 | 282 | ||
283 | void dm_table_put(struct dm_table *t) | 283 | void dm_table_put(struct dm_table *t) |
284 | { | 284 | { |
285 | if (!t) | 285 | if (!t) |
286 | return; | 286 | return; |
287 | 287 | ||
288 | smp_mb__before_atomic_dec(); | 288 | smp_mb__before_atomic_dec(); |
289 | atomic_dec(&t->holders); | 289 | atomic_dec(&t->holders); |
290 | } | 290 | } |
291 | EXPORT_SYMBOL(dm_table_put); | 291 | EXPORT_SYMBOL(dm_table_put); |
292 | 292 | ||
293 | /* | 293 | /* |
294 | * Checks to see if we need to extend highs or targets. | 294 | * Checks to see if we need to extend highs or targets. |
295 | */ | 295 | */ |
296 | static inline int check_space(struct dm_table *t) | 296 | static inline int check_space(struct dm_table *t) |
297 | { | 297 | { |
298 | if (t->num_targets >= t->num_allocated) | 298 | if (t->num_targets >= t->num_allocated) |
299 | return alloc_targets(t, t->num_allocated * 2); | 299 | return alloc_targets(t, t->num_allocated * 2); |
300 | 300 | ||
301 | return 0; | 301 | return 0; |
302 | } | 302 | } |
303 | 303 | ||
304 | /* | 304 | /* |
305 | * See if we've already got a device in the list. | 305 | * See if we've already got a device in the list. |
306 | */ | 306 | */ |
307 | static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) | 307 | static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) |
308 | { | 308 | { |
309 | struct dm_dev_internal *dd; | 309 | struct dm_dev_internal *dd; |
310 | 310 | ||
311 | list_for_each_entry (dd, l, list) | 311 | list_for_each_entry (dd, l, list) |
312 | if (dd->dm_dev.bdev->bd_dev == dev) | 312 | if (dd->dm_dev.bdev->bd_dev == dev) |
313 | return dd; | 313 | return dd; |
314 | 314 | ||
315 | return NULL; | 315 | return NULL; |
316 | } | 316 | } |
317 | 317 | ||
318 | /* | 318 | /* |
319 | * Open a device so we can use it as a map destination. | 319 | * Open a device so we can use it as a map destination. |
320 | */ | 320 | */ |
321 | static int open_dev(struct dm_dev_internal *d, dev_t dev, | 321 | static int open_dev(struct dm_dev_internal *d, dev_t dev, |
322 | struct mapped_device *md) | 322 | struct mapped_device *md) |
323 | { | 323 | { |
324 | static char *_claim_ptr = "I belong to device-mapper"; | 324 | static char *_claim_ptr = "I belong to device-mapper"; |
325 | struct block_device *bdev; | 325 | struct block_device *bdev; |
326 | 326 | ||
327 | int r; | 327 | int r; |
328 | 328 | ||
329 | BUG_ON(d->dm_dev.bdev); | 329 | BUG_ON(d->dm_dev.bdev); |
330 | 330 | ||
331 | bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr); | 331 | bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr); |
332 | if (IS_ERR(bdev)) | 332 | if (IS_ERR(bdev)) |
333 | return PTR_ERR(bdev); | 333 | return PTR_ERR(bdev); |
334 | 334 | ||
335 | r = bd_link_disk_holder(bdev, dm_disk(md)); | 335 | r = bd_link_disk_holder(bdev, dm_disk(md)); |
336 | if (r) { | 336 | if (r) { |
337 | blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL); | 337 | blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL); |
338 | return r; | 338 | return r; |
339 | } | 339 | } |
340 | 340 | ||
341 | d->dm_dev.bdev = bdev; | 341 | d->dm_dev.bdev = bdev; |
342 | return 0; | 342 | return 0; |
343 | } | 343 | } |
344 | 344 | ||
345 | /* | 345 | /* |
346 | * Close a device that we've been using. | 346 | * Close a device that we've been using. |
347 | */ | 347 | */ |
348 | static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) | 348 | static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) |
349 | { | 349 | { |
350 | if (!d->dm_dev.bdev) | 350 | if (!d->dm_dev.bdev) |
351 | return; | 351 | return; |
352 | 352 | ||
353 | bd_unlink_disk_holder(d->dm_dev.bdev, dm_disk(md)); | 353 | bd_unlink_disk_holder(d->dm_dev.bdev, dm_disk(md)); |
354 | blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL); | 354 | blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL); |
355 | d->dm_dev.bdev = NULL; | 355 | d->dm_dev.bdev = NULL; |
356 | } | 356 | } |
357 | 357 | ||
358 | /* | 358 | /* |
359 | * If possible, this checks an area of a destination device is invalid. | 359 | * If possible, this checks an area of a destination device is invalid. |
360 | */ | 360 | */ |
361 | static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, | 361 | static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, |
362 | sector_t start, sector_t len, void *data) | 362 | sector_t start, sector_t len, void *data) |
363 | { | 363 | { |
364 | struct request_queue *q; | 364 | struct request_queue *q; |
365 | struct queue_limits *limits = data; | 365 | struct queue_limits *limits = data; |
366 | struct block_device *bdev = dev->bdev; | 366 | struct block_device *bdev = dev->bdev; |
367 | sector_t dev_size = | 367 | sector_t dev_size = |
368 | i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; | 368 | i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; |
369 | unsigned short logical_block_size_sectors = | 369 | unsigned short logical_block_size_sectors = |
370 | limits->logical_block_size >> SECTOR_SHIFT; | 370 | limits->logical_block_size >> SECTOR_SHIFT; |
371 | char b[BDEVNAME_SIZE]; | 371 | char b[BDEVNAME_SIZE]; |
372 | 372 | ||
373 | /* | 373 | /* |
374 | * Some devices exist without request functions, | 374 | * Some devices exist without request functions, |
375 | * such as loop devices not yet bound to backing files. | 375 | * such as loop devices not yet bound to backing files. |
376 | * Forbid the use of such devices. | 376 | * Forbid the use of such devices. |
377 | */ | 377 | */ |
378 | q = bdev_get_queue(bdev); | 378 | q = bdev_get_queue(bdev); |
379 | if (!q || !q->make_request_fn) { | 379 | if (!q || !q->make_request_fn) { |
380 | DMWARN("%s: %s is not yet initialised: " | 380 | DMWARN("%s: %s is not yet initialised: " |
381 | "start=%llu, len=%llu, dev_size=%llu", | 381 | "start=%llu, len=%llu, dev_size=%llu", |
382 | dm_device_name(ti->table->md), bdevname(bdev, b), | 382 | dm_device_name(ti->table->md), bdevname(bdev, b), |
383 | (unsigned long long)start, | 383 | (unsigned long long)start, |
384 | (unsigned long long)len, | 384 | (unsigned long long)len, |
385 | (unsigned long long)dev_size); | 385 | (unsigned long long)dev_size); |
386 | return 1; | 386 | return 1; |
387 | } | 387 | } |
388 | 388 | ||
389 | if (!dev_size) | 389 | if (!dev_size) |
390 | return 0; | 390 | return 0; |
391 | 391 | ||
392 | if ((start >= dev_size) || (start + len > dev_size)) { | 392 | if ((start >= dev_size) || (start + len > dev_size)) { |
393 | DMWARN("%s: %s too small for target: " | 393 | DMWARN("%s: %s too small for target: " |
394 | "start=%llu, len=%llu, dev_size=%llu", | 394 | "start=%llu, len=%llu, dev_size=%llu", |
395 | dm_device_name(ti->table->md), bdevname(bdev, b), | 395 | dm_device_name(ti->table->md), bdevname(bdev, b), |
396 | (unsigned long long)start, | 396 | (unsigned long long)start, |
397 | (unsigned long long)len, | 397 | (unsigned long long)len, |
398 | (unsigned long long)dev_size); | 398 | (unsigned long long)dev_size); |
399 | return 1; | 399 | return 1; |
400 | } | 400 | } |
401 | 401 | ||
402 | if (logical_block_size_sectors <= 1) | 402 | if (logical_block_size_sectors <= 1) |
403 | return 0; | 403 | return 0; |
404 | 404 | ||
405 | if (start & (logical_block_size_sectors - 1)) { | 405 | if (start & (logical_block_size_sectors - 1)) { |
406 | DMWARN("%s: start=%llu not aligned to h/w " | 406 | DMWARN("%s: start=%llu not aligned to h/w " |
407 | "logical block size %u of %s", | 407 | "logical block size %u of %s", |
408 | dm_device_name(ti->table->md), | 408 | dm_device_name(ti->table->md), |
409 | (unsigned long long)start, | 409 | (unsigned long long)start, |
410 | limits->logical_block_size, bdevname(bdev, b)); | 410 | limits->logical_block_size, bdevname(bdev, b)); |
411 | return 1; | 411 | return 1; |
412 | } | 412 | } |
413 | 413 | ||
414 | if (len & (logical_block_size_sectors - 1)) { | 414 | if (len & (logical_block_size_sectors - 1)) { |
415 | DMWARN("%s: len=%llu not aligned to h/w " | 415 | DMWARN("%s: len=%llu not aligned to h/w " |
416 | "logical block size %u of %s", | 416 | "logical block size %u of %s", |
417 | dm_device_name(ti->table->md), | 417 | dm_device_name(ti->table->md), |
418 | (unsigned long long)len, | 418 | (unsigned long long)len, |
419 | limits->logical_block_size, bdevname(bdev, b)); | 419 | limits->logical_block_size, bdevname(bdev, b)); |
420 | return 1; | 420 | return 1; |
421 | } | 421 | } |
422 | 422 | ||
423 | return 0; | 423 | return 0; |
424 | } | 424 | } |
425 | 425 | ||
426 | /* | 426 | /* |
427 | * This upgrades the mode on an already open dm_dev, being | 427 | * This upgrades the mode on an already open dm_dev, being |
428 | * careful to leave things as they were if we fail to reopen the | 428 | * careful to leave things as they were if we fail to reopen the |
429 | * device and not to touch the existing bdev field in case | 429 | * device and not to touch the existing bdev field in case |
430 | * it is accessed concurrently inside dm_table_any_congested(). | 430 | * it is accessed concurrently inside dm_table_any_congested(). |
431 | */ | 431 | */ |
432 | static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, | 432 | static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, |
433 | struct mapped_device *md) | 433 | struct mapped_device *md) |
434 | { | 434 | { |
435 | int r; | 435 | int r; |
436 | struct dm_dev_internal dd_new, dd_old; | 436 | struct dm_dev_internal dd_new, dd_old; |
437 | 437 | ||
438 | dd_new = dd_old = *dd; | 438 | dd_new = dd_old = *dd; |
439 | 439 | ||
440 | dd_new.dm_dev.mode |= new_mode; | 440 | dd_new.dm_dev.mode |= new_mode; |
441 | dd_new.dm_dev.bdev = NULL; | 441 | dd_new.dm_dev.bdev = NULL; |
442 | 442 | ||
443 | r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md); | 443 | r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md); |
444 | if (r) | 444 | if (r) |
445 | return r; | 445 | return r; |
446 | 446 | ||
447 | dd->dm_dev.mode |= new_mode; | 447 | dd->dm_dev.mode |= new_mode; |
448 | close_dev(&dd_old, md); | 448 | close_dev(&dd_old, md); |
449 | 449 | ||
450 | return 0; | 450 | return 0; |
451 | } | 451 | } |
452 | 452 | ||
453 | /* | 453 | /* |
454 | * Add a device to the list, or just increment the usage count if | 454 | * Add a device to the list, or just increment the usage count if |
455 | * it's already present. | 455 | * it's already present. |
456 | */ | 456 | */ |
457 | int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, | 457 | int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, |
458 | struct dm_dev **result) | 458 | struct dm_dev **result) |
459 | { | 459 | { |
460 | int r; | 460 | int r; |
461 | dev_t uninitialized_var(dev); | 461 | dev_t uninitialized_var(dev); |
462 | struct dm_dev_internal *dd; | 462 | struct dm_dev_internal *dd; |
463 | unsigned int major, minor; | 463 | unsigned int major, minor; |
464 | struct dm_table *t = ti->table; | 464 | struct dm_table *t = ti->table; |
465 | 465 | ||
466 | BUG_ON(!t); | 466 | BUG_ON(!t); |
467 | 467 | ||
468 | if (sscanf(path, "%u:%u", &major, &minor) == 2) { | 468 | if (sscanf(path, "%u:%u", &major, &minor) == 2) { |
469 | /* Extract the major/minor numbers */ | 469 | /* Extract the major/minor numbers */ |
470 | dev = MKDEV(major, minor); | 470 | dev = MKDEV(major, minor); |
471 | if (MAJOR(dev) != major || MINOR(dev) != minor) | 471 | if (MAJOR(dev) != major || MINOR(dev) != minor) |
472 | return -EOVERFLOW; | 472 | return -EOVERFLOW; |
473 | } else { | 473 | } else { |
474 | /* convert the path to a device */ | 474 | /* convert the path to a device */ |
475 | struct block_device *bdev = lookup_bdev(path); | 475 | struct block_device *bdev = lookup_bdev(path); |
476 | 476 | ||
477 | if (IS_ERR(bdev)) | 477 | if (IS_ERR(bdev)) |
478 | return PTR_ERR(bdev); | 478 | return PTR_ERR(bdev); |
479 | dev = bdev->bd_dev; | 479 | dev = bdev->bd_dev; |
480 | bdput(bdev); | 480 | bdput(bdev); |
481 | } | 481 | } |
482 | 482 | ||
483 | dd = find_device(&t->devices, dev); | 483 | dd = find_device(&t->devices, dev); |
484 | if (!dd) { | 484 | if (!dd) { |
485 | dd = kmalloc(sizeof(*dd), GFP_KERNEL); | 485 | dd = kmalloc(sizeof(*dd), GFP_KERNEL); |
486 | if (!dd) | 486 | if (!dd) |
487 | return -ENOMEM; | 487 | return -ENOMEM; |
488 | 488 | ||
489 | dd->dm_dev.mode = mode; | 489 | dd->dm_dev.mode = mode; |
490 | dd->dm_dev.bdev = NULL; | 490 | dd->dm_dev.bdev = NULL; |
491 | 491 | ||
492 | if ((r = open_dev(dd, dev, t->md))) { | 492 | if ((r = open_dev(dd, dev, t->md))) { |
493 | kfree(dd); | 493 | kfree(dd); |
494 | return r; | 494 | return r; |
495 | } | 495 | } |
496 | 496 | ||
497 | format_dev_t(dd->dm_dev.name, dev); | 497 | format_dev_t(dd->dm_dev.name, dev); |
498 | 498 | ||
499 | atomic_set(&dd->count, 0); | 499 | atomic_set(&dd->count, 0); |
500 | list_add(&dd->list, &t->devices); | 500 | list_add(&dd->list, &t->devices); |
501 | 501 | ||
502 | } else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) { | 502 | } else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) { |
503 | r = upgrade_mode(dd, mode, t->md); | 503 | r = upgrade_mode(dd, mode, t->md); |
504 | if (r) | 504 | if (r) |
505 | return r; | 505 | return r; |
506 | } | 506 | } |
507 | atomic_inc(&dd->count); | 507 | atomic_inc(&dd->count); |
508 | 508 | ||
509 | *result = &dd->dm_dev; | 509 | *result = &dd->dm_dev; |
510 | return 0; | 510 | return 0; |
511 | } | 511 | } |
512 | EXPORT_SYMBOL(dm_get_device); | 512 | EXPORT_SYMBOL(dm_get_device); |
513 | 513 | ||
514 | int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, | 514 | int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, |
515 | sector_t start, sector_t len, void *data) | 515 | sector_t start, sector_t len, void *data) |
516 | { | 516 | { |
517 | struct queue_limits *limits = data; | 517 | struct queue_limits *limits = data; |
518 | struct block_device *bdev = dev->bdev; | 518 | struct block_device *bdev = dev->bdev; |
519 | struct request_queue *q = bdev_get_queue(bdev); | 519 | struct request_queue *q = bdev_get_queue(bdev); |
520 | char b[BDEVNAME_SIZE]; | 520 | char b[BDEVNAME_SIZE]; |
521 | 521 | ||
522 | if (unlikely(!q)) { | 522 | if (unlikely(!q)) { |
523 | DMWARN("%s: Cannot set limits for nonexistent device %s", | 523 | DMWARN("%s: Cannot set limits for nonexistent device %s", |
524 | dm_device_name(ti->table->md), bdevname(bdev, b)); | 524 | dm_device_name(ti->table->md), bdevname(bdev, b)); |
525 | return 0; | 525 | return 0; |
526 | } | 526 | } |
527 | 527 | ||
528 | if (bdev_stack_limits(limits, bdev, start) < 0) | 528 | if (bdev_stack_limits(limits, bdev, start) < 0) |
529 | DMWARN("%s: adding target device %s caused an alignment inconsistency: " | 529 | DMWARN("%s: adding target device %s caused an alignment inconsistency: " |
530 | "physical_block_size=%u, logical_block_size=%u, " | 530 | "physical_block_size=%u, logical_block_size=%u, " |
531 | "alignment_offset=%u, start=%llu", | 531 | "alignment_offset=%u, start=%llu", |
532 | dm_device_name(ti->table->md), bdevname(bdev, b), | 532 | dm_device_name(ti->table->md), bdevname(bdev, b), |
533 | q->limits.physical_block_size, | 533 | q->limits.physical_block_size, |
534 | q->limits.logical_block_size, | 534 | q->limits.logical_block_size, |
535 | q->limits.alignment_offset, | 535 | q->limits.alignment_offset, |
536 | (unsigned long long) start << SECTOR_SHIFT); | 536 | (unsigned long long) start << SECTOR_SHIFT); |
537 | 537 | ||
538 | /* | 538 | /* |
539 | * Check if merge fn is supported. | 539 | * Check if merge fn is supported. |
540 | * If not we'll force DM to use PAGE_SIZE or | 540 | * If not we'll force DM to use PAGE_SIZE or |
541 | * smaller I/O, just to be safe. | 541 | * smaller I/O, just to be safe. |
542 | */ | 542 | */ |
543 | if (dm_queue_merge_is_compulsory(q) && !ti->type->merge) | 543 | if (dm_queue_merge_is_compulsory(q) && !ti->type->merge) |
544 | blk_limits_max_hw_sectors(limits, | 544 | blk_limits_max_hw_sectors(limits, |
545 | (unsigned int) (PAGE_SIZE >> 9)); | 545 | (unsigned int) (PAGE_SIZE >> 9)); |
546 | return 0; | 546 | return 0; |
547 | } | 547 | } |
548 | EXPORT_SYMBOL_GPL(dm_set_device_limits); | 548 | EXPORT_SYMBOL_GPL(dm_set_device_limits); |
549 | 549 | ||
550 | /* | 550 | /* |
551 | * Decrement a device's use count and remove it if necessary. | 551 | * Decrement a device's use count and remove it if necessary. |
552 | */ | 552 | */ |
553 | void dm_put_device(struct dm_target *ti, struct dm_dev *d) | 553 | void dm_put_device(struct dm_target *ti, struct dm_dev *d) |
554 | { | 554 | { |
555 | struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal, | 555 | struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal, |
556 | dm_dev); | 556 | dm_dev); |
557 | 557 | ||
558 | if (atomic_dec_and_test(&dd->count)) { | 558 | if (atomic_dec_and_test(&dd->count)) { |
559 | close_dev(dd, ti->table->md); | 559 | close_dev(dd, ti->table->md); |
560 | list_del(&dd->list); | 560 | list_del(&dd->list); |
561 | kfree(dd); | 561 | kfree(dd); |
562 | } | 562 | } |
563 | } | 563 | } |
564 | EXPORT_SYMBOL(dm_put_device); | 564 | EXPORT_SYMBOL(dm_put_device); |
565 | 565 | ||
566 | /* | 566 | /* |
567 | * Checks to see if the target joins onto the end of the table. | 567 | * Checks to see if the target joins onto the end of the table. |
568 | */ | 568 | */ |
569 | static int adjoin(struct dm_table *table, struct dm_target *ti) | 569 | static int adjoin(struct dm_table *table, struct dm_target *ti) |
570 | { | 570 | { |
571 | struct dm_target *prev; | 571 | struct dm_target *prev; |
572 | 572 | ||
573 | if (!table->num_targets) | 573 | if (!table->num_targets) |
574 | return !ti->begin; | 574 | return !ti->begin; |
575 | 575 | ||
576 | prev = &table->targets[table->num_targets - 1]; | 576 | prev = &table->targets[table->num_targets - 1]; |
577 | return (ti->begin == (prev->begin + prev->len)); | 577 | return (ti->begin == (prev->begin + prev->len)); |
578 | } | 578 | } |
579 | 579 | ||
580 | /* | 580 | /* |
581 | * Used to dynamically allocate the arg array. | 581 | * Used to dynamically allocate the arg array. |
582 | */ | 582 | */ |
583 | static char **realloc_argv(unsigned *array_size, char **old_argv) | 583 | static char **realloc_argv(unsigned *array_size, char **old_argv) |
584 | { | 584 | { |
585 | char **argv; | 585 | char **argv; |
586 | unsigned new_size; | 586 | unsigned new_size; |
587 | 587 | ||
588 | new_size = *array_size ? *array_size * 2 : 64; | 588 | new_size = *array_size ? *array_size * 2 : 64; |
589 | argv = kmalloc(new_size * sizeof(*argv), GFP_KERNEL); | 589 | argv = kmalloc(new_size * sizeof(*argv), GFP_KERNEL); |
590 | if (argv) { | 590 | if (argv) { |
591 | memcpy(argv, old_argv, *array_size * sizeof(*argv)); | 591 | memcpy(argv, old_argv, *array_size * sizeof(*argv)); |
592 | *array_size = new_size; | 592 | *array_size = new_size; |
593 | } | 593 | } |
594 | 594 | ||
595 | kfree(old_argv); | 595 | kfree(old_argv); |
596 | return argv; | 596 | return argv; |
597 | } | 597 | } |
598 | 598 | ||
599 | /* | 599 | /* |
600 | * Destructively splits up the argument list to pass to ctr. | 600 | * Destructively splits up the argument list to pass to ctr. |
601 | */ | 601 | */ |
602 | int dm_split_args(int *argc, char ***argvp, char *input) | 602 | int dm_split_args(int *argc, char ***argvp, char *input) |
603 | { | 603 | { |
604 | char *start, *end = input, *out, **argv = NULL; | 604 | char *start, *end = input, *out, **argv = NULL; |
605 | unsigned array_size = 0; | 605 | unsigned array_size = 0; |
606 | 606 | ||
607 | *argc = 0; | 607 | *argc = 0; |
608 | 608 | ||
609 | if (!input) { | 609 | if (!input) { |
610 | *argvp = NULL; | 610 | *argvp = NULL; |
611 | return 0; | 611 | return 0; |
612 | } | 612 | } |
613 | 613 | ||
614 | argv = realloc_argv(&array_size, argv); | 614 | argv = realloc_argv(&array_size, argv); |
615 | if (!argv) | 615 | if (!argv) |
616 | return -ENOMEM; | 616 | return -ENOMEM; |
617 | 617 | ||
618 | while (1) { | 618 | while (1) { |
619 | /* Skip whitespace */ | 619 | /* Skip whitespace */ |
620 | start = skip_spaces(end); | 620 | start = skip_spaces(end); |
621 | 621 | ||
622 | if (!*start) | 622 | if (!*start) |
623 | break; /* success, we hit the end */ | 623 | break; /* success, we hit the end */ |
624 | 624 | ||
625 | /* 'out' is used to remove any back-quotes */ | 625 | /* 'out' is used to remove any back-quotes */ |
626 | end = out = start; | 626 | end = out = start; |
627 | while (*end) { | 627 | while (*end) { |
628 | /* Everything apart from '\0' can be quoted */ | 628 | /* Everything apart from '\0' can be quoted */ |
629 | if (*end == '\\' && *(end + 1)) { | 629 | if (*end == '\\' && *(end + 1)) { |
630 | *out++ = *(end + 1); | 630 | *out++ = *(end + 1); |
631 | end += 2; | 631 | end += 2; |
632 | continue; | 632 | continue; |
633 | } | 633 | } |
634 | 634 | ||
635 | if (isspace(*end)) | 635 | if (isspace(*end)) |
636 | break; /* end of token */ | 636 | break; /* end of token */ |
637 | 637 | ||
638 | *out++ = *end++; | 638 | *out++ = *end++; |
639 | } | 639 | } |
640 | 640 | ||
641 | /* have we already filled the array ? */ | 641 | /* have we already filled the array ? */ |
642 | if ((*argc + 1) > array_size) { | 642 | if ((*argc + 1) > array_size) { |
643 | argv = realloc_argv(&array_size, argv); | 643 | argv = realloc_argv(&array_size, argv); |
644 | if (!argv) | 644 | if (!argv) |
645 | return -ENOMEM; | 645 | return -ENOMEM; |
646 | } | 646 | } |
647 | 647 | ||
648 | /* we know this is whitespace */ | 648 | /* we know this is whitespace */ |
649 | if (*end) | 649 | if (*end) |
650 | end++; | 650 | end++; |
651 | 651 | ||
652 | /* terminate the string and put it in the array */ | 652 | /* terminate the string and put it in the array */ |
653 | *out = '\0'; | 653 | *out = '\0'; |
654 | argv[*argc] = start; | 654 | argv[*argc] = start; |
655 | (*argc)++; | 655 | (*argc)++; |
656 | } | 656 | } |
657 | 657 | ||
658 | *argvp = argv; | 658 | *argvp = argv; |
659 | return 0; | 659 | return 0; |
660 | } | 660 | } |
661 | 661 | ||
662 | /* | 662 | /* |
663 | * Impose necessary and sufficient conditions on a devices's table such | 663 | * Impose necessary and sufficient conditions on a devices's table such |
664 | * that any incoming bio which respects its logical_block_size can be | 664 | * that any incoming bio which respects its logical_block_size can be |
665 | * processed successfully. If it falls across the boundary between | 665 | * processed successfully. If it falls across the boundary between |
666 | * two or more targets, the size of each piece it gets split into must | 666 | * two or more targets, the size of each piece it gets split into must |
667 | * be compatible with the logical_block_size of the target processing it. | 667 | * be compatible with the logical_block_size of the target processing it. |
668 | */ | 668 | */ |
669 | static int validate_hardware_logical_block_alignment(struct dm_table *table, | 669 | static int validate_hardware_logical_block_alignment(struct dm_table *table, |
670 | struct queue_limits *limits) | 670 | struct queue_limits *limits) |
671 | { | 671 | { |
672 | /* | 672 | /* |
673 | * This function uses arithmetic modulo the logical_block_size | 673 | * This function uses arithmetic modulo the logical_block_size |
674 | * (in units of 512-byte sectors). | 674 | * (in units of 512-byte sectors). |
675 | */ | 675 | */ |
676 | unsigned short device_logical_block_size_sects = | 676 | unsigned short device_logical_block_size_sects = |
677 | limits->logical_block_size >> SECTOR_SHIFT; | 677 | limits->logical_block_size >> SECTOR_SHIFT; |
678 | 678 | ||
679 | /* | 679 | /* |
680 | * Offset of the start of the next table entry, mod logical_block_size. | 680 | * Offset of the start of the next table entry, mod logical_block_size. |
681 | */ | 681 | */ |
682 | unsigned short next_target_start = 0; | 682 | unsigned short next_target_start = 0; |
683 | 683 | ||
684 | /* | 684 | /* |
685 | * Given an aligned bio that extends beyond the end of a | 685 | * Given an aligned bio that extends beyond the end of a |
686 | * target, how many sectors must the next target handle? | 686 | * target, how many sectors must the next target handle? |
687 | */ | 687 | */ |
688 | unsigned short remaining = 0; | 688 | unsigned short remaining = 0; |
689 | 689 | ||
690 | struct dm_target *uninitialized_var(ti); | 690 | struct dm_target *uninitialized_var(ti); |
691 | struct queue_limits ti_limits; | 691 | struct queue_limits ti_limits; |
692 | unsigned i = 0; | 692 | unsigned i = 0; |
693 | 693 | ||
694 | /* | 694 | /* |
695 | * Check each entry in the table in turn. | 695 | * Check each entry in the table in turn. |
696 | */ | 696 | */ |
697 | while (i < dm_table_get_num_targets(table)) { | 697 | while (i < dm_table_get_num_targets(table)) { |
698 | ti = dm_table_get_target(table, i++); | 698 | ti = dm_table_get_target(table, i++); |
699 | 699 | ||
700 | blk_set_default_limits(&ti_limits); | 700 | blk_set_default_limits(&ti_limits); |
701 | 701 | ||
702 | /* combine all target devices' limits */ | 702 | /* combine all target devices' limits */ |
703 | if (ti->type->iterate_devices) | 703 | if (ti->type->iterate_devices) |
704 | ti->type->iterate_devices(ti, dm_set_device_limits, | 704 | ti->type->iterate_devices(ti, dm_set_device_limits, |
705 | &ti_limits); | 705 | &ti_limits); |
706 | 706 | ||
707 | /* | 707 | /* |
708 | * If the remaining sectors fall entirely within this | 708 | * If the remaining sectors fall entirely within this |
709 | * table entry are they compatible with its logical_block_size? | 709 | * table entry are they compatible with its logical_block_size? |
710 | */ | 710 | */ |
711 | if (remaining < ti->len && | 711 | if (remaining < ti->len && |
712 | remaining & ((ti_limits.logical_block_size >> | 712 | remaining & ((ti_limits.logical_block_size >> |
713 | SECTOR_SHIFT) - 1)) | 713 | SECTOR_SHIFT) - 1)) |
714 | break; /* Error */ | 714 | break; /* Error */ |
715 | 715 | ||
716 | next_target_start = | 716 | next_target_start = |
717 | (unsigned short) ((next_target_start + ti->len) & | 717 | (unsigned short) ((next_target_start + ti->len) & |
718 | (device_logical_block_size_sects - 1)); | 718 | (device_logical_block_size_sects - 1)); |
719 | remaining = next_target_start ? | 719 | remaining = next_target_start ? |
720 | device_logical_block_size_sects - next_target_start : 0; | 720 | device_logical_block_size_sects - next_target_start : 0; |
721 | } | 721 | } |
722 | 722 | ||
723 | if (remaining) { | 723 | if (remaining) { |
724 | DMWARN("%s: table line %u (start sect %llu len %llu) " | 724 | DMWARN("%s: table line %u (start sect %llu len %llu) " |
725 | "not aligned to h/w logical block size %u", | 725 | "not aligned to h/w logical block size %u", |
726 | dm_device_name(table->md), i, | 726 | dm_device_name(table->md), i, |
727 | (unsigned long long) ti->begin, | 727 | (unsigned long long) ti->begin, |
728 | (unsigned long long) ti->len, | 728 | (unsigned long long) ti->len, |
729 | limits->logical_block_size); | 729 | limits->logical_block_size); |
730 | return -EINVAL; | 730 | return -EINVAL; |
731 | } | 731 | } |
732 | 732 | ||
733 | return 0; | 733 | return 0; |
734 | } | 734 | } |
735 | 735 | ||
736 | int dm_table_add_target(struct dm_table *t, const char *type, | 736 | int dm_table_add_target(struct dm_table *t, const char *type, |
737 | sector_t start, sector_t len, char *params) | 737 | sector_t start, sector_t len, char *params) |
738 | { | 738 | { |
739 | int r = -EINVAL, argc; | 739 | int r = -EINVAL, argc; |
740 | char **argv; | 740 | char **argv; |
741 | struct dm_target *tgt; | 741 | struct dm_target *tgt; |
742 | 742 | ||
743 | if ((r = check_space(t))) | 743 | if ((r = check_space(t))) |
744 | return r; | 744 | return r; |
745 | 745 | ||
746 | tgt = t->targets + t->num_targets; | 746 | tgt = t->targets + t->num_targets; |
747 | memset(tgt, 0, sizeof(*tgt)); | 747 | memset(tgt, 0, sizeof(*tgt)); |
748 | 748 | ||
749 | if (!len) { | 749 | if (!len) { |
750 | DMERR("%s: zero-length target", dm_device_name(t->md)); | 750 | DMERR("%s: zero-length target", dm_device_name(t->md)); |
751 | return -EINVAL; | 751 | return -EINVAL; |
752 | } | 752 | } |
753 | 753 | ||
754 | tgt->type = dm_get_target_type(type); | 754 | tgt->type = dm_get_target_type(type); |
755 | if (!tgt->type) { | 755 | if (!tgt->type) { |
756 | DMERR("%s: %s: unknown target type", dm_device_name(t->md), | 756 | DMERR("%s: %s: unknown target type", dm_device_name(t->md), |
757 | type); | 757 | type); |
758 | return -EINVAL; | 758 | return -EINVAL; |
759 | } | 759 | } |
760 | 760 | ||
761 | tgt->table = t; | 761 | tgt->table = t; |
762 | tgt->begin = start; | 762 | tgt->begin = start; |
763 | tgt->len = len; | 763 | tgt->len = len; |
764 | tgt->error = "Unknown error"; | 764 | tgt->error = "Unknown error"; |
765 | 765 | ||
766 | /* | 766 | /* |
767 | * Does this target adjoin the previous one ? | 767 | * Does this target adjoin the previous one ? |
768 | */ | 768 | */ |
769 | if (!adjoin(t, tgt)) { | 769 | if (!adjoin(t, tgt)) { |
770 | tgt->error = "Gap in table"; | 770 | tgt->error = "Gap in table"; |
771 | r = -EINVAL; | 771 | r = -EINVAL; |
772 | goto bad; | 772 | goto bad; |
773 | } | 773 | } |
774 | 774 | ||
775 | r = dm_split_args(&argc, &argv, params); | 775 | r = dm_split_args(&argc, &argv, params); |
776 | if (r) { | 776 | if (r) { |
777 | tgt->error = "couldn't split parameters (insufficient memory)"; | 777 | tgt->error = "couldn't split parameters (insufficient memory)"; |
778 | goto bad; | 778 | goto bad; |
779 | } | 779 | } |
780 | 780 | ||
781 | r = tgt->type->ctr(tgt, argc, argv); | 781 | r = tgt->type->ctr(tgt, argc, argv); |
782 | kfree(argv); | 782 | kfree(argv); |
783 | if (r) | 783 | if (r) |
784 | goto bad; | 784 | goto bad; |
785 | 785 | ||
786 | t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; | 786 | t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; |
787 | 787 | ||
788 | if (!tgt->num_discard_requests && tgt->discards_supported) | 788 | if (!tgt->num_discard_requests && tgt->discards_supported) |
789 | DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.", | 789 | DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.", |
790 | dm_device_name(t->md), type); | 790 | dm_device_name(t->md), type); |
791 | 791 | ||
792 | return 0; | 792 | return 0; |
793 | 793 | ||
794 | bad: | 794 | bad: |
795 | DMERR("%s: %s: %s", dm_device_name(t->md), type, tgt->error); | 795 | DMERR("%s: %s: %s", dm_device_name(t->md), type, tgt->error); |
796 | dm_put_target_type(tgt->type); | 796 | dm_put_target_type(tgt->type); |
797 | return r; | 797 | return r; |
798 | } | 798 | } |
799 | 799 | ||
800 | /* | 800 | /* |
801 | * Target argument parsing helpers. | 801 | * Target argument parsing helpers. |
802 | */ | 802 | */ |
803 | static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, | 803 | static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, |
804 | unsigned *value, char **error, unsigned grouped) | 804 | unsigned *value, char **error, unsigned grouped) |
805 | { | 805 | { |
806 | const char *arg_str = dm_shift_arg(arg_set); | 806 | const char *arg_str = dm_shift_arg(arg_set); |
807 | 807 | ||
808 | if (!arg_str || | 808 | if (!arg_str || |
809 | (sscanf(arg_str, "%u", value) != 1) || | 809 | (sscanf(arg_str, "%u", value) != 1) || |
810 | (*value < arg->min) || | 810 | (*value < arg->min) || |
811 | (*value > arg->max) || | 811 | (*value > arg->max) || |
812 | (grouped && arg_set->argc < *value)) { | 812 | (grouped && arg_set->argc < *value)) { |
813 | *error = arg->error; | 813 | *error = arg->error; |
814 | return -EINVAL; | 814 | return -EINVAL; |
815 | } | 815 | } |
816 | 816 | ||
817 | return 0; | 817 | return 0; |
818 | } | 818 | } |
819 | 819 | ||
820 | int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, | 820 | int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, |
821 | unsigned *value, char **error) | 821 | unsigned *value, char **error) |
822 | { | 822 | { |
823 | return validate_next_arg(arg, arg_set, value, error, 0); | 823 | return validate_next_arg(arg, arg_set, value, error, 0); |
824 | } | 824 | } |
825 | EXPORT_SYMBOL(dm_read_arg); | 825 | EXPORT_SYMBOL(dm_read_arg); |
826 | 826 | ||
827 | int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, | 827 | int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, |
828 | unsigned *value, char **error) | 828 | unsigned *value, char **error) |
829 | { | 829 | { |
830 | return validate_next_arg(arg, arg_set, value, error, 1); | 830 | return validate_next_arg(arg, arg_set, value, error, 1); |
831 | } | 831 | } |
832 | EXPORT_SYMBOL(dm_read_arg_group); | 832 | EXPORT_SYMBOL(dm_read_arg_group); |
833 | 833 | ||
834 | const char *dm_shift_arg(struct dm_arg_set *as) | 834 | const char *dm_shift_arg(struct dm_arg_set *as) |
835 | { | 835 | { |
836 | char *r; | 836 | char *r; |
837 | 837 | ||
838 | if (as->argc) { | 838 | if (as->argc) { |
839 | as->argc--; | 839 | as->argc--; |
840 | r = *as->argv; | 840 | r = *as->argv; |
841 | as->argv++; | 841 | as->argv++; |
842 | return r; | 842 | return r; |
843 | } | 843 | } |
844 | 844 | ||
845 | return NULL; | 845 | return NULL; |
846 | } | 846 | } |
847 | EXPORT_SYMBOL(dm_shift_arg); | 847 | EXPORT_SYMBOL(dm_shift_arg); |
848 | 848 | ||
849 | void dm_consume_args(struct dm_arg_set *as, unsigned num_args) | 849 | void dm_consume_args(struct dm_arg_set *as, unsigned num_args) |
850 | { | 850 | { |
851 | BUG_ON(as->argc < num_args); | 851 | BUG_ON(as->argc < num_args); |
852 | as->argc -= num_args; | 852 | as->argc -= num_args; |
853 | as->argv += num_args; | 853 | as->argv += num_args; |
854 | } | 854 | } |
855 | EXPORT_SYMBOL(dm_consume_args); | 855 | EXPORT_SYMBOL(dm_consume_args); |
856 | 856 | ||
857 | static int dm_table_set_type(struct dm_table *t) | 857 | static int dm_table_set_type(struct dm_table *t) |
858 | { | 858 | { |
859 | unsigned i; | 859 | unsigned i; |
860 | unsigned bio_based = 0, request_based = 0; | 860 | unsigned bio_based = 0, request_based = 0; |
861 | struct dm_target *tgt; | 861 | struct dm_target *tgt; |
862 | struct dm_dev_internal *dd; | 862 | struct dm_dev_internal *dd; |
863 | struct list_head *devices; | 863 | struct list_head *devices; |
864 | 864 | ||
865 | for (i = 0; i < t->num_targets; i++) { | 865 | for (i = 0; i < t->num_targets; i++) { |
866 | tgt = t->targets + i; | 866 | tgt = t->targets + i; |
867 | if (dm_target_request_based(tgt)) | 867 | if (dm_target_request_based(tgt)) |
868 | request_based = 1; | 868 | request_based = 1; |
869 | else | 869 | else |
870 | bio_based = 1; | 870 | bio_based = 1; |
871 | 871 | ||
872 | if (bio_based && request_based) { | 872 | if (bio_based && request_based) { |
873 | DMWARN("Inconsistent table: different target types" | 873 | DMWARN("Inconsistent table: different target types" |
874 | " can't be mixed up"); | 874 | " can't be mixed up"); |
875 | return -EINVAL; | 875 | return -EINVAL; |
876 | } | 876 | } |
877 | } | 877 | } |
878 | 878 | ||
879 | if (bio_based) { | 879 | if (bio_based) { |
880 | /* We must use this table as bio-based */ | 880 | /* We must use this table as bio-based */ |
881 | t->type = DM_TYPE_BIO_BASED; | 881 | t->type = DM_TYPE_BIO_BASED; |
882 | return 0; | 882 | return 0; |
883 | } | 883 | } |
884 | 884 | ||
885 | BUG_ON(!request_based); /* No targets in this table */ | 885 | BUG_ON(!request_based); /* No targets in this table */ |
886 | 886 | ||
887 | /* Non-request-stackable devices can't be used for request-based dm */ | 887 | /* Non-request-stackable devices can't be used for request-based dm */ |
888 | devices = dm_table_get_devices(t); | 888 | devices = dm_table_get_devices(t); |
889 | list_for_each_entry(dd, devices, list) { | 889 | list_for_each_entry(dd, devices, list) { |
890 | if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) { | 890 | if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) { |
891 | DMWARN("table load rejected: including" | 891 | DMWARN("table load rejected: including" |
892 | " non-request-stackable devices"); | 892 | " non-request-stackable devices"); |
893 | return -EINVAL; | 893 | return -EINVAL; |
894 | } | 894 | } |
895 | } | 895 | } |
896 | 896 | ||
897 | /* | 897 | /* |
898 | * Request-based dm supports only tables that have a single target now. | 898 | * Request-based dm supports only tables that have a single target now. |
899 | * To support multiple targets, request splitting support is needed, | 899 | * To support multiple targets, request splitting support is needed, |
900 | * and that needs lots of changes in the block-layer. | 900 | * and that needs lots of changes in the block-layer. |
901 | * (e.g. request completion process for partial completion.) | 901 | * (e.g. request completion process for partial completion.) |
902 | */ | 902 | */ |
903 | if (t->num_targets > 1) { | 903 | if (t->num_targets > 1) { |
904 | DMWARN("Request-based dm doesn't support multiple targets yet"); | 904 | DMWARN("Request-based dm doesn't support multiple targets yet"); |
905 | return -EINVAL; | 905 | return -EINVAL; |
906 | } | 906 | } |
907 | 907 | ||
908 | t->type = DM_TYPE_REQUEST_BASED; | 908 | t->type = DM_TYPE_REQUEST_BASED; |
909 | 909 | ||
910 | return 0; | 910 | return 0; |
911 | } | 911 | } |
912 | 912 | ||
913 | unsigned dm_table_get_type(struct dm_table *t) | 913 | unsigned dm_table_get_type(struct dm_table *t) |
914 | { | 914 | { |
915 | return t->type; | 915 | return t->type; |
916 | } | 916 | } |
917 | 917 | ||
918 | bool dm_table_request_based(struct dm_table *t) | 918 | bool dm_table_request_based(struct dm_table *t) |
919 | { | 919 | { |
920 | return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; | 920 | return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; |
921 | } | 921 | } |
922 | 922 | ||
923 | int dm_table_alloc_md_mempools(struct dm_table *t) | 923 | int dm_table_alloc_md_mempools(struct dm_table *t) |
924 | { | 924 | { |
925 | unsigned type = dm_table_get_type(t); | 925 | unsigned type = dm_table_get_type(t); |
926 | 926 | ||
927 | if (unlikely(type == DM_TYPE_NONE)) { | 927 | if (unlikely(type == DM_TYPE_NONE)) { |
928 | DMWARN("no table type is set, can't allocate mempools"); | 928 | DMWARN("no table type is set, can't allocate mempools"); |
929 | return -EINVAL; | 929 | return -EINVAL; |
930 | } | 930 | } |
931 | 931 | ||
932 | t->mempools = dm_alloc_md_mempools(type, t->integrity_supported); | 932 | t->mempools = dm_alloc_md_mempools(type, t->integrity_supported); |
933 | if (!t->mempools) | 933 | if (!t->mempools) |
934 | return -ENOMEM; | 934 | return -ENOMEM; |
935 | 935 | ||
936 | return 0; | 936 | return 0; |
937 | } | 937 | } |
938 | 938 | ||
939 | void dm_table_free_md_mempools(struct dm_table *t) | 939 | void dm_table_free_md_mempools(struct dm_table *t) |
940 | { | 940 | { |
941 | dm_free_md_mempools(t->mempools); | 941 | dm_free_md_mempools(t->mempools); |
942 | t->mempools = NULL; | 942 | t->mempools = NULL; |
943 | } | 943 | } |
944 | 944 | ||
945 | struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t) | 945 | struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t) |
946 | { | 946 | { |
947 | return t->mempools; | 947 | return t->mempools; |
948 | } | 948 | } |
949 | 949 | ||
950 | static int setup_indexes(struct dm_table *t) | 950 | static int setup_indexes(struct dm_table *t) |
951 | { | 951 | { |
952 | int i; | 952 | int i; |
953 | unsigned int total = 0; | 953 | unsigned int total = 0; |
954 | sector_t *indexes; | 954 | sector_t *indexes; |
955 | 955 | ||
956 | /* allocate the space for *all* the indexes */ | 956 | /* allocate the space for *all* the indexes */ |
957 | for (i = t->depth - 2; i >= 0; i--) { | 957 | for (i = t->depth - 2; i >= 0; i--) { |
958 | t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE); | 958 | t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE); |
959 | total += t->counts[i]; | 959 | total += t->counts[i]; |
960 | } | 960 | } |
961 | 961 | ||
962 | indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE); | 962 | indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE); |
963 | if (!indexes) | 963 | if (!indexes) |
964 | return -ENOMEM; | 964 | return -ENOMEM; |
965 | 965 | ||
966 | /* set up internal nodes, bottom-up */ | 966 | /* set up internal nodes, bottom-up */ |
967 | for (i = t->depth - 2; i >= 0; i--) { | 967 | for (i = t->depth - 2; i >= 0; i--) { |
968 | t->index[i] = indexes; | 968 | t->index[i] = indexes; |
969 | indexes += (KEYS_PER_NODE * t->counts[i]); | 969 | indexes += (KEYS_PER_NODE * t->counts[i]); |
970 | setup_btree_index(i, t); | 970 | setup_btree_index(i, t); |
971 | } | 971 | } |
972 | 972 | ||
973 | return 0; | 973 | return 0; |
974 | } | 974 | } |
975 | 975 | ||
976 | /* | 976 | /* |
977 | * Builds the btree to index the map. | 977 | * Builds the btree to index the map. |
978 | */ | 978 | */ |
979 | static int dm_table_build_index(struct dm_table *t) | 979 | static int dm_table_build_index(struct dm_table *t) |
980 | { | 980 | { |
981 | int r = 0; | 981 | int r = 0; |
982 | unsigned int leaf_nodes; | 982 | unsigned int leaf_nodes; |
983 | 983 | ||
984 | /* how many indexes will the btree have ? */ | 984 | /* how many indexes will the btree have ? */ |
985 | leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); | 985 | leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); |
986 | t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); | 986 | t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); |
987 | 987 | ||
988 | /* leaf layer has already been set up */ | 988 | /* leaf layer has already been set up */ |
989 | t->counts[t->depth - 1] = leaf_nodes; | 989 | t->counts[t->depth - 1] = leaf_nodes; |
990 | t->index[t->depth - 1] = t->highs; | 990 | t->index[t->depth - 1] = t->highs; |
991 | 991 | ||
992 | if (t->depth >= 2) | 992 | if (t->depth >= 2) |
993 | r = setup_indexes(t); | 993 | r = setup_indexes(t); |
994 | 994 | ||
995 | return r; | 995 | return r; |
996 | } | 996 | } |
997 | 997 | ||
998 | /* | 998 | /* |
999 | * Get a disk whose integrity profile reflects the table's profile. | 999 | * Get a disk whose integrity profile reflects the table's profile. |
1000 | * If %match_all is true, all devices' profiles must match. | 1000 | * If %match_all is true, all devices' profiles must match. |
1001 | * If %match_all is false, all devices must at least have an | 1001 | * If %match_all is false, all devices must at least have an |
1002 | * allocated integrity profile; but uninitialized is ok. | 1002 | * allocated integrity profile; but uninitialized is ok. |
1003 | * Returns NULL if integrity support was inconsistent or unavailable. | 1003 | * Returns NULL if integrity support was inconsistent or unavailable. |
1004 | */ | 1004 | */ |
1005 | static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t, | 1005 | static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t, |
1006 | bool match_all) | 1006 | bool match_all) |
1007 | { | 1007 | { |
1008 | struct list_head *devices = dm_table_get_devices(t); | 1008 | struct list_head *devices = dm_table_get_devices(t); |
1009 | struct dm_dev_internal *dd = NULL; | 1009 | struct dm_dev_internal *dd = NULL; |
1010 | struct gendisk *prev_disk = NULL, *template_disk = NULL; | 1010 | struct gendisk *prev_disk = NULL, *template_disk = NULL; |
1011 | 1011 | ||
1012 | list_for_each_entry(dd, devices, list) { | 1012 | list_for_each_entry(dd, devices, list) { |
1013 | template_disk = dd->dm_dev.bdev->bd_disk; | 1013 | template_disk = dd->dm_dev.bdev->bd_disk; |
1014 | if (!blk_get_integrity(template_disk)) | 1014 | if (!blk_get_integrity(template_disk)) |
1015 | goto no_integrity; | 1015 | goto no_integrity; |
1016 | if (!match_all && !blk_integrity_is_initialized(template_disk)) | 1016 | if (!match_all && !blk_integrity_is_initialized(template_disk)) |
1017 | continue; /* skip uninitialized profiles */ | 1017 | continue; /* skip uninitialized profiles */ |
1018 | else if (prev_disk && | 1018 | else if (prev_disk && |
1019 | blk_integrity_compare(prev_disk, template_disk) < 0) | 1019 | blk_integrity_compare(prev_disk, template_disk) < 0) |
1020 | goto no_integrity; | 1020 | goto no_integrity; |
1021 | prev_disk = template_disk; | 1021 | prev_disk = template_disk; |
1022 | } | 1022 | } |
1023 | 1023 | ||
1024 | return template_disk; | 1024 | return template_disk; |
1025 | 1025 | ||
1026 | no_integrity: | 1026 | no_integrity: |
1027 | if (prev_disk) | 1027 | if (prev_disk) |
1028 | DMWARN("%s: integrity not set: %s and %s profile mismatch", | 1028 | DMWARN("%s: integrity not set: %s and %s profile mismatch", |
1029 | dm_device_name(t->md), | 1029 | dm_device_name(t->md), |
1030 | prev_disk->disk_name, | 1030 | prev_disk->disk_name, |
1031 | template_disk->disk_name); | 1031 | template_disk->disk_name); |
1032 | return NULL; | 1032 | return NULL; |
1033 | } | 1033 | } |
1034 | 1034 | ||
1035 | /* | 1035 | /* |
1036 | * Register the mapped device for blk_integrity support if | 1036 | * Register the mapped device for blk_integrity support if |
1037 | * the underlying devices have an integrity profile. But all devices | 1037 | * the underlying devices have an integrity profile. But all devices |
1038 | * may not have matching profiles (checking all devices isn't reliable | 1038 | * may not have matching profiles (checking all devices isn't reliable |
1039 | * during table load because this table may use other DM device(s) which | 1039 | * during table load because this table may use other DM device(s) which |
1040 | * must be resumed before they will have an initialized integity profile). | 1040 | * must be resumed before they will have an initialized integity profile). |
1041 | * Stacked DM devices force a 2 stage integrity profile validation: | 1041 | * Stacked DM devices force a 2 stage integrity profile validation: |
1042 | * 1 - during load, validate all initialized integrity profiles match | 1042 | * 1 - during load, validate all initialized integrity profiles match |
1043 | * 2 - during resume, validate all integrity profiles match | 1043 | * 2 - during resume, validate all integrity profiles match |
1044 | */ | 1044 | */ |
1045 | static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device *md) | 1045 | static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device *md) |
1046 | { | 1046 | { |
1047 | struct gendisk *template_disk = NULL; | 1047 | struct gendisk *template_disk = NULL; |
1048 | 1048 | ||
1049 | template_disk = dm_table_get_integrity_disk(t, false); | 1049 | template_disk = dm_table_get_integrity_disk(t, false); |
1050 | if (!template_disk) | 1050 | if (!template_disk) |
1051 | return 0; | 1051 | return 0; |
1052 | 1052 | ||
1053 | if (!blk_integrity_is_initialized(dm_disk(md))) { | 1053 | if (!blk_integrity_is_initialized(dm_disk(md))) { |
1054 | t->integrity_supported = 1; | 1054 | t->integrity_supported = 1; |
1055 | return blk_integrity_register(dm_disk(md), NULL); | 1055 | return blk_integrity_register(dm_disk(md), NULL); |
1056 | } | 1056 | } |
1057 | 1057 | ||
1058 | /* | 1058 | /* |
1059 | * If DM device already has an initalized integrity | 1059 | * If DM device already has an initalized integrity |
1060 | * profile the new profile should not conflict. | 1060 | * profile the new profile should not conflict. |
1061 | */ | 1061 | */ |
1062 | if (blk_integrity_is_initialized(template_disk) && | 1062 | if (blk_integrity_is_initialized(template_disk) && |
1063 | blk_integrity_compare(dm_disk(md), template_disk) < 0) { | 1063 | blk_integrity_compare(dm_disk(md), template_disk) < 0) { |
1064 | DMWARN("%s: conflict with existing integrity profile: " | 1064 | DMWARN("%s: conflict with existing integrity profile: " |
1065 | "%s profile mismatch", | 1065 | "%s profile mismatch", |
1066 | dm_device_name(t->md), | 1066 | dm_device_name(t->md), |
1067 | template_disk->disk_name); | 1067 | template_disk->disk_name); |
1068 | return 1; | 1068 | return 1; |
1069 | } | 1069 | } |
1070 | 1070 | ||
1071 | /* Preserve existing initialized integrity profile */ | 1071 | /* Preserve existing initialized integrity profile */ |
1072 | t->integrity_supported = 1; | 1072 | t->integrity_supported = 1; |
1073 | return 0; | 1073 | return 0; |
1074 | } | 1074 | } |
1075 | 1075 | ||
1076 | /* | 1076 | /* |
1077 | * Prepares the table for use by building the indices, | 1077 | * Prepares the table for use by building the indices, |
1078 | * setting the type, and allocating mempools. | 1078 | * setting the type, and allocating mempools. |
1079 | */ | 1079 | */ |
1080 | int dm_table_complete(struct dm_table *t) | 1080 | int dm_table_complete(struct dm_table *t) |
1081 | { | 1081 | { |
1082 | int r; | 1082 | int r; |
1083 | 1083 | ||
1084 | r = dm_table_set_type(t); | 1084 | r = dm_table_set_type(t); |
1085 | if (r) { | 1085 | if (r) { |
1086 | DMERR("unable to set table type"); | 1086 | DMERR("unable to set table type"); |
1087 | return r; | 1087 | return r; |
1088 | } | 1088 | } |
1089 | 1089 | ||
1090 | r = dm_table_build_index(t); | 1090 | r = dm_table_build_index(t); |
1091 | if (r) { | 1091 | if (r) { |
1092 | DMERR("unable to build btrees"); | 1092 | DMERR("unable to build btrees"); |
1093 | return r; | 1093 | return r; |
1094 | } | 1094 | } |
1095 | 1095 | ||
1096 | r = dm_table_prealloc_integrity(t, t->md); | 1096 | r = dm_table_prealloc_integrity(t, t->md); |
1097 | if (r) { | 1097 | if (r) { |
1098 | DMERR("could not register integrity profile."); | 1098 | DMERR("could not register integrity profile."); |
1099 | return r; | 1099 | return r; |
1100 | } | 1100 | } |
1101 | 1101 | ||
1102 | r = dm_table_alloc_md_mempools(t); | 1102 | r = dm_table_alloc_md_mempools(t); |
1103 | if (r) | 1103 | if (r) |
1104 | DMERR("unable to allocate mempools"); | 1104 | DMERR("unable to allocate mempools"); |
1105 | 1105 | ||
1106 | return r; | 1106 | return r; |
1107 | } | 1107 | } |
1108 | 1108 | ||
1109 | static DEFINE_MUTEX(_event_lock); | 1109 | static DEFINE_MUTEX(_event_lock); |
1110 | void dm_table_event_callback(struct dm_table *t, | 1110 | void dm_table_event_callback(struct dm_table *t, |
1111 | void (*fn)(void *), void *context) | 1111 | void (*fn)(void *), void *context) |
1112 | { | 1112 | { |
1113 | mutex_lock(&_event_lock); | 1113 | mutex_lock(&_event_lock); |
1114 | t->event_fn = fn; | 1114 | t->event_fn = fn; |
1115 | t->event_context = context; | 1115 | t->event_context = context; |
1116 | mutex_unlock(&_event_lock); | 1116 | mutex_unlock(&_event_lock); |
1117 | } | 1117 | } |
1118 | 1118 | ||
1119 | void dm_table_event(struct dm_table *t) | 1119 | void dm_table_event(struct dm_table *t) |
1120 | { | 1120 | { |
1121 | /* | 1121 | /* |
1122 | * You can no longer call dm_table_event() from interrupt | 1122 | * You can no longer call dm_table_event() from interrupt |
1123 | * context, use a bottom half instead. | 1123 | * context, use a bottom half instead. |
1124 | */ | 1124 | */ |
1125 | BUG_ON(in_interrupt()); | 1125 | BUG_ON(in_interrupt()); |
1126 | 1126 | ||
1127 | mutex_lock(&_event_lock); | 1127 | mutex_lock(&_event_lock); |
1128 | if (t->event_fn) | 1128 | if (t->event_fn) |
1129 | t->event_fn(t->event_context); | 1129 | t->event_fn(t->event_context); |
1130 | mutex_unlock(&_event_lock); | 1130 | mutex_unlock(&_event_lock); |
1131 | } | 1131 | } |
1132 | EXPORT_SYMBOL(dm_table_event); | 1132 | EXPORT_SYMBOL(dm_table_event); |
1133 | 1133 | ||
1134 | sector_t dm_table_get_size(struct dm_table *t) | 1134 | sector_t dm_table_get_size(struct dm_table *t) |
1135 | { | 1135 | { |
1136 | return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; | 1136 | return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; |
1137 | } | 1137 | } |
1138 | EXPORT_SYMBOL(dm_table_get_size); | 1138 | EXPORT_SYMBOL(dm_table_get_size); |
1139 | 1139 | ||
1140 | struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) | 1140 | struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) |
1141 | { | 1141 | { |
1142 | if (index >= t->num_targets) | 1142 | if (index >= t->num_targets) |
1143 | return NULL; | 1143 | return NULL; |
1144 | 1144 | ||
1145 | return t->targets + index; | 1145 | return t->targets + index; |
1146 | } | 1146 | } |
1147 | 1147 | ||
1148 | /* | 1148 | /* |
1149 | * Search the btree for the correct target. | 1149 | * Search the btree for the correct target. |
1150 | * | 1150 | * |
1151 | * Caller should check returned pointer with dm_target_is_valid() | 1151 | * Caller should check returned pointer with dm_target_is_valid() |
1152 | * to trap I/O beyond end of device. | 1152 | * to trap I/O beyond end of device. |
1153 | */ | 1153 | */ |
1154 | struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) | 1154 | struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) |
1155 | { | 1155 | { |
1156 | unsigned int l, n = 0, k = 0; | 1156 | unsigned int l, n = 0, k = 0; |
1157 | sector_t *node; | 1157 | sector_t *node; |
1158 | 1158 | ||
1159 | for (l = 0; l < t->depth; l++) { | 1159 | for (l = 0; l < t->depth; l++) { |
1160 | n = get_child(n, k); | 1160 | n = get_child(n, k); |
1161 | node = get_node(t, l, n); | 1161 | node = get_node(t, l, n); |
1162 | 1162 | ||
1163 | for (k = 0; k < KEYS_PER_NODE; k++) | 1163 | for (k = 0; k < KEYS_PER_NODE; k++) |
1164 | if (node[k] >= sector) | 1164 | if (node[k] >= sector) |
1165 | break; | 1165 | break; |
1166 | } | 1166 | } |
1167 | 1167 | ||
1168 | return &t->targets[(KEYS_PER_NODE * n) + k]; | 1168 | return &t->targets[(KEYS_PER_NODE * n) + k]; |
1169 | } | 1169 | } |
1170 | 1170 | ||
1171 | /* | 1171 | /* |
1172 | * Establish the new table's queue_limits and validate them. | 1172 | * Establish the new table's queue_limits and validate them. |
1173 | */ | 1173 | */ |
1174 | int dm_calculate_queue_limits(struct dm_table *table, | 1174 | int dm_calculate_queue_limits(struct dm_table *table, |
1175 | struct queue_limits *limits) | 1175 | struct queue_limits *limits) |
1176 | { | 1176 | { |
1177 | struct dm_target *uninitialized_var(ti); | 1177 | struct dm_target *uninitialized_var(ti); |
1178 | struct queue_limits ti_limits; | 1178 | struct queue_limits ti_limits; |
1179 | unsigned i = 0; | 1179 | unsigned i = 0; |
1180 | 1180 | ||
1181 | blk_set_default_limits(limits); | 1181 | blk_set_default_limits(limits); |
1182 | 1182 | ||
1183 | while (i < dm_table_get_num_targets(table)) { | 1183 | while (i < dm_table_get_num_targets(table)) { |
1184 | blk_set_default_limits(&ti_limits); | 1184 | blk_set_default_limits(&ti_limits); |
1185 | 1185 | ||
1186 | ti = dm_table_get_target(table, i++); | 1186 | ti = dm_table_get_target(table, i++); |
1187 | 1187 | ||
1188 | if (!ti->type->iterate_devices) | 1188 | if (!ti->type->iterate_devices) |
1189 | goto combine_limits; | 1189 | goto combine_limits; |
1190 | 1190 | ||
1191 | /* | 1191 | /* |
1192 | * Combine queue limits of all the devices this target uses. | 1192 | * Combine queue limits of all the devices this target uses. |
1193 | */ | 1193 | */ |
1194 | ti->type->iterate_devices(ti, dm_set_device_limits, | 1194 | ti->type->iterate_devices(ti, dm_set_device_limits, |
1195 | &ti_limits); | 1195 | &ti_limits); |
1196 | 1196 | ||
1197 | /* Set I/O hints portion of queue limits */ | 1197 | /* Set I/O hints portion of queue limits */ |
1198 | if (ti->type->io_hints) | 1198 | if (ti->type->io_hints) |
1199 | ti->type->io_hints(ti, &ti_limits); | 1199 | ti->type->io_hints(ti, &ti_limits); |
1200 | 1200 | ||
1201 | /* | 1201 | /* |
1202 | * Check each device area is consistent with the target's | 1202 | * Check each device area is consistent with the target's |
1203 | * overall queue limits. | 1203 | * overall queue limits. |
1204 | */ | 1204 | */ |
1205 | if (ti->type->iterate_devices(ti, device_area_is_invalid, | 1205 | if (ti->type->iterate_devices(ti, device_area_is_invalid, |
1206 | &ti_limits)) | 1206 | &ti_limits)) |
1207 | return -EINVAL; | 1207 | return -EINVAL; |
1208 | 1208 | ||
1209 | combine_limits: | 1209 | combine_limits: |
1210 | /* | 1210 | /* |
1211 | * Merge this target's queue limits into the overall limits | 1211 | * Merge this target's queue limits into the overall limits |
1212 | * for the table. | 1212 | * for the table. |
1213 | */ | 1213 | */ |
1214 | if (blk_stack_limits(limits, &ti_limits, 0) < 0) | 1214 | if (blk_stack_limits(limits, &ti_limits, 0) < 0) |
1215 | DMWARN("%s: adding target device " | 1215 | DMWARN("%s: adding target device " |
1216 | "(start sect %llu len %llu) " | 1216 | "(start sect %llu len %llu) " |
1217 | "caused an alignment inconsistency", | 1217 | "caused an alignment inconsistency", |
1218 | dm_device_name(table->md), | 1218 | dm_device_name(table->md), |
1219 | (unsigned long long) ti->begin, | 1219 | (unsigned long long) ti->begin, |
1220 | (unsigned long long) ti->len); | 1220 | (unsigned long long) ti->len); |
1221 | } | 1221 | } |
1222 | 1222 | ||
1223 | return validate_hardware_logical_block_alignment(table, limits); | 1223 | return validate_hardware_logical_block_alignment(table, limits); |
1224 | } | 1224 | } |
1225 | 1225 | ||
1226 | /* | 1226 | /* |
1227 | * Set the integrity profile for this device if all devices used have | 1227 | * Set the integrity profile for this device if all devices used have |
1228 | * matching profiles. We're quite deep in the resume path but still | 1228 | * matching profiles. We're quite deep in the resume path but still |
1229 | * don't know if all devices (particularly DM devices this device | 1229 | * don't know if all devices (particularly DM devices this device |
1230 | * may be stacked on) have matching profiles. Even if the profiles | 1230 | * may be stacked on) have matching profiles. Even if the profiles |
1231 | * don't match we have no way to fail (to resume) at this point. | 1231 | * don't match we have no way to fail (to resume) at this point. |
1232 | */ | 1232 | */ |
1233 | static void dm_table_set_integrity(struct dm_table *t) | 1233 | static void dm_table_set_integrity(struct dm_table *t) |
1234 | { | 1234 | { |
1235 | struct gendisk *template_disk = NULL; | 1235 | struct gendisk *template_disk = NULL; |
1236 | 1236 | ||
1237 | if (!blk_get_integrity(dm_disk(t->md))) | 1237 | if (!blk_get_integrity(dm_disk(t->md))) |
1238 | return; | 1238 | return; |
1239 | 1239 | ||
1240 | template_disk = dm_table_get_integrity_disk(t, true); | 1240 | template_disk = dm_table_get_integrity_disk(t, true); |
1241 | if (!template_disk && | 1241 | if (!template_disk && |
1242 | blk_integrity_is_initialized(dm_disk(t->md))) { | 1242 | blk_integrity_is_initialized(dm_disk(t->md))) { |
1243 | DMWARN("%s: device no longer has a valid integrity profile", | 1243 | DMWARN("%s: device no longer has a valid integrity profile", |
1244 | dm_device_name(t->md)); | 1244 | dm_device_name(t->md)); |
1245 | return; | 1245 | return; |
1246 | } | 1246 | } |
1247 | blk_integrity_register(dm_disk(t->md), | 1247 | blk_integrity_register(dm_disk(t->md), |
1248 | blk_get_integrity(template_disk)); | 1248 | blk_get_integrity(template_disk)); |
1249 | } | 1249 | } |
1250 | 1250 | ||
1251 | static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, | ||
1252 | sector_t start, sector_t len, void *data) | ||
1253 | { | ||
1254 | unsigned flush = (*(unsigned *)data); | ||
1255 | struct request_queue *q = bdev_get_queue(dev->bdev); | ||
1256 | |||
1257 | return q && (q->flush_flags & flush); | ||
1258 | } | ||
1259 | |||
1260 | static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) | ||
1261 | { | ||
1262 | struct dm_target *ti; | ||
1263 | unsigned i = 0; | ||
1264 | |||
1265 | /* | ||
1266 | * Require at least one underlying device to support flushes. | ||
1267 | * t->devices includes internal dm devices such as mirror logs | ||
1268 | * so we need to use iterate_devices here, which targets | ||
1269 | * supporting flushes must provide. | ||
1270 | */ | ||
1271 | while (i < dm_table_get_num_targets(t)) { | ||
1272 | ti = dm_table_get_target(t, i++); | ||
1273 | |||
1274 | if (!ti->num_flush_requests) | ||
1275 | continue; | ||
1276 | |||
1277 | if (ti->type->iterate_devices && | ||
1278 | ti->type->iterate_devices(ti, device_flush_capable, &flush)) | ||
1279 | return 1; | ||
1280 | } | ||
1281 | |||
1282 | return 0; | ||
1283 | } | ||
1284 | |||
1251 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, | 1285 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, |
1252 | struct queue_limits *limits) | 1286 | struct queue_limits *limits) |
1253 | { | 1287 | { |
1288 | unsigned flush = 0; | ||
1289 | |||
1254 | /* | 1290 | /* |
1255 | * Copy table's limits to the DM device's request_queue | 1291 | * Copy table's limits to the DM device's request_queue |
1256 | */ | 1292 | */ |
1257 | q->limits = *limits; | 1293 | q->limits = *limits; |
1258 | 1294 | ||
1259 | if (!dm_table_supports_discards(t)) | 1295 | if (!dm_table_supports_discards(t)) |
1260 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); | 1296 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); |
1261 | else | 1297 | else |
1262 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); | 1298 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); |
1299 | |||
1300 | if (dm_table_supports_flush(t, REQ_FLUSH)) { | ||
1301 | flush |= REQ_FLUSH; | ||
1302 | if (dm_table_supports_flush(t, REQ_FUA)) | ||
1303 | flush |= REQ_FUA; | ||
1304 | } | ||
1305 | blk_queue_flush(q, flush); | ||
1263 | 1306 | ||
1264 | dm_table_set_integrity(t); | 1307 | dm_table_set_integrity(t); |
1265 | 1308 | ||
1266 | /* | 1309 | /* |
1267 | * QUEUE_FLAG_STACKABLE must be set after all queue settings are | 1310 | * QUEUE_FLAG_STACKABLE must be set after all queue settings are |
1268 | * visible to other CPUs because, once the flag is set, incoming bios | 1311 | * visible to other CPUs because, once the flag is set, incoming bios |
1269 | * are processed by request-based dm, which refers to the queue | 1312 | * are processed by request-based dm, which refers to the queue |
1270 | * settings. | 1313 | * settings. |
1271 | * Until the flag set, bios are passed to bio-based dm and queued to | 1314 | * Until the flag set, bios are passed to bio-based dm and queued to |
1272 | * md->deferred where queue settings are not needed yet. | 1315 | * md->deferred where queue settings are not needed yet. |
1273 | * Those bios are passed to request-based dm at the resume time. | 1316 | * Those bios are passed to request-based dm at the resume time. |
1274 | */ | 1317 | */ |
1275 | smp_mb(); | 1318 | smp_mb(); |
1276 | if (dm_table_request_based(t)) | 1319 | if (dm_table_request_based(t)) |
1277 | queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q); | 1320 | queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q); |
1278 | } | 1321 | } |
1279 | 1322 | ||
1280 | unsigned int dm_table_get_num_targets(struct dm_table *t) | 1323 | unsigned int dm_table_get_num_targets(struct dm_table *t) |
1281 | { | 1324 | { |
1282 | return t->num_targets; | 1325 | return t->num_targets; |
1283 | } | 1326 | } |
1284 | 1327 | ||
1285 | struct list_head *dm_table_get_devices(struct dm_table *t) | 1328 | struct list_head *dm_table_get_devices(struct dm_table *t) |
1286 | { | 1329 | { |
1287 | return &t->devices; | 1330 | return &t->devices; |
1288 | } | 1331 | } |
1289 | 1332 | ||
1290 | fmode_t dm_table_get_mode(struct dm_table *t) | 1333 | fmode_t dm_table_get_mode(struct dm_table *t) |
1291 | { | 1334 | { |
1292 | return t->mode; | 1335 | return t->mode; |
1293 | } | 1336 | } |
1294 | EXPORT_SYMBOL(dm_table_get_mode); | 1337 | EXPORT_SYMBOL(dm_table_get_mode); |
1295 | 1338 | ||
1296 | static void suspend_targets(struct dm_table *t, unsigned postsuspend) | 1339 | static void suspend_targets(struct dm_table *t, unsigned postsuspend) |
1297 | { | 1340 | { |
1298 | int i = t->num_targets; | 1341 | int i = t->num_targets; |
1299 | struct dm_target *ti = t->targets; | 1342 | struct dm_target *ti = t->targets; |
1300 | 1343 | ||
1301 | while (i--) { | 1344 | while (i--) { |
1302 | if (postsuspend) { | 1345 | if (postsuspend) { |
1303 | if (ti->type->postsuspend) | 1346 | if (ti->type->postsuspend) |
1304 | ti->type->postsuspend(ti); | 1347 | ti->type->postsuspend(ti); |
1305 | } else if (ti->type->presuspend) | 1348 | } else if (ti->type->presuspend) |
1306 | ti->type->presuspend(ti); | 1349 | ti->type->presuspend(ti); |
1307 | 1350 | ||
1308 | ti++; | 1351 | ti++; |
1309 | } | 1352 | } |
1310 | } | 1353 | } |
1311 | 1354 | ||
1312 | void dm_table_presuspend_targets(struct dm_table *t) | 1355 | void dm_table_presuspend_targets(struct dm_table *t) |
1313 | { | 1356 | { |
1314 | if (!t) | 1357 | if (!t) |
1315 | return; | 1358 | return; |
1316 | 1359 | ||
1317 | suspend_targets(t, 0); | 1360 | suspend_targets(t, 0); |
1318 | } | 1361 | } |
1319 | 1362 | ||
1320 | void dm_table_postsuspend_targets(struct dm_table *t) | 1363 | void dm_table_postsuspend_targets(struct dm_table *t) |
1321 | { | 1364 | { |
1322 | if (!t) | 1365 | if (!t) |
1323 | return; | 1366 | return; |
1324 | 1367 | ||
1325 | suspend_targets(t, 1); | 1368 | suspend_targets(t, 1); |
1326 | } | 1369 | } |
1327 | 1370 | ||
1328 | int dm_table_resume_targets(struct dm_table *t) | 1371 | int dm_table_resume_targets(struct dm_table *t) |
1329 | { | 1372 | { |
1330 | int i, r = 0; | 1373 | int i, r = 0; |
1331 | 1374 | ||
1332 | for (i = 0; i < t->num_targets; i++) { | 1375 | for (i = 0; i < t->num_targets; i++) { |
1333 | struct dm_target *ti = t->targets + i; | 1376 | struct dm_target *ti = t->targets + i; |
1334 | 1377 | ||
1335 | if (!ti->type->preresume) | 1378 | if (!ti->type->preresume) |
1336 | continue; | 1379 | continue; |
1337 | 1380 | ||
1338 | r = ti->type->preresume(ti); | 1381 | r = ti->type->preresume(ti); |
1339 | if (r) | 1382 | if (r) |
1340 | return r; | 1383 | return r; |
1341 | } | 1384 | } |
1342 | 1385 | ||
1343 | for (i = 0; i < t->num_targets; i++) { | 1386 | for (i = 0; i < t->num_targets; i++) { |
1344 | struct dm_target *ti = t->targets + i; | 1387 | struct dm_target *ti = t->targets + i; |
1345 | 1388 | ||
1346 | if (ti->type->resume) | 1389 | if (ti->type->resume) |
1347 | ti->type->resume(ti); | 1390 | ti->type->resume(ti); |
1348 | } | 1391 | } |
1349 | 1392 | ||
1350 | return 0; | 1393 | return 0; |
1351 | } | 1394 | } |
1352 | 1395 | ||
1353 | void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb) | 1396 | void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb) |
1354 | { | 1397 | { |
1355 | list_add(&cb->list, &t->target_callbacks); | 1398 | list_add(&cb->list, &t->target_callbacks); |
1356 | } | 1399 | } |
1357 | EXPORT_SYMBOL_GPL(dm_table_add_target_callbacks); | 1400 | EXPORT_SYMBOL_GPL(dm_table_add_target_callbacks); |
1358 | 1401 | ||
1359 | int dm_table_any_congested(struct dm_table *t, int bdi_bits) | 1402 | int dm_table_any_congested(struct dm_table *t, int bdi_bits) |
1360 | { | 1403 | { |
1361 | struct dm_dev_internal *dd; | 1404 | struct dm_dev_internal *dd; |
1362 | struct list_head *devices = dm_table_get_devices(t); | 1405 | struct list_head *devices = dm_table_get_devices(t); |
1363 | struct dm_target_callbacks *cb; | 1406 | struct dm_target_callbacks *cb; |
1364 | int r = 0; | 1407 | int r = 0; |
1365 | 1408 | ||
1366 | list_for_each_entry(dd, devices, list) { | 1409 | list_for_each_entry(dd, devices, list) { |
1367 | struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); | 1410 | struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); |
1368 | char b[BDEVNAME_SIZE]; | 1411 | char b[BDEVNAME_SIZE]; |
1369 | 1412 | ||
1370 | if (likely(q)) | 1413 | if (likely(q)) |
1371 | r |= bdi_congested(&q->backing_dev_info, bdi_bits); | 1414 | r |= bdi_congested(&q->backing_dev_info, bdi_bits); |
1372 | else | 1415 | else |
1373 | DMWARN_LIMIT("%s: any_congested: nonexistent device %s", | 1416 | DMWARN_LIMIT("%s: any_congested: nonexistent device %s", |
1374 | dm_device_name(t->md), | 1417 | dm_device_name(t->md), |
1375 | bdevname(dd->dm_dev.bdev, b)); | 1418 | bdevname(dd->dm_dev.bdev, b)); |
1376 | } | 1419 | } |
1377 | 1420 | ||
1378 | list_for_each_entry(cb, &t->target_callbacks, list) | 1421 | list_for_each_entry(cb, &t->target_callbacks, list) |
1379 | if (cb->congested_fn) | 1422 | if (cb->congested_fn) |
1380 | r |= cb->congested_fn(cb, bdi_bits); | 1423 | r |= cb->congested_fn(cb, bdi_bits); |
1381 | 1424 | ||
1382 | return r; | 1425 | return r; |
1383 | } | 1426 | } |
1384 | 1427 | ||
1385 | int dm_table_any_busy_target(struct dm_table *t) | 1428 | int dm_table_any_busy_target(struct dm_table *t) |
1386 | { | 1429 | { |
1387 | unsigned i; | 1430 | unsigned i; |
1388 | struct dm_target *ti; | 1431 | struct dm_target *ti; |
1389 | 1432 | ||
1390 | for (i = 0; i < t->num_targets; i++) { | 1433 | for (i = 0; i < t->num_targets; i++) { |
1391 | ti = t->targets + i; | 1434 | ti = t->targets + i; |
1392 | if (ti->type->busy && ti->type->busy(ti)) | 1435 | if (ti->type->busy && ti->type->busy(ti)) |
1393 | return 1; | 1436 | return 1; |
1394 | } | 1437 | } |
1395 | 1438 | ||
1396 | return 0; | 1439 | return 0; |
1397 | } | 1440 | } |
1398 | 1441 | ||
1399 | struct mapped_device *dm_table_get_md(struct dm_table *t) | 1442 | struct mapped_device *dm_table_get_md(struct dm_table *t) |
1400 | { | 1443 | { |
1401 | return t->md; | 1444 | return t->md; |
1402 | } | 1445 | } |
1403 | EXPORT_SYMBOL(dm_table_get_md); | 1446 | EXPORT_SYMBOL(dm_table_get_md); |
1404 | 1447 | ||
1405 | static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, | 1448 | static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, |
1406 | sector_t start, sector_t len, void *data) | 1449 | sector_t start, sector_t len, void *data) |
1407 | { | 1450 | { |
1408 | struct request_queue *q = bdev_get_queue(dev->bdev); | 1451 | struct request_queue *q = bdev_get_queue(dev->bdev); |
1409 | 1452 | ||
1410 | return q && blk_queue_discard(q); | 1453 | return q && blk_queue_discard(q); |
1411 | } | 1454 | } |
1412 | 1455 | ||
1413 | bool dm_table_supports_discards(struct dm_table *t) | 1456 | bool dm_table_supports_discards(struct dm_table *t) |
1414 | { | 1457 | { |
1415 | struct dm_target *ti; | 1458 | struct dm_target *ti; |
1416 | unsigned i = 0; | 1459 | unsigned i = 0; |
1417 | 1460 | ||
1418 | /* | 1461 | /* |
1419 | * Unless any target used by the table set discards_supported, | 1462 | * Unless any target used by the table set discards_supported, |
1420 | * require at least one underlying device to support discards. | 1463 | * require at least one underlying device to support discards. |
1421 | * t->devices includes internal dm devices such as mirror logs | 1464 | * t->devices includes internal dm devices such as mirror logs |
1422 | * so we need to use iterate_devices here, which targets | 1465 | * so we need to use iterate_devices here, which targets |
1423 | * supporting discard selectively must provide. | 1466 | * supporting discard selectively must provide. |
1424 | */ | 1467 | */ |
1425 | while (i < dm_table_get_num_targets(t)) { | 1468 | while (i < dm_table_get_num_targets(t)) { |
1426 | ti = dm_table_get_target(t, i++); | 1469 | ti = dm_table_get_target(t, i++); |
1427 | 1470 | ||
1428 | if (!ti->num_discard_requests) | 1471 | if (!ti->num_discard_requests) |
1429 | continue; | 1472 | continue; |
1430 | 1473 | ||
1431 | if (ti->discards_supported) | 1474 | if (ti->discards_supported) |
1432 | return 1; | 1475 | return 1; |
1433 | 1476 | ||
1434 | if (ti->type->iterate_devices && | 1477 | if (ti->type->iterate_devices && |
1435 | ti->type->iterate_devices(ti, device_discard_capable, NULL)) | 1478 | ti->type->iterate_devices(ti, device_discard_capable, NULL)) |
1436 | return 1; | 1479 | return 1; |
1437 | } | 1480 | } |
1438 | 1481 | ||
1439 | return 0; | 1482 | return 0; |
1440 | } | 1483 | } |
1441 | 1484 |
drivers/md/dm.c
1 | /* | 1 | /* |
2 | * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. |
3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include "dm.h" | 8 | #include "dm.h" |
9 | #include "dm-uevent.h" | 9 | #include "dm-uevent.h" |
10 | 10 | ||
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/mutex.h> | 13 | #include <linux/mutex.h> |
14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
15 | #include <linux/blkpg.h> | 15 | #include <linux/blkpg.h> |
16 | #include <linux/bio.h> | 16 | #include <linux/bio.h> |
17 | #include <linux/buffer_head.h> | 17 | #include <linux/buffer_head.h> |
18 | #include <linux/mempool.h> | 18 | #include <linux/mempool.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/idr.h> | 20 | #include <linux/idr.h> |
21 | #include <linux/hdreg.h> | 21 | #include <linux/hdreg.h> |
22 | #include <linux/delay.h> | 22 | #include <linux/delay.h> |
23 | 23 | ||
24 | #include <trace/events/block.h> | 24 | #include <trace/events/block.h> |
25 | 25 | ||
26 | #define DM_MSG_PREFIX "core" | 26 | #define DM_MSG_PREFIX "core" |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * Cookies are numeric values sent with CHANGE and REMOVE | 29 | * Cookies are numeric values sent with CHANGE and REMOVE |
30 | * uevents while resuming, removing or renaming the device. | 30 | * uevents while resuming, removing or renaming the device. |
31 | */ | 31 | */ |
32 | #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" | 32 | #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" |
33 | #define DM_COOKIE_LENGTH 24 | 33 | #define DM_COOKIE_LENGTH 24 |
34 | 34 | ||
35 | static const char *_name = DM_NAME; | 35 | static const char *_name = DM_NAME; |
36 | 36 | ||
37 | static unsigned int major = 0; | 37 | static unsigned int major = 0; |
38 | static unsigned int _major = 0; | 38 | static unsigned int _major = 0; |
39 | 39 | ||
40 | static DEFINE_IDR(_minor_idr); | 40 | static DEFINE_IDR(_minor_idr); |
41 | 41 | ||
42 | static DEFINE_SPINLOCK(_minor_lock); | 42 | static DEFINE_SPINLOCK(_minor_lock); |
43 | /* | 43 | /* |
44 | * For bio-based dm. | 44 | * For bio-based dm. |
45 | * One of these is allocated per bio. | 45 | * One of these is allocated per bio. |
46 | */ | 46 | */ |
47 | struct dm_io { | 47 | struct dm_io { |
48 | struct mapped_device *md; | 48 | struct mapped_device *md; |
49 | int error; | 49 | int error; |
50 | atomic_t io_count; | 50 | atomic_t io_count; |
51 | struct bio *bio; | 51 | struct bio *bio; |
52 | unsigned long start_time; | 52 | unsigned long start_time; |
53 | spinlock_t endio_lock; | 53 | spinlock_t endio_lock; |
54 | }; | 54 | }; |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * For bio-based dm. | 57 | * For bio-based dm. |
58 | * One of these is allocated per target within a bio. Hopefully | 58 | * One of these is allocated per target within a bio. Hopefully |
59 | * this will be simplified out one day. | 59 | * this will be simplified out one day. |
60 | */ | 60 | */ |
61 | struct dm_target_io { | 61 | struct dm_target_io { |
62 | struct dm_io *io; | 62 | struct dm_io *io; |
63 | struct dm_target *ti; | 63 | struct dm_target *ti; |
64 | union map_info info; | 64 | union map_info info; |
65 | }; | 65 | }; |
66 | 66 | ||
67 | /* | 67 | /* |
68 | * For request-based dm. | 68 | * For request-based dm. |
69 | * One of these is allocated per request. | 69 | * One of these is allocated per request. |
70 | */ | 70 | */ |
71 | struct dm_rq_target_io { | 71 | struct dm_rq_target_io { |
72 | struct mapped_device *md; | 72 | struct mapped_device *md; |
73 | struct dm_target *ti; | 73 | struct dm_target *ti; |
74 | struct request *orig, clone; | 74 | struct request *orig, clone; |
75 | int error; | 75 | int error; |
76 | union map_info info; | 76 | union map_info info; |
77 | }; | 77 | }; |
78 | 78 | ||
79 | /* | 79 | /* |
80 | * For request-based dm. | 80 | * For request-based dm. |
81 | * One of these is allocated per bio. | 81 | * One of these is allocated per bio. |
82 | */ | 82 | */ |
83 | struct dm_rq_clone_bio_info { | 83 | struct dm_rq_clone_bio_info { |
84 | struct bio *orig; | 84 | struct bio *orig; |
85 | struct dm_rq_target_io *tio; | 85 | struct dm_rq_target_io *tio; |
86 | }; | 86 | }; |
87 | 87 | ||
88 | union map_info *dm_get_mapinfo(struct bio *bio) | 88 | union map_info *dm_get_mapinfo(struct bio *bio) |
89 | { | 89 | { |
90 | if (bio && bio->bi_private) | 90 | if (bio && bio->bi_private) |
91 | return &((struct dm_target_io *)bio->bi_private)->info; | 91 | return &((struct dm_target_io *)bio->bi_private)->info; |
92 | return NULL; | 92 | return NULL; |
93 | } | 93 | } |
94 | 94 | ||
95 | union map_info *dm_get_rq_mapinfo(struct request *rq) | 95 | union map_info *dm_get_rq_mapinfo(struct request *rq) |
96 | { | 96 | { |
97 | if (rq && rq->end_io_data) | 97 | if (rq && rq->end_io_data) |
98 | return &((struct dm_rq_target_io *)rq->end_io_data)->info; | 98 | return &((struct dm_rq_target_io *)rq->end_io_data)->info; |
99 | return NULL; | 99 | return NULL; |
100 | } | 100 | } |
101 | EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | 101 | EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); |
102 | 102 | ||
103 | #define MINOR_ALLOCED ((void *)-1) | 103 | #define MINOR_ALLOCED ((void *)-1) |
104 | 104 | ||
105 | /* | 105 | /* |
106 | * Bits for the md->flags field. | 106 | * Bits for the md->flags field. |
107 | */ | 107 | */ |
108 | #define DMF_BLOCK_IO_FOR_SUSPEND 0 | 108 | #define DMF_BLOCK_IO_FOR_SUSPEND 0 |
109 | #define DMF_SUSPENDED 1 | 109 | #define DMF_SUSPENDED 1 |
110 | #define DMF_FROZEN 2 | 110 | #define DMF_FROZEN 2 |
111 | #define DMF_FREEING 3 | 111 | #define DMF_FREEING 3 |
112 | #define DMF_DELETING 4 | 112 | #define DMF_DELETING 4 |
113 | #define DMF_NOFLUSH_SUSPENDING 5 | 113 | #define DMF_NOFLUSH_SUSPENDING 5 |
114 | #define DMF_MERGE_IS_OPTIONAL 6 | 114 | #define DMF_MERGE_IS_OPTIONAL 6 |
115 | 115 | ||
116 | /* | 116 | /* |
117 | * Work processed by per-device workqueue. | 117 | * Work processed by per-device workqueue. |
118 | */ | 118 | */ |
119 | struct mapped_device { | 119 | struct mapped_device { |
120 | struct rw_semaphore io_lock; | 120 | struct rw_semaphore io_lock; |
121 | struct mutex suspend_lock; | 121 | struct mutex suspend_lock; |
122 | rwlock_t map_lock; | 122 | rwlock_t map_lock; |
123 | atomic_t holders; | 123 | atomic_t holders; |
124 | atomic_t open_count; | 124 | atomic_t open_count; |
125 | 125 | ||
126 | unsigned long flags; | 126 | unsigned long flags; |
127 | 127 | ||
128 | struct request_queue *queue; | 128 | struct request_queue *queue; |
129 | unsigned type; | 129 | unsigned type; |
130 | /* Protect queue and type against concurrent access. */ | 130 | /* Protect queue and type against concurrent access. */ |
131 | struct mutex type_lock; | 131 | struct mutex type_lock; |
132 | 132 | ||
133 | struct gendisk *disk; | 133 | struct gendisk *disk; |
134 | char name[16]; | 134 | char name[16]; |
135 | 135 | ||
136 | void *interface_ptr; | 136 | void *interface_ptr; |
137 | 137 | ||
138 | /* | 138 | /* |
139 | * A list of ios that arrived while we were suspended. | 139 | * A list of ios that arrived while we were suspended. |
140 | */ | 140 | */ |
141 | atomic_t pending[2]; | 141 | atomic_t pending[2]; |
142 | wait_queue_head_t wait; | 142 | wait_queue_head_t wait; |
143 | struct work_struct work; | 143 | struct work_struct work; |
144 | struct bio_list deferred; | 144 | struct bio_list deferred; |
145 | spinlock_t deferred_lock; | 145 | spinlock_t deferred_lock; |
146 | 146 | ||
147 | /* | 147 | /* |
148 | * Processing queue (flush) | 148 | * Processing queue (flush) |
149 | */ | 149 | */ |
150 | struct workqueue_struct *wq; | 150 | struct workqueue_struct *wq; |
151 | 151 | ||
152 | /* | 152 | /* |
153 | * The current mapping. | 153 | * The current mapping. |
154 | */ | 154 | */ |
155 | struct dm_table *map; | 155 | struct dm_table *map; |
156 | 156 | ||
157 | /* | 157 | /* |
158 | * io objects are allocated from here. | 158 | * io objects are allocated from here. |
159 | */ | 159 | */ |
160 | mempool_t *io_pool; | 160 | mempool_t *io_pool; |
161 | mempool_t *tio_pool; | 161 | mempool_t *tio_pool; |
162 | 162 | ||
163 | struct bio_set *bs; | 163 | struct bio_set *bs; |
164 | 164 | ||
165 | /* | 165 | /* |
166 | * Event handling. | 166 | * Event handling. |
167 | */ | 167 | */ |
168 | atomic_t event_nr; | 168 | atomic_t event_nr; |
169 | wait_queue_head_t eventq; | 169 | wait_queue_head_t eventq; |
170 | atomic_t uevent_seq; | 170 | atomic_t uevent_seq; |
171 | struct list_head uevent_list; | 171 | struct list_head uevent_list; |
172 | spinlock_t uevent_lock; /* Protect access to uevent_list */ | 172 | spinlock_t uevent_lock; /* Protect access to uevent_list */ |
173 | 173 | ||
174 | /* | 174 | /* |
175 | * freeze/thaw support require holding onto a super block | 175 | * freeze/thaw support require holding onto a super block |
176 | */ | 176 | */ |
177 | struct super_block *frozen_sb; | 177 | struct super_block *frozen_sb; |
178 | struct block_device *bdev; | 178 | struct block_device *bdev; |
179 | 179 | ||
180 | /* forced geometry settings */ | 180 | /* forced geometry settings */ |
181 | struct hd_geometry geometry; | 181 | struct hd_geometry geometry; |
182 | 182 | ||
183 | /* For saving the address of __make_request for request based dm */ | 183 | /* For saving the address of __make_request for request based dm */ |
184 | make_request_fn *saved_make_request_fn; | 184 | make_request_fn *saved_make_request_fn; |
185 | 185 | ||
186 | /* sysfs handle */ | 186 | /* sysfs handle */ |
187 | struct kobject kobj; | 187 | struct kobject kobj; |
188 | 188 | ||
189 | /* zero-length flush that will be cloned and submitted to targets */ | 189 | /* zero-length flush that will be cloned and submitted to targets */ |
190 | struct bio flush_bio; | 190 | struct bio flush_bio; |
191 | }; | 191 | }; |
192 | 192 | ||
193 | /* | 193 | /* |
194 | * For mempools pre-allocation at the table loading time. | 194 | * For mempools pre-allocation at the table loading time. |
195 | */ | 195 | */ |
196 | struct dm_md_mempools { | 196 | struct dm_md_mempools { |
197 | mempool_t *io_pool; | 197 | mempool_t *io_pool; |
198 | mempool_t *tio_pool; | 198 | mempool_t *tio_pool; |
199 | struct bio_set *bs; | 199 | struct bio_set *bs; |
200 | }; | 200 | }; |
201 | 201 | ||
202 | #define MIN_IOS 256 | 202 | #define MIN_IOS 256 |
203 | static struct kmem_cache *_io_cache; | 203 | static struct kmem_cache *_io_cache; |
204 | static struct kmem_cache *_tio_cache; | 204 | static struct kmem_cache *_tio_cache; |
205 | static struct kmem_cache *_rq_tio_cache; | 205 | static struct kmem_cache *_rq_tio_cache; |
206 | static struct kmem_cache *_rq_bio_info_cache; | 206 | static struct kmem_cache *_rq_bio_info_cache; |
207 | 207 | ||
208 | static int __init local_init(void) | 208 | static int __init local_init(void) |
209 | { | 209 | { |
210 | int r = -ENOMEM; | 210 | int r = -ENOMEM; |
211 | 211 | ||
212 | /* allocate a slab for the dm_ios */ | 212 | /* allocate a slab for the dm_ios */ |
213 | _io_cache = KMEM_CACHE(dm_io, 0); | 213 | _io_cache = KMEM_CACHE(dm_io, 0); |
214 | if (!_io_cache) | 214 | if (!_io_cache) |
215 | return r; | 215 | return r; |
216 | 216 | ||
217 | /* allocate a slab for the target ios */ | 217 | /* allocate a slab for the target ios */ |
218 | _tio_cache = KMEM_CACHE(dm_target_io, 0); | 218 | _tio_cache = KMEM_CACHE(dm_target_io, 0); |
219 | if (!_tio_cache) | 219 | if (!_tio_cache) |
220 | goto out_free_io_cache; | 220 | goto out_free_io_cache; |
221 | 221 | ||
222 | _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); | 222 | _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); |
223 | if (!_rq_tio_cache) | 223 | if (!_rq_tio_cache) |
224 | goto out_free_tio_cache; | 224 | goto out_free_tio_cache; |
225 | 225 | ||
226 | _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); | 226 | _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); |
227 | if (!_rq_bio_info_cache) | 227 | if (!_rq_bio_info_cache) |
228 | goto out_free_rq_tio_cache; | 228 | goto out_free_rq_tio_cache; |
229 | 229 | ||
230 | r = dm_uevent_init(); | 230 | r = dm_uevent_init(); |
231 | if (r) | 231 | if (r) |
232 | goto out_free_rq_bio_info_cache; | 232 | goto out_free_rq_bio_info_cache; |
233 | 233 | ||
234 | _major = major; | 234 | _major = major; |
235 | r = register_blkdev(_major, _name); | 235 | r = register_blkdev(_major, _name); |
236 | if (r < 0) | 236 | if (r < 0) |
237 | goto out_uevent_exit; | 237 | goto out_uevent_exit; |
238 | 238 | ||
239 | if (!_major) | 239 | if (!_major) |
240 | _major = r; | 240 | _major = r; |
241 | 241 | ||
242 | return 0; | 242 | return 0; |
243 | 243 | ||
244 | out_uevent_exit: | 244 | out_uevent_exit: |
245 | dm_uevent_exit(); | 245 | dm_uevent_exit(); |
246 | out_free_rq_bio_info_cache: | 246 | out_free_rq_bio_info_cache: |
247 | kmem_cache_destroy(_rq_bio_info_cache); | 247 | kmem_cache_destroy(_rq_bio_info_cache); |
248 | out_free_rq_tio_cache: | 248 | out_free_rq_tio_cache: |
249 | kmem_cache_destroy(_rq_tio_cache); | 249 | kmem_cache_destroy(_rq_tio_cache); |
250 | out_free_tio_cache: | 250 | out_free_tio_cache: |
251 | kmem_cache_destroy(_tio_cache); | 251 | kmem_cache_destroy(_tio_cache); |
252 | out_free_io_cache: | 252 | out_free_io_cache: |
253 | kmem_cache_destroy(_io_cache); | 253 | kmem_cache_destroy(_io_cache); |
254 | 254 | ||
255 | return r; | 255 | return r; |
256 | } | 256 | } |
257 | 257 | ||
258 | static void local_exit(void) | 258 | static void local_exit(void) |
259 | { | 259 | { |
260 | kmem_cache_destroy(_rq_bio_info_cache); | 260 | kmem_cache_destroy(_rq_bio_info_cache); |
261 | kmem_cache_destroy(_rq_tio_cache); | 261 | kmem_cache_destroy(_rq_tio_cache); |
262 | kmem_cache_destroy(_tio_cache); | 262 | kmem_cache_destroy(_tio_cache); |
263 | kmem_cache_destroy(_io_cache); | 263 | kmem_cache_destroy(_io_cache); |
264 | unregister_blkdev(_major, _name); | 264 | unregister_blkdev(_major, _name); |
265 | dm_uevent_exit(); | 265 | dm_uevent_exit(); |
266 | 266 | ||
267 | _major = 0; | 267 | _major = 0; |
268 | 268 | ||
269 | DMINFO("cleaned up"); | 269 | DMINFO("cleaned up"); |
270 | } | 270 | } |
271 | 271 | ||
272 | static int (*_inits[])(void) __initdata = { | 272 | static int (*_inits[])(void) __initdata = { |
273 | local_init, | 273 | local_init, |
274 | dm_target_init, | 274 | dm_target_init, |
275 | dm_linear_init, | 275 | dm_linear_init, |
276 | dm_stripe_init, | 276 | dm_stripe_init, |
277 | dm_io_init, | 277 | dm_io_init, |
278 | dm_kcopyd_init, | 278 | dm_kcopyd_init, |
279 | dm_interface_init, | 279 | dm_interface_init, |
280 | }; | 280 | }; |
281 | 281 | ||
282 | static void (*_exits[])(void) = { | 282 | static void (*_exits[])(void) = { |
283 | local_exit, | 283 | local_exit, |
284 | dm_target_exit, | 284 | dm_target_exit, |
285 | dm_linear_exit, | 285 | dm_linear_exit, |
286 | dm_stripe_exit, | 286 | dm_stripe_exit, |
287 | dm_io_exit, | 287 | dm_io_exit, |
288 | dm_kcopyd_exit, | 288 | dm_kcopyd_exit, |
289 | dm_interface_exit, | 289 | dm_interface_exit, |
290 | }; | 290 | }; |
291 | 291 | ||
292 | static int __init dm_init(void) | 292 | static int __init dm_init(void) |
293 | { | 293 | { |
294 | const int count = ARRAY_SIZE(_inits); | 294 | const int count = ARRAY_SIZE(_inits); |
295 | 295 | ||
296 | int r, i; | 296 | int r, i; |
297 | 297 | ||
298 | for (i = 0; i < count; i++) { | 298 | for (i = 0; i < count; i++) { |
299 | r = _inits[i](); | 299 | r = _inits[i](); |
300 | if (r) | 300 | if (r) |
301 | goto bad; | 301 | goto bad; |
302 | } | 302 | } |
303 | 303 | ||
304 | return 0; | 304 | return 0; |
305 | 305 | ||
306 | bad: | 306 | bad: |
307 | while (i--) | 307 | while (i--) |
308 | _exits[i](); | 308 | _exits[i](); |
309 | 309 | ||
310 | return r; | 310 | return r; |
311 | } | 311 | } |
312 | 312 | ||
313 | static void __exit dm_exit(void) | 313 | static void __exit dm_exit(void) |
314 | { | 314 | { |
315 | int i = ARRAY_SIZE(_exits); | 315 | int i = ARRAY_SIZE(_exits); |
316 | 316 | ||
317 | while (i--) | 317 | while (i--) |
318 | _exits[i](); | 318 | _exits[i](); |
319 | 319 | ||
320 | /* | 320 | /* |
321 | * Should be empty by this point. | 321 | * Should be empty by this point. |
322 | */ | 322 | */ |
323 | idr_remove_all(&_minor_idr); | 323 | idr_remove_all(&_minor_idr); |
324 | idr_destroy(&_minor_idr); | 324 | idr_destroy(&_minor_idr); |
325 | } | 325 | } |
326 | 326 | ||
327 | /* | 327 | /* |
328 | * Block device functions | 328 | * Block device functions |
329 | */ | 329 | */ |
330 | int dm_deleting_md(struct mapped_device *md) | 330 | int dm_deleting_md(struct mapped_device *md) |
331 | { | 331 | { |
332 | return test_bit(DMF_DELETING, &md->flags); | 332 | return test_bit(DMF_DELETING, &md->flags); |
333 | } | 333 | } |
334 | 334 | ||
335 | static int dm_blk_open(struct block_device *bdev, fmode_t mode) | 335 | static int dm_blk_open(struct block_device *bdev, fmode_t mode) |
336 | { | 336 | { |
337 | struct mapped_device *md; | 337 | struct mapped_device *md; |
338 | 338 | ||
339 | spin_lock(&_minor_lock); | 339 | spin_lock(&_minor_lock); |
340 | 340 | ||
341 | md = bdev->bd_disk->private_data; | 341 | md = bdev->bd_disk->private_data; |
342 | if (!md) | 342 | if (!md) |
343 | goto out; | 343 | goto out; |
344 | 344 | ||
345 | if (test_bit(DMF_FREEING, &md->flags) || | 345 | if (test_bit(DMF_FREEING, &md->flags) || |
346 | dm_deleting_md(md)) { | 346 | dm_deleting_md(md)) { |
347 | md = NULL; | 347 | md = NULL; |
348 | goto out; | 348 | goto out; |
349 | } | 349 | } |
350 | 350 | ||
351 | dm_get(md); | 351 | dm_get(md); |
352 | atomic_inc(&md->open_count); | 352 | atomic_inc(&md->open_count); |
353 | 353 | ||
354 | out: | 354 | out: |
355 | spin_unlock(&_minor_lock); | 355 | spin_unlock(&_minor_lock); |
356 | 356 | ||
357 | return md ? 0 : -ENXIO; | 357 | return md ? 0 : -ENXIO; |
358 | } | 358 | } |
359 | 359 | ||
360 | static int dm_blk_close(struct gendisk *disk, fmode_t mode) | 360 | static int dm_blk_close(struct gendisk *disk, fmode_t mode) |
361 | { | 361 | { |
362 | struct mapped_device *md = disk->private_data; | 362 | struct mapped_device *md = disk->private_data; |
363 | 363 | ||
364 | spin_lock(&_minor_lock); | 364 | spin_lock(&_minor_lock); |
365 | 365 | ||
366 | atomic_dec(&md->open_count); | 366 | atomic_dec(&md->open_count); |
367 | dm_put(md); | 367 | dm_put(md); |
368 | 368 | ||
369 | spin_unlock(&_minor_lock); | 369 | spin_unlock(&_minor_lock); |
370 | 370 | ||
371 | return 0; | 371 | return 0; |
372 | } | 372 | } |
373 | 373 | ||
374 | int dm_open_count(struct mapped_device *md) | 374 | int dm_open_count(struct mapped_device *md) |
375 | { | 375 | { |
376 | return atomic_read(&md->open_count); | 376 | return atomic_read(&md->open_count); |
377 | } | 377 | } |
378 | 378 | ||
379 | /* | 379 | /* |
380 | * Guarantees nothing is using the device before it's deleted. | 380 | * Guarantees nothing is using the device before it's deleted. |
381 | */ | 381 | */ |
382 | int dm_lock_for_deletion(struct mapped_device *md) | 382 | int dm_lock_for_deletion(struct mapped_device *md) |
383 | { | 383 | { |
384 | int r = 0; | 384 | int r = 0; |
385 | 385 | ||
386 | spin_lock(&_minor_lock); | 386 | spin_lock(&_minor_lock); |
387 | 387 | ||
388 | if (dm_open_count(md)) | 388 | if (dm_open_count(md)) |
389 | r = -EBUSY; | 389 | r = -EBUSY; |
390 | else | 390 | else |
391 | set_bit(DMF_DELETING, &md->flags); | 391 | set_bit(DMF_DELETING, &md->flags); |
392 | 392 | ||
393 | spin_unlock(&_minor_lock); | 393 | spin_unlock(&_minor_lock); |
394 | 394 | ||
395 | return r; | 395 | return r; |
396 | } | 396 | } |
397 | 397 | ||
398 | static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) | 398 | static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) |
399 | { | 399 | { |
400 | struct mapped_device *md = bdev->bd_disk->private_data; | 400 | struct mapped_device *md = bdev->bd_disk->private_data; |
401 | 401 | ||
402 | return dm_get_geometry(md, geo); | 402 | return dm_get_geometry(md, geo); |
403 | } | 403 | } |
404 | 404 | ||
405 | static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, | 405 | static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, |
406 | unsigned int cmd, unsigned long arg) | 406 | unsigned int cmd, unsigned long arg) |
407 | { | 407 | { |
408 | struct mapped_device *md = bdev->bd_disk->private_data; | 408 | struct mapped_device *md = bdev->bd_disk->private_data; |
409 | struct dm_table *map = dm_get_live_table(md); | 409 | struct dm_table *map = dm_get_live_table(md); |
410 | struct dm_target *tgt; | 410 | struct dm_target *tgt; |
411 | int r = -ENOTTY; | 411 | int r = -ENOTTY; |
412 | 412 | ||
413 | if (!map || !dm_table_get_size(map)) | 413 | if (!map || !dm_table_get_size(map)) |
414 | goto out; | 414 | goto out; |
415 | 415 | ||
416 | /* We only support devices that have a single target */ | 416 | /* We only support devices that have a single target */ |
417 | if (dm_table_get_num_targets(map) != 1) | 417 | if (dm_table_get_num_targets(map) != 1) |
418 | goto out; | 418 | goto out; |
419 | 419 | ||
420 | tgt = dm_table_get_target(map, 0); | 420 | tgt = dm_table_get_target(map, 0); |
421 | 421 | ||
422 | if (dm_suspended_md(md)) { | 422 | if (dm_suspended_md(md)) { |
423 | r = -EAGAIN; | 423 | r = -EAGAIN; |
424 | goto out; | 424 | goto out; |
425 | } | 425 | } |
426 | 426 | ||
427 | if (tgt->type->ioctl) | 427 | if (tgt->type->ioctl) |
428 | r = tgt->type->ioctl(tgt, cmd, arg); | 428 | r = tgt->type->ioctl(tgt, cmd, arg); |
429 | 429 | ||
430 | out: | 430 | out: |
431 | dm_table_put(map); | 431 | dm_table_put(map); |
432 | 432 | ||
433 | return r; | 433 | return r; |
434 | } | 434 | } |
435 | 435 | ||
436 | static struct dm_io *alloc_io(struct mapped_device *md) | 436 | static struct dm_io *alloc_io(struct mapped_device *md) |
437 | { | 437 | { |
438 | return mempool_alloc(md->io_pool, GFP_NOIO); | 438 | return mempool_alloc(md->io_pool, GFP_NOIO); |
439 | } | 439 | } |
440 | 440 | ||
441 | static void free_io(struct mapped_device *md, struct dm_io *io) | 441 | static void free_io(struct mapped_device *md, struct dm_io *io) |
442 | { | 442 | { |
443 | mempool_free(io, md->io_pool); | 443 | mempool_free(io, md->io_pool); |
444 | } | 444 | } |
445 | 445 | ||
446 | static void free_tio(struct mapped_device *md, struct dm_target_io *tio) | 446 | static void free_tio(struct mapped_device *md, struct dm_target_io *tio) |
447 | { | 447 | { |
448 | mempool_free(tio, md->tio_pool); | 448 | mempool_free(tio, md->tio_pool); |
449 | } | 449 | } |
450 | 450 | ||
451 | static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md, | 451 | static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md, |
452 | gfp_t gfp_mask) | 452 | gfp_t gfp_mask) |
453 | { | 453 | { |
454 | return mempool_alloc(md->tio_pool, gfp_mask); | 454 | return mempool_alloc(md->tio_pool, gfp_mask); |
455 | } | 455 | } |
456 | 456 | ||
457 | static void free_rq_tio(struct dm_rq_target_io *tio) | 457 | static void free_rq_tio(struct dm_rq_target_io *tio) |
458 | { | 458 | { |
459 | mempool_free(tio, tio->md->tio_pool); | 459 | mempool_free(tio, tio->md->tio_pool); |
460 | } | 460 | } |
461 | 461 | ||
462 | static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md) | 462 | static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md) |
463 | { | 463 | { |
464 | return mempool_alloc(md->io_pool, GFP_ATOMIC); | 464 | return mempool_alloc(md->io_pool, GFP_ATOMIC); |
465 | } | 465 | } |
466 | 466 | ||
467 | static void free_bio_info(struct dm_rq_clone_bio_info *info) | 467 | static void free_bio_info(struct dm_rq_clone_bio_info *info) |
468 | { | 468 | { |
469 | mempool_free(info, info->tio->md->io_pool); | 469 | mempool_free(info, info->tio->md->io_pool); |
470 | } | 470 | } |
471 | 471 | ||
472 | static int md_in_flight(struct mapped_device *md) | 472 | static int md_in_flight(struct mapped_device *md) |
473 | { | 473 | { |
474 | return atomic_read(&md->pending[READ]) + | 474 | return atomic_read(&md->pending[READ]) + |
475 | atomic_read(&md->pending[WRITE]); | 475 | atomic_read(&md->pending[WRITE]); |
476 | } | 476 | } |
477 | 477 | ||
478 | static void start_io_acct(struct dm_io *io) | 478 | static void start_io_acct(struct dm_io *io) |
479 | { | 479 | { |
480 | struct mapped_device *md = io->md; | 480 | struct mapped_device *md = io->md; |
481 | int cpu; | 481 | int cpu; |
482 | int rw = bio_data_dir(io->bio); | 482 | int rw = bio_data_dir(io->bio); |
483 | 483 | ||
484 | io->start_time = jiffies; | 484 | io->start_time = jiffies; |
485 | 485 | ||
486 | cpu = part_stat_lock(); | 486 | cpu = part_stat_lock(); |
487 | part_round_stats(cpu, &dm_disk(md)->part0); | 487 | part_round_stats(cpu, &dm_disk(md)->part0); |
488 | part_stat_unlock(); | 488 | part_stat_unlock(); |
489 | atomic_set(&dm_disk(md)->part0.in_flight[rw], | 489 | atomic_set(&dm_disk(md)->part0.in_flight[rw], |
490 | atomic_inc_return(&md->pending[rw])); | 490 | atomic_inc_return(&md->pending[rw])); |
491 | } | 491 | } |
492 | 492 | ||
493 | static void end_io_acct(struct dm_io *io) | 493 | static void end_io_acct(struct dm_io *io) |
494 | { | 494 | { |
495 | struct mapped_device *md = io->md; | 495 | struct mapped_device *md = io->md; |
496 | struct bio *bio = io->bio; | 496 | struct bio *bio = io->bio; |
497 | unsigned long duration = jiffies - io->start_time; | 497 | unsigned long duration = jiffies - io->start_time; |
498 | int pending, cpu; | 498 | int pending, cpu; |
499 | int rw = bio_data_dir(bio); | 499 | int rw = bio_data_dir(bio); |
500 | 500 | ||
501 | cpu = part_stat_lock(); | 501 | cpu = part_stat_lock(); |
502 | part_round_stats(cpu, &dm_disk(md)->part0); | 502 | part_round_stats(cpu, &dm_disk(md)->part0); |
503 | part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); | 503 | part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); |
504 | part_stat_unlock(); | 504 | part_stat_unlock(); |
505 | 505 | ||
506 | /* | 506 | /* |
507 | * After this is decremented the bio must not be touched if it is | 507 | * After this is decremented the bio must not be touched if it is |
508 | * a flush. | 508 | * a flush. |
509 | */ | 509 | */ |
510 | pending = atomic_dec_return(&md->pending[rw]); | 510 | pending = atomic_dec_return(&md->pending[rw]); |
511 | atomic_set(&dm_disk(md)->part0.in_flight[rw], pending); | 511 | atomic_set(&dm_disk(md)->part0.in_flight[rw], pending); |
512 | pending += atomic_read(&md->pending[rw^0x1]); | 512 | pending += atomic_read(&md->pending[rw^0x1]); |
513 | 513 | ||
514 | /* nudge anyone waiting on suspend queue */ | 514 | /* nudge anyone waiting on suspend queue */ |
515 | if (!pending) | 515 | if (!pending) |
516 | wake_up(&md->wait); | 516 | wake_up(&md->wait); |
517 | } | 517 | } |
518 | 518 | ||
519 | /* | 519 | /* |
520 | * Add the bio to the list of deferred io. | 520 | * Add the bio to the list of deferred io. |
521 | */ | 521 | */ |
522 | static void queue_io(struct mapped_device *md, struct bio *bio) | 522 | static void queue_io(struct mapped_device *md, struct bio *bio) |
523 | { | 523 | { |
524 | unsigned long flags; | 524 | unsigned long flags; |
525 | 525 | ||
526 | spin_lock_irqsave(&md->deferred_lock, flags); | 526 | spin_lock_irqsave(&md->deferred_lock, flags); |
527 | bio_list_add(&md->deferred, bio); | 527 | bio_list_add(&md->deferred, bio); |
528 | spin_unlock_irqrestore(&md->deferred_lock, flags); | 528 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
529 | queue_work(md->wq, &md->work); | 529 | queue_work(md->wq, &md->work); |
530 | } | 530 | } |
531 | 531 | ||
532 | /* | 532 | /* |
533 | * Everyone (including functions in this file), should use this | 533 | * Everyone (including functions in this file), should use this |
534 | * function to access the md->map field, and make sure they call | 534 | * function to access the md->map field, and make sure they call |
535 | * dm_table_put() when finished. | 535 | * dm_table_put() when finished. |
536 | */ | 536 | */ |
537 | struct dm_table *dm_get_live_table(struct mapped_device *md) | 537 | struct dm_table *dm_get_live_table(struct mapped_device *md) |
538 | { | 538 | { |
539 | struct dm_table *t; | 539 | struct dm_table *t; |
540 | unsigned long flags; | 540 | unsigned long flags; |
541 | 541 | ||
542 | read_lock_irqsave(&md->map_lock, flags); | 542 | read_lock_irqsave(&md->map_lock, flags); |
543 | t = md->map; | 543 | t = md->map; |
544 | if (t) | 544 | if (t) |
545 | dm_table_get(t); | 545 | dm_table_get(t); |
546 | read_unlock_irqrestore(&md->map_lock, flags); | 546 | read_unlock_irqrestore(&md->map_lock, flags); |
547 | 547 | ||
548 | return t; | 548 | return t; |
549 | } | 549 | } |
550 | 550 | ||
551 | /* | 551 | /* |
552 | * Get the geometry associated with a dm device | 552 | * Get the geometry associated with a dm device |
553 | */ | 553 | */ |
554 | int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) | 554 | int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) |
555 | { | 555 | { |
556 | *geo = md->geometry; | 556 | *geo = md->geometry; |
557 | 557 | ||
558 | return 0; | 558 | return 0; |
559 | } | 559 | } |
560 | 560 | ||
561 | /* | 561 | /* |
562 | * Set the geometry of a device. | 562 | * Set the geometry of a device. |
563 | */ | 563 | */ |
564 | int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) | 564 | int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) |
565 | { | 565 | { |
566 | sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; | 566 | sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; |
567 | 567 | ||
568 | if (geo->start > sz) { | 568 | if (geo->start > sz) { |
569 | DMWARN("Start sector is beyond the geometry limits."); | 569 | DMWARN("Start sector is beyond the geometry limits."); |
570 | return -EINVAL; | 570 | return -EINVAL; |
571 | } | 571 | } |
572 | 572 | ||
573 | md->geometry = *geo; | 573 | md->geometry = *geo; |
574 | 574 | ||
575 | return 0; | 575 | return 0; |
576 | } | 576 | } |
577 | 577 | ||
578 | /*----------------------------------------------------------------- | 578 | /*----------------------------------------------------------------- |
579 | * CRUD START: | 579 | * CRUD START: |
580 | * A more elegant soln is in the works that uses the queue | 580 | * A more elegant soln is in the works that uses the queue |
581 | * merge fn, unfortunately there are a couple of changes to | 581 | * merge fn, unfortunately there are a couple of changes to |
582 | * the block layer that I want to make for this. So in the | 582 | * the block layer that I want to make for this. So in the |
583 | * interests of getting something for people to use I give | 583 | * interests of getting something for people to use I give |
584 | * you this clearly demarcated crap. | 584 | * you this clearly demarcated crap. |
585 | *---------------------------------------------------------------*/ | 585 | *---------------------------------------------------------------*/ |
586 | 586 | ||
587 | static int __noflush_suspending(struct mapped_device *md) | 587 | static int __noflush_suspending(struct mapped_device *md) |
588 | { | 588 | { |
589 | return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | 589 | return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); |
590 | } | 590 | } |
591 | 591 | ||
592 | /* | 592 | /* |
593 | * Decrements the number of outstanding ios that a bio has been | 593 | * Decrements the number of outstanding ios that a bio has been |
594 | * cloned into, completing the original io if necc. | 594 | * cloned into, completing the original io if necc. |
595 | */ | 595 | */ |
596 | static void dec_pending(struct dm_io *io, int error) | 596 | static void dec_pending(struct dm_io *io, int error) |
597 | { | 597 | { |
598 | unsigned long flags; | 598 | unsigned long flags; |
599 | int io_error; | 599 | int io_error; |
600 | struct bio *bio; | 600 | struct bio *bio; |
601 | struct mapped_device *md = io->md; | 601 | struct mapped_device *md = io->md; |
602 | 602 | ||
603 | /* Push-back supersedes any I/O errors */ | 603 | /* Push-back supersedes any I/O errors */ |
604 | if (unlikely(error)) { | 604 | if (unlikely(error)) { |
605 | spin_lock_irqsave(&io->endio_lock, flags); | 605 | spin_lock_irqsave(&io->endio_lock, flags); |
606 | if (!(io->error > 0 && __noflush_suspending(md))) | 606 | if (!(io->error > 0 && __noflush_suspending(md))) |
607 | io->error = error; | 607 | io->error = error; |
608 | spin_unlock_irqrestore(&io->endio_lock, flags); | 608 | spin_unlock_irqrestore(&io->endio_lock, flags); |
609 | } | 609 | } |
610 | 610 | ||
611 | if (atomic_dec_and_test(&io->io_count)) { | 611 | if (atomic_dec_and_test(&io->io_count)) { |
612 | if (io->error == DM_ENDIO_REQUEUE) { | 612 | if (io->error == DM_ENDIO_REQUEUE) { |
613 | /* | 613 | /* |
614 | * Target requested pushing back the I/O. | 614 | * Target requested pushing back the I/O. |
615 | */ | 615 | */ |
616 | spin_lock_irqsave(&md->deferred_lock, flags); | 616 | spin_lock_irqsave(&md->deferred_lock, flags); |
617 | if (__noflush_suspending(md)) | 617 | if (__noflush_suspending(md)) |
618 | bio_list_add_head(&md->deferred, io->bio); | 618 | bio_list_add_head(&md->deferred, io->bio); |
619 | else | 619 | else |
620 | /* noflush suspend was interrupted. */ | 620 | /* noflush suspend was interrupted. */ |
621 | io->error = -EIO; | 621 | io->error = -EIO; |
622 | spin_unlock_irqrestore(&md->deferred_lock, flags); | 622 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
623 | } | 623 | } |
624 | 624 | ||
625 | io_error = io->error; | 625 | io_error = io->error; |
626 | bio = io->bio; | 626 | bio = io->bio; |
627 | end_io_acct(io); | 627 | end_io_acct(io); |
628 | free_io(md, io); | 628 | free_io(md, io); |
629 | 629 | ||
630 | if (io_error == DM_ENDIO_REQUEUE) | 630 | if (io_error == DM_ENDIO_REQUEUE) |
631 | return; | 631 | return; |
632 | 632 | ||
633 | if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) { | 633 | if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) { |
634 | /* | 634 | /* |
635 | * Preflush done for flush with data, reissue | 635 | * Preflush done for flush with data, reissue |
636 | * without REQ_FLUSH. | 636 | * without REQ_FLUSH. |
637 | */ | 637 | */ |
638 | bio->bi_rw &= ~REQ_FLUSH; | 638 | bio->bi_rw &= ~REQ_FLUSH; |
639 | queue_io(md, bio); | 639 | queue_io(md, bio); |
640 | } else { | 640 | } else { |
641 | /* done with normal IO or empty flush */ | 641 | /* done with normal IO or empty flush */ |
642 | trace_block_bio_complete(md->queue, bio, io_error); | 642 | trace_block_bio_complete(md->queue, bio, io_error); |
643 | bio_endio(bio, io_error); | 643 | bio_endio(bio, io_error); |
644 | } | 644 | } |
645 | } | 645 | } |
646 | } | 646 | } |
647 | 647 | ||
648 | static void clone_endio(struct bio *bio, int error) | 648 | static void clone_endio(struct bio *bio, int error) |
649 | { | 649 | { |
650 | int r = 0; | 650 | int r = 0; |
651 | struct dm_target_io *tio = bio->bi_private; | 651 | struct dm_target_io *tio = bio->bi_private; |
652 | struct dm_io *io = tio->io; | 652 | struct dm_io *io = tio->io; |
653 | struct mapped_device *md = tio->io->md; | 653 | struct mapped_device *md = tio->io->md; |
654 | dm_endio_fn endio = tio->ti->type->end_io; | 654 | dm_endio_fn endio = tio->ti->type->end_io; |
655 | 655 | ||
656 | if (!bio_flagged(bio, BIO_UPTODATE) && !error) | 656 | if (!bio_flagged(bio, BIO_UPTODATE) && !error) |
657 | error = -EIO; | 657 | error = -EIO; |
658 | 658 | ||
659 | if (endio) { | 659 | if (endio) { |
660 | r = endio(tio->ti, bio, error, &tio->info); | 660 | r = endio(tio->ti, bio, error, &tio->info); |
661 | if (r < 0 || r == DM_ENDIO_REQUEUE) | 661 | if (r < 0 || r == DM_ENDIO_REQUEUE) |
662 | /* | 662 | /* |
663 | * error and requeue request are handled | 663 | * error and requeue request are handled |
664 | * in dec_pending(). | 664 | * in dec_pending(). |
665 | */ | 665 | */ |
666 | error = r; | 666 | error = r; |
667 | else if (r == DM_ENDIO_INCOMPLETE) | 667 | else if (r == DM_ENDIO_INCOMPLETE) |
668 | /* The target will handle the io */ | 668 | /* The target will handle the io */ |
669 | return; | 669 | return; |
670 | else if (r) { | 670 | else if (r) { |
671 | DMWARN("unimplemented target endio return value: %d", r); | 671 | DMWARN("unimplemented target endio return value: %d", r); |
672 | BUG(); | 672 | BUG(); |
673 | } | 673 | } |
674 | } | 674 | } |
675 | 675 | ||
676 | /* | 676 | /* |
677 | * Store md for cleanup instead of tio which is about to get freed. | 677 | * Store md for cleanup instead of tio which is about to get freed. |
678 | */ | 678 | */ |
679 | bio->bi_private = md->bs; | 679 | bio->bi_private = md->bs; |
680 | 680 | ||
681 | free_tio(md, tio); | 681 | free_tio(md, tio); |
682 | bio_put(bio); | 682 | bio_put(bio); |
683 | dec_pending(io, error); | 683 | dec_pending(io, error); |
684 | } | 684 | } |
685 | 685 | ||
686 | /* | 686 | /* |
687 | * Partial completion handling for request-based dm | 687 | * Partial completion handling for request-based dm |
688 | */ | 688 | */ |
689 | static void end_clone_bio(struct bio *clone, int error) | 689 | static void end_clone_bio(struct bio *clone, int error) |
690 | { | 690 | { |
691 | struct dm_rq_clone_bio_info *info = clone->bi_private; | 691 | struct dm_rq_clone_bio_info *info = clone->bi_private; |
692 | struct dm_rq_target_io *tio = info->tio; | 692 | struct dm_rq_target_io *tio = info->tio; |
693 | struct bio *bio = info->orig; | 693 | struct bio *bio = info->orig; |
694 | unsigned int nr_bytes = info->orig->bi_size; | 694 | unsigned int nr_bytes = info->orig->bi_size; |
695 | 695 | ||
696 | bio_put(clone); | 696 | bio_put(clone); |
697 | 697 | ||
698 | if (tio->error) | 698 | if (tio->error) |
699 | /* | 699 | /* |
700 | * An error has already been detected on the request. | 700 | * An error has already been detected on the request. |
701 | * Once error occurred, just let clone->end_io() handle | 701 | * Once error occurred, just let clone->end_io() handle |
702 | * the remainder. | 702 | * the remainder. |
703 | */ | 703 | */ |
704 | return; | 704 | return; |
705 | else if (error) { | 705 | else if (error) { |
706 | /* | 706 | /* |
707 | * Don't notice the error to the upper layer yet. | 707 | * Don't notice the error to the upper layer yet. |
708 | * The error handling decision is made by the target driver, | 708 | * The error handling decision is made by the target driver, |
709 | * when the request is completed. | 709 | * when the request is completed. |
710 | */ | 710 | */ |
711 | tio->error = error; | 711 | tio->error = error; |
712 | return; | 712 | return; |
713 | } | 713 | } |
714 | 714 | ||
715 | /* | 715 | /* |
716 | * I/O for the bio successfully completed. | 716 | * I/O for the bio successfully completed. |
717 | * Notice the data completion to the upper layer. | 717 | * Notice the data completion to the upper layer. |
718 | */ | 718 | */ |
719 | 719 | ||
720 | /* | 720 | /* |
721 | * bios are processed from the head of the list. | 721 | * bios are processed from the head of the list. |
722 | * So the completing bio should always be rq->bio. | 722 | * So the completing bio should always be rq->bio. |
723 | * If it's not, something wrong is happening. | 723 | * If it's not, something wrong is happening. |
724 | */ | 724 | */ |
725 | if (tio->orig->bio != bio) | 725 | if (tio->orig->bio != bio) |
726 | DMERR("bio completion is going in the middle of the request"); | 726 | DMERR("bio completion is going in the middle of the request"); |
727 | 727 | ||
728 | /* | 728 | /* |
729 | * Update the original request. | 729 | * Update the original request. |
730 | * Do not use blk_end_request() here, because it may complete | 730 | * Do not use blk_end_request() here, because it may complete |
731 | * the original request before the clone, and break the ordering. | 731 | * the original request before the clone, and break the ordering. |
732 | */ | 732 | */ |
733 | blk_update_request(tio->orig, 0, nr_bytes); | 733 | blk_update_request(tio->orig, 0, nr_bytes); |
734 | } | 734 | } |
735 | 735 | ||
736 | /* | 736 | /* |
737 | * Don't touch any member of the md after calling this function because | 737 | * Don't touch any member of the md after calling this function because |
738 | * the md may be freed in dm_put() at the end of this function. | 738 | * the md may be freed in dm_put() at the end of this function. |
739 | * Or do dm_get() before calling this function and dm_put() later. | 739 | * Or do dm_get() before calling this function and dm_put() later. |
740 | */ | 740 | */ |
741 | static void rq_completed(struct mapped_device *md, int rw, int run_queue) | 741 | static void rq_completed(struct mapped_device *md, int rw, int run_queue) |
742 | { | 742 | { |
743 | atomic_dec(&md->pending[rw]); | 743 | atomic_dec(&md->pending[rw]); |
744 | 744 | ||
745 | /* nudge anyone waiting on suspend queue */ | 745 | /* nudge anyone waiting on suspend queue */ |
746 | if (!md_in_flight(md)) | 746 | if (!md_in_flight(md)) |
747 | wake_up(&md->wait); | 747 | wake_up(&md->wait); |
748 | 748 | ||
749 | if (run_queue) | 749 | if (run_queue) |
750 | blk_run_queue(md->queue); | 750 | blk_run_queue(md->queue); |
751 | 751 | ||
752 | /* | 752 | /* |
753 | * dm_put() must be at the end of this function. See the comment above | 753 | * dm_put() must be at the end of this function. See the comment above |
754 | */ | 754 | */ |
755 | dm_put(md); | 755 | dm_put(md); |
756 | } | 756 | } |
757 | 757 | ||
758 | static void free_rq_clone(struct request *clone) | 758 | static void free_rq_clone(struct request *clone) |
759 | { | 759 | { |
760 | struct dm_rq_target_io *tio = clone->end_io_data; | 760 | struct dm_rq_target_io *tio = clone->end_io_data; |
761 | 761 | ||
762 | blk_rq_unprep_clone(clone); | 762 | blk_rq_unprep_clone(clone); |
763 | free_rq_tio(tio); | 763 | free_rq_tio(tio); |
764 | } | 764 | } |
765 | 765 | ||
766 | /* | 766 | /* |
767 | * Complete the clone and the original request. | 767 | * Complete the clone and the original request. |
768 | * Must be called without queue lock. | 768 | * Must be called without queue lock. |
769 | */ | 769 | */ |
770 | static void dm_end_request(struct request *clone, int error) | 770 | static void dm_end_request(struct request *clone, int error) |
771 | { | 771 | { |
772 | int rw = rq_data_dir(clone); | 772 | int rw = rq_data_dir(clone); |
773 | struct dm_rq_target_io *tio = clone->end_io_data; | 773 | struct dm_rq_target_io *tio = clone->end_io_data; |
774 | struct mapped_device *md = tio->md; | 774 | struct mapped_device *md = tio->md; |
775 | struct request *rq = tio->orig; | 775 | struct request *rq = tio->orig; |
776 | 776 | ||
777 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | 777 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
778 | rq->errors = clone->errors; | 778 | rq->errors = clone->errors; |
779 | rq->resid_len = clone->resid_len; | 779 | rq->resid_len = clone->resid_len; |
780 | 780 | ||
781 | if (rq->sense) | 781 | if (rq->sense) |
782 | /* | 782 | /* |
783 | * We are using the sense buffer of the original | 783 | * We are using the sense buffer of the original |
784 | * request. | 784 | * request. |
785 | * So setting the length of the sense data is enough. | 785 | * So setting the length of the sense data is enough. |
786 | */ | 786 | */ |
787 | rq->sense_len = clone->sense_len; | 787 | rq->sense_len = clone->sense_len; |
788 | } | 788 | } |
789 | 789 | ||
790 | free_rq_clone(clone); | 790 | free_rq_clone(clone); |
791 | blk_end_request_all(rq, error); | 791 | blk_end_request_all(rq, error); |
792 | rq_completed(md, rw, true); | 792 | rq_completed(md, rw, true); |
793 | } | 793 | } |
794 | 794 | ||
795 | static void dm_unprep_request(struct request *rq) | 795 | static void dm_unprep_request(struct request *rq) |
796 | { | 796 | { |
797 | struct request *clone = rq->special; | 797 | struct request *clone = rq->special; |
798 | 798 | ||
799 | rq->special = NULL; | 799 | rq->special = NULL; |
800 | rq->cmd_flags &= ~REQ_DONTPREP; | 800 | rq->cmd_flags &= ~REQ_DONTPREP; |
801 | 801 | ||
802 | free_rq_clone(clone); | 802 | free_rq_clone(clone); |
803 | } | 803 | } |
804 | 804 | ||
805 | /* | 805 | /* |
806 | * Requeue the original request of a clone. | 806 | * Requeue the original request of a clone. |
807 | */ | 807 | */ |
808 | void dm_requeue_unmapped_request(struct request *clone) | 808 | void dm_requeue_unmapped_request(struct request *clone) |
809 | { | 809 | { |
810 | int rw = rq_data_dir(clone); | 810 | int rw = rq_data_dir(clone); |
811 | struct dm_rq_target_io *tio = clone->end_io_data; | 811 | struct dm_rq_target_io *tio = clone->end_io_data; |
812 | struct mapped_device *md = tio->md; | 812 | struct mapped_device *md = tio->md; |
813 | struct request *rq = tio->orig; | 813 | struct request *rq = tio->orig; |
814 | struct request_queue *q = rq->q; | 814 | struct request_queue *q = rq->q; |
815 | unsigned long flags; | 815 | unsigned long flags; |
816 | 816 | ||
817 | dm_unprep_request(rq); | 817 | dm_unprep_request(rq); |
818 | 818 | ||
819 | spin_lock_irqsave(q->queue_lock, flags); | 819 | spin_lock_irqsave(q->queue_lock, flags); |
820 | blk_requeue_request(q, rq); | 820 | blk_requeue_request(q, rq); |
821 | spin_unlock_irqrestore(q->queue_lock, flags); | 821 | spin_unlock_irqrestore(q->queue_lock, flags); |
822 | 822 | ||
823 | rq_completed(md, rw, 0); | 823 | rq_completed(md, rw, 0); |
824 | } | 824 | } |
825 | EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); | 825 | EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); |
826 | 826 | ||
827 | static void __stop_queue(struct request_queue *q) | 827 | static void __stop_queue(struct request_queue *q) |
828 | { | 828 | { |
829 | blk_stop_queue(q); | 829 | blk_stop_queue(q); |
830 | } | 830 | } |
831 | 831 | ||
832 | static void stop_queue(struct request_queue *q) | 832 | static void stop_queue(struct request_queue *q) |
833 | { | 833 | { |
834 | unsigned long flags; | 834 | unsigned long flags; |
835 | 835 | ||
836 | spin_lock_irqsave(q->queue_lock, flags); | 836 | spin_lock_irqsave(q->queue_lock, flags); |
837 | __stop_queue(q); | 837 | __stop_queue(q); |
838 | spin_unlock_irqrestore(q->queue_lock, flags); | 838 | spin_unlock_irqrestore(q->queue_lock, flags); |
839 | } | 839 | } |
840 | 840 | ||
841 | static void __start_queue(struct request_queue *q) | 841 | static void __start_queue(struct request_queue *q) |
842 | { | 842 | { |
843 | if (blk_queue_stopped(q)) | 843 | if (blk_queue_stopped(q)) |
844 | blk_start_queue(q); | 844 | blk_start_queue(q); |
845 | } | 845 | } |
846 | 846 | ||
847 | static void start_queue(struct request_queue *q) | 847 | static void start_queue(struct request_queue *q) |
848 | { | 848 | { |
849 | unsigned long flags; | 849 | unsigned long flags; |
850 | 850 | ||
851 | spin_lock_irqsave(q->queue_lock, flags); | 851 | spin_lock_irqsave(q->queue_lock, flags); |
852 | __start_queue(q); | 852 | __start_queue(q); |
853 | spin_unlock_irqrestore(q->queue_lock, flags); | 853 | spin_unlock_irqrestore(q->queue_lock, flags); |
854 | } | 854 | } |
855 | 855 | ||
856 | static void dm_done(struct request *clone, int error, bool mapped) | 856 | static void dm_done(struct request *clone, int error, bool mapped) |
857 | { | 857 | { |
858 | int r = error; | 858 | int r = error; |
859 | struct dm_rq_target_io *tio = clone->end_io_data; | 859 | struct dm_rq_target_io *tio = clone->end_io_data; |
860 | dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; | 860 | dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; |
861 | 861 | ||
862 | if (mapped && rq_end_io) | 862 | if (mapped && rq_end_io) |
863 | r = rq_end_io(tio->ti, clone, error, &tio->info); | 863 | r = rq_end_io(tio->ti, clone, error, &tio->info); |
864 | 864 | ||
865 | if (r <= 0) | 865 | if (r <= 0) |
866 | /* The target wants to complete the I/O */ | 866 | /* The target wants to complete the I/O */ |
867 | dm_end_request(clone, r); | 867 | dm_end_request(clone, r); |
868 | else if (r == DM_ENDIO_INCOMPLETE) | 868 | else if (r == DM_ENDIO_INCOMPLETE) |
869 | /* The target will handle the I/O */ | 869 | /* The target will handle the I/O */ |
870 | return; | 870 | return; |
871 | else if (r == DM_ENDIO_REQUEUE) | 871 | else if (r == DM_ENDIO_REQUEUE) |
872 | /* The target wants to requeue the I/O */ | 872 | /* The target wants to requeue the I/O */ |
873 | dm_requeue_unmapped_request(clone); | 873 | dm_requeue_unmapped_request(clone); |
874 | else { | 874 | else { |
875 | DMWARN("unimplemented target endio return value: %d", r); | 875 | DMWARN("unimplemented target endio return value: %d", r); |
876 | BUG(); | 876 | BUG(); |
877 | } | 877 | } |
878 | } | 878 | } |
879 | 879 | ||
880 | /* | 880 | /* |
881 | * Request completion handler for request-based dm | 881 | * Request completion handler for request-based dm |
882 | */ | 882 | */ |
883 | static void dm_softirq_done(struct request *rq) | 883 | static void dm_softirq_done(struct request *rq) |
884 | { | 884 | { |
885 | bool mapped = true; | 885 | bool mapped = true; |
886 | struct request *clone = rq->completion_data; | 886 | struct request *clone = rq->completion_data; |
887 | struct dm_rq_target_io *tio = clone->end_io_data; | 887 | struct dm_rq_target_io *tio = clone->end_io_data; |
888 | 888 | ||
889 | if (rq->cmd_flags & REQ_FAILED) | 889 | if (rq->cmd_flags & REQ_FAILED) |
890 | mapped = false; | 890 | mapped = false; |
891 | 891 | ||
892 | dm_done(clone, tio->error, mapped); | 892 | dm_done(clone, tio->error, mapped); |
893 | } | 893 | } |
894 | 894 | ||
895 | /* | 895 | /* |
896 | * Complete the clone and the original request with the error status | 896 | * Complete the clone and the original request with the error status |
897 | * through softirq context. | 897 | * through softirq context. |
898 | */ | 898 | */ |
899 | static void dm_complete_request(struct request *clone, int error) | 899 | static void dm_complete_request(struct request *clone, int error) |
900 | { | 900 | { |
901 | struct dm_rq_target_io *tio = clone->end_io_data; | 901 | struct dm_rq_target_io *tio = clone->end_io_data; |
902 | struct request *rq = tio->orig; | 902 | struct request *rq = tio->orig; |
903 | 903 | ||
904 | tio->error = error; | 904 | tio->error = error; |
905 | rq->completion_data = clone; | 905 | rq->completion_data = clone; |
906 | blk_complete_request(rq); | 906 | blk_complete_request(rq); |
907 | } | 907 | } |
908 | 908 | ||
909 | /* | 909 | /* |
910 | * Complete the not-mapped clone and the original request with the error status | 910 | * Complete the not-mapped clone and the original request with the error status |
911 | * through softirq context. | 911 | * through softirq context. |
912 | * Target's rq_end_io() function isn't called. | 912 | * Target's rq_end_io() function isn't called. |
913 | * This may be used when the target's map_rq() function fails. | 913 | * This may be used when the target's map_rq() function fails. |
914 | */ | 914 | */ |
915 | void dm_kill_unmapped_request(struct request *clone, int error) | 915 | void dm_kill_unmapped_request(struct request *clone, int error) |
916 | { | 916 | { |
917 | struct dm_rq_target_io *tio = clone->end_io_data; | 917 | struct dm_rq_target_io *tio = clone->end_io_data; |
918 | struct request *rq = tio->orig; | 918 | struct request *rq = tio->orig; |
919 | 919 | ||
920 | rq->cmd_flags |= REQ_FAILED; | 920 | rq->cmd_flags |= REQ_FAILED; |
921 | dm_complete_request(clone, error); | 921 | dm_complete_request(clone, error); |
922 | } | 922 | } |
923 | EXPORT_SYMBOL_GPL(dm_kill_unmapped_request); | 923 | EXPORT_SYMBOL_GPL(dm_kill_unmapped_request); |
924 | 924 | ||
925 | /* | 925 | /* |
926 | * Called with the queue lock held | 926 | * Called with the queue lock held |
927 | */ | 927 | */ |
928 | static void end_clone_request(struct request *clone, int error) | 928 | static void end_clone_request(struct request *clone, int error) |
929 | { | 929 | { |
930 | /* | 930 | /* |
931 | * For just cleaning up the information of the queue in which | 931 | * For just cleaning up the information of the queue in which |
932 | * the clone was dispatched. | 932 | * the clone was dispatched. |
933 | * The clone is *NOT* freed actually here because it is alloced from | 933 | * The clone is *NOT* freed actually here because it is alloced from |
934 | * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags. | 934 | * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags. |
935 | */ | 935 | */ |
936 | __blk_put_request(clone->q, clone); | 936 | __blk_put_request(clone->q, clone); |
937 | 937 | ||
938 | /* | 938 | /* |
939 | * Actual request completion is done in a softirq context which doesn't | 939 | * Actual request completion is done in a softirq context which doesn't |
940 | * hold the queue lock. Otherwise, deadlock could occur because: | 940 | * hold the queue lock. Otherwise, deadlock could occur because: |
941 | * - another request may be submitted by the upper level driver | 941 | * - another request may be submitted by the upper level driver |
942 | * of the stacking during the completion | 942 | * of the stacking during the completion |
943 | * - the submission which requires queue lock may be done | 943 | * - the submission which requires queue lock may be done |
944 | * against this queue | 944 | * against this queue |
945 | */ | 945 | */ |
946 | dm_complete_request(clone, error); | 946 | dm_complete_request(clone, error); |
947 | } | 947 | } |
948 | 948 | ||
949 | /* | 949 | /* |
950 | * Return maximum size of I/O possible at the supplied sector up to the current | 950 | * Return maximum size of I/O possible at the supplied sector up to the current |
951 | * target boundary. | 951 | * target boundary. |
952 | */ | 952 | */ |
953 | static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti) | 953 | static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti) |
954 | { | 954 | { |
955 | sector_t target_offset = dm_target_offset(ti, sector); | 955 | sector_t target_offset = dm_target_offset(ti, sector); |
956 | 956 | ||
957 | return ti->len - target_offset; | 957 | return ti->len - target_offset; |
958 | } | 958 | } |
959 | 959 | ||
960 | static sector_t max_io_len(sector_t sector, struct dm_target *ti) | 960 | static sector_t max_io_len(sector_t sector, struct dm_target *ti) |
961 | { | 961 | { |
962 | sector_t len = max_io_len_target_boundary(sector, ti); | 962 | sector_t len = max_io_len_target_boundary(sector, ti); |
963 | 963 | ||
964 | /* | 964 | /* |
965 | * Does the target need to split even further ? | 965 | * Does the target need to split even further ? |
966 | */ | 966 | */ |
967 | if (ti->split_io) { | 967 | if (ti->split_io) { |
968 | sector_t boundary; | 968 | sector_t boundary; |
969 | sector_t offset = dm_target_offset(ti, sector); | 969 | sector_t offset = dm_target_offset(ti, sector); |
970 | boundary = ((offset + ti->split_io) & ~(ti->split_io - 1)) | 970 | boundary = ((offset + ti->split_io) & ~(ti->split_io - 1)) |
971 | - offset; | 971 | - offset; |
972 | if (len > boundary) | 972 | if (len > boundary) |
973 | len = boundary; | 973 | len = boundary; |
974 | } | 974 | } |
975 | 975 | ||
976 | return len; | 976 | return len; |
977 | } | 977 | } |
978 | 978 | ||
979 | static void __map_bio(struct dm_target *ti, struct bio *clone, | 979 | static void __map_bio(struct dm_target *ti, struct bio *clone, |
980 | struct dm_target_io *tio) | 980 | struct dm_target_io *tio) |
981 | { | 981 | { |
982 | int r; | 982 | int r; |
983 | sector_t sector; | 983 | sector_t sector; |
984 | struct mapped_device *md; | 984 | struct mapped_device *md; |
985 | 985 | ||
986 | clone->bi_end_io = clone_endio; | 986 | clone->bi_end_io = clone_endio; |
987 | clone->bi_private = tio; | 987 | clone->bi_private = tio; |
988 | 988 | ||
989 | /* | 989 | /* |
990 | * Map the clone. If r == 0 we don't need to do | 990 | * Map the clone. If r == 0 we don't need to do |
991 | * anything, the target has assumed ownership of | 991 | * anything, the target has assumed ownership of |
992 | * this io. | 992 | * this io. |
993 | */ | 993 | */ |
994 | atomic_inc(&tio->io->io_count); | 994 | atomic_inc(&tio->io->io_count); |
995 | sector = clone->bi_sector; | 995 | sector = clone->bi_sector; |
996 | r = ti->type->map(ti, clone, &tio->info); | 996 | r = ti->type->map(ti, clone, &tio->info); |
997 | if (r == DM_MAPIO_REMAPPED) { | 997 | if (r == DM_MAPIO_REMAPPED) { |
998 | /* the bio has been remapped so dispatch it */ | 998 | /* the bio has been remapped so dispatch it */ |
999 | 999 | ||
1000 | trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone, | 1000 | trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone, |
1001 | tio->io->bio->bi_bdev->bd_dev, sector); | 1001 | tio->io->bio->bi_bdev->bd_dev, sector); |
1002 | 1002 | ||
1003 | generic_make_request(clone); | 1003 | generic_make_request(clone); |
1004 | } else if (r < 0 || r == DM_MAPIO_REQUEUE) { | 1004 | } else if (r < 0 || r == DM_MAPIO_REQUEUE) { |
1005 | /* error the io and bail out, or requeue it if needed */ | 1005 | /* error the io and bail out, or requeue it if needed */ |
1006 | md = tio->io->md; | 1006 | md = tio->io->md; |
1007 | dec_pending(tio->io, r); | 1007 | dec_pending(tio->io, r); |
1008 | /* | 1008 | /* |
1009 | * Store bio_set for cleanup. | 1009 | * Store bio_set for cleanup. |
1010 | */ | 1010 | */ |
1011 | clone->bi_private = md->bs; | 1011 | clone->bi_private = md->bs; |
1012 | bio_put(clone); | 1012 | bio_put(clone); |
1013 | free_tio(md, tio); | 1013 | free_tio(md, tio); |
1014 | } else if (r) { | 1014 | } else if (r) { |
1015 | DMWARN("unimplemented target map return value: %d", r); | 1015 | DMWARN("unimplemented target map return value: %d", r); |
1016 | BUG(); | 1016 | BUG(); |
1017 | } | 1017 | } |
1018 | } | 1018 | } |
1019 | 1019 | ||
1020 | struct clone_info { | 1020 | struct clone_info { |
1021 | struct mapped_device *md; | 1021 | struct mapped_device *md; |
1022 | struct dm_table *map; | 1022 | struct dm_table *map; |
1023 | struct bio *bio; | 1023 | struct bio *bio; |
1024 | struct dm_io *io; | 1024 | struct dm_io *io; |
1025 | sector_t sector; | 1025 | sector_t sector; |
1026 | sector_t sector_count; | 1026 | sector_t sector_count; |
1027 | unsigned short idx; | 1027 | unsigned short idx; |
1028 | }; | 1028 | }; |
1029 | 1029 | ||
1030 | static void dm_bio_destructor(struct bio *bio) | 1030 | static void dm_bio_destructor(struct bio *bio) |
1031 | { | 1031 | { |
1032 | struct bio_set *bs = bio->bi_private; | 1032 | struct bio_set *bs = bio->bi_private; |
1033 | 1033 | ||
1034 | bio_free(bio, bs); | 1034 | bio_free(bio, bs); |
1035 | } | 1035 | } |
1036 | 1036 | ||
1037 | /* | 1037 | /* |
1038 | * Creates a little bio that just does part of a bvec. | 1038 | * Creates a little bio that just does part of a bvec. |
1039 | */ | 1039 | */ |
1040 | static struct bio *split_bvec(struct bio *bio, sector_t sector, | 1040 | static struct bio *split_bvec(struct bio *bio, sector_t sector, |
1041 | unsigned short idx, unsigned int offset, | 1041 | unsigned short idx, unsigned int offset, |
1042 | unsigned int len, struct bio_set *bs) | 1042 | unsigned int len, struct bio_set *bs) |
1043 | { | 1043 | { |
1044 | struct bio *clone; | 1044 | struct bio *clone; |
1045 | struct bio_vec *bv = bio->bi_io_vec + idx; | 1045 | struct bio_vec *bv = bio->bi_io_vec + idx; |
1046 | 1046 | ||
1047 | clone = bio_alloc_bioset(GFP_NOIO, 1, bs); | 1047 | clone = bio_alloc_bioset(GFP_NOIO, 1, bs); |
1048 | clone->bi_destructor = dm_bio_destructor; | 1048 | clone->bi_destructor = dm_bio_destructor; |
1049 | *clone->bi_io_vec = *bv; | 1049 | *clone->bi_io_vec = *bv; |
1050 | 1050 | ||
1051 | clone->bi_sector = sector; | 1051 | clone->bi_sector = sector; |
1052 | clone->bi_bdev = bio->bi_bdev; | 1052 | clone->bi_bdev = bio->bi_bdev; |
1053 | clone->bi_rw = bio->bi_rw; | 1053 | clone->bi_rw = bio->bi_rw; |
1054 | clone->bi_vcnt = 1; | 1054 | clone->bi_vcnt = 1; |
1055 | clone->bi_size = to_bytes(len); | 1055 | clone->bi_size = to_bytes(len); |
1056 | clone->bi_io_vec->bv_offset = offset; | 1056 | clone->bi_io_vec->bv_offset = offset; |
1057 | clone->bi_io_vec->bv_len = clone->bi_size; | 1057 | clone->bi_io_vec->bv_len = clone->bi_size; |
1058 | clone->bi_flags |= 1 << BIO_CLONED; | 1058 | clone->bi_flags |= 1 << BIO_CLONED; |
1059 | 1059 | ||
1060 | if (bio_integrity(bio)) { | 1060 | if (bio_integrity(bio)) { |
1061 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); | 1061 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); |
1062 | bio_integrity_trim(clone, | 1062 | bio_integrity_trim(clone, |
1063 | bio_sector_offset(bio, idx, offset), len); | 1063 | bio_sector_offset(bio, idx, offset), len); |
1064 | } | 1064 | } |
1065 | 1065 | ||
1066 | return clone; | 1066 | return clone; |
1067 | } | 1067 | } |
1068 | 1068 | ||
1069 | /* | 1069 | /* |
1070 | * Creates a bio that consists of range of complete bvecs. | 1070 | * Creates a bio that consists of range of complete bvecs. |
1071 | */ | 1071 | */ |
1072 | static struct bio *clone_bio(struct bio *bio, sector_t sector, | 1072 | static struct bio *clone_bio(struct bio *bio, sector_t sector, |
1073 | unsigned short idx, unsigned short bv_count, | 1073 | unsigned short idx, unsigned short bv_count, |
1074 | unsigned int len, struct bio_set *bs) | 1074 | unsigned int len, struct bio_set *bs) |
1075 | { | 1075 | { |
1076 | struct bio *clone; | 1076 | struct bio *clone; |
1077 | 1077 | ||
1078 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); | 1078 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); |
1079 | __bio_clone(clone, bio); | 1079 | __bio_clone(clone, bio); |
1080 | clone->bi_destructor = dm_bio_destructor; | 1080 | clone->bi_destructor = dm_bio_destructor; |
1081 | clone->bi_sector = sector; | 1081 | clone->bi_sector = sector; |
1082 | clone->bi_idx = idx; | 1082 | clone->bi_idx = idx; |
1083 | clone->bi_vcnt = idx + bv_count; | 1083 | clone->bi_vcnt = idx + bv_count; |
1084 | clone->bi_size = to_bytes(len); | 1084 | clone->bi_size = to_bytes(len); |
1085 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); | 1085 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); |
1086 | 1086 | ||
1087 | if (bio_integrity(bio)) { | 1087 | if (bio_integrity(bio)) { |
1088 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); | 1088 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); |
1089 | 1089 | ||
1090 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) | 1090 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) |
1091 | bio_integrity_trim(clone, | 1091 | bio_integrity_trim(clone, |
1092 | bio_sector_offset(bio, idx, 0), len); | 1092 | bio_sector_offset(bio, idx, 0), len); |
1093 | } | 1093 | } |
1094 | 1094 | ||
1095 | return clone; | 1095 | return clone; |
1096 | } | 1096 | } |
1097 | 1097 | ||
1098 | static struct dm_target_io *alloc_tio(struct clone_info *ci, | 1098 | static struct dm_target_io *alloc_tio(struct clone_info *ci, |
1099 | struct dm_target *ti) | 1099 | struct dm_target *ti) |
1100 | { | 1100 | { |
1101 | struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO); | 1101 | struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO); |
1102 | 1102 | ||
1103 | tio->io = ci->io; | 1103 | tio->io = ci->io; |
1104 | tio->ti = ti; | 1104 | tio->ti = ti; |
1105 | memset(&tio->info, 0, sizeof(tio->info)); | 1105 | memset(&tio->info, 0, sizeof(tio->info)); |
1106 | 1106 | ||
1107 | return tio; | 1107 | return tio; |
1108 | } | 1108 | } |
1109 | 1109 | ||
1110 | static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, | 1110 | static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, |
1111 | unsigned request_nr, sector_t len) | 1111 | unsigned request_nr, sector_t len) |
1112 | { | 1112 | { |
1113 | struct dm_target_io *tio = alloc_tio(ci, ti); | 1113 | struct dm_target_io *tio = alloc_tio(ci, ti); |
1114 | struct bio *clone; | 1114 | struct bio *clone; |
1115 | 1115 | ||
1116 | tio->info.target_request_nr = request_nr; | 1116 | tio->info.target_request_nr = request_nr; |
1117 | 1117 | ||
1118 | /* | 1118 | /* |
1119 | * Discard requests require the bio's inline iovecs be initialized. | 1119 | * Discard requests require the bio's inline iovecs be initialized. |
1120 | * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush | 1120 | * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush |
1121 | * and discard, so no need for concern about wasted bvec allocations. | 1121 | * and discard, so no need for concern about wasted bvec allocations. |
1122 | */ | 1122 | */ |
1123 | clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs); | 1123 | clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs); |
1124 | __bio_clone(clone, ci->bio); | 1124 | __bio_clone(clone, ci->bio); |
1125 | clone->bi_destructor = dm_bio_destructor; | 1125 | clone->bi_destructor = dm_bio_destructor; |
1126 | if (len) { | 1126 | if (len) { |
1127 | clone->bi_sector = ci->sector; | 1127 | clone->bi_sector = ci->sector; |
1128 | clone->bi_size = to_bytes(len); | 1128 | clone->bi_size = to_bytes(len); |
1129 | } | 1129 | } |
1130 | 1130 | ||
1131 | __map_bio(ti, clone, tio); | 1131 | __map_bio(ti, clone, tio); |
1132 | } | 1132 | } |
1133 | 1133 | ||
1134 | static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, | 1134 | static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, |
1135 | unsigned num_requests, sector_t len) | 1135 | unsigned num_requests, sector_t len) |
1136 | { | 1136 | { |
1137 | unsigned request_nr; | 1137 | unsigned request_nr; |
1138 | 1138 | ||
1139 | for (request_nr = 0; request_nr < num_requests; request_nr++) | 1139 | for (request_nr = 0; request_nr < num_requests; request_nr++) |
1140 | __issue_target_request(ci, ti, request_nr, len); | 1140 | __issue_target_request(ci, ti, request_nr, len); |
1141 | } | 1141 | } |
1142 | 1142 | ||
1143 | static int __clone_and_map_empty_flush(struct clone_info *ci) | 1143 | static int __clone_and_map_empty_flush(struct clone_info *ci) |
1144 | { | 1144 | { |
1145 | unsigned target_nr = 0; | 1145 | unsigned target_nr = 0; |
1146 | struct dm_target *ti; | 1146 | struct dm_target *ti; |
1147 | 1147 | ||
1148 | BUG_ON(bio_has_data(ci->bio)); | 1148 | BUG_ON(bio_has_data(ci->bio)); |
1149 | while ((ti = dm_table_get_target(ci->map, target_nr++))) | 1149 | while ((ti = dm_table_get_target(ci->map, target_nr++))) |
1150 | __issue_target_requests(ci, ti, ti->num_flush_requests, 0); | 1150 | __issue_target_requests(ci, ti, ti->num_flush_requests, 0); |
1151 | 1151 | ||
1152 | return 0; | 1152 | return 0; |
1153 | } | 1153 | } |
1154 | 1154 | ||
1155 | /* | 1155 | /* |
1156 | * Perform all io with a single clone. | 1156 | * Perform all io with a single clone. |
1157 | */ | 1157 | */ |
1158 | static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) | 1158 | static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) |
1159 | { | 1159 | { |
1160 | struct bio *clone, *bio = ci->bio; | 1160 | struct bio *clone, *bio = ci->bio; |
1161 | struct dm_target_io *tio; | 1161 | struct dm_target_io *tio; |
1162 | 1162 | ||
1163 | tio = alloc_tio(ci, ti); | 1163 | tio = alloc_tio(ci, ti); |
1164 | clone = clone_bio(bio, ci->sector, ci->idx, | 1164 | clone = clone_bio(bio, ci->sector, ci->idx, |
1165 | bio->bi_vcnt - ci->idx, ci->sector_count, | 1165 | bio->bi_vcnt - ci->idx, ci->sector_count, |
1166 | ci->md->bs); | 1166 | ci->md->bs); |
1167 | __map_bio(ti, clone, tio); | 1167 | __map_bio(ti, clone, tio); |
1168 | ci->sector_count = 0; | 1168 | ci->sector_count = 0; |
1169 | } | 1169 | } |
1170 | 1170 | ||
1171 | static int __clone_and_map_discard(struct clone_info *ci) | 1171 | static int __clone_and_map_discard(struct clone_info *ci) |
1172 | { | 1172 | { |
1173 | struct dm_target *ti; | 1173 | struct dm_target *ti; |
1174 | sector_t len; | 1174 | sector_t len; |
1175 | 1175 | ||
1176 | do { | 1176 | do { |
1177 | ti = dm_table_find_target(ci->map, ci->sector); | 1177 | ti = dm_table_find_target(ci->map, ci->sector); |
1178 | if (!dm_target_is_valid(ti)) | 1178 | if (!dm_target_is_valid(ti)) |
1179 | return -EIO; | 1179 | return -EIO; |
1180 | 1180 | ||
1181 | /* | 1181 | /* |
1182 | * Even though the device advertised discard support, | 1182 | * Even though the device advertised discard support, |
1183 | * that does not mean every target supports it, and | 1183 | * that does not mean every target supports it, and |
1184 | * reconfiguration might also have changed that since the | 1184 | * reconfiguration might also have changed that since the |
1185 | * check was performed. | 1185 | * check was performed. |
1186 | */ | 1186 | */ |
1187 | if (!ti->num_discard_requests) | 1187 | if (!ti->num_discard_requests) |
1188 | return -EOPNOTSUPP; | 1188 | return -EOPNOTSUPP; |
1189 | 1189 | ||
1190 | len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); | 1190 | len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); |
1191 | 1191 | ||
1192 | __issue_target_requests(ci, ti, ti->num_discard_requests, len); | 1192 | __issue_target_requests(ci, ti, ti->num_discard_requests, len); |
1193 | 1193 | ||
1194 | ci->sector += len; | 1194 | ci->sector += len; |
1195 | } while (ci->sector_count -= len); | 1195 | } while (ci->sector_count -= len); |
1196 | 1196 | ||
1197 | return 0; | 1197 | return 0; |
1198 | } | 1198 | } |
1199 | 1199 | ||
1200 | static int __clone_and_map(struct clone_info *ci) | 1200 | static int __clone_and_map(struct clone_info *ci) |
1201 | { | 1201 | { |
1202 | struct bio *clone, *bio = ci->bio; | 1202 | struct bio *clone, *bio = ci->bio; |
1203 | struct dm_target *ti; | 1203 | struct dm_target *ti; |
1204 | sector_t len = 0, max; | 1204 | sector_t len = 0, max; |
1205 | struct dm_target_io *tio; | 1205 | struct dm_target_io *tio; |
1206 | 1206 | ||
1207 | if (unlikely(bio->bi_rw & REQ_DISCARD)) | 1207 | if (unlikely(bio->bi_rw & REQ_DISCARD)) |
1208 | return __clone_and_map_discard(ci); | 1208 | return __clone_and_map_discard(ci); |
1209 | 1209 | ||
1210 | ti = dm_table_find_target(ci->map, ci->sector); | 1210 | ti = dm_table_find_target(ci->map, ci->sector); |
1211 | if (!dm_target_is_valid(ti)) | 1211 | if (!dm_target_is_valid(ti)) |
1212 | return -EIO; | 1212 | return -EIO; |
1213 | 1213 | ||
1214 | max = max_io_len(ci->sector, ti); | 1214 | max = max_io_len(ci->sector, ti); |
1215 | 1215 | ||
1216 | if (ci->sector_count <= max) { | 1216 | if (ci->sector_count <= max) { |
1217 | /* | 1217 | /* |
1218 | * Optimise for the simple case where we can do all of | 1218 | * Optimise for the simple case where we can do all of |
1219 | * the remaining io with a single clone. | 1219 | * the remaining io with a single clone. |
1220 | */ | 1220 | */ |
1221 | __clone_and_map_simple(ci, ti); | 1221 | __clone_and_map_simple(ci, ti); |
1222 | 1222 | ||
1223 | } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { | 1223 | } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { |
1224 | /* | 1224 | /* |
1225 | * There are some bvecs that don't span targets. | 1225 | * There are some bvecs that don't span targets. |
1226 | * Do as many of these as possible. | 1226 | * Do as many of these as possible. |
1227 | */ | 1227 | */ |
1228 | int i; | 1228 | int i; |
1229 | sector_t remaining = max; | 1229 | sector_t remaining = max; |
1230 | sector_t bv_len; | 1230 | sector_t bv_len; |
1231 | 1231 | ||
1232 | for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) { | 1232 | for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) { |
1233 | bv_len = to_sector(bio->bi_io_vec[i].bv_len); | 1233 | bv_len = to_sector(bio->bi_io_vec[i].bv_len); |
1234 | 1234 | ||
1235 | if (bv_len > remaining) | 1235 | if (bv_len > remaining) |
1236 | break; | 1236 | break; |
1237 | 1237 | ||
1238 | remaining -= bv_len; | 1238 | remaining -= bv_len; |
1239 | len += bv_len; | 1239 | len += bv_len; |
1240 | } | 1240 | } |
1241 | 1241 | ||
1242 | tio = alloc_tio(ci, ti); | 1242 | tio = alloc_tio(ci, ti); |
1243 | clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len, | 1243 | clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len, |
1244 | ci->md->bs); | 1244 | ci->md->bs); |
1245 | __map_bio(ti, clone, tio); | 1245 | __map_bio(ti, clone, tio); |
1246 | 1246 | ||
1247 | ci->sector += len; | 1247 | ci->sector += len; |
1248 | ci->sector_count -= len; | 1248 | ci->sector_count -= len; |
1249 | ci->idx = i; | 1249 | ci->idx = i; |
1250 | 1250 | ||
1251 | } else { | 1251 | } else { |
1252 | /* | 1252 | /* |
1253 | * Handle a bvec that must be split between two or more targets. | 1253 | * Handle a bvec that must be split between two or more targets. |
1254 | */ | 1254 | */ |
1255 | struct bio_vec *bv = bio->bi_io_vec + ci->idx; | 1255 | struct bio_vec *bv = bio->bi_io_vec + ci->idx; |
1256 | sector_t remaining = to_sector(bv->bv_len); | 1256 | sector_t remaining = to_sector(bv->bv_len); |
1257 | unsigned int offset = 0; | 1257 | unsigned int offset = 0; |
1258 | 1258 | ||
1259 | do { | 1259 | do { |
1260 | if (offset) { | 1260 | if (offset) { |
1261 | ti = dm_table_find_target(ci->map, ci->sector); | 1261 | ti = dm_table_find_target(ci->map, ci->sector); |
1262 | if (!dm_target_is_valid(ti)) | 1262 | if (!dm_target_is_valid(ti)) |
1263 | return -EIO; | 1263 | return -EIO; |
1264 | 1264 | ||
1265 | max = max_io_len(ci->sector, ti); | 1265 | max = max_io_len(ci->sector, ti); |
1266 | } | 1266 | } |
1267 | 1267 | ||
1268 | len = min(remaining, max); | 1268 | len = min(remaining, max); |
1269 | 1269 | ||
1270 | tio = alloc_tio(ci, ti); | 1270 | tio = alloc_tio(ci, ti); |
1271 | clone = split_bvec(bio, ci->sector, ci->idx, | 1271 | clone = split_bvec(bio, ci->sector, ci->idx, |
1272 | bv->bv_offset + offset, len, | 1272 | bv->bv_offset + offset, len, |
1273 | ci->md->bs); | 1273 | ci->md->bs); |
1274 | 1274 | ||
1275 | __map_bio(ti, clone, tio); | 1275 | __map_bio(ti, clone, tio); |
1276 | 1276 | ||
1277 | ci->sector += len; | 1277 | ci->sector += len; |
1278 | ci->sector_count -= len; | 1278 | ci->sector_count -= len; |
1279 | offset += to_bytes(len); | 1279 | offset += to_bytes(len); |
1280 | } while (remaining -= len); | 1280 | } while (remaining -= len); |
1281 | 1281 | ||
1282 | ci->idx++; | 1282 | ci->idx++; |
1283 | } | 1283 | } |
1284 | 1284 | ||
1285 | return 0; | 1285 | return 0; |
1286 | } | 1286 | } |
1287 | 1287 | ||
1288 | /* | 1288 | /* |
1289 | * Split the bio into several clones and submit it to targets. | 1289 | * Split the bio into several clones and submit it to targets. |
1290 | */ | 1290 | */ |
1291 | static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | 1291 | static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) |
1292 | { | 1292 | { |
1293 | struct clone_info ci; | 1293 | struct clone_info ci; |
1294 | int error = 0; | 1294 | int error = 0; |
1295 | 1295 | ||
1296 | ci.map = dm_get_live_table(md); | 1296 | ci.map = dm_get_live_table(md); |
1297 | if (unlikely(!ci.map)) { | 1297 | if (unlikely(!ci.map)) { |
1298 | bio_io_error(bio); | 1298 | bio_io_error(bio); |
1299 | return; | 1299 | return; |
1300 | } | 1300 | } |
1301 | 1301 | ||
1302 | ci.md = md; | 1302 | ci.md = md; |
1303 | ci.io = alloc_io(md); | 1303 | ci.io = alloc_io(md); |
1304 | ci.io->error = 0; | 1304 | ci.io->error = 0; |
1305 | atomic_set(&ci.io->io_count, 1); | 1305 | atomic_set(&ci.io->io_count, 1); |
1306 | ci.io->bio = bio; | 1306 | ci.io->bio = bio; |
1307 | ci.io->md = md; | 1307 | ci.io->md = md; |
1308 | spin_lock_init(&ci.io->endio_lock); | 1308 | spin_lock_init(&ci.io->endio_lock); |
1309 | ci.sector = bio->bi_sector; | 1309 | ci.sector = bio->bi_sector; |
1310 | ci.idx = bio->bi_idx; | 1310 | ci.idx = bio->bi_idx; |
1311 | 1311 | ||
1312 | start_io_acct(ci.io); | 1312 | start_io_acct(ci.io); |
1313 | if (bio->bi_rw & REQ_FLUSH) { | 1313 | if (bio->bi_rw & REQ_FLUSH) { |
1314 | ci.bio = &ci.md->flush_bio; | 1314 | ci.bio = &ci.md->flush_bio; |
1315 | ci.sector_count = 0; | 1315 | ci.sector_count = 0; |
1316 | error = __clone_and_map_empty_flush(&ci); | 1316 | error = __clone_and_map_empty_flush(&ci); |
1317 | /* dec_pending submits any data associated with flush */ | 1317 | /* dec_pending submits any data associated with flush */ |
1318 | } else { | 1318 | } else { |
1319 | ci.bio = bio; | 1319 | ci.bio = bio; |
1320 | ci.sector_count = bio_sectors(bio); | 1320 | ci.sector_count = bio_sectors(bio); |
1321 | while (ci.sector_count && !error) | 1321 | while (ci.sector_count && !error) |
1322 | error = __clone_and_map(&ci); | 1322 | error = __clone_and_map(&ci); |
1323 | } | 1323 | } |
1324 | 1324 | ||
1325 | /* drop the extra reference count */ | 1325 | /* drop the extra reference count */ |
1326 | dec_pending(ci.io, error); | 1326 | dec_pending(ci.io, error); |
1327 | dm_table_put(ci.map); | 1327 | dm_table_put(ci.map); |
1328 | } | 1328 | } |
1329 | /*----------------------------------------------------------------- | 1329 | /*----------------------------------------------------------------- |
1330 | * CRUD END | 1330 | * CRUD END |
1331 | *---------------------------------------------------------------*/ | 1331 | *---------------------------------------------------------------*/ |
1332 | 1332 | ||
1333 | static int dm_merge_bvec(struct request_queue *q, | 1333 | static int dm_merge_bvec(struct request_queue *q, |
1334 | struct bvec_merge_data *bvm, | 1334 | struct bvec_merge_data *bvm, |
1335 | struct bio_vec *biovec) | 1335 | struct bio_vec *biovec) |
1336 | { | 1336 | { |
1337 | struct mapped_device *md = q->queuedata; | 1337 | struct mapped_device *md = q->queuedata; |
1338 | struct dm_table *map = dm_get_live_table(md); | 1338 | struct dm_table *map = dm_get_live_table(md); |
1339 | struct dm_target *ti; | 1339 | struct dm_target *ti; |
1340 | sector_t max_sectors; | 1340 | sector_t max_sectors; |
1341 | int max_size = 0; | 1341 | int max_size = 0; |
1342 | 1342 | ||
1343 | if (unlikely(!map)) | 1343 | if (unlikely(!map)) |
1344 | goto out; | 1344 | goto out; |
1345 | 1345 | ||
1346 | ti = dm_table_find_target(map, bvm->bi_sector); | 1346 | ti = dm_table_find_target(map, bvm->bi_sector); |
1347 | if (!dm_target_is_valid(ti)) | 1347 | if (!dm_target_is_valid(ti)) |
1348 | goto out_table; | 1348 | goto out_table; |
1349 | 1349 | ||
1350 | /* | 1350 | /* |
1351 | * Find maximum amount of I/O that won't need splitting | 1351 | * Find maximum amount of I/O that won't need splitting |
1352 | */ | 1352 | */ |
1353 | max_sectors = min(max_io_len(bvm->bi_sector, ti), | 1353 | max_sectors = min(max_io_len(bvm->bi_sector, ti), |
1354 | (sector_t) BIO_MAX_SECTORS); | 1354 | (sector_t) BIO_MAX_SECTORS); |
1355 | max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; | 1355 | max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; |
1356 | if (max_size < 0) | 1356 | if (max_size < 0) |
1357 | max_size = 0; | 1357 | max_size = 0; |
1358 | 1358 | ||
1359 | /* | 1359 | /* |
1360 | * merge_bvec_fn() returns number of bytes | 1360 | * merge_bvec_fn() returns number of bytes |
1361 | * it can accept at this offset | 1361 | * it can accept at this offset |
1362 | * max is precomputed maximal io size | 1362 | * max is precomputed maximal io size |
1363 | */ | 1363 | */ |
1364 | if (max_size && ti->type->merge) | 1364 | if (max_size && ti->type->merge) |
1365 | max_size = ti->type->merge(ti, bvm, biovec, max_size); | 1365 | max_size = ti->type->merge(ti, bvm, biovec, max_size); |
1366 | /* | 1366 | /* |
1367 | * If the target doesn't support merge method and some of the devices | 1367 | * If the target doesn't support merge method and some of the devices |
1368 | * provided their merge_bvec method (we know this by looking at | 1368 | * provided their merge_bvec method (we know this by looking at |
1369 | * queue_max_hw_sectors), then we can't allow bios with multiple vector | 1369 | * queue_max_hw_sectors), then we can't allow bios with multiple vector |
1370 | * entries. So always set max_size to 0, and the code below allows | 1370 | * entries. So always set max_size to 0, and the code below allows |
1371 | * just one page. | 1371 | * just one page. |
1372 | */ | 1372 | */ |
1373 | else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) | 1373 | else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) |
1374 | 1374 | ||
1375 | max_size = 0; | 1375 | max_size = 0; |
1376 | 1376 | ||
1377 | out_table: | 1377 | out_table: |
1378 | dm_table_put(map); | 1378 | dm_table_put(map); |
1379 | 1379 | ||
1380 | out: | 1380 | out: |
1381 | /* | 1381 | /* |
1382 | * Always allow an entire first page | 1382 | * Always allow an entire first page |
1383 | */ | 1383 | */ |
1384 | if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) | 1384 | if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) |
1385 | max_size = biovec->bv_len; | 1385 | max_size = biovec->bv_len; |
1386 | 1386 | ||
1387 | return max_size; | 1387 | return max_size; |
1388 | } | 1388 | } |
1389 | 1389 | ||
1390 | /* | 1390 | /* |
1391 | * The request function that just remaps the bio built up by | 1391 | * The request function that just remaps the bio built up by |
1392 | * dm_merge_bvec. | 1392 | * dm_merge_bvec. |
1393 | */ | 1393 | */ |
1394 | static int _dm_request(struct request_queue *q, struct bio *bio) | 1394 | static int _dm_request(struct request_queue *q, struct bio *bio) |
1395 | { | 1395 | { |
1396 | int rw = bio_data_dir(bio); | 1396 | int rw = bio_data_dir(bio); |
1397 | struct mapped_device *md = q->queuedata; | 1397 | struct mapped_device *md = q->queuedata; |
1398 | int cpu; | 1398 | int cpu; |
1399 | 1399 | ||
1400 | down_read(&md->io_lock); | 1400 | down_read(&md->io_lock); |
1401 | 1401 | ||
1402 | cpu = part_stat_lock(); | 1402 | cpu = part_stat_lock(); |
1403 | part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); | 1403 | part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); |
1404 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); | 1404 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); |
1405 | part_stat_unlock(); | 1405 | part_stat_unlock(); |
1406 | 1406 | ||
1407 | /* if we're suspended, we have to queue this io for later */ | 1407 | /* if we're suspended, we have to queue this io for later */ |
1408 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { | 1408 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { |
1409 | up_read(&md->io_lock); | 1409 | up_read(&md->io_lock); |
1410 | 1410 | ||
1411 | if (bio_rw(bio) != READA) | 1411 | if (bio_rw(bio) != READA) |
1412 | queue_io(md, bio); | 1412 | queue_io(md, bio); |
1413 | else | 1413 | else |
1414 | bio_io_error(bio); | 1414 | bio_io_error(bio); |
1415 | return 0; | 1415 | return 0; |
1416 | } | 1416 | } |
1417 | 1417 | ||
1418 | __split_and_process_bio(md, bio); | 1418 | __split_and_process_bio(md, bio); |
1419 | up_read(&md->io_lock); | 1419 | up_read(&md->io_lock); |
1420 | return 0; | 1420 | return 0; |
1421 | } | 1421 | } |
1422 | 1422 | ||
1423 | static int dm_make_request(struct request_queue *q, struct bio *bio) | 1423 | static int dm_make_request(struct request_queue *q, struct bio *bio) |
1424 | { | 1424 | { |
1425 | struct mapped_device *md = q->queuedata; | 1425 | struct mapped_device *md = q->queuedata; |
1426 | 1426 | ||
1427 | return md->saved_make_request_fn(q, bio); /* call __make_request() */ | 1427 | return md->saved_make_request_fn(q, bio); /* call __make_request() */ |
1428 | } | 1428 | } |
1429 | 1429 | ||
1430 | static int dm_request_based(struct mapped_device *md) | 1430 | static int dm_request_based(struct mapped_device *md) |
1431 | { | 1431 | { |
1432 | return blk_queue_stackable(md->queue); | 1432 | return blk_queue_stackable(md->queue); |
1433 | } | 1433 | } |
1434 | 1434 | ||
1435 | static int dm_request(struct request_queue *q, struct bio *bio) | 1435 | static int dm_request(struct request_queue *q, struct bio *bio) |
1436 | { | 1436 | { |
1437 | struct mapped_device *md = q->queuedata; | 1437 | struct mapped_device *md = q->queuedata; |
1438 | 1438 | ||
1439 | if (dm_request_based(md)) | 1439 | if (dm_request_based(md)) |
1440 | return dm_make_request(q, bio); | 1440 | return dm_make_request(q, bio); |
1441 | 1441 | ||
1442 | return _dm_request(q, bio); | 1442 | return _dm_request(q, bio); |
1443 | } | 1443 | } |
1444 | 1444 | ||
1445 | void dm_dispatch_request(struct request *rq) | 1445 | void dm_dispatch_request(struct request *rq) |
1446 | { | 1446 | { |
1447 | int r; | 1447 | int r; |
1448 | 1448 | ||
1449 | if (blk_queue_io_stat(rq->q)) | 1449 | if (blk_queue_io_stat(rq->q)) |
1450 | rq->cmd_flags |= REQ_IO_STAT; | 1450 | rq->cmd_flags |= REQ_IO_STAT; |
1451 | 1451 | ||
1452 | rq->start_time = jiffies; | 1452 | rq->start_time = jiffies; |
1453 | r = blk_insert_cloned_request(rq->q, rq); | 1453 | r = blk_insert_cloned_request(rq->q, rq); |
1454 | if (r) | 1454 | if (r) |
1455 | dm_complete_request(rq, r); | 1455 | dm_complete_request(rq, r); |
1456 | } | 1456 | } |
1457 | EXPORT_SYMBOL_GPL(dm_dispatch_request); | 1457 | EXPORT_SYMBOL_GPL(dm_dispatch_request); |
1458 | 1458 | ||
1459 | static void dm_rq_bio_destructor(struct bio *bio) | 1459 | static void dm_rq_bio_destructor(struct bio *bio) |
1460 | { | 1460 | { |
1461 | struct dm_rq_clone_bio_info *info = bio->bi_private; | 1461 | struct dm_rq_clone_bio_info *info = bio->bi_private; |
1462 | struct mapped_device *md = info->tio->md; | 1462 | struct mapped_device *md = info->tio->md; |
1463 | 1463 | ||
1464 | free_bio_info(info); | 1464 | free_bio_info(info); |
1465 | bio_free(bio, md->bs); | 1465 | bio_free(bio, md->bs); |
1466 | } | 1466 | } |
1467 | 1467 | ||
1468 | static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, | 1468 | static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, |
1469 | void *data) | 1469 | void *data) |
1470 | { | 1470 | { |
1471 | struct dm_rq_target_io *tio = data; | 1471 | struct dm_rq_target_io *tio = data; |
1472 | struct mapped_device *md = tio->md; | 1472 | struct mapped_device *md = tio->md; |
1473 | struct dm_rq_clone_bio_info *info = alloc_bio_info(md); | 1473 | struct dm_rq_clone_bio_info *info = alloc_bio_info(md); |
1474 | 1474 | ||
1475 | if (!info) | 1475 | if (!info) |
1476 | return -ENOMEM; | 1476 | return -ENOMEM; |
1477 | 1477 | ||
1478 | info->orig = bio_orig; | 1478 | info->orig = bio_orig; |
1479 | info->tio = tio; | 1479 | info->tio = tio; |
1480 | bio->bi_end_io = end_clone_bio; | 1480 | bio->bi_end_io = end_clone_bio; |
1481 | bio->bi_private = info; | 1481 | bio->bi_private = info; |
1482 | bio->bi_destructor = dm_rq_bio_destructor; | 1482 | bio->bi_destructor = dm_rq_bio_destructor; |
1483 | 1483 | ||
1484 | return 0; | 1484 | return 0; |
1485 | } | 1485 | } |
1486 | 1486 | ||
1487 | static int setup_clone(struct request *clone, struct request *rq, | 1487 | static int setup_clone(struct request *clone, struct request *rq, |
1488 | struct dm_rq_target_io *tio) | 1488 | struct dm_rq_target_io *tio) |
1489 | { | 1489 | { |
1490 | int r; | 1490 | int r; |
1491 | 1491 | ||
1492 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | 1492 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, |
1493 | dm_rq_bio_constructor, tio); | 1493 | dm_rq_bio_constructor, tio); |
1494 | if (r) | 1494 | if (r) |
1495 | return r; | 1495 | return r; |
1496 | 1496 | ||
1497 | clone->cmd = rq->cmd; | 1497 | clone->cmd = rq->cmd; |
1498 | clone->cmd_len = rq->cmd_len; | 1498 | clone->cmd_len = rq->cmd_len; |
1499 | clone->sense = rq->sense; | 1499 | clone->sense = rq->sense; |
1500 | clone->buffer = rq->buffer; | 1500 | clone->buffer = rq->buffer; |
1501 | clone->end_io = end_clone_request; | 1501 | clone->end_io = end_clone_request; |
1502 | clone->end_io_data = tio; | 1502 | clone->end_io_data = tio; |
1503 | 1503 | ||
1504 | return 0; | 1504 | return 0; |
1505 | } | 1505 | } |
1506 | 1506 | ||
1507 | static struct request *clone_rq(struct request *rq, struct mapped_device *md, | 1507 | static struct request *clone_rq(struct request *rq, struct mapped_device *md, |
1508 | gfp_t gfp_mask) | 1508 | gfp_t gfp_mask) |
1509 | { | 1509 | { |
1510 | struct request *clone; | 1510 | struct request *clone; |
1511 | struct dm_rq_target_io *tio; | 1511 | struct dm_rq_target_io *tio; |
1512 | 1512 | ||
1513 | tio = alloc_rq_tio(md, gfp_mask); | 1513 | tio = alloc_rq_tio(md, gfp_mask); |
1514 | if (!tio) | 1514 | if (!tio) |
1515 | return NULL; | 1515 | return NULL; |
1516 | 1516 | ||
1517 | tio->md = md; | 1517 | tio->md = md; |
1518 | tio->ti = NULL; | 1518 | tio->ti = NULL; |
1519 | tio->orig = rq; | 1519 | tio->orig = rq; |
1520 | tio->error = 0; | 1520 | tio->error = 0; |
1521 | memset(&tio->info, 0, sizeof(tio->info)); | 1521 | memset(&tio->info, 0, sizeof(tio->info)); |
1522 | 1522 | ||
1523 | clone = &tio->clone; | 1523 | clone = &tio->clone; |
1524 | if (setup_clone(clone, rq, tio)) { | 1524 | if (setup_clone(clone, rq, tio)) { |
1525 | /* -ENOMEM */ | 1525 | /* -ENOMEM */ |
1526 | free_rq_tio(tio); | 1526 | free_rq_tio(tio); |
1527 | return NULL; | 1527 | return NULL; |
1528 | } | 1528 | } |
1529 | 1529 | ||
1530 | return clone; | 1530 | return clone; |
1531 | } | 1531 | } |
1532 | 1532 | ||
1533 | /* | 1533 | /* |
1534 | * Called with the queue lock held. | 1534 | * Called with the queue lock held. |
1535 | */ | 1535 | */ |
1536 | static int dm_prep_fn(struct request_queue *q, struct request *rq) | 1536 | static int dm_prep_fn(struct request_queue *q, struct request *rq) |
1537 | { | 1537 | { |
1538 | struct mapped_device *md = q->queuedata; | 1538 | struct mapped_device *md = q->queuedata; |
1539 | struct request *clone; | 1539 | struct request *clone; |
1540 | 1540 | ||
1541 | if (unlikely(rq->special)) { | 1541 | if (unlikely(rq->special)) { |
1542 | DMWARN("Already has something in rq->special."); | 1542 | DMWARN("Already has something in rq->special."); |
1543 | return BLKPREP_KILL; | 1543 | return BLKPREP_KILL; |
1544 | } | 1544 | } |
1545 | 1545 | ||
1546 | clone = clone_rq(rq, md, GFP_ATOMIC); | 1546 | clone = clone_rq(rq, md, GFP_ATOMIC); |
1547 | if (!clone) | 1547 | if (!clone) |
1548 | return BLKPREP_DEFER; | 1548 | return BLKPREP_DEFER; |
1549 | 1549 | ||
1550 | rq->special = clone; | 1550 | rq->special = clone; |
1551 | rq->cmd_flags |= REQ_DONTPREP; | 1551 | rq->cmd_flags |= REQ_DONTPREP; |
1552 | 1552 | ||
1553 | return BLKPREP_OK; | 1553 | return BLKPREP_OK; |
1554 | } | 1554 | } |
1555 | 1555 | ||
1556 | /* | 1556 | /* |
1557 | * Returns: | 1557 | * Returns: |
1558 | * 0 : the request has been processed (not requeued) | 1558 | * 0 : the request has been processed (not requeued) |
1559 | * !0 : the request has been requeued | 1559 | * !0 : the request has been requeued |
1560 | */ | 1560 | */ |
1561 | static int map_request(struct dm_target *ti, struct request *clone, | 1561 | static int map_request(struct dm_target *ti, struct request *clone, |
1562 | struct mapped_device *md) | 1562 | struct mapped_device *md) |
1563 | { | 1563 | { |
1564 | int r, requeued = 0; | 1564 | int r, requeued = 0; |
1565 | struct dm_rq_target_io *tio = clone->end_io_data; | 1565 | struct dm_rq_target_io *tio = clone->end_io_data; |
1566 | 1566 | ||
1567 | /* | 1567 | /* |
1568 | * Hold the md reference here for the in-flight I/O. | 1568 | * Hold the md reference here for the in-flight I/O. |
1569 | * We can't rely on the reference count by device opener, | 1569 | * We can't rely on the reference count by device opener, |
1570 | * because the device may be closed during the request completion | 1570 | * because the device may be closed during the request completion |
1571 | * when all bios are completed. | 1571 | * when all bios are completed. |
1572 | * See the comment in rq_completed() too. | 1572 | * See the comment in rq_completed() too. |
1573 | */ | 1573 | */ |
1574 | dm_get(md); | 1574 | dm_get(md); |
1575 | 1575 | ||
1576 | tio->ti = ti; | 1576 | tio->ti = ti; |
1577 | r = ti->type->map_rq(ti, clone, &tio->info); | 1577 | r = ti->type->map_rq(ti, clone, &tio->info); |
1578 | switch (r) { | 1578 | switch (r) { |
1579 | case DM_MAPIO_SUBMITTED: | 1579 | case DM_MAPIO_SUBMITTED: |
1580 | /* The target has taken the I/O to submit by itself later */ | 1580 | /* The target has taken the I/O to submit by itself later */ |
1581 | break; | 1581 | break; |
1582 | case DM_MAPIO_REMAPPED: | 1582 | case DM_MAPIO_REMAPPED: |
1583 | /* The target has remapped the I/O so dispatch it */ | 1583 | /* The target has remapped the I/O so dispatch it */ |
1584 | trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), | 1584 | trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), |
1585 | blk_rq_pos(tio->orig)); | 1585 | blk_rq_pos(tio->orig)); |
1586 | dm_dispatch_request(clone); | 1586 | dm_dispatch_request(clone); |
1587 | break; | 1587 | break; |
1588 | case DM_MAPIO_REQUEUE: | 1588 | case DM_MAPIO_REQUEUE: |
1589 | /* The target wants to requeue the I/O */ | 1589 | /* The target wants to requeue the I/O */ |
1590 | dm_requeue_unmapped_request(clone); | 1590 | dm_requeue_unmapped_request(clone); |
1591 | requeued = 1; | 1591 | requeued = 1; |
1592 | break; | 1592 | break; |
1593 | default: | 1593 | default: |
1594 | if (r > 0) { | 1594 | if (r > 0) { |
1595 | DMWARN("unimplemented target map return value: %d", r); | 1595 | DMWARN("unimplemented target map return value: %d", r); |
1596 | BUG(); | 1596 | BUG(); |
1597 | } | 1597 | } |
1598 | 1598 | ||
1599 | /* The target wants to complete the I/O */ | 1599 | /* The target wants to complete the I/O */ |
1600 | dm_kill_unmapped_request(clone, r); | 1600 | dm_kill_unmapped_request(clone, r); |
1601 | break; | 1601 | break; |
1602 | } | 1602 | } |
1603 | 1603 | ||
1604 | return requeued; | 1604 | return requeued; |
1605 | } | 1605 | } |
1606 | 1606 | ||
1607 | /* | 1607 | /* |
1608 | * q->request_fn for request-based dm. | 1608 | * q->request_fn for request-based dm. |
1609 | * Called with the queue lock held. | 1609 | * Called with the queue lock held. |
1610 | */ | 1610 | */ |
1611 | static void dm_request_fn(struct request_queue *q) | 1611 | static void dm_request_fn(struct request_queue *q) |
1612 | { | 1612 | { |
1613 | struct mapped_device *md = q->queuedata; | 1613 | struct mapped_device *md = q->queuedata; |
1614 | struct dm_table *map = dm_get_live_table(md); | 1614 | struct dm_table *map = dm_get_live_table(md); |
1615 | struct dm_target *ti; | 1615 | struct dm_target *ti; |
1616 | struct request *rq, *clone; | 1616 | struct request *rq, *clone; |
1617 | sector_t pos; | 1617 | sector_t pos; |
1618 | 1618 | ||
1619 | /* | 1619 | /* |
1620 | * For suspend, check blk_queue_stopped() and increment | 1620 | * For suspend, check blk_queue_stopped() and increment |
1621 | * ->pending within a single queue_lock not to increment the | 1621 | * ->pending within a single queue_lock not to increment the |
1622 | * number of in-flight I/Os after the queue is stopped in | 1622 | * number of in-flight I/Os after the queue is stopped in |
1623 | * dm_suspend(). | 1623 | * dm_suspend(). |
1624 | */ | 1624 | */ |
1625 | while (!blk_queue_stopped(q)) { | 1625 | while (!blk_queue_stopped(q)) { |
1626 | rq = blk_peek_request(q); | 1626 | rq = blk_peek_request(q); |
1627 | if (!rq) | 1627 | if (!rq) |
1628 | goto delay_and_out; | 1628 | goto delay_and_out; |
1629 | 1629 | ||
1630 | /* always use block 0 to find the target for flushes for now */ | 1630 | /* always use block 0 to find the target for flushes for now */ |
1631 | pos = 0; | 1631 | pos = 0; |
1632 | if (!(rq->cmd_flags & REQ_FLUSH)) | 1632 | if (!(rq->cmd_flags & REQ_FLUSH)) |
1633 | pos = blk_rq_pos(rq); | 1633 | pos = blk_rq_pos(rq); |
1634 | 1634 | ||
1635 | ti = dm_table_find_target(map, pos); | 1635 | ti = dm_table_find_target(map, pos); |
1636 | BUG_ON(!dm_target_is_valid(ti)); | 1636 | BUG_ON(!dm_target_is_valid(ti)); |
1637 | 1637 | ||
1638 | if (ti->type->busy && ti->type->busy(ti)) | 1638 | if (ti->type->busy && ti->type->busy(ti)) |
1639 | goto delay_and_out; | 1639 | goto delay_and_out; |
1640 | 1640 | ||
1641 | blk_start_request(rq); | 1641 | blk_start_request(rq); |
1642 | clone = rq->special; | 1642 | clone = rq->special; |
1643 | atomic_inc(&md->pending[rq_data_dir(clone)]); | 1643 | atomic_inc(&md->pending[rq_data_dir(clone)]); |
1644 | 1644 | ||
1645 | spin_unlock(q->queue_lock); | 1645 | spin_unlock(q->queue_lock); |
1646 | if (map_request(ti, clone, md)) | 1646 | if (map_request(ti, clone, md)) |
1647 | goto requeued; | 1647 | goto requeued; |
1648 | 1648 | ||
1649 | BUG_ON(!irqs_disabled()); | 1649 | BUG_ON(!irqs_disabled()); |
1650 | spin_lock(q->queue_lock); | 1650 | spin_lock(q->queue_lock); |
1651 | } | 1651 | } |
1652 | 1652 | ||
1653 | goto out; | 1653 | goto out; |
1654 | 1654 | ||
1655 | requeued: | 1655 | requeued: |
1656 | BUG_ON(!irqs_disabled()); | 1656 | BUG_ON(!irqs_disabled()); |
1657 | spin_lock(q->queue_lock); | 1657 | spin_lock(q->queue_lock); |
1658 | 1658 | ||
1659 | delay_and_out: | 1659 | delay_and_out: |
1660 | blk_delay_queue(q, HZ / 10); | 1660 | blk_delay_queue(q, HZ / 10); |
1661 | out: | 1661 | out: |
1662 | dm_table_put(map); | 1662 | dm_table_put(map); |
1663 | 1663 | ||
1664 | return; | 1664 | return; |
1665 | } | 1665 | } |
1666 | 1666 | ||
1667 | int dm_underlying_device_busy(struct request_queue *q) | 1667 | int dm_underlying_device_busy(struct request_queue *q) |
1668 | { | 1668 | { |
1669 | return blk_lld_busy(q); | 1669 | return blk_lld_busy(q); |
1670 | } | 1670 | } |
1671 | EXPORT_SYMBOL_GPL(dm_underlying_device_busy); | 1671 | EXPORT_SYMBOL_GPL(dm_underlying_device_busy); |
1672 | 1672 | ||
1673 | static int dm_lld_busy(struct request_queue *q) | 1673 | static int dm_lld_busy(struct request_queue *q) |
1674 | { | 1674 | { |
1675 | int r; | 1675 | int r; |
1676 | struct mapped_device *md = q->queuedata; | 1676 | struct mapped_device *md = q->queuedata; |
1677 | struct dm_table *map = dm_get_live_table(md); | 1677 | struct dm_table *map = dm_get_live_table(md); |
1678 | 1678 | ||
1679 | if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) | 1679 | if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) |
1680 | r = 1; | 1680 | r = 1; |
1681 | else | 1681 | else |
1682 | r = dm_table_any_busy_target(map); | 1682 | r = dm_table_any_busy_target(map); |
1683 | 1683 | ||
1684 | dm_table_put(map); | 1684 | dm_table_put(map); |
1685 | 1685 | ||
1686 | return r; | 1686 | return r; |
1687 | } | 1687 | } |
1688 | 1688 | ||
1689 | static int dm_any_congested(void *congested_data, int bdi_bits) | 1689 | static int dm_any_congested(void *congested_data, int bdi_bits) |
1690 | { | 1690 | { |
1691 | int r = bdi_bits; | 1691 | int r = bdi_bits; |
1692 | struct mapped_device *md = congested_data; | 1692 | struct mapped_device *md = congested_data; |
1693 | struct dm_table *map; | 1693 | struct dm_table *map; |
1694 | 1694 | ||
1695 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 1695 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
1696 | map = dm_get_live_table(md); | 1696 | map = dm_get_live_table(md); |
1697 | if (map) { | 1697 | if (map) { |
1698 | /* | 1698 | /* |
1699 | * Request-based dm cares about only own queue for | 1699 | * Request-based dm cares about only own queue for |
1700 | * the query about congestion status of request_queue | 1700 | * the query about congestion status of request_queue |
1701 | */ | 1701 | */ |
1702 | if (dm_request_based(md)) | 1702 | if (dm_request_based(md)) |
1703 | r = md->queue->backing_dev_info.state & | 1703 | r = md->queue->backing_dev_info.state & |
1704 | bdi_bits; | 1704 | bdi_bits; |
1705 | else | 1705 | else |
1706 | r = dm_table_any_congested(map, bdi_bits); | 1706 | r = dm_table_any_congested(map, bdi_bits); |
1707 | 1707 | ||
1708 | dm_table_put(map); | 1708 | dm_table_put(map); |
1709 | } | 1709 | } |
1710 | } | 1710 | } |
1711 | 1711 | ||
1712 | return r; | 1712 | return r; |
1713 | } | 1713 | } |
1714 | 1714 | ||
1715 | /*----------------------------------------------------------------- | 1715 | /*----------------------------------------------------------------- |
1716 | * An IDR is used to keep track of allocated minor numbers. | 1716 | * An IDR is used to keep track of allocated minor numbers. |
1717 | *---------------------------------------------------------------*/ | 1717 | *---------------------------------------------------------------*/ |
1718 | static void free_minor(int minor) | 1718 | static void free_minor(int minor) |
1719 | { | 1719 | { |
1720 | spin_lock(&_minor_lock); | 1720 | spin_lock(&_minor_lock); |
1721 | idr_remove(&_minor_idr, minor); | 1721 | idr_remove(&_minor_idr, minor); |
1722 | spin_unlock(&_minor_lock); | 1722 | spin_unlock(&_minor_lock); |
1723 | } | 1723 | } |
1724 | 1724 | ||
1725 | /* | 1725 | /* |
1726 | * See if the device with a specific minor # is free. | 1726 | * See if the device with a specific minor # is free. |
1727 | */ | 1727 | */ |
1728 | static int specific_minor(int minor) | 1728 | static int specific_minor(int minor) |
1729 | { | 1729 | { |
1730 | int r, m; | 1730 | int r, m; |
1731 | 1731 | ||
1732 | if (minor >= (1 << MINORBITS)) | 1732 | if (minor >= (1 << MINORBITS)) |
1733 | return -EINVAL; | 1733 | return -EINVAL; |
1734 | 1734 | ||
1735 | r = idr_pre_get(&_minor_idr, GFP_KERNEL); | 1735 | r = idr_pre_get(&_minor_idr, GFP_KERNEL); |
1736 | if (!r) | 1736 | if (!r) |
1737 | return -ENOMEM; | 1737 | return -ENOMEM; |
1738 | 1738 | ||
1739 | spin_lock(&_minor_lock); | 1739 | spin_lock(&_minor_lock); |
1740 | 1740 | ||
1741 | if (idr_find(&_minor_idr, minor)) { | 1741 | if (idr_find(&_minor_idr, minor)) { |
1742 | r = -EBUSY; | 1742 | r = -EBUSY; |
1743 | goto out; | 1743 | goto out; |
1744 | } | 1744 | } |
1745 | 1745 | ||
1746 | r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m); | 1746 | r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m); |
1747 | if (r) | 1747 | if (r) |
1748 | goto out; | 1748 | goto out; |
1749 | 1749 | ||
1750 | if (m != minor) { | 1750 | if (m != minor) { |
1751 | idr_remove(&_minor_idr, m); | 1751 | idr_remove(&_minor_idr, m); |
1752 | r = -EBUSY; | 1752 | r = -EBUSY; |
1753 | goto out; | 1753 | goto out; |
1754 | } | 1754 | } |
1755 | 1755 | ||
1756 | out: | 1756 | out: |
1757 | spin_unlock(&_minor_lock); | 1757 | spin_unlock(&_minor_lock); |
1758 | return r; | 1758 | return r; |
1759 | } | 1759 | } |
1760 | 1760 | ||
1761 | static int next_free_minor(int *minor) | 1761 | static int next_free_minor(int *minor) |
1762 | { | 1762 | { |
1763 | int r, m; | 1763 | int r, m; |
1764 | 1764 | ||
1765 | r = idr_pre_get(&_minor_idr, GFP_KERNEL); | 1765 | r = idr_pre_get(&_minor_idr, GFP_KERNEL); |
1766 | if (!r) | 1766 | if (!r) |
1767 | return -ENOMEM; | 1767 | return -ENOMEM; |
1768 | 1768 | ||
1769 | spin_lock(&_minor_lock); | 1769 | spin_lock(&_minor_lock); |
1770 | 1770 | ||
1771 | r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m); | 1771 | r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m); |
1772 | if (r) | 1772 | if (r) |
1773 | goto out; | 1773 | goto out; |
1774 | 1774 | ||
1775 | if (m >= (1 << MINORBITS)) { | 1775 | if (m >= (1 << MINORBITS)) { |
1776 | idr_remove(&_minor_idr, m); | 1776 | idr_remove(&_minor_idr, m); |
1777 | r = -ENOSPC; | 1777 | r = -ENOSPC; |
1778 | goto out; | 1778 | goto out; |
1779 | } | 1779 | } |
1780 | 1780 | ||
1781 | *minor = m; | 1781 | *minor = m; |
1782 | 1782 | ||
1783 | out: | 1783 | out: |
1784 | spin_unlock(&_minor_lock); | 1784 | spin_unlock(&_minor_lock); |
1785 | return r; | 1785 | return r; |
1786 | } | 1786 | } |
1787 | 1787 | ||
1788 | static const struct block_device_operations dm_blk_dops; | 1788 | static const struct block_device_operations dm_blk_dops; |
1789 | 1789 | ||
1790 | static void dm_wq_work(struct work_struct *work); | 1790 | static void dm_wq_work(struct work_struct *work); |
1791 | 1791 | ||
1792 | static void dm_init_md_queue(struct mapped_device *md) | 1792 | static void dm_init_md_queue(struct mapped_device *md) |
1793 | { | 1793 | { |
1794 | /* | 1794 | /* |
1795 | * Request-based dm devices cannot be stacked on top of bio-based dm | 1795 | * Request-based dm devices cannot be stacked on top of bio-based dm |
1796 | * devices. The type of this dm device has not been decided yet. | 1796 | * devices. The type of this dm device has not been decided yet. |
1797 | * The type is decided at the first table loading time. | 1797 | * The type is decided at the first table loading time. |
1798 | * To prevent problematic device stacking, clear the queue flag | 1798 | * To prevent problematic device stacking, clear the queue flag |
1799 | * for request stacking support until then. | 1799 | * for request stacking support until then. |
1800 | * | 1800 | * |
1801 | * This queue is new, so no concurrency on the queue_flags. | 1801 | * This queue is new, so no concurrency on the queue_flags. |
1802 | */ | 1802 | */ |
1803 | queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); | 1803 | queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); |
1804 | 1804 | ||
1805 | md->queue->queuedata = md; | 1805 | md->queue->queuedata = md; |
1806 | md->queue->backing_dev_info.congested_fn = dm_any_congested; | 1806 | md->queue->backing_dev_info.congested_fn = dm_any_congested; |
1807 | md->queue->backing_dev_info.congested_data = md; | 1807 | md->queue->backing_dev_info.congested_data = md; |
1808 | blk_queue_make_request(md->queue, dm_request); | 1808 | blk_queue_make_request(md->queue, dm_request); |
1809 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 1809 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
1810 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 1810 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
1811 | blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); | ||
1812 | } | 1811 | } |
1813 | 1812 | ||
1814 | /* | 1813 | /* |
1815 | * Allocate and initialise a blank device with a given minor. | 1814 | * Allocate and initialise a blank device with a given minor. |
1816 | */ | 1815 | */ |
1817 | static struct mapped_device *alloc_dev(int minor) | 1816 | static struct mapped_device *alloc_dev(int minor) |
1818 | { | 1817 | { |
1819 | int r; | 1818 | int r; |
1820 | struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL); | 1819 | struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL); |
1821 | void *old_md; | 1820 | void *old_md; |
1822 | 1821 | ||
1823 | if (!md) { | 1822 | if (!md) { |
1824 | DMWARN("unable to allocate device, out of memory."); | 1823 | DMWARN("unable to allocate device, out of memory."); |
1825 | return NULL; | 1824 | return NULL; |
1826 | } | 1825 | } |
1827 | 1826 | ||
1828 | if (!try_module_get(THIS_MODULE)) | 1827 | if (!try_module_get(THIS_MODULE)) |
1829 | goto bad_module_get; | 1828 | goto bad_module_get; |
1830 | 1829 | ||
1831 | /* get a minor number for the dev */ | 1830 | /* get a minor number for the dev */ |
1832 | if (minor == DM_ANY_MINOR) | 1831 | if (minor == DM_ANY_MINOR) |
1833 | r = next_free_minor(&minor); | 1832 | r = next_free_minor(&minor); |
1834 | else | 1833 | else |
1835 | r = specific_minor(minor); | 1834 | r = specific_minor(minor); |
1836 | if (r < 0) | 1835 | if (r < 0) |
1837 | goto bad_minor; | 1836 | goto bad_minor; |
1838 | 1837 | ||
1839 | md->type = DM_TYPE_NONE; | 1838 | md->type = DM_TYPE_NONE; |
1840 | init_rwsem(&md->io_lock); | 1839 | init_rwsem(&md->io_lock); |
1841 | mutex_init(&md->suspend_lock); | 1840 | mutex_init(&md->suspend_lock); |
1842 | mutex_init(&md->type_lock); | 1841 | mutex_init(&md->type_lock); |
1843 | spin_lock_init(&md->deferred_lock); | 1842 | spin_lock_init(&md->deferred_lock); |
1844 | rwlock_init(&md->map_lock); | 1843 | rwlock_init(&md->map_lock); |
1845 | atomic_set(&md->holders, 1); | 1844 | atomic_set(&md->holders, 1); |
1846 | atomic_set(&md->open_count, 0); | 1845 | atomic_set(&md->open_count, 0); |
1847 | atomic_set(&md->event_nr, 0); | 1846 | atomic_set(&md->event_nr, 0); |
1848 | atomic_set(&md->uevent_seq, 0); | 1847 | atomic_set(&md->uevent_seq, 0); |
1849 | INIT_LIST_HEAD(&md->uevent_list); | 1848 | INIT_LIST_HEAD(&md->uevent_list); |
1850 | spin_lock_init(&md->uevent_lock); | 1849 | spin_lock_init(&md->uevent_lock); |
1851 | 1850 | ||
1852 | md->queue = blk_alloc_queue(GFP_KERNEL); | 1851 | md->queue = blk_alloc_queue(GFP_KERNEL); |
1853 | if (!md->queue) | 1852 | if (!md->queue) |
1854 | goto bad_queue; | 1853 | goto bad_queue; |
1855 | 1854 | ||
1856 | dm_init_md_queue(md); | 1855 | dm_init_md_queue(md); |
1857 | 1856 | ||
1858 | md->disk = alloc_disk(1); | 1857 | md->disk = alloc_disk(1); |
1859 | if (!md->disk) | 1858 | if (!md->disk) |
1860 | goto bad_disk; | 1859 | goto bad_disk; |
1861 | 1860 | ||
1862 | atomic_set(&md->pending[0], 0); | 1861 | atomic_set(&md->pending[0], 0); |
1863 | atomic_set(&md->pending[1], 0); | 1862 | atomic_set(&md->pending[1], 0); |
1864 | init_waitqueue_head(&md->wait); | 1863 | init_waitqueue_head(&md->wait); |
1865 | INIT_WORK(&md->work, dm_wq_work); | 1864 | INIT_WORK(&md->work, dm_wq_work); |
1866 | init_waitqueue_head(&md->eventq); | 1865 | init_waitqueue_head(&md->eventq); |
1867 | 1866 | ||
1868 | md->disk->major = _major; | 1867 | md->disk->major = _major; |
1869 | md->disk->first_minor = minor; | 1868 | md->disk->first_minor = minor; |
1870 | md->disk->fops = &dm_blk_dops; | 1869 | md->disk->fops = &dm_blk_dops; |
1871 | md->disk->queue = md->queue; | 1870 | md->disk->queue = md->queue; |
1872 | md->disk->private_data = md; | 1871 | md->disk->private_data = md; |
1873 | sprintf(md->disk->disk_name, "dm-%d", minor); | 1872 | sprintf(md->disk->disk_name, "dm-%d", minor); |
1874 | add_disk(md->disk); | 1873 | add_disk(md->disk); |
1875 | format_dev_t(md->name, MKDEV(_major, minor)); | 1874 | format_dev_t(md->name, MKDEV(_major, minor)); |
1876 | 1875 | ||
1877 | md->wq = alloc_workqueue("kdmflush", | 1876 | md->wq = alloc_workqueue("kdmflush", |
1878 | WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); | 1877 | WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); |
1879 | if (!md->wq) | 1878 | if (!md->wq) |
1880 | goto bad_thread; | 1879 | goto bad_thread; |
1881 | 1880 | ||
1882 | md->bdev = bdget_disk(md->disk, 0); | 1881 | md->bdev = bdget_disk(md->disk, 0); |
1883 | if (!md->bdev) | 1882 | if (!md->bdev) |
1884 | goto bad_bdev; | 1883 | goto bad_bdev; |
1885 | 1884 | ||
1886 | bio_init(&md->flush_bio); | 1885 | bio_init(&md->flush_bio); |
1887 | md->flush_bio.bi_bdev = md->bdev; | 1886 | md->flush_bio.bi_bdev = md->bdev; |
1888 | md->flush_bio.bi_rw = WRITE_FLUSH; | 1887 | md->flush_bio.bi_rw = WRITE_FLUSH; |
1889 | 1888 | ||
1890 | /* Populate the mapping, nobody knows we exist yet */ | 1889 | /* Populate the mapping, nobody knows we exist yet */ |
1891 | spin_lock(&_minor_lock); | 1890 | spin_lock(&_minor_lock); |
1892 | old_md = idr_replace(&_minor_idr, md, minor); | 1891 | old_md = idr_replace(&_minor_idr, md, minor); |
1893 | spin_unlock(&_minor_lock); | 1892 | spin_unlock(&_minor_lock); |
1894 | 1893 | ||
1895 | BUG_ON(old_md != MINOR_ALLOCED); | 1894 | BUG_ON(old_md != MINOR_ALLOCED); |
1896 | 1895 | ||
1897 | return md; | 1896 | return md; |
1898 | 1897 | ||
1899 | bad_bdev: | 1898 | bad_bdev: |
1900 | destroy_workqueue(md->wq); | 1899 | destroy_workqueue(md->wq); |
1901 | bad_thread: | 1900 | bad_thread: |
1902 | del_gendisk(md->disk); | 1901 | del_gendisk(md->disk); |
1903 | put_disk(md->disk); | 1902 | put_disk(md->disk); |
1904 | bad_disk: | 1903 | bad_disk: |
1905 | blk_cleanup_queue(md->queue); | 1904 | blk_cleanup_queue(md->queue); |
1906 | bad_queue: | 1905 | bad_queue: |
1907 | free_minor(minor); | 1906 | free_minor(minor); |
1908 | bad_minor: | 1907 | bad_minor: |
1909 | module_put(THIS_MODULE); | 1908 | module_put(THIS_MODULE); |
1910 | bad_module_get: | 1909 | bad_module_get: |
1911 | kfree(md); | 1910 | kfree(md); |
1912 | return NULL; | 1911 | return NULL; |
1913 | } | 1912 | } |
1914 | 1913 | ||
1915 | static void unlock_fs(struct mapped_device *md); | 1914 | static void unlock_fs(struct mapped_device *md); |
1916 | 1915 | ||
1917 | static void free_dev(struct mapped_device *md) | 1916 | static void free_dev(struct mapped_device *md) |
1918 | { | 1917 | { |
1919 | int minor = MINOR(disk_devt(md->disk)); | 1918 | int minor = MINOR(disk_devt(md->disk)); |
1920 | 1919 | ||
1921 | unlock_fs(md); | 1920 | unlock_fs(md); |
1922 | bdput(md->bdev); | 1921 | bdput(md->bdev); |
1923 | destroy_workqueue(md->wq); | 1922 | destroy_workqueue(md->wq); |
1924 | if (md->tio_pool) | 1923 | if (md->tio_pool) |
1925 | mempool_destroy(md->tio_pool); | 1924 | mempool_destroy(md->tio_pool); |
1926 | if (md->io_pool) | 1925 | if (md->io_pool) |
1927 | mempool_destroy(md->io_pool); | 1926 | mempool_destroy(md->io_pool); |
1928 | if (md->bs) | 1927 | if (md->bs) |
1929 | bioset_free(md->bs); | 1928 | bioset_free(md->bs); |
1930 | blk_integrity_unregister(md->disk); | 1929 | blk_integrity_unregister(md->disk); |
1931 | del_gendisk(md->disk); | 1930 | del_gendisk(md->disk); |
1932 | free_minor(minor); | 1931 | free_minor(minor); |
1933 | 1932 | ||
1934 | spin_lock(&_minor_lock); | 1933 | spin_lock(&_minor_lock); |
1935 | md->disk->private_data = NULL; | 1934 | md->disk->private_data = NULL; |
1936 | spin_unlock(&_minor_lock); | 1935 | spin_unlock(&_minor_lock); |
1937 | 1936 | ||
1938 | put_disk(md->disk); | 1937 | put_disk(md->disk); |
1939 | blk_cleanup_queue(md->queue); | 1938 | blk_cleanup_queue(md->queue); |
1940 | module_put(THIS_MODULE); | 1939 | module_put(THIS_MODULE); |
1941 | kfree(md); | 1940 | kfree(md); |
1942 | } | 1941 | } |
1943 | 1942 | ||
1944 | static void __bind_mempools(struct mapped_device *md, struct dm_table *t) | 1943 | static void __bind_mempools(struct mapped_device *md, struct dm_table *t) |
1945 | { | 1944 | { |
1946 | struct dm_md_mempools *p; | 1945 | struct dm_md_mempools *p; |
1947 | 1946 | ||
1948 | if (md->io_pool && md->tio_pool && md->bs) | 1947 | if (md->io_pool && md->tio_pool && md->bs) |
1949 | /* the md already has necessary mempools */ | 1948 | /* the md already has necessary mempools */ |
1950 | goto out; | 1949 | goto out; |
1951 | 1950 | ||
1952 | p = dm_table_get_md_mempools(t); | 1951 | p = dm_table_get_md_mempools(t); |
1953 | BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); | 1952 | BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); |
1954 | 1953 | ||
1955 | md->io_pool = p->io_pool; | 1954 | md->io_pool = p->io_pool; |
1956 | p->io_pool = NULL; | 1955 | p->io_pool = NULL; |
1957 | md->tio_pool = p->tio_pool; | 1956 | md->tio_pool = p->tio_pool; |
1958 | p->tio_pool = NULL; | 1957 | p->tio_pool = NULL; |
1959 | md->bs = p->bs; | 1958 | md->bs = p->bs; |
1960 | p->bs = NULL; | 1959 | p->bs = NULL; |
1961 | 1960 | ||
1962 | out: | 1961 | out: |
1963 | /* mempool bind completed, now no need any mempools in the table */ | 1962 | /* mempool bind completed, now no need any mempools in the table */ |
1964 | dm_table_free_md_mempools(t); | 1963 | dm_table_free_md_mempools(t); |
1965 | } | 1964 | } |
1966 | 1965 | ||
1967 | /* | 1966 | /* |
1968 | * Bind a table to the device. | 1967 | * Bind a table to the device. |
1969 | */ | 1968 | */ |
1970 | static void event_callback(void *context) | 1969 | static void event_callback(void *context) |
1971 | { | 1970 | { |
1972 | unsigned long flags; | 1971 | unsigned long flags; |
1973 | LIST_HEAD(uevents); | 1972 | LIST_HEAD(uevents); |
1974 | struct mapped_device *md = (struct mapped_device *) context; | 1973 | struct mapped_device *md = (struct mapped_device *) context; |
1975 | 1974 | ||
1976 | spin_lock_irqsave(&md->uevent_lock, flags); | 1975 | spin_lock_irqsave(&md->uevent_lock, flags); |
1977 | list_splice_init(&md->uevent_list, &uevents); | 1976 | list_splice_init(&md->uevent_list, &uevents); |
1978 | spin_unlock_irqrestore(&md->uevent_lock, flags); | 1977 | spin_unlock_irqrestore(&md->uevent_lock, flags); |
1979 | 1978 | ||
1980 | dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); | 1979 | dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); |
1981 | 1980 | ||
1982 | atomic_inc(&md->event_nr); | 1981 | atomic_inc(&md->event_nr); |
1983 | wake_up(&md->eventq); | 1982 | wake_up(&md->eventq); |
1984 | } | 1983 | } |
1985 | 1984 | ||
1986 | /* | 1985 | /* |
1987 | * Protected by md->suspend_lock obtained by dm_swap_table(). | 1986 | * Protected by md->suspend_lock obtained by dm_swap_table(). |
1988 | */ | 1987 | */ |
1989 | static void __set_size(struct mapped_device *md, sector_t size) | 1988 | static void __set_size(struct mapped_device *md, sector_t size) |
1990 | { | 1989 | { |
1991 | set_capacity(md->disk, size); | 1990 | set_capacity(md->disk, size); |
1992 | 1991 | ||
1993 | i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); | 1992 | i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); |
1994 | } | 1993 | } |
1995 | 1994 | ||
1996 | /* | 1995 | /* |
1997 | * Return 1 if the queue has a compulsory merge_bvec_fn function. | 1996 | * Return 1 if the queue has a compulsory merge_bvec_fn function. |
1998 | * | 1997 | * |
1999 | * If this function returns 0, then the device is either a non-dm | 1998 | * If this function returns 0, then the device is either a non-dm |
2000 | * device without a merge_bvec_fn, or it is a dm device that is | 1999 | * device without a merge_bvec_fn, or it is a dm device that is |
2001 | * able to split any bios it receives that are too big. | 2000 | * able to split any bios it receives that are too big. |
2002 | */ | 2001 | */ |
2003 | int dm_queue_merge_is_compulsory(struct request_queue *q) | 2002 | int dm_queue_merge_is_compulsory(struct request_queue *q) |
2004 | { | 2003 | { |
2005 | struct mapped_device *dev_md; | 2004 | struct mapped_device *dev_md; |
2006 | 2005 | ||
2007 | if (!q->merge_bvec_fn) | 2006 | if (!q->merge_bvec_fn) |
2008 | return 0; | 2007 | return 0; |
2009 | 2008 | ||
2010 | if (q->make_request_fn == dm_request) { | 2009 | if (q->make_request_fn == dm_request) { |
2011 | dev_md = q->queuedata; | 2010 | dev_md = q->queuedata; |
2012 | if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags)) | 2011 | if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags)) |
2013 | return 0; | 2012 | return 0; |
2014 | } | 2013 | } |
2015 | 2014 | ||
2016 | return 1; | 2015 | return 1; |
2017 | } | 2016 | } |
2018 | 2017 | ||
2019 | static int dm_device_merge_is_compulsory(struct dm_target *ti, | 2018 | static int dm_device_merge_is_compulsory(struct dm_target *ti, |
2020 | struct dm_dev *dev, sector_t start, | 2019 | struct dm_dev *dev, sector_t start, |
2021 | sector_t len, void *data) | 2020 | sector_t len, void *data) |
2022 | { | 2021 | { |
2023 | struct block_device *bdev = dev->bdev; | 2022 | struct block_device *bdev = dev->bdev; |
2024 | struct request_queue *q = bdev_get_queue(bdev); | 2023 | struct request_queue *q = bdev_get_queue(bdev); |
2025 | 2024 | ||
2026 | return dm_queue_merge_is_compulsory(q); | 2025 | return dm_queue_merge_is_compulsory(q); |
2027 | } | 2026 | } |
2028 | 2027 | ||
2029 | /* | 2028 | /* |
2030 | * Return 1 if it is acceptable to ignore merge_bvec_fn based | 2029 | * Return 1 if it is acceptable to ignore merge_bvec_fn based |
2031 | * on the properties of the underlying devices. | 2030 | * on the properties of the underlying devices. |
2032 | */ | 2031 | */ |
2033 | static int dm_table_merge_is_optional(struct dm_table *table) | 2032 | static int dm_table_merge_is_optional(struct dm_table *table) |
2034 | { | 2033 | { |
2035 | unsigned i = 0; | 2034 | unsigned i = 0; |
2036 | struct dm_target *ti; | 2035 | struct dm_target *ti; |
2037 | 2036 | ||
2038 | while (i < dm_table_get_num_targets(table)) { | 2037 | while (i < dm_table_get_num_targets(table)) { |
2039 | ti = dm_table_get_target(table, i++); | 2038 | ti = dm_table_get_target(table, i++); |
2040 | 2039 | ||
2041 | if (ti->type->iterate_devices && | 2040 | if (ti->type->iterate_devices && |
2042 | ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL)) | 2041 | ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL)) |
2043 | return 0; | 2042 | return 0; |
2044 | } | 2043 | } |
2045 | 2044 | ||
2046 | return 1; | 2045 | return 1; |
2047 | } | 2046 | } |
2048 | 2047 | ||
2049 | /* | 2048 | /* |
2050 | * Returns old map, which caller must destroy. | 2049 | * Returns old map, which caller must destroy. |
2051 | */ | 2050 | */ |
2052 | static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, | 2051 | static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, |
2053 | struct queue_limits *limits) | 2052 | struct queue_limits *limits) |
2054 | { | 2053 | { |
2055 | struct dm_table *old_map; | 2054 | struct dm_table *old_map; |
2056 | struct request_queue *q = md->queue; | 2055 | struct request_queue *q = md->queue; |
2057 | sector_t size; | 2056 | sector_t size; |
2058 | unsigned long flags; | 2057 | unsigned long flags; |
2059 | int merge_is_optional; | 2058 | int merge_is_optional; |
2060 | 2059 | ||
2061 | size = dm_table_get_size(t); | 2060 | size = dm_table_get_size(t); |
2062 | 2061 | ||
2063 | /* | 2062 | /* |
2064 | * Wipe any geometry if the size of the table changed. | 2063 | * Wipe any geometry if the size of the table changed. |
2065 | */ | 2064 | */ |
2066 | if (size != get_capacity(md->disk)) | 2065 | if (size != get_capacity(md->disk)) |
2067 | memset(&md->geometry, 0, sizeof(md->geometry)); | 2066 | memset(&md->geometry, 0, sizeof(md->geometry)); |
2068 | 2067 | ||
2069 | __set_size(md, size); | 2068 | __set_size(md, size); |
2070 | 2069 | ||
2071 | dm_table_event_callback(t, event_callback, md); | 2070 | dm_table_event_callback(t, event_callback, md); |
2072 | 2071 | ||
2073 | /* | 2072 | /* |
2074 | * The queue hasn't been stopped yet, if the old table type wasn't | 2073 | * The queue hasn't been stopped yet, if the old table type wasn't |
2075 | * for request-based during suspension. So stop it to prevent | 2074 | * for request-based during suspension. So stop it to prevent |
2076 | * I/O mapping before resume. | 2075 | * I/O mapping before resume. |
2077 | * This must be done before setting the queue restrictions, | 2076 | * This must be done before setting the queue restrictions, |
2078 | * because request-based dm may be run just after the setting. | 2077 | * because request-based dm may be run just after the setting. |
2079 | */ | 2078 | */ |
2080 | if (dm_table_request_based(t) && !blk_queue_stopped(q)) | 2079 | if (dm_table_request_based(t) && !blk_queue_stopped(q)) |
2081 | stop_queue(q); | 2080 | stop_queue(q); |
2082 | 2081 | ||
2083 | __bind_mempools(md, t); | 2082 | __bind_mempools(md, t); |
2084 | 2083 | ||
2085 | merge_is_optional = dm_table_merge_is_optional(t); | 2084 | merge_is_optional = dm_table_merge_is_optional(t); |
2086 | 2085 | ||
2087 | write_lock_irqsave(&md->map_lock, flags); | 2086 | write_lock_irqsave(&md->map_lock, flags); |
2088 | old_map = md->map; | 2087 | old_map = md->map; |
2089 | md->map = t; | 2088 | md->map = t; |
2090 | dm_table_set_restrictions(t, q, limits); | 2089 | dm_table_set_restrictions(t, q, limits); |
2091 | if (merge_is_optional) | 2090 | if (merge_is_optional) |
2092 | set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); | 2091 | set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); |
2093 | else | 2092 | else |
2094 | clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); | 2093 | clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); |
2095 | write_unlock_irqrestore(&md->map_lock, flags); | 2094 | write_unlock_irqrestore(&md->map_lock, flags); |
2096 | 2095 | ||
2097 | return old_map; | 2096 | return old_map; |
2098 | } | 2097 | } |
2099 | 2098 | ||
2100 | /* | 2099 | /* |
2101 | * Returns unbound table for the caller to free. | 2100 | * Returns unbound table for the caller to free. |
2102 | */ | 2101 | */ |
2103 | static struct dm_table *__unbind(struct mapped_device *md) | 2102 | static struct dm_table *__unbind(struct mapped_device *md) |
2104 | { | 2103 | { |
2105 | struct dm_table *map = md->map; | 2104 | struct dm_table *map = md->map; |
2106 | unsigned long flags; | 2105 | unsigned long flags; |
2107 | 2106 | ||
2108 | if (!map) | 2107 | if (!map) |
2109 | return NULL; | 2108 | return NULL; |
2110 | 2109 | ||
2111 | dm_table_event_callback(map, NULL, NULL); | 2110 | dm_table_event_callback(map, NULL, NULL); |
2112 | write_lock_irqsave(&md->map_lock, flags); | 2111 | write_lock_irqsave(&md->map_lock, flags); |
2113 | md->map = NULL; | 2112 | md->map = NULL; |
2114 | write_unlock_irqrestore(&md->map_lock, flags); | 2113 | write_unlock_irqrestore(&md->map_lock, flags); |
2115 | 2114 | ||
2116 | return map; | 2115 | return map; |
2117 | } | 2116 | } |
2118 | 2117 | ||
2119 | /* | 2118 | /* |
2120 | * Constructor for a new device. | 2119 | * Constructor for a new device. |
2121 | */ | 2120 | */ |
2122 | int dm_create(int minor, struct mapped_device **result) | 2121 | int dm_create(int minor, struct mapped_device **result) |
2123 | { | 2122 | { |
2124 | struct mapped_device *md; | 2123 | struct mapped_device *md; |
2125 | 2124 | ||
2126 | md = alloc_dev(minor); | 2125 | md = alloc_dev(minor); |
2127 | if (!md) | 2126 | if (!md) |
2128 | return -ENXIO; | 2127 | return -ENXIO; |
2129 | 2128 | ||
2130 | dm_sysfs_init(md); | 2129 | dm_sysfs_init(md); |
2131 | 2130 | ||
2132 | *result = md; | 2131 | *result = md; |
2133 | return 0; | 2132 | return 0; |
2134 | } | 2133 | } |
2135 | 2134 | ||
2136 | /* | 2135 | /* |
2137 | * Functions to manage md->type. | 2136 | * Functions to manage md->type. |
2138 | * All are required to hold md->type_lock. | 2137 | * All are required to hold md->type_lock. |
2139 | */ | 2138 | */ |
2140 | void dm_lock_md_type(struct mapped_device *md) | 2139 | void dm_lock_md_type(struct mapped_device *md) |
2141 | { | 2140 | { |
2142 | mutex_lock(&md->type_lock); | 2141 | mutex_lock(&md->type_lock); |
2143 | } | 2142 | } |
2144 | 2143 | ||
2145 | void dm_unlock_md_type(struct mapped_device *md) | 2144 | void dm_unlock_md_type(struct mapped_device *md) |
2146 | { | 2145 | { |
2147 | mutex_unlock(&md->type_lock); | 2146 | mutex_unlock(&md->type_lock); |
2148 | } | 2147 | } |
2149 | 2148 | ||
2150 | void dm_set_md_type(struct mapped_device *md, unsigned type) | 2149 | void dm_set_md_type(struct mapped_device *md, unsigned type) |
2151 | { | 2150 | { |
2152 | md->type = type; | 2151 | md->type = type; |
2153 | } | 2152 | } |
2154 | 2153 | ||
2155 | unsigned dm_get_md_type(struct mapped_device *md) | 2154 | unsigned dm_get_md_type(struct mapped_device *md) |
2156 | { | 2155 | { |
2157 | return md->type; | 2156 | return md->type; |
2158 | } | 2157 | } |
2159 | 2158 | ||
2160 | /* | 2159 | /* |
2161 | * Fully initialize a request-based queue (->elevator, ->request_fn, etc). | 2160 | * Fully initialize a request-based queue (->elevator, ->request_fn, etc). |
2162 | */ | 2161 | */ |
2163 | static int dm_init_request_based_queue(struct mapped_device *md) | 2162 | static int dm_init_request_based_queue(struct mapped_device *md) |
2164 | { | 2163 | { |
2165 | struct request_queue *q = NULL; | 2164 | struct request_queue *q = NULL; |
2166 | 2165 | ||
2167 | if (md->queue->elevator) | 2166 | if (md->queue->elevator) |
2168 | return 1; | 2167 | return 1; |
2169 | 2168 | ||
2170 | /* Fully initialize the queue */ | 2169 | /* Fully initialize the queue */ |
2171 | q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); | 2170 | q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); |
2172 | if (!q) | 2171 | if (!q) |
2173 | return 0; | 2172 | return 0; |
2174 | 2173 | ||
2175 | md->queue = q; | 2174 | md->queue = q; |
2176 | md->saved_make_request_fn = md->queue->make_request_fn; | 2175 | md->saved_make_request_fn = md->queue->make_request_fn; |
2177 | dm_init_md_queue(md); | 2176 | dm_init_md_queue(md); |
2178 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 2177 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
2179 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 2178 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
2180 | blk_queue_lld_busy(md->queue, dm_lld_busy); | 2179 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
2181 | 2180 | ||
2182 | elv_register_queue(md->queue); | 2181 | elv_register_queue(md->queue); |
2183 | 2182 | ||
2184 | return 1; | 2183 | return 1; |
2185 | } | 2184 | } |
2186 | 2185 | ||
2187 | /* | 2186 | /* |
2188 | * Setup the DM device's queue based on md's type | 2187 | * Setup the DM device's queue based on md's type |
2189 | */ | 2188 | */ |
2190 | int dm_setup_md_queue(struct mapped_device *md) | 2189 | int dm_setup_md_queue(struct mapped_device *md) |
2191 | { | 2190 | { |
2192 | if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) && | 2191 | if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) && |
2193 | !dm_init_request_based_queue(md)) { | 2192 | !dm_init_request_based_queue(md)) { |
2194 | DMWARN("Cannot initialize queue for request-based mapped device"); | 2193 | DMWARN("Cannot initialize queue for request-based mapped device"); |
2195 | return -EINVAL; | 2194 | return -EINVAL; |
2196 | } | 2195 | } |
2197 | 2196 | ||
2198 | return 0; | 2197 | return 0; |
2199 | } | 2198 | } |
2200 | 2199 | ||
2201 | static struct mapped_device *dm_find_md(dev_t dev) | 2200 | static struct mapped_device *dm_find_md(dev_t dev) |
2202 | { | 2201 | { |
2203 | struct mapped_device *md; | 2202 | struct mapped_device *md; |
2204 | unsigned minor = MINOR(dev); | 2203 | unsigned minor = MINOR(dev); |
2205 | 2204 | ||
2206 | if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) | 2205 | if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) |
2207 | return NULL; | 2206 | return NULL; |
2208 | 2207 | ||
2209 | spin_lock(&_minor_lock); | 2208 | spin_lock(&_minor_lock); |
2210 | 2209 | ||
2211 | md = idr_find(&_minor_idr, minor); | 2210 | md = idr_find(&_minor_idr, minor); |
2212 | if (md && (md == MINOR_ALLOCED || | 2211 | if (md && (md == MINOR_ALLOCED || |
2213 | (MINOR(disk_devt(dm_disk(md))) != minor) || | 2212 | (MINOR(disk_devt(dm_disk(md))) != minor) || |
2214 | dm_deleting_md(md) || | 2213 | dm_deleting_md(md) || |
2215 | test_bit(DMF_FREEING, &md->flags))) { | 2214 | test_bit(DMF_FREEING, &md->flags))) { |
2216 | md = NULL; | 2215 | md = NULL; |
2217 | goto out; | 2216 | goto out; |
2218 | } | 2217 | } |
2219 | 2218 | ||
2220 | out: | 2219 | out: |
2221 | spin_unlock(&_minor_lock); | 2220 | spin_unlock(&_minor_lock); |
2222 | 2221 | ||
2223 | return md; | 2222 | return md; |
2224 | } | 2223 | } |
2225 | 2224 | ||
2226 | struct mapped_device *dm_get_md(dev_t dev) | 2225 | struct mapped_device *dm_get_md(dev_t dev) |
2227 | { | 2226 | { |
2228 | struct mapped_device *md = dm_find_md(dev); | 2227 | struct mapped_device *md = dm_find_md(dev); |
2229 | 2228 | ||
2230 | if (md) | 2229 | if (md) |
2231 | dm_get(md); | 2230 | dm_get(md); |
2232 | 2231 | ||
2233 | return md; | 2232 | return md; |
2234 | } | 2233 | } |
2235 | 2234 | ||
2236 | void *dm_get_mdptr(struct mapped_device *md) | 2235 | void *dm_get_mdptr(struct mapped_device *md) |
2237 | { | 2236 | { |
2238 | return md->interface_ptr; | 2237 | return md->interface_ptr; |
2239 | } | 2238 | } |
2240 | 2239 | ||
2241 | void dm_set_mdptr(struct mapped_device *md, void *ptr) | 2240 | void dm_set_mdptr(struct mapped_device *md, void *ptr) |
2242 | { | 2241 | { |
2243 | md->interface_ptr = ptr; | 2242 | md->interface_ptr = ptr; |
2244 | } | 2243 | } |
2245 | 2244 | ||
2246 | void dm_get(struct mapped_device *md) | 2245 | void dm_get(struct mapped_device *md) |
2247 | { | 2246 | { |
2248 | atomic_inc(&md->holders); | 2247 | atomic_inc(&md->holders); |
2249 | BUG_ON(test_bit(DMF_FREEING, &md->flags)); | 2248 | BUG_ON(test_bit(DMF_FREEING, &md->flags)); |
2250 | } | 2249 | } |
2251 | 2250 | ||
2252 | const char *dm_device_name(struct mapped_device *md) | 2251 | const char *dm_device_name(struct mapped_device *md) |
2253 | { | 2252 | { |
2254 | return md->name; | 2253 | return md->name; |
2255 | } | 2254 | } |
2256 | EXPORT_SYMBOL_GPL(dm_device_name); | 2255 | EXPORT_SYMBOL_GPL(dm_device_name); |
2257 | 2256 | ||
2258 | static void __dm_destroy(struct mapped_device *md, bool wait) | 2257 | static void __dm_destroy(struct mapped_device *md, bool wait) |
2259 | { | 2258 | { |
2260 | struct dm_table *map; | 2259 | struct dm_table *map; |
2261 | 2260 | ||
2262 | might_sleep(); | 2261 | might_sleep(); |
2263 | 2262 | ||
2264 | spin_lock(&_minor_lock); | 2263 | spin_lock(&_minor_lock); |
2265 | map = dm_get_live_table(md); | 2264 | map = dm_get_live_table(md); |
2266 | idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); | 2265 | idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); |
2267 | set_bit(DMF_FREEING, &md->flags); | 2266 | set_bit(DMF_FREEING, &md->flags); |
2268 | spin_unlock(&_minor_lock); | 2267 | spin_unlock(&_minor_lock); |
2269 | 2268 | ||
2270 | if (!dm_suspended_md(md)) { | 2269 | if (!dm_suspended_md(md)) { |
2271 | dm_table_presuspend_targets(map); | 2270 | dm_table_presuspend_targets(map); |
2272 | dm_table_postsuspend_targets(map); | 2271 | dm_table_postsuspend_targets(map); |
2273 | } | 2272 | } |
2274 | 2273 | ||
2275 | /* | 2274 | /* |
2276 | * Rare, but there may be I/O requests still going to complete, | 2275 | * Rare, but there may be I/O requests still going to complete, |
2277 | * for example. Wait for all references to disappear. | 2276 | * for example. Wait for all references to disappear. |
2278 | * No one should increment the reference count of the mapped_device, | 2277 | * No one should increment the reference count of the mapped_device, |
2279 | * after the mapped_device state becomes DMF_FREEING. | 2278 | * after the mapped_device state becomes DMF_FREEING. |
2280 | */ | 2279 | */ |
2281 | if (wait) | 2280 | if (wait) |
2282 | while (atomic_read(&md->holders)) | 2281 | while (atomic_read(&md->holders)) |
2283 | msleep(1); | 2282 | msleep(1); |
2284 | else if (atomic_read(&md->holders)) | 2283 | else if (atomic_read(&md->holders)) |
2285 | DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)", | 2284 | DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)", |
2286 | dm_device_name(md), atomic_read(&md->holders)); | 2285 | dm_device_name(md), atomic_read(&md->holders)); |
2287 | 2286 | ||
2288 | dm_sysfs_exit(md); | 2287 | dm_sysfs_exit(md); |
2289 | dm_table_put(map); | 2288 | dm_table_put(map); |
2290 | dm_table_destroy(__unbind(md)); | 2289 | dm_table_destroy(__unbind(md)); |
2291 | free_dev(md); | 2290 | free_dev(md); |
2292 | } | 2291 | } |
2293 | 2292 | ||
2294 | void dm_destroy(struct mapped_device *md) | 2293 | void dm_destroy(struct mapped_device *md) |
2295 | { | 2294 | { |
2296 | __dm_destroy(md, true); | 2295 | __dm_destroy(md, true); |
2297 | } | 2296 | } |
2298 | 2297 | ||
2299 | void dm_destroy_immediate(struct mapped_device *md) | 2298 | void dm_destroy_immediate(struct mapped_device *md) |
2300 | { | 2299 | { |
2301 | __dm_destroy(md, false); | 2300 | __dm_destroy(md, false); |
2302 | } | 2301 | } |
2303 | 2302 | ||
2304 | void dm_put(struct mapped_device *md) | 2303 | void dm_put(struct mapped_device *md) |
2305 | { | 2304 | { |
2306 | atomic_dec(&md->holders); | 2305 | atomic_dec(&md->holders); |
2307 | } | 2306 | } |
2308 | EXPORT_SYMBOL_GPL(dm_put); | 2307 | EXPORT_SYMBOL_GPL(dm_put); |
2309 | 2308 | ||
2310 | static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | 2309 | static int dm_wait_for_completion(struct mapped_device *md, int interruptible) |
2311 | { | 2310 | { |
2312 | int r = 0; | 2311 | int r = 0; |
2313 | DECLARE_WAITQUEUE(wait, current); | 2312 | DECLARE_WAITQUEUE(wait, current); |
2314 | 2313 | ||
2315 | add_wait_queue(&md->wait, &wait); | 2314 | add_wait_queue(&md->wait, &wait); |
2316 | 2315 | ||
2317 | while (1) { | 2316 | while (1) { |
2318 | set_current_state(interruptible); | 2317 | set_current_state(interruptible); |
2319 | 2318 | ||
2320 | smp_mb(); | 2319 | smp_mb(); |
2321 | if (!md_in_flight(md)) | 2320 | if (!md_in_flight(md)) |
2322 | break; | 2321 | break; |
2323 | 2322 | ||
2324 | if (interruptible == TASK_INTERRUPTIBLE && | 2323 | if (interruptible == TASK_INTERRUPTIBLE && |
2325 | signal_pending(current)) { | 2324 | signal_pending(current)) { |
2326 | r = -EINTR; | 2325 | r = -EINTR; |
2327 | break; | 2326 | break; |
2328 | } | 2327 | } |
2329 | 2328 | ||
2330 | io_schedule(); | 2329 | io_schedule(); |
2331 | } | 2330 | } |
2332 | set_current_state(TASK_RUNNING); | 2331 | set_current_state(TASK_RUNNING); |
2333 | 2332 | ||
2334 | remove_wait_queue(&md->wait, &wait); | 2333 | remove_wait_queue(&md->wait, &wait); |
2335 | 2334 | ||
2336 | return r; | 2335 | return r; |
2337 | } | 2336 | } |
2338 | 2337 | ||
2339 | /* | 2338 | /* |
2340 | * Process the deferred bios | 2339 | * Process the deferred bios |
2341 | */ | 2340 | */ |
2342 | static void dm_wq_work(struct work_struct *work) | 2341 | static void dm_wq_work(struct work_struct *work) |
2343 | { | 2342 | { |
2344 | struct mapped_device *md = container_of(work, struct mapped_device, | 2343 | struct mapped_device *md = container_of(work, struct mapped_device, |
2345 | work); | 2344 | work); |
2346 | struct bio *c; | 2345 | struct bio *c; |
2347 | 2346 | ||
2348 | down_read(&md->io_lock); | 2347 | down_read(&md->io_lock); |
2349 | 2348 | ||
2350 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 2349 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
2351 | spin_lock_irq(&md->deferred_lock); | 2350 | spin_lock_irq(&md->deferred_lock); |
2352 | c = bio_list_pop(&md->deferred); | 2351 | c = bio_list_pop(&md->deferred); |
2353 | spin_unlock_irq(&md->deferred_lock); | 2352 | spin_unlock_irq(&md->deferred_lock); |
2354 | 2353 | ||
2355 | if (!c) | 2354 | if (!c) |
2356 | break; | 2355 | break; |
2357 | 2356 | ||
2358 | up_read(&md->io_lock); | 2357 | up_read(&md->io_lock); |
2359 | 2358 | ||
2360 | if (dm_request_based(md)) | 2359 | if (dm_request_based(md)) |
2361 | generic_make_request(c); | 2360 | generic_make_request(c); |
2362 | else | 2361 | else |
2363 | __split_and_process_bio(md, c); | 2362 | __split_and_process_bio(md, c); |
2364 | 2363 | ||
2365 | down_read(&md->io_lock); | 2364 | down_read(&md->io_lock); |
2366 | } | 2365 | } |
2367 | 2366 | ||
2368 | up_read(&md->io_lock); | 2367 | up_read(&md->io_lock); |
2369 | } | 2368 | } |
2370 | 2369 | ||
2371 | static void dm_queue_flush(struct mapped_device *md) | 2370 | static void dm_queue_flush(struct mapped_device *md) |
2372 | { | 2371 | { |
2373 | clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | 2372 | clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
2374 | smp_mb__after_clear_bit(); | 2373 | smp_mb__after_clear_bit(); |
2375 | queue_work(md->wq, &md->work); | 2374 | queue_work(md->wq, &md->work); |
2376 | } | 2375 | } |
2377 | 2376 | ||
2378 | /* | 2377 | /* |
2379 | * Swap in a new table, returning the old one for the caller to destroy. | 2378 | * Swap in a new table, returning the old one for the caller to destroy. |
2380 | */ | 2379 | */ |
2381 | struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) | 2380 | struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) |
2382 | { | 2381 | { |
2383 | struct dm_table *map = ERR_PTR(-EINVAL); | 2382 | struct dm_table *map = ERR_PTR(-EINVAL); |
2384 | struct queue_limits limits; | 2383 | struct queue_limits limits; |
2385 | int r; | 2384 | int r; |
2386 | 2385 | ||
2387 | mutex_lock(&md->suspend_lock); | 2386 | mutex_lock(&md->suspend_lock); |
2388 | 2387 | ||
2389 | /* device must be suspended */ | 2388 | /* device must be suspended */ |
2390 | if (!dm_suspended_md(md)) | 2389 | if (!dm_suspended_md(md)) |
2391 | goto out; | 2390 | goto out; |
2392 | 2391 | ||
2393 | r = dm_calculate_queue_limits(table, &limits); | 2392 | r = dm_calculate_queue_limits(table, &limits); |
2394 | if (r) { | 2393 | if (r) { |
2395 | map = ERR_PTR(r); | 2394 | map = ERR_PTR(r); |
2396 | goto out; | 2395 | goto out; |
2397 | } | 2396 | } |
2398 | 2397 | ||
2399 | map = __bind(md, table, &limits); | 2398 | map = __bind(md, table, &limits); |
2400 | 2399 | ||
2401 | out: | 2400 | out: |
2402 | mutex_unlock(&md->suspend_lock); | 2401 | mutex_unlock(&md->suspend_lock); |
2403 | return map; | 2402 | return map; |
2404 | } | 2403 | } |
2405 | 2404 | ||
2406 | /* | 2405 | /* |
2407 | * Functions to lock and unlock any filesystem running on the | 2406 | * Functions to lock and unlock any filesystem running on the |
2408 | * device. | 2407 | * device. |
2409 | */ | 2408 | */ |
2410 | static int lock_fs(struct mapped_device *md) | 2409 | static int lock_fs(struct mapped_device *md) |
2411 | { | 2410 | { |
2412 | int r; | 2411 | int r; |
2413 | 2412 | ||
2414 | WARN_ON(md->frozen_sb); | 2413 | WARN_ON(md->frozen_sb); |
2415 | 2414 | ||
2416 | md->frozen_sb = freeze_bdev(md->bdev); | 2415 | md->frozen_sb = freeze_bdev(md->bdev); |
2417 | if (IS_ERR(md->frozen_sb)) { | 2416 | if (IS_ERR(md->frozen_sb)) { |
2418 | r = PTR_ERR(md->frozen_sb); | 2417 | r = PTR_ERR(md->frozen_sb); |
2419 | md->frozen_sb = NULL; | 2418 | md->frozen_sb = NULL; |
2420 | return r; | 2419 | return r; |
2421 | } | 2420 | } |
2422 | 2421 | ||
2423 | set_bit(DMF_FROZEN, &md->flags); | 2422 | set_bit(DMF_FROZEN, &md->flags); |
2424 | 2423 | ||
2425 | return 0; | 2424 | return 0; |
2426 | } | 2425 | } |
2427 | 2426 | ||
2428 | static void unlock_fs(struct mapped_device *md) | 2427 | static void unlock_fs(struct mapped_device *md) |
2429 | { | 2428 | { |
2430 | if (!test_bit(DMF_FROZEN, &md->flags)) | 2429 | if (!test_bit(DMF_FROZEN, &md->flags)) |
2431 | return; | 2430 | return; |
2432 | 2431 | ||
2433 | thaw_bdev(md->bdev, md->frozen_sb); | 2432 | thaw_bdev(md->bdev, md->frozen_sb); |
2434 | md->frozen_sb = NULL; | 2433 | md->frozen_sb = NULL; |
2435 | clear_bit(DMF_FROZEN, &md->flags); | 2434 | clear_bit(DMF_FROZEN, &md->flags); |
2436 | } | 2435 | } |
2437 | 2436 | ||
2438 | /* | 2437 | /* |
2439 | * We need to be able to change a mapping table under a mounted | 2438 | * We need to be able to change a mapping table under a mounted |
2440 | * filesystem. For example we might want to move some data in | 2439 | * filesystem. For example we might want to move some data in |
2441 | * the background. Before the table can be swapped with | 2440 | * the background. Before the table can be swapped with |
2442 | * dm_bind_table, dm_suspend must be called to flush any in | 2441 | * dm_bind_table, dm_suspend must be called to flush any in |
2443 | * flight bios and ensure that any further io gets deferred. | 2442 | * flight bios and ensure that any further io gets deferred. |
2444 | */ | 2443 | */ |
2445 | /* | 2444 | /* |
2446 | * Suspend mechanism in request-based dm. | 2445 | * Suspend mechanism in request-based dm. |
2447 | * | 2446 | * |
2448 | * 1. Flush all I/Os by lock_fs() if needed. | 2447 | * 1. Flush all I/Os by lock_fs() if needed. |
2449 | * 2. Stop dispatching any I/O by stopping the request_queue. | 2448 | * 2. Stop dispatching any I/O by stopping the request_queue. |
2450 | * 3. Wait for all in-flight I/Os to be completed or requeued. | 2449 | * 3. Wait for all in-flight I/Os to be completed or requeued. |
2451 | * | 2450 | * |
2452 | * To abort suspend, start the request_queue. | 2451 | * To abort suspend, start the request_queue. |
2453 | */ | 2452 | */ |
2454 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | 2453 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) |
2455 | { | 2454 | { |
2456 | struct dm_table *map = NULL; | 2455 | struct dm_table *map = NULL; |
2457 | int r = 0; | 2456 | int r = 0; |
2458 | int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; | 2457 | int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; |
2459 | int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; | 2458 | int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; |
2460 | 2459 | ||
2461 | mutex_lock(&md->suspend_lock); | 2460 | mutex_lock(&md->suspend_lock); |
2462 | 2461 | ||
2463 | if (dm_suspended_md(md)) { | 2462 | if (dm_suspended_md(md)) { |
2464 | r = -EINVAL; | 2463 | r = -EINVAL; |
2465 | goto out_unlock; | 2464 | goto out_unlock; |
2466 | } | 2465 | } |
2467 | 2466 | ||
2468 | map = dm_get_live_table(md); | 2467 | map = dm_get_live_table(md); |
2469 | 2468 | ||
2470 | /* | 2469 | /* |
2471 | * DMF_NOFLUSH_SUSPENDING must be set before presuspend. | 2470 | * DMF_NOFLUSH_SUSPENDING must be set before presuspend. |
2472 | * This flag is cleared before dm_suspend returns. | 2471 | * This flag is cleared before dm_suspend returns. |
2473 | */ | 2472 | */ |
2474 | if (noflush) | 2473 | if (noflush) |
2475 | set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | 2474 | set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); |
2476 | 2475 | ||
2477 | /* This does not get reverted if there's an error later. */ | 2476 | /* This does not get reverted if there's an error later. */ |
2478 | dm_table_presuspend_targets(map); | 2477 | dm_table_presuspend_targets(map); |
2479 | 2478 | ||
2480 | /* | 2479 | /* |
2481 | * Flush I/O to the device. | 2480 | * Flush I/O to the device. |
2482 | * Any I/O submitted after lock_fs() may not be flushed. | 2481 | * Any I/O submitted after lock_fs() may not be flushed. |
2483 | * noflush takes precedence over do_lockfs. | 2482 | * noflush takes precedence over do_lockfs. |
2484 | * (lock_fs() flushes I/Os and waits for them to complete.) | 2483 | * (lock_fs() flushes I/Os and waits for them to complete.) |
2485 | */ | 2484 | */ |
2486 | if (!noflush && do_lockfs) { | 2485 | if (!noflush && do_lockfs) { |
2487 | r = lock_fs(md); | 2486 | r = lock_fs(md); |
2488 | if (r) | 2487 | if (r) |
2489 | goto out; | 2488 | goto out; |
2490 | } | 2489 | } |
2491 | 2490 | ||
2492 | /* | 2491 | /* |
2493 | * Here we must make sure that no processes are submitting requests | 2492 | * Here we must make sure that no processes are submitting requests |
2494 | * to target drivers i.e. no one may be executing | 2493 | * to target drivers i.e. no one may be executing |
2495 | * __split_and_process_bio. This is called from dm_request and | 2494 | * __split_and_process_bio. This is called from dm_request and |
2496 | * dm_wq_work. | 2495 | * dm_wq_work. |
2497 | * | 2496 | * |
2498 | * To get all processes out of __split_and_process_bio in dm_request, | 2497 | * To get all processes out of __split_and_process_bio in dm_request, |
2499 | * we take the write lock. To prevent any process from reentering | 2498 | * we take the write lock. To prevent any process from reentering |
2500 | * __split_and_process_bio from dm_request and quiesce the thread | 2499 | * __split_and_process_bio from dm_request and quiesce the thread |
2501 | * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call | 2500 | * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call |
2502 | * flush_workqueue(md->wq). | 2501 | * flush_workqueue(md->wq). |
2503 | */ | 2502 | */ |
2504 | down_write(&md->io_lock); | 2503 | down_write(&md->io_lock); |
2505 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | 2504 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
2506 | up_write(&md->io_lock); | 2505 | up_write(&md->io_lock); |
2507 | 2506 | ||
2508 | /* | 2507 | /* |
2509 | * Stop md->queue before flushing md->wq in case request-based | 2508 | * Stop md->queue before flushing md->wq in case request-based |
2510 | * dm defers requests to md->wq from md->queue. | 2509 | * dm defers requests to md->wq from md->queue. |
2511 | */ | 2510 | */ |
2512 | if (dm_request_based(md)) | 2511 | if (dm_request_based(md)) |
2513 | stop_queue(md->queue); | 2512 | stop_queue(md->queue); |
2514 | 2513 | ||
2515 | flush_workqueue(md->wq); | 2514 | flush_workqueue(md->wq); |
2516 | 2515 | ||
2517 | /* | 2516 | /* |
2518 | * At this point no more requests are entering target request routines. | 2517 | * At this point no more requests are entering target request routines. |
2519 | * We call dm_wait_for_completion to wait for all existing requests | 2518 | * We call dm_wait_for_completion to wait for all existing requests |
2520 | * to finish. | 2519 | * to finish. |
2521 | */ | 2520 | */ |
2522 | r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); | 2521 | r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); |
2523 | 2522 | ||
2524 | down_write(&md->io_lock); | 2523 | down_write(&md->io_lock); |
2525 | if (noflush) | 2524 | if (noflush) |
2526 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | 2525 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); |
2527 | up_write(&md->io_lock); | 2526 | up_write(&md->io_lock); |
2528 | 2527 | ||
2529 | /* were we interrupted ? */ | 2528 | /* were we interrupted ? */ |
2530 | if (r < 0) { | 2529 | if (r < 0) { |
2531 | dm_queue_flush(md); | 2530 | dm_queue_flush(md); |
2532 | 2531 | ||
2533 | if (dm_request_based(md)) | 2532 | if (dm_request_based(md)) |
2534 | start_queue(md->queue); | 2533 | start_queue(md->queue); |
2535 | 2534 | ||
2536 | unlock_fs(md); | 2535 | unlock_fs(md); |
2537 | goto out; /* pushback list is already flushed, so skip flush */ | 2536 | goto out; /* pushback list is already flushed, so skip flush */ |
2538 | } | 2537 | } |
2539 | 2538 | ||
2540 | /* | 2539 | /* |
2541 | * If dm_wait_for_completion returned 0, the device is completely | 2540 | * If dm_wait_for_completion returned 0, the device is completely |
2542 | * quiescent now. There is no request-processing activity. All new | 2541 | * quiescent now. There is no request-processing activity. All new |
2543 | * requests are being added to md->deferred list. | 2542 | * requests are being added to md->deferred list. |
2544 | */ | 2543 | */ |
2545 | 2544 | ||
2546 | set_bit(DMF_SUSPENDED, &md->flags); | 2545 | set_bit(DMF_SUSPENDED, &md->flags); |
2547 | 2546 | ||
2548 | dm_table_postsuspend_targets(map); | 2547 | dm_table_postsuspend_targets(map); |
2549 | 2548 | ||
2550 | out: | 2549 | out: |
2551 | dm_table_put(map); | 2550 | dm_table_put(map); |
2552 | 2551 | ||
2553 | out_unlock: | 2552 | out_unlock: |
2554 | mutex_unlock(&md->suspend_lock); | 2553 | mutex_unlock(&md->suspend_lock); |
2555 | return r; | 2554 | return r; |
2556 | } | 2555 | } |
2557 | 2556 | ||
2558 | int dm_resume(struct mapped_device *md) | 2557 | int dm_resume(struct mapped_device *md) |
2559 | { | 2558 | { |
2560 | int r = -EINVAL; | 2559 | int r = -EINVAL; |
2561 | struct dm_table *map = NULL; | 2560 | struct dm_table *map = NULL; |
2562 | 2561 | ||
2563 | mutex_lock(&md->suspend_lock); | 2562 | mutex_lock(&md->suspend_lock); |
2564 | if (!dm_suspended_md(md)) | 2563 | if (!dm_suspended_md(md)) |
2565 | goto out; | 2564 | goto out; |
2566 | 2565 | ||
2567 | map = dm_get_live_table(md); | 2566 | map = dm_get_live_table(md); |
2568 | if (!map || !dm_table_get_size(map)) | 2567 | if (!map || !dm_table_get_size(map)) |
2569 | goto out; | 2568 | goto out; |
2570 | 2569 | ||
2571 | r = dm_table_resume_targets(map); | 2570 | r = dm_table_resume_targets(map); |
2572 | if (r) | 2571 | if (r) |
2573 | goto out; | 2572 | goto out; |
2574 | 2573 | ||
2575 | dm_queue_flush(md); | 2574 | dm_queue_flush(md); |
2576 | 2575 | ||
2577 | /* | 2576 | /* |
2578 | * Flushing deferred I/Os must be done after targets are resumed | 2577 | * Flushing deferred I/Os must be done after targets are resumed |
2579 | * so that mapping of targets can work correctly. | 2578 | * so that mapping of targets can work correctly. |
2580 | * Request-based dm is queueing the deferred I/Os in its request_queue. | 2579 | * Request-based dm is queueing the deferred I/Os in its request_queue. |
2581 | */ | 2580 | */ |
2582 | if (dm_request_based(md)) | 2581 | if (dm_request_based(md)) |
2583 | start_queue(md->queue); | 2582 | start_queue(md->queue); |
2584 | 2583 | ||
2585 | unlock_fs(md); | 2584 | unlock_fs(md); |
2586 | 2585 | ||
2587 | clear_bit(DMF_SUSPENDED, &md->flags); | 2586 | clear_bit(DMF_SUSPENDED, &md->flags); |
2588 | 2587 | ||
2589 | r = 0; | 2588 | r = 0; |
2590 | out: | 2589 | out: |
2591 | dm_table_put(map); | 2590 | dm_table_put(map); |
2592 | mutex_unlock(&md->suspend_lock); | 2591 | mutex_unlock(&md->suspend_lock); |
2593 | 2592 | ||
2594 | return r; | 2593 | return r; |
2595 | } | 2594 | } |
2596 | 2595 | ||
2597 | /*----------------------------------------------------------------- | 2596 | /*----------------------------------------------------------------- |
2598 | * Event notification. | 2597 | * Event notification. |
2599 | *---------------------------------------------------------------*/ | 2598 | *---------------------------------------------------------------*/ |
2600 | int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, | 2599 | int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, |
2601 | unsigned cookie) | 2600 | unsigned cookie) |
2602 | { | 2601 | { |
2603 | char udev_cookie[DM_COOKIE_LENGTH]; | 2602 | char udev_cookie[DM_COOKIE_LENGTH]; |
2604 | char *envp[] = { udev_cookie, NULL }; | 2603 | char *envp[] = { udev_cookie, NULL }; |
2605 | 2604 | ||
2606 | if (!cookie) | 2605 | if (!cookie) |
2607 | return kobject_uevent(&disk_to_dev(md->disk)->kobj, action); | 2606 | return kobject_uevent(&disk_to_dev(md->disk)->kobj, action); |
2608 | else { | 2607 | else { |
2609 | snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", | 2608 | snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", |
2610 | DM_COOKIE_ENV_VAR_NAME, cookie); | 2609 | DM_COOKIE_ENV_VAR_NAME, cookie); |
2611 | return kobject_uevent_env(&disk_to_dev(md->disk)->kobj, | 2610 | return kobject_uevent_env(&disk_to_dev(md->disk)->kobj, |
2612 | action, envp); | 2611 | action, envp); |
2613 | } | 2612 | } |
2614 | } | 2613 | } |
2615 | 2614 | ||
2616 | uint32_t dm_next_uevent_seq(struct mapped_device *md) | 2615 | uint32_t dm_next_uevent_seq(struct mapped_device *md) |
2617 | { | 2616 | { |
2618 | return atomic_add_return(1, &md->uevent_seq); | 2617 | return atomic_add_return(1, &md->uevent_seq); |
2619 | } | 2618 | } |
2620 | 2619 | ||
2621 | uint32_t dm_get_event_nr(struct mapped_device *md) | 2620 | uint32_t dm_get_event_nr(struct mapped_device *md) |
2622 | { | 2621 | { |
2623 | return atomic_read(&md->event_nr); | 2622 | return atomic_read(&md->event_nr); |
2624 | } | 2623 | } |
2625 | 2624 | ||
2626 | int dm_wait_event(struct mapped_device *md, int event_nr) | 2625 | int dm_wait_event(struct mapped_device *md, int event_nr) |
2627 | { | 2626 | { |
2628 | return wait_event_interruptible(md->eventq, | 2627 | return wait_event_interruptible(md->eventq, |
2629 | (event_nr != atomic_read(&md->event_nr))); | 2628 | (event_nr != atomic_read(&md->event_nr))); |
2630 | } | 2629 | } |
2631 | 2630 | ||
2632 | void dm_uevent_add(struct mapped_device *md, struct list_head *elist) | 2631 | void dm_uevent_add(struct mapped_device *md, struct list_head *elist) |
2633 | { | 2632 | { |
2634 | unsigned long flags; | 2633 | unsigned long flags; |
2635 | 2634 | ||
2636 | spin_lock_irqsave(&md->uevent_lock, flags); | 2635 | spin_lock_irqsave(&md->uevent_lock, flags); |
2637 | list_add(elist, &md->uevent_list); | 2636 | list_add(elist, &md->uevent_list); |
2638 | spin_unlock_irqrestore(&md->uevent_lock, flags); | 2637 | spin_unlock_irqrestore(&md->uevent_lock, flags); |
2639 | } | 2638 | } |
2640 | 2639 | ||
2641 | /* | 2640 | /* |
2642 | * The gendisk is only valid as long as you have a reference | 2641 | * The gendisk is only valid as long as you have a reference |
2643 | * count on 'md'. | 2642 | * count on 'md'. |
2644 | */ | 2643 | */ |
2645 | struct gendisk *dm_disk(struct mapped_device *md) | 2644 | struct gendisk *dm_disk(struct mapped_device *md) |
2646 | { | 2645 | { |
2647 | return md->disk; | 2646 | return md->disk; |
2648 | } | 2647 | } |
2649 | 2648 | ||
2650 | struct kobject *dm_kobject(struct mapped_device *md) | 2649 | struct kobject *dm_kobject(struct mapped_device *md) |
2651 | { | 2650 | { |
2652 | return &md->kobj; | 2651 | return &md->kobj; |
2653 | } | 2652 | } |
2654 | 2653 | ||
2655 | /* | 2654 | /* |
2656 | * struct mapped_device should not be exported outside of dm.c | 2655 | * struct mapped_device should not be exported outside of dm.c |
2657 | * so use this check to verify that kobj is part of md structure | 2656 | * so use this check to verify that kobj is part of md structure |
2658 | */ | 2657 | */ |
2659 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj) | 2658 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj) |
2660 | { | 2659 | { |
2661 | struct mapped_device *md; | 2660 | struct mapped_device *md; |
2662 | 2661 | ||
2663 | md = container_of(kobj, struct mapped_device, kobj); | 2662 | md = container_of(kobj, struct mapped_device, kobj); |
2664 | if (&md->kobj != kobj) | 2663 | if (&md->kobj != kobj) |
2665 | return NULL; | 2664 | return NULL; |
2666 | 2665 | ||
2667 | if (test_bit(DMF_FREEING, &md->flags) || | 2666 | if (test_bit(DMF_FREEING, &md->flags) || |
2668 | dm_deleting_md(md)) | 2667 | dm_deleting_md(md)) |
2669 | return NULL; | 2668 | return NULL; |
2670 | 2669 | ||
2671 | dm_get(md); | 2670 | dm_get(md); |
2672 | return md; | 2671 | return md; |
2673 | } | 2672 | } |
2674 | 2673 | ||
2675 | int dm_suspended_md(struct mapped_device *md) | 2674 | int dm_suspended_md(struct mapped_device *md) |
2676 | { | 2675 | { |
2677 | return test_bit(DMF_SUSPENDED, &md->flags); | 2676 | return test_bit(DMF_SUSPENDED, &md->flags); |
2678 | } | 2677 | } |
2679 | 2678 | ||
2680 | int dm_suspended(struct dm_target *ti) | 2679 | int dm_suspended(struct dm_target *ti) |
2681 | { | 2680 | { |
2682 | return dm_suspended_md(dm_table_get_md(ti->table)); | 2681 | return dm_suspended_md(dm_table_get_md(ti->table)); |
2683 | } | 2682 | } |
2684 | EXPORT_SYMBOL_GPL(dm_suspended); | 2683 | EXPORT_SYMBOL_GPL(dm_suspended); |
2685 | 2684 | ||
2686 | int dm_noflush_suspending(struct dm_target *ti) | 2685 | int dm_noflush_suspending(struct dm_target *ti) |
2687 | { | 2686 | { |
2688 | return __noflush_suspending(dm_table_get_md(ti->table)); | 2687 | return __noflush_suspending(dm_table_get_md(ti->table)); |
2689 | } | 2688 | } |
2690 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); | 2689 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); |
2691 | 2690 | ||
2692 | struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) | 2691 | struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) |
2693 | { | 2692 | { |
2694 | struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); | 2693 | struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); |
2695 | unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS; | 2694 | unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS; |
2696 | 2695 | ||
2697 | if (!pools) | 2696 | if (!pools) |
2698 | return NULL; | 2697 | return NULL; |
2699 | 2698 | ||
2700 | pools->io_pool = (type == DM_TYPE_BIO_BASED) ? | 2699 | pools->io_pool = (type == DM_TYPE_BIO_BASED) ? |
2701 | mempool_create_slab_pool(MIN_IOS, _io_cache) : | 2700 | mempool_create_slab_pool(MIN_IOS, _io_cache) : |
2702 | mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); | 2701 | mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); |
2703 | if (!pools->io_pool) | 2702 | if (!pools->io_pool) |
2704 | goto free_pools_and_out; | 2703 | goto free_pools_and_out; |
2705 | 2704 | ||
2706 | pools->tio_pool = (type == DM_TYPE_BIO_BASED) ? | 2705 | pools->tio_pool = (type == DM_TYPE_BIO_BASED) ? |
2707 | mempool_create_slab_pool(MIN_IOS, _tio_cache) : | 2706 | mempool_create_slab_pool(MIN_IOS, _tio_cache) : |
2708 | mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); | 2707 | mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); |
2709 | if (!pools->tio_pool) | 2708 | if (!pools->tio_pool) |
2710 | goto free_io_pool_and_out; | 2709 | goto free_io_pool_and_out; |
2711 | 2710 | ||
2712 | pools->bs = bioset_create(pool_size, 0); | 2711 | pools->bs = bioset_create(pool_size, 0); |
2713 | if (!pools->bs) | 2712 | if (!pools->bs) |
2714 | goto free_tio_pool_and_out; | 2713 | goto free_tio_pool_and_out; |
2715 | 2714 | ||
2716 | if (integrity && bioset_integrity_create(pools->bs, pool_size)) | 2715 | if (integrity && bioset_integrity_create(pools->bs, pool_size)) |
2717 | goto free_bioset_and_out; | 2716 | goto free_bioset_and_out; |
2718 | 2717 | ||
2719 | return pools; | 2718 | return pools; |
2720 | 2719 | ||
2721 | free_bioset_and_out: | 2720 | free_bioset_and_out: |
2722 | bioset_free(pools->bs); | 2721 | bioset_free(pools->bs); |
2723 | 2722 | ||
2724 | free_tio_pool_and_out: | 2723 | free_tio_pool_and_out: |
2725 | mempool_destroy(pools->tio_pool); | 2724 | mempool_destroy(pools->tio_pool); |
2726 | 2725 | ||
2727 | free_io_pool_and_out: | 2726 | free_io_pool_and_out: |
2728 | mempool_destroy(pools->io_pool); | 2727 | mempool_destroy(pools->io_pool); |
2729 | 2728 | ||
2730 | free_pools_and_out: | 2729 | free_pools_and_out: |
2731 | kfree(pools); | 2730 | kfree(pools); |
2732 | 2731 | ||
2733 | return NULL; | 2732 | return NULL; |
2734 | } | 2733 | } |
2735 | 2734 | ||
2736 | void dm_free_md_mempools(struct dm_md_mempools *pools) | 2735 | void dm_free_md_mempools(struct dm_md_mempools *pools) |
2737 | { | 2736 | { |
2738 | if (!pools) | 2737 | if (!pools) |
2739 | return; | 2738 | return; |
2740 | 2739 | ||
2741 | if (pools->io_pool) | 2740 | if (pools->io_pool) |
2742 | mempool_destroy(pools->io_pool); | 2741 | mempool_destroy(pools->io_pool); |
2743 | 2742 | ||
2744 | if (pools->tio_pool) | 2743 | if (pools->tio_pool) |
2745 | mempool_destroy(pools->tio_pool); | 2744 | mempool_destroy(pools->tio_pool); |
2746 | 2745 | ||
2747 | if (pools->bs) | 2746 | if (pools->bs) |
2748 | bioset_free(pools->bs); | 2747 | bioset_free(pools->bs); |
2749 | 2748 | ||
2750 | kfree(pools); | 2749 | kfree(pools); |
2751 | } | 2750 | } |
2752 | 2751 | ||
2753 | static const struct block_device_operations dm_blk_dops = { | 2752 | static const struct block_device_operations dm_blk_dops = { |
2754 | .open = dm_blk_open, | 2753 | .open = dm_blk_open, |
2755 | .release = dm_blk_close, | 2754 | .release = dm_blk_close, |
2756 | .ioctl = dm_blk_ioctl, | 2755 | .ioctl = dm_blk_ioctl, |
2757 | .getgeo = dm_blk_getgeo, | 2756 | .getgeo = dm_blk_getgeo, |
2758 | .owner = THIS_MODULE | 2757 | .owner = THIS_MODULE |
2759 | }; | 2758 | }; |
2760 | 2759 | ||
2761 | EXPORT_SYMBOL(dm_get_mapinfo); | 2760 | EXPORT_SYMBOL(dm_get_mapinfo); |
2762 | 2761 | ||
2763 | /* | 2762 | /* |
2764 | * module hooks | 2763 | * module hooks |
2765 | */ | 2764 | */ |
2766 | module_init(dm_init); | 2765 | module_init(dm_init); |
2767 | module_exit(dm_exit); | 2766 | module_exit(dm_exit); |
2768 | 2767 | ||
2769 | module_param(major, uint, 0); | 2768 | module_param(major, uint, 0); |
2770 | MODULE_PARM_DESC(major, "The major number of the device mapper"); | 2769 | MODULE_PARM_DESC(major, "The major number of the device mapper"); |
2771 | MODULE_DESCRIPTION(DM_NAME " driver"); | 2770 | MODULE_DESCRIPTION(DM_NAME " driver"); |
2772 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); | 2771 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); |
2773 | MODULE_LICENSE("GPL"); | 2772 | MODULE_LICENSE("GPL"); |
2774 | 2773 |