Commit 31e6306a4046926b598484f1cacf69309382eac6
Committed by
Trond Myklebust
1 parent
650e2d39bd
Exists in
master
and in
4 other branches
pnfsblock: note written INVAL areas for layoutcommit
Signed-off-by: Peng Tao <peng_tao@emc.com> Signed-off-by: Fred Isaman <iisaman@citi.umich.edu> Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Benny Halevy <bhalevy@tonian.com> Signed-off-by: Jim Rees <rees@umich.edu> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Showing 3 changed files with 129 additions and 0 deletions Side-by-side Diff
fs/nfs/blocklayout/blocklayout.c
... | ... | @@ -329,6 +329,30 @@ |
329 | 329 | return PNFS_NOT_ATTEMPTED; |
330 | 330 | } |
331 | 331 | |
332 | +static void mark_extents_written(struct pnfs_block_layout *bl, | |
333 | + __u64 offset, __u32 count) | |
334 | +{ | |
335 | + sector_t isect, end; | |
336 | + struct pnfs_block_extent *be; | |
337 | + | |
338 | + dprintk("%s(%llu, %u)\n", __func__, offset, count); | |
339 | + if (count == 0) | |
340 | + return; | |
341 | + isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT; | |
342 | + end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK); | |
343 | + end >>= SECTOR_SHIFT; | |
344 | + while (isect < end) { | |
345 | + sector_t len; | |
346 | + be = bl_find_get_extent(bl, isect, NULL); | |
347 | + BUG_ON(!be); /* FIXME */ | |
348 | + len = min(end, be->be_f_offset + be->be_length) - isect; | |
349 | + if (be->be_state == PNFS_BLOCK_INVALID_DATA) | |
350 | + bl_mark_for_commit(be, isect, len); /* What if fails? */ | |
351 | + isect += len; | |
352 | + bl_put_extent(be); | |
353 | + } | |
354 | +} | |
355 | + | |
332 | 356 | /* This is basically copied from mpage_end_io_read */ |
333 | 357 | static void bl_end_io_write(struct bio *bio, int err) |
334 | 358 | { |
... | ... | @@ -355,6 +379,14 @@ |
355 | 379 | dprintk("%s enter\n", __func__); |
356 | 380 | task = container_of(work, struct rpc_task, u.tk_work); |
357 | 381 | wdata = container_of(task, struct nfs_write_data, task); |
382 | + if (!wdata->task.tk_status) { | |
383 | + /* Marks for LAYOUTCOMMIT */ | |
384 | + /* BUG - this should be called after each bio, not after | |
385 | + * all finish, unless have some way of storing success/failure | |
386 | + */ | |
387 | + mark_extents_written(BLK_LSEG2EXT(wdata->lseg), | |
388 | + wdata->args.offset, wdata->args.count); | |
389 | + } | |
358 | 390 | pnfs_ld_write_done(wdata); |
359 | 391 | } |
360 | 392 |
fs/nfs/blocklayout/blocklayout.h
... | ... | @@ -201,6 +201,8 @@ |
201 | 201 | int status); |
202 | 202 | int bl_add_merge_extent(struct pnfs_block_layout *bl, |
203 | 203 | struct pnfs_block_extent *new); |
204 | +int bl_mark_for_commit(struct pnfs_block_extent *be, | |
205 | + sector_t offset, sector_t length); | |
204 | 206 | |
205 | 207 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ |
fs/nfs/blocklayout/extents.c
... | ... | @@ -217,6 +217,48 @@ |
217 | 217 | return rv; |
218 | 218 | } |
219 | 219 | |
220 | +/* Assume start, end already sector aligned */ | |
221 | +static int | |
222 | +_range_has_tag(struct my_tree *tree, u64 start, u64 end, int32_t tag) | |
223 | +{ | |
224 | + struct pnfs_inval_tracking *pos; | |
225 | + u64 expect = 0; | |
226 | + | |
227 | + dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag); | |
228 | + list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | |
229 | + if (pos->it_sector >= end) | |
230 | + continue; | |
231 | + if (!expect) { | |
232 | + if ((pos->it_sector == end - tree->mtt_step_size) && | |
233 | + (pos->it_tags & (1 << tag))) { | |
234 | + expect = pos->it_sector - tree->mtt_step_size; | |
235 | + if (pos->it_sector < tree->mtt_step_size || expect < start) | |
236 | + return 1; | |
237 | + continue; | |
238 | + } else { | |
239 | + return 0; | |
240 | + } | |
241 | + } | |
242 | + if (pos->it_sector != expect || !(pos->it_tags & (1 << tag))) | |
243 | + return 0; | |
244 | + expect -= tree->mtt_step_size; | |
245 | + if (expect < start) | |
246 | + return 1; | |
247 | + } | |
248 | + return 0; | |
249 | +} | |
250 | + | |
251 | +static int is_range_written(struct pnfs_inval_markings *marks, | |
252 | + sector_t start, sector_t end) | |
253 | +{ | |
254 | + int rv; | |
255 | + | |
256 | + spin_lock(&marks->im_lock); | |
257 | + rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); | |
258 | + spin_unlock(&marks->im_lock); | |
259 | + return rv; | |
260 | +} | |
261 | + | |
220 | 262 | /* Marks sectors in [offest, offset_length) as having been initialized. |
221 | 263 | * All lengths are step-aligned, where step is min(pagesize, blocksize). |
222 | 264 | * Notes where partial block is initialized, and helps prepare it for |
... | ... | @@ -394,6 +436,59 @@ |
394 | 436 | } |
395 | 437 | dprintk("%s: after merging\n", __func__); |
396 | 438 | print_clist(clist, bl->bl_count); |
439 | +} | |
440 | + | |
441 | +/* Note the range described by offset, length is guaranteed to be contained | |
442 | + * within be. | |
443 | + */ | |
444 | +int bl_mark_for_commit(struct pnfs_block_extent *be, | |
445 | + sector_t offset, sector_t length) | |
446 | +{ | |
447 | + sector_t new_end, end = offset + length; | |
448 | + struct pnfs_block_short_extent *new; | |
449 | + struct pnfs_block_layout *bl = container_of(be->be_inval, | |
450 | + struct pnfs_block_layout, | |
451 | + bl_inval); | |
452 | + | |
453 | + new = kmalloc(sizeof(*new), GFP_NOFS); | |
454 | + if (!new) | |
455 | + return -ENOMEM; | |
456 | + | |
457 | + mark_written_sectors(be->be_inval, offset, length); | |
458 | + /* We want to add the range to commit list, but it must be | |
459 | + * block-normalized, and verified that the normalized range has | |
460 | + * been entirely written to disk. | |
461 | + */ | |
462 | + new->bse_f_offset = offset; | |
463 | + offset = normalize(offset, bl->bl_blocksize); | |
464 | + if (offset < new->bse_f_offset) { | |
465 | + if (is_range_written(be->be_inval, offset, new->bse_f_offset)) | |
466 | + new->bse_f_offset = offset; | |
467 | + else | |
468 | + new->bse_f_offset = offset + bl->bl_blocksize; | |
469 | + } | |
470 | + new_end = normalize_up(end, bl->bl_blocksize); | |
471 | + if (end < new_end) { | |
472 | + if (is_range_written(be->be_inval, end, new_end)) | |
473 | + end = new_end; | |
474 | + else | |
475 | + end = new_end - bl->bl_blocksize; | |
476 | + } | |
477 | + if (end <= new->bse_f_offset) { | |
478 | + kfree(new); | |
479 | + return 0; | |
480 | + } | |
481 | + new->bse_length = end - new->bse_f_offset; | |
482 | + new->bse_devid = be->be_devid; | |
483 | + new->bse_mdev = be->be_mdev; | |
484 | + | |
485 | + spin_lock(&bl->bl_ext_lock); | |
486 | + /* new will be freed, either by add_to_commitlist if it decides not | |
487 | + * to use it, or after LAYOUTCOMMIT uses it in the commitlist. | |
488 | + */ | |
489 | + add_to_commitlist(bl, new); | |
490 | + spin_unlock(&bl->bl_ext_lock); | |
491 | + return 0; | |
397 | 492 | } |
398 | 493 | |
399 | 494 | static void print_bl_extent(struct pnfs_block_extent *be) |