Commit 31e6306a4046926b598484f1cacf69309382eac6

Authored by Fred Isaman
Committed by Trond Myklebust
1 parent 650e2d39bd

pnfsblock: note written INVAL areas for layoutcommit

Signed-off-by: Peng Tao <peng_tao@emc.com>
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Benny Halevy <bhalevy@tonian.com>
Signed-off-by: Jim Rees <rees@umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

Showing 3 changed files with 129 additions and 0 deletions Side-by-side Diff

fs/nfs/blocklayout/blocklayout.c
... ... @@ -329,6 +329,30 @@
329 329 return PNFS_NOT_ATTEMPTED;
330 330 }
331 331  
  332 +static void mark_extents_written(struct pnfs_block_layout *bl,
  333 + __u64 offset, __u32 count)
  334 +{
  335 + sector_t isect, end;
  336 + struct pnfs_block_extent *be;
  337 +
  338 + dprintk("%s(%llu, %u)\n", __func__, offset, count);
  339 + if (count == 0)
  340 + return;
  341 + isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT;
  342 + end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK);
  343 + end >>= SECTOR_SHIFT;
  344 + while (isect < end) {
  345 + sector_t len;
  346 + be = bl_find_get_extent(bl, isect, NULL);
  347 + BUG_ON(!be); /* FIXME */
  348 + len = min(end, be->be_f_offset + be->be_length) - isect;
  349 + if (be->be_state == PNFS_BLOCK_INVALID_DATA)
  350 + bl_mark_for_commit(be, isect, len); /* What if fails? */
  351 + isect += len;
  352 + bl_put_extent(be);
  353 + }
  354 +}
  355 +
332 356 /* This is basically copied from mpage_end_io_read */
333 357 static void bl_end_io_write(struct bio *bio, int err)
334 358 {
... ... @@ -355,6 +379,14 @@
355 379 dprintk("%s enter\n", __func__);
356 380 task = container_of(work, struct rpc_task, u.tk_work);
357 381 wdata = container_of(task, struct nfs_write_data, task);
  382 + if (!wdata->task.tk_status) {
  383 + /* Marks for LAYOUTCOMMIT */
  384 + /* BUG - this should be called after each bio, not after
  385 + * all finish, unless have some way of storing success/failure
  386 + */
  387 + mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
  388 + wdata->args.offset, wdata->args.count);
  389 + }
358 390 pnfs_ld_write_done(wdata);
359 391 }
360 392  
fs/nfs/blocklayout/blocklayout.h
... ... @@ -201,6 +201,8 @@
201 201 int status);
202 202 int bl_add_merge_extent(struct pnfs_block_layout *bl,
203 203 struct pnfs_block_extent *new);
  204 +int bl_mark_for_commit(struct pnfs_block_extent *be,
  205 + sector_t offset, sector_t length);
204 206  
205 207 #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
fs/nfs/blocklayout/extents.c
... ... @@ -217,6 +217,48 @@
217 217 return rv;
218 218 }
219 219  
  220 +/* Assume start, end already sector aligned */
  221 +static int
  222 +_range_has_tag(struct my_tree *tree, u64 start, u64 end, int32_t tag)
  223 +{
  224 + struct pnfs_inval_tracking *pos;
  225 + u64 expect = 0;
  226 +
  227 + dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag);
  228 + list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
  229 + if (pos->it_sector >= end)
  230 + continue;
  231 + if (!expect) {
  232 + if ((pos->it_sector == end - tree->mtt_step_size) &&
  233 + (pos->it_tags & (1 << tag))) {
  234 + expect = pos->it_sector - tree->mtt_step_size;
  235 + if (pos->it_sector < tree->mtt_step_size || expect < start)
  236 + return 1;
  237 + continue;
  238 + } else {
  239 + return 0;
  240 + }
  241 + }
  242 + if (pos->it_sector != expect || !(pos->it_tags & (1 << tag)))
  243 + return 0;
  244 + expect -= tree->mtt_step_size;
  245 + if (expect < start)
  246 + return 1;
  247 + }
  248 + return 0;
  249 +}
  250 +
  251 +static int is_range_written(struct pnfs_inval_markings *marks,
  252 + sector_t start, sector_t end)
  253 +{
  254 + int rv;
  255 +
  256 + spin_lock(&marks->im_lock);
  257 + rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
  258 + spin_unlock(&marks->im_lock);
  259 + return rv;
  260 +}
  261 +
220 262 /* Marks sectors in [offest, offset_length) as having been initialized.
221 263 * All lengths are step-aligned, where step is min(pagesize, blocksize).
222 264 * Notes where partial block is initialized, and helps prepare it for
... ... @@ -394,6 +436,59 @@
394 436 }
395 437 dprintk("%s: after merging\n", __func__);
396 438 print_clist(clist, bl->bl_count);
  439 +}
  440 +
  441 +/* Note the range described by offset, length is guaranteed to be contained
  442 + * within be.
  443 + */
  444 +int bl_mark_for_commit(struct pnfs_block_extent *be,
  445 + sector_t offset, sector_t length)
  446 +{
  447 + sector_t new_end, end = offset + length;
  448 + struct pnfs_block_short_extent *new;
  449 + struct pnfs_block_layout *bl = container_of(be->be_inval,
  450 + struct pnfs_block_layout,
  451 + bl_inval);
  452 +
  453 + new = kmalloc(sizeof(*new), GFP_NOFS);
  454 + if (!new)
  455 + return -ENOMEM;
  456 +
  457 + mark_written_sectors(be->be_inval, offset, length);
  458 + /* We want to add the range to commit list, but it must be
  459 + * block-normalized, and verified that the normalized range has
  460 + * been entirely written to disk.
  461 + */
  462 + new->bse_f_offset = offset;
  463 + offset = normalize(offset, bl->bl_blocksize);
  464 + if (offset < new->bse_f_offset) {
  465 + if (is_range_written(be->be_inval, offset, new->bse_f_offset))
  466 + new->bse_f_offset = offset;
  467 + else
  468 + new->bse_f_offset = offset + bl->bl_blocksize;
  469 + }
  470 + new_end = normalize_up(end, bl->bl_blocksize);
  471 + if (end < new_end) {
  472 + if (is_range_written(be->be_inval, end, new_end))
  473 + end = new_end;
  474 + else
  475 + end = new_end - bl->bl_blocksize;
  476 + }
  477 + if (end <= new->bse_f_offset) {
  478 + kfree(new);
  479 + return 0;
  480 + }
  481 + new->bse_length = end - new->bse_f_offset;
  482 + new->bse_devid = be->be_devid;
  483 + new->bse_mdev = be->be_mdev;
  484 +
  485 + spin_lock(&bl->bl_ext_lock);
  486 + /* new will be freed, either by add_to_commitlist if it decides not
  487 + * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
  488 + */
  489 + add_to_commitlist(bl, new);
  490 + spin_unlock(&bl->bl_ext_lock);
  491 + return 0;
397 492 }
398 493  
399 494 static void print_bl_extent(struct pnfs_block_extent *be)