Commit a12587b00388d1694933252e97abca237bc3a6b8
Exists in
master
and in
6 other branches
Merge tag 'nfs-for-3.3-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
NFS client bugfixes and cleanups for Linux 3.3 (pull 2) * tag 'nfs-for-3.3-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: pnfsblock: alloc short extent before submit bio pnfsblock: remove rpc_call_ops from struct parallel_io pnfsblock: move find lock page logic out of bl_write_pagelist pnfsblock: cleanup bl_mark_sectors_init pnfsblock: limit bio page count pnfsblock: don't spinlock when freeing block_dev pnfsblock: clean up _add_entry pnfsblock: set read/write tk_status to pnfs_error pnfsblock: acquire im_lock in _preload_range NFS4: fix compile warnings in nfs4proc.c nfs: check for integer overflow in decode_devicenotify_args() NFS: cleanup endian type in decode_ds_addr() NFS: add an endian notation
Showing 7 changed files Side-by-side Diff
fs/nfs/blocklayout/blocklayout.c
... | ... | @@ -90,9 +90,9 @@ |
90 | 90 | */ |
91 | 91 | struct parallel_io { |
92 | 92 | struct kref refcnt; |
93 | - struct rpc_call_ops call_ops; | |
94 | - void (*pnfs_callback) (void *data); | |
93 | + void (*pnfs_callback) (void *data, int num_se); | |
95 | 94 | void *data; |
95 | + int bse_count; | |
96 | 96 | }; |
97 | 97 | |
98 | 98 | static inline struct parallel_io *alloc_parallel(void *data) |
... | ... | @@ -103,6 +103,7 @@ |
103 | 103 | if (rv) { |
104 | 104 | rv->data = data; |
105 | 105 | kref_init(&rv->refcnt); |
106 | + rv->bse_count = 0; | |
106 | 107 | } |
107 | 108 | return rv; |
108 | 109 | } |
... | ... | @@ -117,7 +118,7 @@ |
117 | 118 | struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); |
118 | 119 | |
119 | 120 | dprintk("%s enter\n", __func__); |
120 | - p->pnfs_callback(p->data); | |
121 | + p->pnfs_callback(p->data, p->bse_count); | |
121 | 122 | kfree(p); |
122 | 123 | } |
123 | 124 | |
124 | 125 | |
125 | 126 | |
... | ... | @@ -146,14 +147,19 @@ |
146 | 147 | { |
147 | 148 | struct bio *bio; |
148 | 149 | |
150 | + npg = min(npg, BIO_MAX_PAGES); | |
149 | 151 | bio = bio_alloc(GFP_NOIO, npg); |
150 | - if (!bio) | |
151 | - return NULL; | |
152 | + if (!bio && (current->flags & PF_MEMALLOC)) { | |
153 | + while (!bio && (npg /= 2)) | |
154 | + bio = bio_alloc(GFP_NOIO, npg); | |
155 | + } | |
152 | 156 | |
153 | - bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; | |
154 | - bio->bi_bdev = be->be_mdev; | |
155 | - bio->bi_end_io = end_io; | |
156 | - bio->bi_private = par; | |
157 | + if (bio) { | |
158 | + bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; | |
159 | + bio->bi_bdev = be->be_mdev; | |
160 | + bio->bi_end_io = end_io; | |
161 | + bio->bi_private = par; | |
162 | + } | |
157 | 163 | return bio; |
158 | 164 | } |
159 | 165 | |
160 | 166 | |
161 | 167 | |
... | ... | @@ -212,22 +218,15 @@ |
212 | 218 | } |
213 | 219 | |
214 | 220 | static void |
215 | -bl_end_par_io_read(void *data) | |
221 | +bl_end_par_io_read(void *data, int unused) | |
216 | 222 | { |
217 | 223 | struct nfs_read_data *rdata = data; |
218 | 224 | |
225 | + rdata->task.tk_status = rdata->pnfs_error; | |
219 | 226 | INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); |
220 | 227 | schedule_work(&rdata->task.u.tk_work); |
221 | 228 | } |
222 | 229 | |
223 | -/* We don't want normal .rpc_call_done callback used, so we replace it | |
224 | - * with this stub. | |
225 | - */ | |
226 | -static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata) | |
227 | -{ | |
228 | - return; | |
229 | -} | |
230 | - | |
231 | 230 | static enum pnfs_try_status |
232 | 231 | bl_read_pagelist(struct nfs_read_data *rdata) |
233 | 232 | { |
... | ... | @@ -247,8 +246,6 @@ |
247 | 246 | par = alloc_parallel(rdata); |
248 | 247 | if (!par) |
249 | 248 | goto use_mds; |
250 | - par->call_ops = *rdata->mds_ops; | |
251 | - par->call_ops.rpc_call_done = bl_rpc_do_nothing; | |
252 | 249 | par->pnfs_callback = bl_end_par_io_read; |
253 | 250 | /* At this point, we can no longer jump to use_mds */ |
254 | 251 | |
... | ... | @@ -322,6 +319,7 @@ |
322 | 319 | { |
323 | 320 | sector_t isect, end; |
324 | 321 | struct pnfs_block_extent *be; |
322 | + struct pnfs_block_short_extent *se; | |
325 | 323 | |
326 | 324 | dprintk("%s(%llu, %u)\n", __func__, offset, count); |
327 | 325 | if (count == 0) |
... | ... | @@ -334,8 +332,11 @@ |
334 | 332 | be = bl_find_get_extent(bl, isect, NULL); |
335 | 333 | BUG_ON(!be); /* FIXME */ |
336 | 334 | len = min(end, be->be_f_offset + be->be_length) - isect; |
337 | - if (be->be_state == PNFS_BLOCK_INVALID_DATA) | |
338 | - bl_mark_for_commit(be, isect, len); /* What if fails? */ | |
335 | + if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | |
336 | + se = bl_pop_one_short_extent(be->be_inval); | |
337 | + BUG_ON(!se); | |
338 | + bl_mark_for_commit(be, isect, len, se); | |
339 | + } | |
339 | 340 | isect += len; |
340 | 341 | bl_put_extent(be); |
341 | 342 | } |
... | ... | @@ -357,7 +358,8 @@ |
357 | 358 | end_page_writeback(page); |
358 | 359 | page_cache_release(page); |
359 | 360 | } while (bvec >= bio->bi_io_vec); |
360 | - if (!uptodate) { | |
361 | + | |
362 | + if (unlikely(!uptodate)) { | |
361 | 363 | if (!wdata->pnfs_error) |
362 | 364 | wdata->pnfs_error = -EIO; |
363 | 365 | pnfs_set_lo_fail(wdata->lseg); |
... | ... | @@ -366,7 +368,6 @@ |
366 | 368 | put_parallel(par); |
367 | 369 | } |
368 | 370 | |
369 | -/* This is basically copied from mpage_end_io_read */ | |
370 | 371 | static void bl_end_io_write(struct bio *bio, int err) |
371 | 372 | { |
372 | 373 | struct parallel_io *par = bio->bi_private; |
... | ... | @@ -392,7 +393,7 @@ |
392 | 393 | dprintk("%s enter\n", __func__); |
393 | 394 | task = container_of(work, struct rpc_task, u.tk_work); |
394 | 395 | wdata = container_of(task, struct nfs_write_data, task); |
395 | - if (!wdata->pnfs_error) { | |
396 | + if (likely(!wdata->pnfs_error)) { | |
396 | 397 | /* Marks for LAYOUTCOMMIT */ |
397 | 398 | mark_extents_written(BLK_LSEG2EXT(wdata->lseg), |
398 | 399 | wdata->args.offset, wdata->args.count); |
399 | 400 | |
... | ... | @@ -401,11 +402,16 @@ |
401 | 402 | } |
402 | 403 | |
403 | 404 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ |
404 | -static void bl_end_par_io_write(void *data) | |
405 | +static void bl_end_par_io_write(void *data, int num_se) | |
405 | 406 | { |
406 | 407 | struct nfs_write_data *wdata = data; |
407 | 408 | |
408 | - wdata->task.tk_status = 0; | |
409 | + if (unlikely(wdata->pnfs_error)) { | |
410 | + bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, | |
411 | + num_se); | |
412 | + } | |
413 | + | |
414 | + wdata->task.tk_status = wdata->pnfs_error; | |
409 | 415 | wdata->verf.committed = NFS_FILE_SYNC; |
410 | 416 | INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); |
411 | 417 | schedule_work(&wdata->task.u.tk_work); |
... | ... | @@ -484,6 +490,55 @@ |
484 | 490 | return ret; |
485 | 491 | } |
486 | 492 | |
493 | +/* Find or create a zeroing page marked being writeback. | |
494 | + * Return ERR_PTR on error, NULL to indicate skip this page and page itself | |
495 | + * to indicate write out. | |
496 | + */ | |
497 | +static struct page * | |
498 | +bl_find_get_zeroing_page(struct inode *inode, pgoff_t index, | |
499 | + struct pnfs_block_extent *cow_read) | |
500 | +{ | |
501 | + struct page *page; | |
502 | + int locked = 0; | |
503 | + page = find_get_page(inode->i_mapping, index); | |
504 | + if (page) | |
505 | + goto check_page; | |
506 | + | |
507 | + page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | |
508 | + if (unlikely(!page)) { | |
509 | + dprintk("%s oom\n", __func__); | |
510 | + return ERR_PTR(-ENOMEM); | |
511 | + } | |
512 | + locked = 1; | |
513 | + | |
514 | +check_page: | |
515 | + /* PageDirty: Other will write this out | |
516 | + * PageWriteback: Other is writing this out | |
517 | + * PageUptodate: It was read before | |
518 | + */ | |
519 | + if (PageDirty(page) || PageWriteback(page)) { | |
520 | + print_page(page); | |
521 | + if (locked) | |
522 | + unlock_page(page); | |
523 | + page_cache_release(page); | |
524 | + return NULL; | |
525 | + } | |
526 | + | |
527 | + if (!locked) { | |
528 | + lock_page(page); | |
529 | + locked = 1; | |
530 | + goto check_page; | |
531 | + } | |
532 | + if (!PageUptodate(page)) { | |
533 | + /* New page, readin or zero it */ | |
534 | + init_page_for_write(page, cow_read); | |
535 | + } | |
536 | + set_page_writeback(page); | |
537 | + unlock_page(page); | |
538 | + | |
539 | + return page; | |
540 | +} | |
541 | + | |
487 | 542 | static enum pnfs_try_status |
488 | 543 | bl_write_pagelist(struct nfs_write_data *wdata, int sync) |
489 | 544 | { |
... | ... | @@ -508,9 +563,7 @@ |
508 | 563 | */ |
509 | 564 | par = alloc_parallel(wdata); |
510 | 565 | if (!par) |
511 | - return PNFS_NOT_ATTEMPTED; | |
512 | - par->call_ops = *wdata->mds_ops; | |
513 | - par->call_ops.rpc_call_done = bl_rpc_do_nothing; | |
566 | + goto out_mds; | |
514 | 567 | par->pnfs_callback = bl_end_par_io_write; |
515 | 568 | /* At this point, have to be more careful with error handling */ |
516 | 569 | |
517 | 570 | |
... | ... | @@ -518,12 +571,15 @@ |
518 | 571 | be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); |
519 | 572 | if (!be || !is_writable(be, isect)) { |
520 | 573 | dprintk("%s no matching extents!\n", __func__); |
521 | - wdata->pnfs_error = -EINVAL; | |
522 | - goto out; | |
574 | + goto out_mds; | |
523 | 575 | } |
524 | 576 | |
525 | 577 | /* First page inside INVALID extent */ |
526 | 578 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { |
579 | + if (likely(!bl_push_one_short_extent(be->be_inval))) | |
580 | + par->bse_count++; | |
581 | + else | |
582 | + goto out_mds; | |
527 | 583 | temp = offset >> PAGE_CACHE_SHIFT; |
528 | 584 | npg_zero = do_div(temp, npg_per_block); |
529 | 585 | isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & |
530 | 586 | |
531 | 587 | |
532 | 588 | |
... | ... | @@ -543,36 +599,16 @@ |
543 | 599 | dprintk("%s zero %dth page: index %lu isect %llu\n", |
544 | 600 | __func__, npg_zero, index, |
545 | 601 | (unsigned long long)isect); |
546 | - page = | |
547 | - find_or_create_page(wdata->inode->i_mapping, index, | |
548 | - GFP_NOFS); | |
549 | - if (!page) { | |
550 | - dprintk("%s oom\n", __func__); | |
551 | - wdata->pnfs_error = -ENOMEM; | |
602 | + page = bl_find_get_zeroing_page(wdata->inode, index, | |
603 | + cow_read); | |
604 | + if (unlikely(IS_ERR(page))) { | |
605 | + wdata->pnfs_error = PTR_ERR(page); | |
552 | 606 | goto out; |
553 | - } | |
554 | - | |
555 | - /* PageDirty: Other will write this out | |
556 | - * PageWriteback: Other is writing this out | |
557 | - * PageUptodate: It was read before | |
558 | - * sector_initialized: already written out | |
559 | - */ | |
560 | - if (PageDirty(page) || PageWriteback(page)) { | |
561 | - print_page(page); | |
562 | - unlock_page(page); | |
563 | - page_cache_release(page); | |
607 | + } else if (page == NULL) | |
564 | 608 | goto next_page; |
565 | - } | |
566 | - if (!PageUptodate(page)) { | |
567 | - /* New page, readin or zero it */ | |
568 | - init_page_for_write(page, cow_read); | |
569 | - } | |
570 | - set_page_writeback(page); | |
571 | - unlock_page(page); | |
572 | 609 | |
573 | 610 | ret = bl_mark_sectors_init(be->be_inval, isect, |
574 | - PAGE_CACHE_SECTORS, | |
575 | - NULL); | |
611 | + PAGE_CACHE_SECTORS); | |
576 | 612 | if (unlikely(ret)) { |
577 | 613 | dprintk("%s bl_mark_sectors_init fail %d\n", |
578 | 614 | __func__, ret); |
... | ... | @@ -581,6 +617,19 @@ |
581 | 617 | wdata->pnfs_error = ret; |
582 | 618 | goto out; |
583 | 619 | } |
620 | + if (likely(!bl_push_one_short_extent(be->be_inval))) | |
621 | + par->bse_count++; | |
622 | + else { | |
623 | + end_page_writeback(page); | |
624 | + page_cache_release(page); | |
625 | + wdata->pnfs_error = -ENOMEM; | |
626 | + goto out; | |
627 | + } | |
628 | + /* FIXME: This should be done in bi_end_io */ | |
629 | + mark_extents_written(BLK_LSEG2EXT(wdata->lseg), | |
630 | + page->index << PAGE_CACHE_SHIFT, | |
631 | + PAGE_CACHE_SIZE); | |
632 | + | |
584 | 633 | bio = bl_add_page_to_bio(bio, npg_zero, WRITE, |
585 | 634 | isect, page, be, |
586 | 635 | bl_end_io_write_zero, par); |
... | ... | @@ -589,10 +638,6 @@ |
589 | 638 | bio = NULL; |
590 | 639 | goto out; |
591 | 640 | } |
592 | - /* FIXME: This should be done in bi_end_io */ | |
593 | - mark_extents_written(BLK_LSEG2EXT(wdata->lseg), | |
594 | - page->index << PAGE_CACHE_SHIFT, | |
595 | - PAGE_CACHE_SIZE); | |
596 | 641 | next_page: |
597 | 642 | isect += PAGE_CACHE_SECTORS; |
598 | 643 | extent_length -= PAGE_CACHE_SECTORS; |
599 | 644 | |
... | ... | @@ -616,13 +661,21 @@ |
616 | 661 | wdata->pnfs_error = -EINVAL; |
617 | 662 | goto out; |
618 | 663 | } |
664 | + if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | |
665 | + if (likely(!bl_push_one_short_extent( | |
666 | + be->be_inval))) | |
667 | + par->bse_count++; | |
668 | + else { | |
669 | + wdata->pnfs_error = -ENOMEM; | |
670 | + goto out; | |
671 | + } | |
672 | + } | |
619 | 673 | extent_length = be->be_length - |
620 | 674 | (isect - be->be_f_offset); |
621 | 675 | } |
622 | 676 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { |
623 | 677 | ret = bl_mark_sectors_init(be->be_inval, isect, |
624 | - PAGE_CACHE_SECTORS, | |
625 | - NULL); | |
678 | + PAGE_CACHE_SECTORS); | |
626 | 679 | if (unlikely(ret)) { |
627 | 680 | dprintk("%s bl_mark_sectors_init fail %d\n", |
628 | 681 | __func__, ret); |
... | ... | @@ -664,6 +717,10 @@ |
664 | 717 | bl_submit_bio(WRITE, bio); |
665 | 718 | put_parallel(par); |
666 | 719 | return PNFS_ATTEMPTED; |
720 | +out_mds: | |
721 | + bl_put_extent(be); | |
722 | + kfree(par); | |
723 | + return PNFS_NOT_ATTEMPTED; | |
667 | 724 | } |
668 | 725 | |
669 | 726 | /* FIXME - range ignored */ |
670 | 727 | |
... | ... | @@ -690,11 +747,17 @@ |
690 | 747 | release_inval_marks(struct pnfs_inval_markings *marks) |
691 | 748 | { |
692 | 749 | struct pnfs_inval_tracking *pos, *temp; |
750 | + struct pnfs_block_short_extent *se, *stemp; | |
693 | 751 | |
694 | 752 | list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { |
695 | 753 | list_del(&pos->it_link); |
696 | 754 | kfree(pos); |
697 | 755 | } |
756 | + | |
757 | + list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) { | |
758 | + list_del(&se->bse_node); | |
759 | + kfree(se); | |
760 | + } | |
698 | 761 | return; |
699 | 762 | } |
700 | 763 | |
701 | 764 | |
... | ... | @@ -779,16 +842,13 @@ |
779 | 842 | static void free_blk_mountid(struct block_mount_id *mid) |
780 | 843 | { |
781 | 844 | if (mid) { |
782 | - struct pnfs_block_dev *dev; | |
783 | - spin_lock(&mid->bm_lock); | |
784 | - while (!list_empty(&mid->bm_devlist)) { | |
785 | - dev = list_first_entry(&mid->bm_devlist, | |
786 | - struct pnfs_block_dev, | |
787 | - bm_node); | |
845 | + struct pnfs_block_dev *dev, *tmp; | |
846 | + | |
847 | + /* No need to take bm_lock as we are last user freeing bm_devlist */ | |
848 | + list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) { | |
788 | 849 | list_del(&dev->bm_node); |
789 | 850 | bl_free_block_dev(dev); |
790 | 851 | } |
791 | - spin_unlock(&mid->bm_lock); | |
792 | 852 | kfree(mid); |
793 | 853 | } |
794 | 854 | } |
fs/nfs/blocklayout/blocklayout.h
... | ... | @@ -70,6 +70,7 @@ |
70 | 70 | spinlock_t im_lock; |
71 | 71 | struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ |
72 | 72 | sector_t im_block_size; /* Server blocksize in sectors */ |
73 | + struct list_head im_extents; /* Short extents for INVAL->RW conversion */ | |
73 | 74 | }; |
74 | 75 | |
75 | 76 | struct pnfs_inval_tracking { |
... | ... | @@ -105,6 +106,7 @@ |
105 | 106 | { |
106 | 107 | spin_lock_init(&marks->im_lock); |
107 | 108 | INIT_LIST_HEAD(&marks->im_tree.mtt_stub); |
109 | + INIT_LIST_HEAD(&marks->im_extents); | |
108 | 110 | marks->im_block_size = blocksize; |
109 | 111 | marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, |
110 | 112 | blocksize); |
... | ... | @@ -186,8 +188,7 @@ |
186 | 188 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, |
187 | 189 | struct pnfs_block_extent **cow_read); |
188 | 190 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, |
189 | - sector_t offset, sector_t length, | |
190 | - sector_t **pages); | |
191 | + sector_t offset, sector_t length); | |
191 | 192 | void bl_put_extent(struct pnfs_block_extent *be); |
192 | 193 | struct pnfs_block_extent *bl_alloc_extent(void); |
193 | 194 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); |
... | ... | @@ -200,7 +201,12 @@ |
200 | 201 | int bl_add_merge_extent(struct pnfs_block_layout *bl, |
201 | 202 | struct pnfs_block_extent *new); |
202 | 203 | int bl_mark_for_commit(struct pnfs_block_extent *be, |
203 | - sector_t offset, sector_t length); | |
204 | + sector_t offset, sector_t length, | |
205 | + struct pnfs_block_short_extent *new); | |
206 | +int bl_push_one_short_extent(struct pnfs_inval_markings *marks); | |
207 | +struct pnfs_block_short_extent * | |
208 | +bl_pop_one_short_extent(struct pnfs_inval_markings *marks); | |
209 | +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free); | |
204 | 210 | |
205 | 211 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ |
fs/nfs/blocklayout/extents.c
... | ... | @@ -110,13 +110,7 @@ |
110 | 110 | return 0; |
111 | 111 | } else { |
112 | 112 | struct pnfs_inval_tracking *new; |
113 | - if (storage) | |
114 | - new = storage; | |
115 | - else { | |
116 | - new = kmalloc(sizeof(*new), GFP_NOFS); | |
117 | - if (!new) | |
118 | - return -ENOMEM; | |
119 | - } | |
113 | + new = storage; | |
120 | 114 | new->it_sector = s; |
121 | 115 | new->it_tags = (1 << tag); |
122 | 116 | list_add(&new->it_link, &pos->it_link); |
123 | 117 | |
... | ... | @@ -139,11 +133,13 @@ |
139 | 133 | } |
140 | 134 | |
141 | 135 | /* Ensure that future operations on given range of tree will not malloc */ |
142 | -static int _preload_range(struct my_tree *tree, u64 offset, u64 length) | |
136 | +static int _preload_range(struct pnfs_inval_markings *marks, | |
137 | + u64 offset, u64 length) | |
143 | 138 | { |
144 | 139 | u64 start, end, s; |
145 | 140 | int count, i, used = 0, status = -ENOMEM; |
146 | 141 | struct pnfs_inval_tracking **storage; |
142 | + struct my_tree *tree = &marks->im_tree; | |
147 | 143 | |
148 | 144 | dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); |
149 | 145 | start = normalize(offset, tree->mtt_step_size); |
150 | 146 | |
151 | 147 | |
... | ... | @@ -161,12 +157,11 @@ |
161 | 157 | goto out_cleanup; |
162 | 158 | } |
163 | 159 | |
164 | - /* Now need lock - HOW??? */ | |
165 | - | |
160 | + spin_lock_bh(&marks->im_lock); | |
166 | 161 | for (s = start; s < end; s += tree->mtt_step_size) |
167 | 162 | used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); |
163 | + spin_unlock_bh(&marks->im_lock); | |
168 | 164 | |
169 | - /* Unlock - HOW??? */ | |
170 | 165 | status = 0; |
171 | 166 | |
172 | 167 | out_cleanup: |
173 | 168 | |
174 | 169 | |
... | ... | @@ -179,41 +174,14 @@ |
179 | 174 | return status; |
180 | 175 | } |
181 | 176 | |
182 | -static void set_needs_init(sector_t *array, sector_t offset) | |
183 | -{ | |
184 | - sector_t *p = array; | |
185 | - | |
186 | - dprintk("%s enter\n", __func__); | |
187 | - if (!p) | |
188 | - return; | |
189 | - while (*p < offset) | |
190 | - p++; | |
191 | - if (*p == offset) | |
192 | - return; | |
193 | - else if (*p == ~0) { | |
194 | - *p++ = offset; | |
195 | - *p = ~0; | |
196 | - return; | |
197 | - } else { | |
198 | - sector_t *save = p; | |
199 | - dprintk("%s Adding %llu\n", __func__, (u64)offset); | |
200 | - while (*p != ~0) | |
201 | - p++; | |
202 | - p++; | |
203 | - memmove(save + 1, save, (char *)p - (char *)save); | |
204 | - *save = offset; | |
205 | - return; | |
206 | - } | |
207 | -} | |
208 | - | |
209 | 177 | /* We are relying on page lock to serialize this */ |
210 | 178 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) |
211 | 179 | { |
212 | 180 | int rv; |
213 | 181 | |
214 | - spin_lock(&marks->im_lock); | |
182 | + spin_lock_bh(&marks->im_lock); | |
215 | 183 | rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); |
216 | - spin_unlock(&marks->im_lock); | |
184 | + spin_unlock_bh(&marks->im_lock); | |
217 | 185 | return rv; |
218 | 186 | } |
219 | 187 | |
220 | 188 | |
221 | 189 | |
222 | 190 | |
223 | 191 | |
224 | 192 | |
225 | 193 | |
226 | 194 | |
227 | 195 | |
228 | 196 | |
229 | 197 | |
230 | 198 | |
... | ... | @@ -253,78 +221,39 @@ |
253 | 221 | { |
254 | 222 | int rv; |
255 | 223 | |
256 | - spin_lock(&marks->im_lock); | |
224 | + spin_lock_bh(&marks->im_lock); | |
257 | 225 | rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); |
258 | - spin_unlock(&marks->im_lock); | |
226 | + spin_unlock_bh(&marks->im_lock); | |
259 | 227 | return rv; |
260 | 228 | } |
261 | 229 | |
262 | 230 | /* Marks sectors in [offest, offset_length) as having been initialized. |
263 | 231 | * All lengths are step-aligned, where step is min(pagesize, blocksize). |
264 | - * Notes where partial block is initialized, and helps prepare it for | |
265 | - * complete initialization later. | |
232 | + * Currently assumes offset is page-aligned | |
266 | 233 | */ |
267 | -/* Currently assumes offset is page-aligned */ | |
268 | 234 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, |
269 | - sector_t offset, sector_t length, | |
270 | - sector_t **pages) | |
235 | + sector_t offset, sector_t length) | |
271 | 236 | { |
272 | - sector_t s, start, end; | |
273 | - sector_t *array = NULL; /* Pages to mark */ | |
237 | + sector_t start, end; | |
274 | 238 | |
275 | 239 | dprintk("%s(offset=%llu,len=%llu) enter\n", |
276 | 240 | __func__, (u64)offset, (u64)length); |
277 | - s = max((sector_t) 3, | |
278 | - 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS))); | |
279 | - dprintk("%s set max=%llu\n", __func__, (u64)s); | |
280 | - if (pages) { | |
281 | - array = kmalloc(s * sizeof(sector_t), GFP_NOFS); | |
282 | - if (!array) | |
283 | - goto outerr; | |
284 | - array[0] = ~0; | |
285 | - } | |
286 | 241 | |
287 | 242 | start = normalize(offset, marks->im_block_size); |
288 | 243 | end = normalize_up(offset + length, marks->im_block_size); |
289 | - if (_preload_range(&marks->im_tree, start, end - start)) | |
244 | + if (_preload_range(marks, start, end - start)) | |
290 | 245 | goto outerr; |
291 | 246 | |
292 | - spin_lock(&marks->im_lock); | |
293 | - | |
294 | - for (s = normalize_up(start, PAGE_CACHE_SECTORS); | |
295 | - s < offset; s += PAGE_CACHE_SECTORS) { | |
296 | - dprintk("%s pre-area pages\n", __func__); | |
297 | - /* Portion of used block is not initialized */ | |
298 | - if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) | |
299 | - set_needs_init(array, s); | |
300 | - } | |
247 | + spin_lock_bh(&marks->im_lock); | |
301 | 248 | if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) |
302 | 249 | goto out_unlock; |
303 | - for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS); | |
304 | - s < end; s += PAGE_CACHE_SECTORS) { | |
305 | - dprintk("%s post-area pages\n", __func__); | |
306 | - if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) | |
307 | - set_needs_init(array, s); | |
308 | - } | |
250 | + spin_unlock_bh(&marks->im_lock); | |
309 | 251 | |
310 | - spin_unlock(&marks->im_lock); | |
311 | - | |
312 | - if (pages) { | |
313 | - if (array[0] == ~0) { | |
314 | - kfree(array); | |
315 | - *pages = NULL; | |
316 | - } else | |
317 | - *pages = array; | |
318 | - } | |
319 | 252 | return 0; |
320 | 253 | |
321 | - out_unlock: | |
322 | - spin_unlock(&marks->im_lock); | |
323 | - outerr: | |
324 | - if (pages) { | |
325 | - kfree(array); | |
326 | - *pages = NULL; | |
327 | - } | |
254 | +out_unlock: | |
255 | + spin_unlock_bh(&marks->im_lock); | |
256 | +outerr: | |
328 | 257 | return -ENOMEM; |
329 | 258 | } |
330 | 259 | |
331 | 260 | |
... | ... | @@ -338,9 +267,9 @@ |
338 | 267 | |
339 | 268 | dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, |
340 | 269 | (u64)offset, (u64)length); |
341 | - spin_lock(&marks->im_lock); | |
270 | + spin_lock_bh(&marks->im_lock); | |
342 | 271 | status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); |
343 | - spin_unlock(&marks->im_lock); | |
272 | + spin_unlock_bh(&marks->im_lock); | |
344 | 273 | return status; |
345 | 274 | } |
346 | 275 | |
347 | 276 | |
348 | 277 | |
349 | 278 | |
... | ... | @@ -440,20 +369,18 @@ |
440 | 369 | |
441 | 370 | /* Note the range described by offset, length is guaranteed to be contained |
442 | 371 | * within be. |
372 | + * new will be freed, either by this function or add_to_commitlist if they | |
373 | + * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist. | |
443 | 374 | */ |
444 | 375 | int bl_mark_for_commit(struct pnfs_block_extent *be, |
445 | - sector_t offset, sector_t length) | |
376 | + sector_t offset, sector_t length, | |
377 | + struct pnfs_block_short_extent *new) | |
446 | 378 | { |
447 | 379 | sector_t new_end, end = offset + length; |
448 | - struct pnfs_block_short_extent *new; | |
449 | 380 | struct pnfs_block_layout *bl = container_of(be->be_inval, |
450 | 381 | struct pnfs_block_layout, |
451 | 382 | bl_inval); |
452 | 383 | |
453 | - new = kmalloc(sizeof(*new), GFP_NOFS); | |
454 | - if (!new) | |
455 | - return -ENOMEM; | |
456 | - | |
457 | 384 | mark_written_sectors(be->be_inval, offset, length); |
458 | 385 | /* We want to add the range to commit list, but it must be |
459 | 386 | * block-normalized, and verified that the normalized range has |
... | ... | @@ -483,9 +410,6 @@ |
483 | 410 | new->bse_mdev = be->be_mdev; |
484 | 411 | |
485 | 412 | spin_lock(&bl->bl_ext_lock); |
486 | - /* new will be freed, either by add_to_commitlist if it decides not | |
487 | - * to use it, or after LAYOUTCOMMIT uses it in the commitlist. | |
488 | - */ | |
489 | 413 | add_to_commitlist(bl, new); |
490 | 414 | spin_unlock(&bl->bl_ext_lock); |
491 | 415 | return 0; |
... | ... | @@ -932,5 +856,55 @@ |
932 | 856 | spin_unlock(&bl->bl_ext_lock); |
933 | 857 | } |
934 | 858 | } |
859 | +} | |
860 | + | |
861 | +int bl_push_one_short_extent(struct pnfs_inval_markings *marks) | |
862 | +{ | |
863 | + struct pnfs_block_short_extent *new; | |
864 | + | |
865 | + new = kmalloc(sizeof(*new), GFP_NOFS); | |
866 | + if (unlikely(!new)) | |
867 | + return -ENOMEM; | |
868 | + | |
869 | + spin_lock_bh(&marks->im_lock); | |
870 | + list_add(&new->bse_node, &marks->im_extents); | |
871 | + spin_unlock_bh(&marks->im_lock); | |
872 | + | |
873 | + return 0; | |
874 | +} | |
875 | + | |
876 | +struct pnfs_block_short_extent * | |
877 | +bl_pop_one_short_extent(struct pnfs_inval_markings *marks) | |
878 | +{ | |
879 | + struct pnfs_block_short_extent *rv = NULL; | |
880 | + | |
881 | + spin_lock_bh(&marks->im_lock); | |
882 | + if (!list_empty(&marks->im_extents)) { | |
883 | + rv = list_entry((&marks->im_extents)->next, | |
884 | + struct pnfs_block_short_extent, bse_node); | |
885 | + list_del_init(&rv->bse_node); | |
886 | + } | |
887 | + spin_unlock_bh(&marks->im_lock); | |
888 | + | |
889 | + return rv; | |
890 | +} | |
891 | + | |
892 | +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free) | |
893 | +{ | |
894 | + struct pnfs_block_short_extent *se = NULL, *tmp; | |
895 | + | |
896 | + if (num_to_free <= 0) | |
897 | + return; | |
898 | + | |
899 | + spin_lock(&marks->im_lock); | |
900 | + list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) { | |
901 | + list_del(&se->bse_node); | |
902 | + kfree(se); | |
903 | + if (--num_to_free == 0) | |
904 | + break; | |
905 | + } | |
906 | + spin_unlock(&marks->im_lock); | |
907 | + | |
908 | + BUG_ON(num_to_free > 0); | |
935 | 909 | } |
fs/nfs/callback.h
fs/nfs/callback_xdr.c
... | ... | @@ -305,6 +305,10 @@ |
305 | 305 | n = ntohl(*p++); |
306 | 306 | if (n <= 0) |
307 | 307 | goto out; |
308 | + if (n > ULONG_MAX / sizeof(*args->devs)) { | |
309 | + status = htonl(NFS4ERR_BADXDR); | |
310 | + goto out; | |
311 | + } | |
308 | 312 | |
309 | 313 | args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL); |
310 | 314 | if (!args->devs) { |
fs/nfs/nfs4filelayoutdev.c
fs/nfs/nfs4proc.c
... | ... | @@ -3587,7 +3587,7 @@ |
3587 | 3587 | res.acl_flags |= NFS4_ACL_LEN_REQUEST; |
3588 | 3588 | resp_buf = page_address(pages[0]); |
3589 | 3589 | |
3590 | - dprintk("%s buf %p buflen %ld npages %d args.acl_len %ld\n", | |
3590 | + dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", | |
3591 | 3591 | __func__, buf, buflen, npages, args.acl_len); |
3592 | 3592 | ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), |
3593 | 3593 | &msg, &args.seq_args, &res.seq_res, 0); |