Commit a12587b00388d1694933252e97abca237bc3a6b8

Authored by Linus Torvalds

Merge tag 'nfs-for-3.3-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

NFS client bugfixes and cleanups for Linux 3.3 (pull 2)

* tag 'nfs-for-3.3-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs:
  pnfsblock: alloc short extent before submit bio
  pnfsblock: remove rpc_call_ops from struct parallel_io
  pnfsblock: move find lock page logic out of bl_write_pagelist
  pnfsblock: cleanup bl_mark_sectors_init
  pnfsblock: limit bio page count
  pnfsblock: don't spinlock when freeing block_dev
  pnfsblock: clean up _add_entry
  pnfsblock: set read/write tk_status to pnfs_error
  pnfsblock: acquire im_lock in _preload_range
  NFS4: fix compile warnings in nfs4proc.c
  nfs: check for integer overflow in decode_devicenotify_args()
  NFS: cleanup endian type in decode_ds_addr()
  NFS: add an endian notation

Showing 7 changed files Side-by-side Diff

fs/nfs/blocklayout/blocklayout.c
... ... @@ -90,9 +90,9 @@
90 90 */
91 91 struct parallel_io {
92 92 struct kref refcnt;
93   - struct rpc_call_ops call_ops;
94   - void (*pnfs_callback) (void *data);
  93 + void (*pnfs_callback) (void *data, int num_se);
95 94 void *data;
  95 + int bse_count;
96 96 };
97 97  
98 98 static inline struct parallel_io *alloc_parallel(void *data)
... ... @@ -103,6 +103,7 @@
103 103 if (rv) {
104 104 rv->data = data;
105 105 kref_init(&rv->refcnt);
  106 + rv->bse_count = 0;
106 107 }
107 108 return rv;
108 109 }
... ... @@ -117,7 +118,7 @@
117 118 struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
118 119  
119 120 dprintk("%s enter\n", __func__);
120   - p->pnfs_callback(p->data);
  121 + p->pnfs_callback(p->data, p->bse_count);
121 122 kfree(p);
122 123 }
123 124  
124 125  
125 126  
... ... @@ -146,14 +147,19 @@
146 147 {
147 148 struct bio *bio;
148 149  
  150 + npg = min(npg, BIO_MAX_PAGES);
149 151 bio = bio_alloc(GFP_NOIO, npg);
150   - if (!bio)
151   - return NULL;
  152 + if (!bio && (current->flags & PF_MEMALLOC)) {
  153 + while (!bio && (npg /= 2))
  154 + bio = bio_alloc(GFP_NOIO, npg);
  155 + }
152 156  
153   - bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
154   - bio->bi_bdev = be->be_mdev;
155   - bio->bi_end_io = end_io;
156   - bio->bi_private = par;
  157 + if (bio) {
  158 + bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
  159 + bio->bi_bdev = be->be_mdev;
  160 + bio->bi_end_io = end_io;
  161 + bio->bi_private = par;
  162 + }
157 163 return bio;
158 164 }
159 165  
160 166  
161 167  
... ... @@ -212,22 +218,15 @@
212 218 }
213 219  
214 220 static void
215   -bl_end_par_io_read(void *data)
  221 +bl_end_par_io_read(void *data, int unused)
216 222 {
217 223 struct nfs_read_data *rdata = data;
218 224  
  225 + rdata->task.tk_status = rdata->pnfs_error;
219 226 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
220 227 schedule_work(&rdata->task.u.tk_work);
221 228 }
222 229  
223   -/* We don't want normal .rpc_call_done callback used, so we replace it
224   - * with this stub.
225   - */
226   -static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
227   -{
228   - return;
229   -}
230   -
231 230 static enum pnfs_try_status
232 231 bl_read_pagelist(struct nfs_read_data *rdata)
233 232 {
... ... @@ -247,8 +246,6 @@
247 246 par = alloc_parallel(rdata);
248 247 if (!par)
249 248 goto use_mds;
250   - par->call_ops = *rdata->mds_ops;
251   - par->call_ops.rpc_call_done = bl_rpc_do_nothing;
252 249 par->pnfs_callback = bl_end_par_io_read;
253 250 /* At this point, we can no longer jump to use_mds */
254 251  
... ... @@ -322,6 +319,7 @@
322 319 {
323 320 sector_t isect, end;
324 321 struct pnfs_block_extent *be;
  322 + struct pnfs_block_short_extent *se;
325 323  
326 324 dprintk("%s(%llu, %u)\n", __func__, offset, count);
327 325 if (count == 0)
... ... @@ -334,8 +332,11 @@
334 332 be = bl_find_get_extent(bl, isect, NULL);
335 333 BUG_ON(!be); /* FIXME */
336 334 len = min(end, be->be_f_offset + be->be_length) - isect;
337   - if (be->be_state == PNFS_BLOCK_INVALID_DATA)
338   - bl_mark_for_commit(be, isect, len); /* What if fails? */
  335 + if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
  336 + se = bl_pop_one_short_extent(be->be_inval);
  337 + BUG_ON(!se);
  338 + bl_mark_for_commit(be, isect, len, se);
  339 + }
339 340 isect += len;
340 341 bl_put_extent(be);
341 342 }
... ... @@ -357,7 +358,8 @@
357 358 end_page_writeback(page);
358 359 page_cache_release(page);
359 360 } while (bvec >= bio->bi_io_vec);
360   - if (!uptodate) {
  361 +
  362 + if (unlikely(!uptodate)) {
361 363 if (!wdata->pnfs_error)
362 364 wdata->pnfs_error = -EIO;
363 365 pnfs_set_lo_fail(wdata->lseg);
... ... @@ -366,7 +368,6 @@
366 368 put_parallel(par);
367 369 }
368 370  
369   -/* This is basically copied from mpage_end_io_read */
370 371 static void bl_end_io_write(struct bio *bio, int err)
371 372 {
372 373 struct parallel_io *par = bio->bi_private;
... ... @@ -392,7 +393,7 @@
392 393 dprintk("%s enter\n", __func__);
393 394 task = container_of(work, struct rpc_task, u.tk_work);
394 395 wdata = container_of(task, struct nfs_write_data, task);
395   - if (!wdata->pnfs_error) {
  396 + if (likely(!wdata->pnfs_error)) {
396 397 /* Marks for LAYOUTCOMMIT */
397 398 mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
398 399 wdata->args.offset, wdata->args.count);
399 400  
... ... @@ -401,11 +402,16 @@
401 402 }
402 403  
403 404 /* Called when last of bios associated with a bl_write_pagelist call finishes */
404   -static void bl_end_par_io_write(void *data)
  405 +static void bl_end_par_io_write(void *data, int num_se)
405 406 {
406 407 struct nfs_write_data *wdata = data;
407 408  
408   - wdata->task.tk_status = 0;
  409 + if (unlikely(wdata->pnfs_error)) {
  410 + bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
  411 + num_se);
  412 + }
  413 +
  414 + wdata->task.tk_status = wdata->pnfs_error;
409 415 wdata->verf.committed = NFS_FILE_SYNC;
410 416 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
411 417 schedule_work(&wdata->task.u.tk_work);
... ... @@ -484,6 +490,55 @@
484 490 return ret;
485 491 }
486 492  
  493 +/* Find or create a zeroing page marked being writeback.
  494 + * Return ERR_PTR on error, NULL to indicate skip this page and page itself
  495 + * to indicate write out.
  496 + */
  497 +static struct page *
  498 +bl_find_get_zeroing_page(struct inode *inode, pgoff_t index,
  499 + struct pnfs_block_extent *cow_read)
  500 +{
  501 + struct page *page;
  502 + int locked = 0;
  503 + page = find_get_page(inode->i_mapping, index);
  504 + if (page)
  505 + goto check_page;
  506 +
  507 + page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
  508 + if (unlikely(!page)) {
  509 + dprintk("%s oom\n", __func__);
  510 + return ERR_PTR(-ENOMEM);
  511 + }
  512 + locked = 1;
  513 +
  514 +check_page:
  515 + /* PageDirty: Other will write this out
  516 + * PageWriteback: Other is writing this out
  517 + * PageUptodate: It was read before
  518 + */
  519 + if (PageDirty(page) || PageWriteback(page)) {
  520 + print_page(page);
  521 + if (locked)
  522 + unlock_page(page);
  523 + page_cache_release(page);
  524 + return NULL;
  525 + }
  526 +
  527 + if (!locked) {
  528 + lock_page(page);
  529 + locked = 1;
  530 + goto check_page;
  531 + }
  532 + if (!PageUptodate(page)) {
  533 + /* New page, readin or zero it */
  534 + init_page_for_write(page, cow_read);
  535 + }
  536 + set_page_writeback(page);
  537 + unlock_page(page);
  538 +
  539 + return page;
  540 +}
  541 +
487 542 static enum pnfs_try_status
488 543 bl_write_pagelist(struct nfs_write_data *wdata, int sync)
489 544 {
... ... @@ -508,9 +563,7 @@
508 563 */
509 564 par = alloc_parallel(wdata);
510 565 if (!par)
511   - return PNFS_NOT_ATTEMPTED;
512   - par->call_ops = *wdata->mds_ops;
513   - par->call_ops.rpc_call_done = bl_rpc_do_nothing;
  566 + goto out_mds;
514 567 par->pnfs_callback = bl_end_par_io_write;
515 568 /* At this point, have to be more careful with error handling */
516 569  
517 570  
... ... @@ -518,12 +571,15 @@
518 571 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
519 572 if (!be || !is_writable(be, isect)) {
520 573 dprintk("%s no matching extents!\n", __func__);
521   - wdata->pnfs_error = -EINVAL;
522   - goto out;
  574 + goto out_mds;
523 575 }
524 576  
525 577 /* First page inside INVALID extent */
526 578 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
  579 + if (likely(!bl_push_one_short_extent(be->be_inval)))
  580 + par->bse_count++;
  581 + else
  582 + goto out_mds;
527 583 temp = offset >> PAGE_CACHE_SHIFT;
528 584 npg_zero = do_div(temp, npg_per_block);
529 585 isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
530 586  
531 587  
532 588  
... ... @@ -543,36 +599,16 @@
543 599 dprintk("%s zero %dth page: index %lu isect %llu\n",
544 600 __func__, npg_zero, index,
545 601 (unsigned long long)isect);
546   - page =
547   - find_or_create_page(wdata->inode->i_mapping, index,
548   - GFP_NOFS);
549   - if (!page) {
550   - dprintk("%s oom\n", __func__);
551   - wdata->pnfs_error = -ENOMEM;
  602 + page = bl_find_get_zeroing_page(wdata->inode, index,
  603 + cow_read);
  604 + if (unlikely(IS_ERR(page))) {
  605 + wdata->pnfs_error = PTR_ERR(page);
552 606 goto out;
553   - }
554   -
555   - /* PageDirty: Other will write this out
556   - * PageWriteback: Other is writing this out
557   - * PageUptodate: It was read before
558   - * sector_initialized: already written out
559   - */
560   - if (PageDirty(page) || PageWriteback(page)) {
561   - print_page(page);
562   - unlock_page(page);
563   - page_cache_release(page);
  607 + } else if (page == NULL)
564 608 goto next_page;
565   - }
566   - if (!PageUptodate(page)) {
567   - /* New page, readin or zero it */
568   - init_page_for_write(page, cow_read);
569   - }
570   - set_page_writeback(page);
571   - unlock_page(page);
572 609  
573 610 ret = bl_mark_sectors_init(be->be_inval, isect,
574   - PAGE_CACHE_SECTORS,
575   - NULL);
  611 + PAGE_CACHE_SECTORS);
576 612 if (unlikely(ret)) {
577 613 dprintk("%s bl_mark_sectors_init fail %d\n",
578 614 __func__, ret);
... ... @@ -581,6 +617,19 @@
581 617 wdata->pnfs_error = ret;
582 618 goto out;
583 619 }
  620 + if (likely(!bl_push_one_short_extent(be->be_inval)))
  621 + par->bse_count++;
  622 + else {
  623 + end_page_writeback(page);
  624 + page_cache_release(page);
  625 + wdata->pnfs_error = -ENOMEM;
  626 + goto out;
  627 + }
  628 + /* FIXME: This should be done in bi_end_io */
  629 + mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
  630 + page->index << PAGE_CACHE_SHIFT,
  631 + PAGE_CACHE_SIZE);
  632 +
584 633 bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
585 634 isect, page, be,
586 635 bl_end_io_write_zero, par);
... ... @@ -589,10 +638,6 @@
589 638 bio = NULL;
590 639 goto out;
591 640 }
592   - /* FIXME: This should be done in bi_end_io */
593   - mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
594   - page->index << PAGE_CACHE_SHIFT,
595   - PAGE_CACHE_SIZE);
596 641 next_page:
597 642 isect += PAGE_CACHE_SECTORS;
598 643 extent_length -= PAGE_CACHE_SECTORS;
599 644  
... ... @@ -616,13 +661,21 @@
616 661 wdata->pnfs_error = -EINVAL;
617 662 goto out;
618 663 }
  664 + if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
  665 + if (likely(!bl_push_one_short_extent(
  666 + be->be_inval)))
  667 + par->bse_count++;
  668 + else {
  669 + wdata->pnfs_error = -ENOMEM;
  670 + goto out;
  671 + }
  672 + }
619 673 extent_length = be->be_length -
620 674 (isect - be->be_f_offset);
621 675 }
622 676 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
623 677 ret = bl_mark_sectors_init(be->be_inval, isect,
624   - PAGE_CACHE_SECTORS,
625   - NULL);
  678 + PAGE_CACHE_SECTORS);
626 679 if (unlikely(ret)) {
627 680 dprintk("%s bl_mark_sectors_init fail %d\n",
628 681 __func__, ret);
... ... @@ -664,6 +717,10 @@
664 717 bl_submit_bio(WRITE, bio);
665 718 put_parallel(par);
666 719 return PNFS_ATTEMPTED;
  720 +out_mds:
  721 + bl_put_extent(be);
  722 + kfree(par);
  723 + return PNFS_NOT_ATTEMPTED;
667 724 }
668 725  
669 726 /* FIXME - range ignored */
670 727  
... ... @@ -690,11 +747,17 @@
690 747 release_inval_marks(struct pnfs_inval_markings *marks)
691 748 {
692 749 struct pnfs_inval_tracking *pos, *temp;
  750 + struct pnfs_block_short_extent *se, *stemp;
693 751  
694 752 list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
695 753 list_del(&pos->it_link);
696 754 kfree(pos);
697 755 }
  756 +
  757 + list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
  758 + list_del(&se->bse_node);
  759 + kfree(se);
  760 + }
698 761 return;
699 762 }
700 763  
701 764  
... ... @@ -779,16 +842,13 @@
779 842 static void free_blk_mountid(struct block_mount_id *mid)
780 843 {
781 844 if (mid) {
782   - struct pnfs_block_dev *dev;
783   - spin_lock(&mid->bm_lock);
784   - while (!list_empty(&mid->bm_devlist)) {
785   - dev = list_first_entry(&mid->bm_devlist,
786   - struct pnfs_block_dev,
787   - bm_node);
  845 + struct pnfs_block_dev *dev, *tmp;
  846 +
  847 + /* No need to take bm_lock as we are last user freeing bm_devlist */
  848 + list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) {
788 849 list_del(&dev->bm_node);
789 850 bl_free_block_dev(dev);
790 851 }
791   - spin_unlock(&mid->bm_lock);
792 852 kfree(mid);
793 853 }
794 854 }
fs/nfs/blocklayout/blocklayout.h
... ... @@ -70,6 +70,7 @@
70 70 spinlock_t im_lock;
71 71 struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */
72 72 sector_t im_block_size; /* Server blocksize in sectors */
  73 + struct list_head im_extents; /* Short extents for INVAL->RW conversion */
73 74 };
74 75  
75 76 struct pnfs_inval_tracking {
... ... @@ -105,6 +106,7 @@
105 106 {
106 107 spin_lock_init(&marks->im_lock);
107 108 INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
  109 + INIT_LIST_HEAD(&marks->im_extents);
108 110 marks->im_block_size = blocksize;
109 111 marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
110 112 blocksize);
... ... @@ -186,8 +188,7 @@
186 188 bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
187 189 struct pnfs_block_extent **cow_read);
188 190 int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
189   - sector_t offset, sector_t length,
190   - sector_t **pages);
  191 + sector_t offset, sector_t length);
191 192 void bl_put_extent(struct pnfs_block_extent *be);
192 193 struct pnfs_block_extent *bl_alloc_extent(void);
193 194 int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect);
... ... @@ -200,7 +201,12 @@
200 201 int bl_add_merge_extent(struct pnfs_block_layout *bl,
201 202 struct pnfs_block_extent *new);
202 203 int bl_mark_for_commit(struct pnfs_block_extent *be,
203   - sector_t offset, sector_t length);
  204 + sector_t offset, sector_t length,
  205 + struct pnfs_block_short_extent *new);
  206 +int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
  207 +struct pnfs_block_short_extent *
  208 +bl_pop_one_short_extent(struct pnfs_inval_markings *marks);
  209 +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free);
204 210  
205 211 #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
fs/nfs/blocklayout/extents.c
... ... @@ -110,13 +110,7 @@
110 110 return 0;
111 111 } else {
112 112 struct pnfs_inval_tracking *new;
113   - if (storage)
114   - new = storage;
115   - else {
116   - new = kmalloc(sizeof(*new), GFP_NOFS);
117   - if (!new)
118   - return -ENOMEM;
119   - }
  113 + new = storage;
120 114 new->it_sector = s;
121 115 new->it_tags = (1 << tag);
122 116 list_add(&new->it_link, &pos->it_link);
123 117  
... ... @@ -139,11 +133,13 @@
139 133 }
140 134  
141 135 /* Ensure that future operations on given range of tree will not malloc */
142   -static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
  136 +static int _preload_range(struct pnfs_inval_markings *marks,
  137 + u64 offset, u64 length)
143 138 {
144 139 u64 start, end, s;
145 140 int count, i, used = 0, status = -ENOMEM;
146 141 struct pnfs_inval_tracking **storage;
  142 + struct my_tree *tree = &marks->im_tree;
147 143  
148 144 dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
149 145 start = normalize(offset, tree->mtt_step_size);
150 146  
151 147  
... ... @@ -161,12 +157,11 @@
161 157 goto out_cleanup;
162 158 }
163 159  
164   - /* Now need lock - HOW??? */
165   -
  160 + spin_lock_bh(&marks->im_lock);
166 161 for (s = start; s < end; s += tree->mtt_step_size)
167 162 used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
  163 + spin_unlock_bh(&marks->im_lock);
168 164  
169   - /* Unlock - HOW??? */
170 165 status = 0;
171 166  
172 167 out_cleanup:
173 168  
174 169  
... ... @@ -179,41 +174,14 @@
179 174 return status;
180 175 }
181 176  
182   -static void set_needs_init(sector_t *array, sector_t offset)
183   -{
184   - sector_t *p = array;
185   -
186   - dprintk("%s enter\n", __func__);
187   - if (!p)
188   - return;
189   - while (*p < offset)
190   - p++;
191   - if (*p == offset)
192   - return;
193   - else if (*p == ~0) {
194   - *p++ = offset;
195   - *p = ~0;
196   - return;
197   - } else {
198   - sector_t *save = p;
199   - dprintk("%s Adding %llu\n", __func__, (u64)offset);
200   - while (*p != ~0)
201   - p++;
202   - p++;
203   - memmove(save + 1, save, (char *)p - (char *)save);
204   - *save = offset;
205   - return;
206   - }
207   -}
208   -
209 177 /* We are relying on page lock to serialize this */
210 178 int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect)
211 179 {
212 180 int rv;
213 181  
214   - spin_lock(&marks->im_lock);
  182 + spin_lock_bh(&marks->im_lock);
215 183 rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
216   - spin_unlock(&marks->im_lock);
  184 + spin_unlock_bh(&marks->im_lock);
217 185 return rv;
218 186 }
219 187  
220 188  
221 189  
222 190  
223 191  
224 192  
225 193  
226 194  
227 195  
228 196  
229 197  
230 198  
... ... @@ -253,78 +221,39 @@
253 221 {
254 222 int rv;
255 223  
256   - spin_lock(&marks->im_lock);
  224 + spin_lock_bh(&marks->im_lock);
257 225 rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
258   - spin_unlock(&marks->im_lock);
  226 + spin_unlock_bh(&marks->im_lock);
259 227 return rv;
260 228 }
261 229  
262 230 /* Marks sectors in [offest, offset_length) as having been initialized.
263 231 * All lengths are step-aligned, where step is min(pagesize, blocksize).
264   - * Notes where partial block is initialized, and helps prepare it for
265   - * complete initialization later.
  232 + * Currently assumes offset is page-aligned
266 233 */
267   -/* Currently assumes offset is page-aligned */
268 234 int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
269   - sector_t offset, sector_t length,
270   - sector_t **pages)
  235 + sector_t offset, sector_t length)
271 236 {
272   - sector_t s, start, end;
273   - sector_t *array = NULL; /* Pages to mark */
  237 + sector_t start, end;
274 238  
275 239 dprintk("%s(offset=%llu,len=%llu) enter\n",
276 240 __func__, (u64)offset, (u64)length);
277   - s = max((sector_t) 3,
278   - 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
279   - dprintk("%s set max=%llu\n", __func__, (u64)s);
280   - if (pages) {
281   - array = kmalloc(s * sizeof(sector_t), GFP_NOFS);
282   - if (!array)
283   - goto outerr;
284   - array[0] = ~0;
285   - }
286 241  
287 242 start = normalize(offset, marks->im_block_size);
288 243 end = normalize_up(offset + length, marks->im_block_size);
289   - if (_preload_range(&marks->im_tree, start, end - start))
  244 + if (_preload_range(marks, start, end - start))
290 245 goto outerr;
291 246  
292   - spin_lock(&marks->im_lock);
293   -
294   - for (s = normalize_up(start, PAGE_CACHE_SECTORS);
295   - s < offset; s += PAGE_CACHE_SECTORS) {
296   - dprintk("%s pre-area pages\n", __func__);
297   - /* Portion of used block is not initialized */
298   - if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
299   - set_needs_init(array, s);
300   - }
  247 + spin_lock_bh(&marks->im_lock);
301 248 if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
302 249 goto out_unlock;
303   - for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
304   - s < end; s += PAGE_CACHE_SECTORS) {
305   - dprintk("%s post-area pages\n", __func__);
306   - if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
307   - set_needs_init(array, s);
308   - }
  250 + spin_unlock_bh(&marks->im_lock);
309 251  
310   - spin_unlock(&marks->im_lock);
311   -
312   - if (pages) {
313   - if (array[0] == ~0) {
314   - kfree(array);
315   - *pages = NULL;
316   - } else
317   - *pages = array;
318   - }
319 252 return 0;
320 253  
321   - out_unlock:
322   - spin_unlock(&marks->im_lock);
323   - outerr:
324   - if (pages) {
325   - kfree(array);
326   - *pages = NULL;
327   - }
  254 +out_unlock:
  255 + spin_unlock_bh(&marks->im_lock);
  256 +outerr:
328 257 return -ENOMEM;
329 258 }
330 259  
331 260  
... ... @@ -338,9 +267,9 @@
338 267  
339 268 dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
340 269 (u64)offset, (u64)length);
341   - spin_lock(&marks->im_lock);
  270 + spin_lock_bh(&marks->im_lock);
342 271 status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
343   - spin_unlock(&marks->im_lock);
  272 + spin_unlock_bh(&marks->im_lock);
344 273 return status;
345 274 }
346 275  
347 276  
348 277  
349 278  
... ... @@ -440,20 +369,18 @@
440 369  
441 370 /* Note the range described by offset, length is guaranteed to be contained
442 371 * within be.
  372 + * new will be freed, either by this function or add_to_commitlist if they
  373 + * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
443 374 */
444 375 int bl_mark_for_commit(struct pnfs_block_extent *be,
445   - sector_t offset, sector_t length)
  376 + sector_t offset, sector_t length,
  377 + struct pnfs_block_short_extent *new)
446 378 {
447 379 sector_t new_end, end = offset + length;
448   - struct pnfs_block_short_extent *new;
449 380 struct pnfs_block_layout *bl = container_of(be->be_inval,
450 381 struct pnfs_block_layout,
451 382 bl_inval);
452 383  
453   - new = kmalloc(sizeof(*new), GFP_NOFS);
454   - if (!new)
455   - return -ENOMEM;
456   -
457 384 mark_written_sectors(be->be_inval, offset, length);
458 385 /* We want to add the range to commit list, but it must be
459 386 * block-normalized, and verified that the normalized range has
... ... @@ -483,9 +410,6 @@
483 410 new->bse_mdev = be->be_mdev;
484 411  
485 412 spin_lock(&bl->bl_ext_lock);
486   - /* new will be freed, either by add_to_commitlist if it decides not
487   - * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
488   - */
489 413 add_to_commitlist(bl, new);
490 414 spin_unlock(&bl->bl_ext_lock);
491 415 return 0;
... ... @@ -932,5 +856,55 @@
932 856 spin_unlock(&bl->bl_ext_lock);
933 857 }
934 858 }
  859 +}
  860 +
  861 +int bl_push_one_short_extent(struct pnfs_inval_markings *marks)
  862 +{
  863 + struct pnfs_block_short_extent *new;
  864 +
  865 + new = kmalloc(sizeof(*new), GFP_NOFS);
  866 + if (unlikely(!new))
  867 + return -ENOMEM;
  868 +
  869 + spin_lock_bh(&marks->im_lock);
  870 + list_add(&new->bse_node, &marks->im_extents);
  871 + spin_unlock_bh(&marks->im_lock);
  872 +
  873 + return 0;
  874 +}
  875 +
  876 +struct pnfs_block_short_extent *
  877 +bl_pop_one_short_extent(struct pnfs_inval_markings *marks)
  878 +{
  879 + struct pnfs_block_short_extent *rv = NULL;
  880 +
  881 + spin_lock_bh(&marks->im_lock);
  882 + if (!list_empty(&marks->im_extents)) {
  883 + rv = list_entry((&marks->im_extents)->next,
  884 + struct pnfs_block_short_extent, bse_node);
  885 + list_del_init(&rv->bse_node);
  886 + }
  887 + spin_unlock_bh(&marks->im_lock);
  888 +
  889 + return rv;
  890 +}
  891 +
  892 +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free)
  893 +{
  894 + struct pnfs_block_short_extent *se = NULL, *tmp;
  895 +
  896 + if (num_to_free <= 0)
  897 + return;
  898 +
  899 + spin_lock(&marks->im_lock);
  900 + list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) {
  901 + list_del(&se->bse_node);
  902 + kfree(se);
  903 + if (--num_to_free == 0)
  904 + break;
  905 + }
  906 + spin_unlock(&marks->im_lock);
  907 +
  908 + BUG_ON(num_to_free > 0);
935 909 }
... ... @@ -162,7 +162,7 @@
162 162 };
163 163 };
164 164  
165   -extern unsigned nfs4_callback_layoutrecall(
  165 +extern __be32 nfs4_callback_layoutrecall(
166 166 struct cb_layoutrecallargs *args,
167 167 void *dummy, struct cb_process_state *cps);
168 168  
fs/nfs/callback_xdr.c
... ... @@ -305,6 +305,10 @@
305 305 n = ntohl(*p++);
306 306 if (n <= 0)
307 307 goto out;
  308 + if (n > ULONG_MAX / sizeof(*args->devs)) {
  309 + status = htonl(NFS4ERR_BADXDR);
  310 + goto out;
  311 + }
308 312  
309 313 args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL);
310 314 if (!args->devs) {
fs/nfs/nfs4filelayoutdev.c
... ... @@ -382,7 +382,7 @@
382 382 {
383 383 struct nfs4_pnfs_ds_addr *da = NULL;
384 384 char *buf, *portstr;
385   - u32 port;
  385 + __be16 port;
386 386 int nlen, rlen;
387 387 int tmp[2];
388 388 __be32 *p;
... ... @@ -3587,7 +3587,7 @@
3587 3587 res.acl_flags |= NFS4_ACL_LEN_REQUEST;
3588 3588 resp_buf = page_address(pages[0]);
3589 3589  
3590   - dprintk("%s buf %p buflen %ld npages %d args.acl_len %ld\n",
  3590 + dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n",
3591 3591 __func__, buf, buflen, npages, args.acl_len);
3592 3592 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
3593 3593 &msg, &args.seq_args, &res.seq_res, 0);