Commit 03158cd7eb3374843de68421142ca5900df845d9

Authored by Nick Piggin
Committed by Linus Torvalds
1 parent b6af1bcd87

fs: restore nobh

Implement nobh in new aops.  This is a bit tricky.  FWIW, nobh_truncate is
now implemented in a way that does not create blocks in sparse regions,
which is a silly thing for it to have been doing (isn't it?)

ext2 survives fsx and fsstress. jfs is converted as well... ext3
should be easy to do (but not done yet).

[akpm@linux-foundation.org: coding-style fixes]
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 178 additions and 88 deletions Side-by-side Diff

... ... @@ -2369,7 +2369,7 @@
2369 2369 }
2370 2370  
2371 2371 /*
2372   - * nobh_prepare_write()'s prereads are special: the buffer_heads are freed
  2372 + * nobh_write_begin()'s prereads are special: the buffer_heads are freed
2373 2373 * immediately, while under the page lock. So it needs a special end_io
2374 2374 * handler which does not touch the bh after unlocking it.
2375 2375 */
2376 2376  
2377 2377  
2378 2378  
... ... @@ -2379,16 +2379,45 @@
2379 2379 }
2380 2380  
2381 2381 /*
  2382 + * Attach the singly-linked list of buffers created by nobh_write_begin, to
  2383 + * the page (converting it to circular linked list and taking care of page
  2384 + * dirty races).
  2385 + */
  2386 +static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
  2387 +{
  2388 + struct buffer_head *bh;
  2389 +
  2390 + BUG_ON(!PageLocked(page));
  2391 +
  2392 + spin_lock(&page->mapping->private_lock);
  2393 + bh = head;
  2394 + do {
  2395 + if (PageDirty(page))
  2396 + set_buffer_dirty(bh);
  2397 + if (!bh->b_this_page)
  2398 + bh->b_this_page = head;
  2399 + bh = bh->b_this_page;
  2400 + } while (bh != head);
  2401 + attach_page_buffers(page, head);
  2402 + spin_unlock(&page->mapping->private_lock);
  2403 +}
  2404 +
  2405 +/*
2382 2406 * On entry, the page is fully not uptodate.
2383 2407 * On exit the page is fully uptodate in the areas outside (from,to)
2384 2408 */
2385   -int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
  2409 +int nobh_write_begin(struct file *file, struct address_space *mapping,
  2410 + loff_t pos, unsigned len, unsigned flags,
  2411 + struct page **pagep, void **fsdata,
2386 2412 get_block_t *get_block)
2387 2413 {
2388   - struct inode *inode = page->mapping->host;
  2414 + struct inode *inode = mapping->host;
2389 2415 const unsigned blkbits = inode->i_blkbits;
2390 2416 const unsigned blocksize = 1 << blkbits;
2391 2417 struct buffer_head *head, *bh;
  2418 + struct page *page;
  2419 + pgoff_t index;
  2420 + unsigned from, to;
2392 2421 unsigned block_in_page;
2393 2422 unsigned block_start, block_end;
2394 2423 sector_t block_in_file;
2395 2424  
... ... @@ -2397,9 +2426,24 @@
2397 2426 int ret = 0;
2398 2427 int is_mapped_to_disk = 1;
2399 2428  
2400   - if (page_has_buffers(page))
2401   - return block_prepare_write(page, from, to, get_block);
  2429 + index = pos >> PAGE_CACHE_SHIFT;
  2430 + from = pos & (PAGE_CACHE_SIZE - 1);
  2431 + to = from + len;
2402 2432  
  2433 + page = __grab_cache_page(mapping, index);
  2434 + if (!page)
  2435 + return -ENOMEM;
  2436 + *pagep = page;
  2437 + *fsdata = NULL;
  2438 +
  2439 + if (page_has_buffers(page)) {
  2440 + unlock_page(page);
  2441 + page_cache_release(page);
  2442 + *pagep = NULL;
  2443 + return block_write_begin(file, mapping, pos, len, flags, pagep,
  2444 + fsdata, get_block);
  2445 + }
  2446 +
2403 2447 if (PageMappedToDisk(page))
2404 2448 return 0;
2405 2449  
... ... @@ -2413,8 +2457,10 @@
2413 2457 * than the circular one we're used to.
2414 2458 */
2415 2459 head = alloc_page_buffers(page, blocksize, 0);
2416   - if (!head)
2417   - return -ENOMEM;
  2460 + if (!head) {
  2461 + ret = -ENOMEM;
  2462 + goto out_release;
  2463 + }
2418 2464  
2419 2465 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2420 2466  
2421 2467  
... ... @@ -2483,15 +2529,12 @@
2483 2529 if (is_mapped_to_disk)
2484 2530 SetPageMappedToDisk(page);
2485 2531  
2486   - do {
2487   - bh = head;
2488   - head = head->b_this_page;
2489   - free_buffer_head(bh);
2490   - } while (head);
  2532 + *fsdata = head; /* to be released by nobh_write_end */
2491 2533  
2492 2534 return 0;
2493 2535  
2494 2536 failed:
  2537 + BUG_ON(!ret);
2495 2538 /*
2496 2539 * Error recovery is a bit difficult. We need to zero out blocks that
2497 2540 * were newly allocated, and dirty them to ensure they get written out.
2498 2541  
2499 2542  
2500 2543  
2501 2544  
2502 2545  
2503 2546  
2504 2547  
2505 2548  
2506 2549  
... ... @@ -2499,64 +2542,57 @@
2499 2542 * the handling of potential IO errors during writeout would be hard
2500 2543 * (could try doing synchronous writeout, but what if that fails too?)
2501 2544 */
2502   - spin_lock(&page->mapping->private_lock);
2503   - bh = head;
2504   - block_start = 0;
2505   - do {
2506   - if (PageUptodate(page))
2507   - set_buffer_uptodate(bh);
2508   - if (PageDirty(page))
2509   - set_buffer_dirty(bh);
  2545 + attach_nobh_buffers(page, head);
  2546 + page_zero_new_buffers(page, from, to);
2510 2547  
2511   - block_end = block_start+blocksize;
2512   - if (block_end <= from)
2513   - goto next;
2514   - if (block_start >= to)
2515   - goto next;
  2548 +out_release:
  2549 + unlock_page(page);
  2550 + page_cache_release(page);
  2551 + *pagep = NULL;
2516 2552  
2517   - if (buffer_new(bh)) {
2518   - clear_buffer_new(bh);
2519   - if (!buffer_uptodate(bh)) {
2520   - zero_user_page(page, block_start, bh->b_size, KM_USER0);
2521   - set_buffer_uptodate(bh);
2522   - }
2523   - mark_buffer_dirty(bh);
2524   - }
2525   -next:
2526   - block_start = block_end;
2527   - if (!bh->b_this_page)
2528   - bh->b_this_page = head;
2529   - bh = bh->b_this_page;
2530   - } while (bh != head);
2531   - attach_page_buffers(page, head);
2532   - spin_unlock(&page->mapping->private_lock);
  2553 + if (pos + len > inode->i_size)
  2554 + vmtruncate(inode, inode->i_size);
2533 2555  
2534 2556 return ret;
2535 2557 }
2536   -EXPORT_SYMBOL(nobh_prepare_write);
  2558 +EXPORT_SYMBOL(nobh_write_begin);
2537 2559  
2538   -/*
2539   - * Make sure any changes to nobh_commit_write() are reflected in
2540   - * nobh_truncate_page(), since it doesn't call commit_write().
2541   - */
2542   -int nobh_commit_write(struct file *file, struct page *page,
2543   - unsigned from, unsigned to)
  2560 +int nobh_write_end(struct file *file, struct address_space *mapping,
  2561 + loff_t pos, unsigned len, unsigned copied,
  2562 + struct page *page, void *fsdata)
2544 2563 {
2545 2564 struct inode *inode = page->mapping->host;
2546   - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
  2565 + struct buffer_head *head = NULL;
  2566 + struct buffer_head *bh;
2547 2567  
2548   - if (page_has_buffers(page))
2549   - return generic_commit_write(file, page, from, to);
  2568 + if (!PageMappedToDisk(page)) {
  2569 + if (unlikely(copied < len) && !page_has_buffers(page))
  2570 + attach_nobh_buffers(page, head);
  2571 + if (page_has_buffers(page))
  2572 + return generic_write_end(file, mapping, pos, len,
  2573 + copied, page, fsdata);
  2574 + }
2550 2575  
2551 2576 SetPageUptodate(page);
2552 2577 set_page_dirty(page);
2553   - if (pos > inode->i_size) {
2554   - i_size_write(inode, pos);
  2578 + if (pos+copied > inode->i_size) {
  2579 + i_size_write(inode, pos+copied);
2555 2580 mark_inode_dirty(inode);
2556 2581 }
2557   - return 0;
  2582 +
  2583 + unlock_page(page);
  2584 + page_cache_release(page);
  2585 +
  2586 + head = fsdata;
  2587 + while (head) {
  2588 + bh = head;
  2589 + head = head->b_this_page;
  2590 + free_buffer_head(bh);
  2591 + }
  2592 +
  2593 + return copied;
2558 2594 }
2559   -EXPORT_SYMBOL(nobh_commit_write);
  2595 +EXPORT_SYMBOL(nobh_write_end);
2560 2596  
2561 2597 /*
2562 2598 * nobh_writepage() - based on block_full_write_page() except
2563 2599  
2564 2600  
2565 2601  
2566 2602  
2567 2603  
2568 2604  
2569 2605  
2570 2606  
2571 2607  
... ... @@ -2609,44 +2645,79 @@
2609 2645 }
2610 2646 EXPORT_SYMBOL(nobh_writepage);
2611 2647  
2612   -/*
2613   - * This function assumes that ->prepare_write() uses nobh_prepare_write().
2614   - */
2615   -int nobh_truncate_page(struct address_space *mapping, loff_t from)
  2648 +int nobh_truncate_page(struct address_space *mapping,
  2649 + loff_t from, get_block_t *get_block)
2616 2650 {
2617   - struct inode *inode = mapping->host;
2618   - unsigned blocksize = 1 << inode->i_blkbits;
2619 2651 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2620 2652 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2621   - unsigned to;
  2653 + unsigned blocksize;
  2654 + sector_t iblock;
  2655 + unsigned length, pos;
  2656 + struct inode *inode = mapping->host;
2622 2657 struct page *page;
2623   - const struct address_space_operations *a_ops = mapping->a_ops;
2624   - int ret = 0;
  2658 + struct buffer_head map_bh;
  2659 + int err;
2625 2660  
2626   - if ((offset & (blocksize - 1)) == 0)
2627   - goto out;
  2661 + blocksize = 1 << inode->i_blkbits;
  2662 + length = offset & (blocksize - 1);
2628 2663  
2629   - ret = -ENOMEM;
  2664 + /* Block boundary? Nothing to do */
  2665 + if (!length)
  2666 + return 0;
  2667 +
  2668 + length = blocksize - length;
  2669 + iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
  2670 +
2630 2671 page = grab_cache_page(mapping, index);
  2672 + err = -ENOMEM;
2631 2673 if (!page)
2632 2674 goto out;
2633 2675  
2634   - to = (offset + blocksize) & ~(blocksize - 1);
2635   - ret = a_ops->prepare_write(NULL, page, offset, to);
2636   - if (ret == 0) {
2637   - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset,
2638   - KM_USER0);
2639   - /*
2640   - * It would be more correct to call aops->commit_write()
2641   - * here, but this is more efficient.
2642   - */
2643   - SetPageUptodate(page);
2644   - set_page_dirty(page);
  2676 + if (page_has_buffers(page)) {
  2677 +has_buffers:
  2678 + unlock_page(page);
  2679 + page_cache_release(page);
  2680 + return block_truncate_page(mapping, from, get_block);
2645 2681 }
  2682 +
  2683 + /* Find the buffer that contains "offset" */
  2684 + pos = blocksize;
  2685 + while (offset >= pos) {
  2686 + iblock++;
  2687 + pos += blocksize;
  2688 + }
  2689 +
  2690 + err = get_block(inode, iblock, &map_bh, 0);
  2691 + if (err)
  2692 + goto unlock;
  2693 + /* unmapped? It's a hole - nothing to do */
  2694 + if (!buffer_mapped(&map_bh))
  2695 + goto unlock;
  2696 +
  2697 + /* Ok, it's mapped. Make sure it's up-to-date */
  2698 + if (!PageUptodate(page)) {
  2699 + err = mapping->a_ops->readpage(NULL, page);
  2700 + if (err) {
  2701 + page_cache_release(page);
  2702 + goto out;
  2703 + }
  2704 + lock_page(page);
  2705 + if (!PageUptodate(page)) {
  2706 + err = -EIO;
  2707 + goto unlock;
  2708 + }
  2709 + if (page_has_buffers(page))
  2710 + goto has_buffers;
  2711 + }
  2712 + zero_user_page(page, offset, length, KM_USER0);
  2713 + set_page_dirty(page);
  2714 + err = 0;
  2715 +
  2716 +unlock:
2646 2717 unlock_page(page);
2647 2718 page_cache_release(page);
2648 2719 out:
2649   - return ret;
  2720 + return err;
2650 2721 }
2651 2722 EXPORT_SYMBOL(nobh_truncate_page);
2652 2723  
... ... @@ -659,6 +659,20 @@
659 659 return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
660 660 }
661 661  
  662 +static int
  663 +ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
  664 + loff_t pos, unsigned len, unsigned flags,
  665 + struct page **pagep, void **fsdata)
  666 +{
  667 + /*
  668 + * Dir-in-pagecache still uses ext2_write_begin. Would have to rework
  669 + * directory handling code to pass around offsets rather than struct
  670 + * pages in order to make this work easily.
  671 + */
  672 + return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
  673 + ext2_get_block);
  674 +}
  675 +
662 676 static int ext2_nobh_writepage(struct page *page,
663 677 struct writeback_control *wbc)
664 678 {
... ... @@ -710,7 +724,8 @@
710 724 .readpages = ext2_readpages,
711 725 .writepage = ext2_nobh_writepage,
712 726 .sync_page = block_sync_page,
713   - /* XXX: todo */
  727 + .write_begin = ext2_nobh_write_begin,
  728 + .write_end = nobh_write_end,
714 729 .bmap = ext2_bmap,
715 730 .direct_IO = ext2_direct_IO,
716 731 .writepages = ext2_writepages,
... ... @@ -927,7 +942,8 @@
927 942 if (mapping_is_xip(inode->i_mapping))
928 943 xip_truncate_page(inode->i_mapping, inode->i_size);
929 944 else if (test_opt(inode->i_sb, NOBH))
930   - nobh_truncate_page(inode->i_mapping, inode->i_size);
  945 + nobh_truncate_page(inode->i_mapping,
  946 + inode->i_size, ext2_get_block);
931 947 else
932 948 block_truncate_page(inode->i_mapping,
933 949 inode->i_size, ext2_get_block);
... ... @@ -279,8 +279,7 @@
279 279 loff_t pos, unsigned len, unsigned flags,
280 280 struct page **pagep, void **fsdata)
281 281 {
282   - *pagep = NULL;
283   - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
  282 + return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
284 283 jfs_get_block);
285 284 }
286 285  
... ... @@ -306,7 +305,7 @@
306 305 .writepages = jfs_writepages,
307 306 .sync_page = block_sync_page,
308 307 .write_begin = jfs_write_begin,
309   - .write_end = generic_write_end,
  308 + .write_end = nobh_write_end,
310 309 .bmap = jfs_bmap,
311 310 .direct_IO = jfs_direct_IO,
312 311 };
... ... @@ -359,7 +358,7 @@
359 358 {
360 359 jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size);
361 360  
362   - block_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);
  361 + nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);
363 362  
364 363 IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
365 364 jfs_truncate_nolock(ip, ip->i_size);
include/linux/buffer_head.h
... ... @@ -226,9 +226,13 @@
226 226 int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
227 227 int block_truncate_page(struct address_space *, loff_t, get_block_t *);
228 228 int file_fsync(struct file *, struct dentry *, int);
229   -int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
230   -int nobh_commit_write(struct file *, struct page *, unsigned, unsigned);
231   -int nobh_truncate_page(struct address_space *, loff_t);
  229 +int nobh_write_begin(struct file *, struct address_space *,
  230 + loff_t, unsigned, unsigned,
  231 + struct page **, void **, get_block_t*);
  232 +int nobh_write_end(struct file *, struct address_space *,
  233 + loff_t, unsigned, unsigned,
  234 + struct page *, void *);
  235 +int nobh_truncate_page(struct address_space *, loff_t, get_block_t *);
232 236 int nobh_writepage(struct page *page, get_block_t *get_block,
233 237 struct writeback_control *wbc);
234 238