Commit 03158cd7eb3374843de68421142ca5900df845d9
Committed by
Linus Torvalds
1 parent
b6af1bcd87
Exists in
master
and in
39 other branches
fs: restore nobh
Implement nobh in new aops. This is a bit tricky. FWIW, nobh_truncate is now implemented in a way that does not create blocks in sparse regions, which is a silly thing for it to have been doing (isn't it?) ext2 survives fsx and fsstress. jfs is converted as well... ext3 should be easy to do (but not done yet). [akpm@linux-foundation.org: coding-style fixes] Cc: Badari Pulavarty <pbadari@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 4 changed files with 178 additions and 88 deletions Side-by-side Diff
fs/buffer.c
... | ... | @@ -2369,7 +2369,7 @@ |
2369 | 2369 | } |
2370 | 2370 | |
2371 | 2371 | /* |
2372 | - * nobh_prepare_write()'s prereads are special: the buffer_heads are freed | |
2372 | + * nobh_write_begin()'s prereads are special: the buffer_heads are freed | |
2373 | 2373 | * immediately, while under the page lock. So it needs a special end_io |
2374 | 2374 | * handler which does not touch the bh after unlocking it. |
2375 | 2375 | */ |
2376 | 2376 | |
2377 | 2377 | |
2378 | 2378 | |
... | ... | @@ -2379,16 +2379,45 @@ |
2379 | 2379 | } |
2380 | 2380 | |
2381 | 2381 | /* |
2382 | + * Attach the singly-linked list of buffers created by nobh_write_begin, to | |
2383 | + * the page (converting it to circular linked list and taking care of page | |
2384 | + * dirty races). | |
2385 | + */ | |
2386 | +static void attach_nobh_buffers(struct page *page, struct buffer_head *head) | |
2387 | +{ | |
2388 | + struct buffer_head *bh; | |
2389 | + | |
2390 | + BUG_ON(!PageLocked(page)); | |
2391 | + | |
2392 | + spin_lock(&page->mapping->private_lock); | |
2393 | + bh = head; | |
2394 | + do { | |
2395 | + if (PageDirty(page)) | |
2396 | + set_buffer_dirty(bh); | |
2397 | + if (!bh->b_this_page) | |
2398 | + bh->b_this_page = head; | |
2399 | + bh = bh->b_this_page; | |
2400 | + } while (bh != head); | |
2401 | + attach_page_buffers(page, head); | |
2402 | + spin_unlock(&page->mapping->private_lock); | |
2403 | +} | |
2404 | + | |
2405 | +/* | |
2382 | 2406 | * On entry, the page is fully not uptodate. |
2383 | 2407 | * On exit the page is fully uptodate in the areas outside (from,to) |
2384 | 2408 | */ |
2385 | -int nobh_prepare_write(struct page *page, unsigned from, unsigned to, | |
2409 | +int nobh_write_begin(struct file *file, struct address_space *mapping, | |
2410 | + loff_t pos, unsigned len, unsigned flags, | |
2411 | + struct page **pagep, void **fsdata, | |
2386 | 2412 | get_block_t *get_block) |
2387 | 2413 | { |
2388 | - struct inode *inode = page->mapping->host; | |
2414 | + struct inode *inode = mapping->host; | |
2389 | 2415 | const unsigned blkbits = inode->i_blkbits; |
2390 | 2416 | const unsigned blocksize = 1 << blkbits; |
2391 | 2417 | struct buffer_head *head, *bh; |
2418 | + struct page *page; | |
2419 | + pgoff_t index; | |
2420 | + unsigned from, to; | |
2392 | 2421 | unsigned block_in_page; |
2393 | 2422 | unsigned block_start, block_end; |
2394 | 2423 | sector_t block_in_file; |
2395 | 2424 | |
... | ... | @@ -2397,9 +2426,24 @@ |
2397 | 2426 | int ret = 0; |
2398 | 2427 | int is_mapped_to_disk = 1; |
2399 | 2428 | |
2400 | - if (page_has_buffers(page)) | |
2401 | - return block_prepare_write(page, from, to, get_block); | |
2429 | + index = pos >> PAGE_CACHE_SHIFT; | |
2430 | + from = pos & (PAGE_CACHE_SIZE - 1); | |
2431 | + to = from + len; | |
2402 | 2432 | |
2433 | + page = __grab_cache_page(mapping, index); | |
2434 | + if (!page) | |
2435 | + return -ENOMEM; | |
2436 | + *pagep = page; | |
2437 | + *fsdata = NULL; | |
2438 | + | |
2439 | + if (page_has_buffers(page)) { | |
2440 | + unlock_page(page); | |
2441 | + page_cache_release(page); | |
2442 | + *pagep = NULL; | |
2443 | + return block_write_begin(file, mapping, pos, len, flags, pagep, | |
2444 | + fsdata, get_block); | |
2445 | + } | |
2446 | + | |
2403 | 2447 | if (PageMappedToDisk(page)) |
2404 | 2448 | return 0; |
2405 | 2449 | |
... | ... | @@ -2413,8 +2457,10 @@ |
2413 | 2457 | * than the circular one we're used to. |
2414 | 2458 | */ |
2415 | 2459 | head = alloc_page_buffers(page, blocksize, 0); |
2416 | - if (!head) | |
2417 | - return -ENOMEM; | |
2460 | + if (!head) { | |
2461 | + ret = -ENOMEM; | |
2462 | + goto out_release; | |
2463 | + } | |
2418 | 2464 | |
2419 | 2465 | block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); |
2420 | 2466 | |
2421 | 2467 | |
... | ... | @@ -2483,15 +2529,12 @@ |
2483 | 2529 | if (is_mapped_to_disk) |
2484 | 2530 | SetPageMappedToDisk(page); |
2485 | 2531 | |
2486 | - do { | |
2487 | - bh = head; | |
2488 | - head = head->b_this_page; | |
2489 | - free_buffer_head(bh); | |
2490 | - } while (head); | |
2532 | + *fsdata = head; /* to be released by nobh_write_end */ | |
2491 | 2533 | |
2492 | 2534 | return 0; |
2493 | 2535 | |
2494 | 2536 | failed: |
2537 | + BUG_ON(!ret); | |
2495 | 2538 | /* |
2496 | 2539 | * Error recovery is a bit difficult. We need to zero out blocks that |
2497 | 2540 | * were newly allocated, and dirty them to ensure they get written out. |
2498 | 2541 | |
2499 | 2542 | |
2500 | 2543 | |
2501 | 2544 | |
2502 | 2545 | |
2503 | 2546 | |
2504 | 2547 | |
2505 | 2548 | |
2506 | 2549 | |
... | ... | @@ -2499,64 +2542,57 @@ |
2499 | 2542 | * the handling of potential IO errors during writeout would be hard |
2500 | 2543 | * (could try doing synchronous writeout, but what if that fails too?) |
2501 | 2544 | */ |
2502 | - spin_lock(&page->mapping->private_lock); | |
2503 | - bh = head; | |
2504 | - block_start = 0; | |
2505 | - do { | |
2506 | - if (PageUptodate(page)) | |
2507 | - set_buffer_uptodate(bh); | |
2508 | - if (PageDirty(page)) | |
2509 | - set_buffer_dirty(bh); | |
2545 | + attach_nobh_buffers(page, head); | |
2546 | + page_zero_new_buffers(page, from, to); | |
2510 | 2547 | |
2511 | - block_end = block_start+blocksize; | |
2512 | - if (block_end <= from) | |
2513 | - goto next; | |
2514 | - if (block_start >= to) | |
2515 | - goto next; | |
2548 | +out_release: | |
2549 | + unlock_page(page); | |
2550 | + page_cache_release(page); | |
2551 | + *pagep = NULL; | |
2516 | 2552 | |
2517 | - if (buffer_new(bh)) { | |
2518 | - clear_buffer_new(bh); | |
2519 | - if (!buffer_uptodate(bh)) { | |
2520 | - zero_user_page(page, block_start, bh->b_size, KM_USER0); | |
2521 | - set_buffer_uptodate(bh); | |
2522 | - } | |
2523 | - mark_buffer_dirty(bh); | |
2524 | - } | |
2525 | -next: | |
2526 | - block_start = block_end; | |
2527 | - if (!bh->b_this_page) | |
2528 | - bh->b_this_page = head; | |
2529 | - bh = bh->b_this_page; | |
2530 | - } while (bh != head); | |
2531 | - attach_page_buffers(page, head); | |
2532 | - spin_unlock(&page->mapping->private_lock); | |
2553 | + if (pos + len > inode->i_size) | |
2554 | + vmtruncate(inode, inode->i_size); | |
2533 | 2555 | |
2534 | 2556 | return ret; |
2535 | 2557 | } |
2536 | -EXPORT_SYMBOL(nobh_prepare_write); | |
2558 | +EXPORT_SYMBOL(nobh_write_begin); | |
2537 | 2559 | |
2538 | -/* | |
2539 | - * Make sure any changes to nobh_commit_write() are reflected in | |
2540 | - * nobh_truncate_page(), since it doesn't call commit_write(). | |
2541 | - */ | |
2542 | -int nobh_commit_write(struct file *file, struct page *page, | |
2543 | - unsigned from, unsigned to) | |
2560 | +int nobh_write_end(struct file *file, struct address_space *mapping, | |
2561 | + loff_t pos, unsigned len, unsigned copied, | |
2562 | + struct page *page, void *fsdata) | |
2544 | 2563 | { |
2545 | 2564 | struct inode *inode = page->mapping->host; |
2546 | - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; | |
2565 | + struct buffer_head *head = NULL; | |
2566 | + struct buffer_head *bh; | |
2547 | 2567 | |
2548 | - if (page_has_buffers(page)) | |
2549 | - return generic_commit_write(file, page, from, to); | |
2568 | + if (!PageMappedToDisk(page)) { | |
2569 | + if (unlikely(copied < len) && !page_has_buffers(page)) | |
2570 | + attach_nobh_buffers(page, head); | |
2571 | + if (page_has_buffers(page)) | |
2572 | + return generic_write_end(file, mapping, pos, len, | |
2573 | + copied, page, fsdata); | |
2574 | + } | |
2550 | 2575 | |
2551 | 2576 | SetPageUptodate(page); |
2552 | 2577 | set_page_dirty(page); |
2553 | - if (pos > inode->i_size) { | |
2554 | - i_size_write(inode, pos); | |
2578 | + if (pos+copied > inode->i_size) { | |
2579 | + i_size_write(inode, pos+copied); | |
2555 | 2580 | mark_inode_dirty(inode); |
2556 | 2581 | } |
2557 | - return 0; | |
2582 | + | |
2583 | + unlock_page(page); | |
2584 | + page_cache_release(page); | |
2585 | + | |
2586 | + head = fsdata; | |
2587 | + while (head) { | |
2588 | + bh = head; | |
2589 | + head = head->b_this_page; | |
2590 | + free_buffer_head(bh); | |
2591 | + } | |
2592 | + | |
2593 | + return copied; | |
2558 | 2594 | } |
2559 | -EXPORT_SYMBOL(nobh_commit_write); | |
2595 | +EXPORT_SYMBOL(nobh_write_end); | |
2560 | 2596 | |
2561 | 2597 | /* |
2562 | 2598 | * nobh_writepage() - based on block_full_write_page() except |
2563 | 2599 | |
2564 | 2600 | |
2565 | 2601 | |
2566 | 2602 | |
2567 | 2603 | |
2568 | 2604 | |
2569 | 2605 | |
2570 | 2606 | |
2571 | 2607 | |
... | ... | @@ -2609,44 +2645,79 @@ |
2609 | 2645 | } |
2610 | 2646 | EXPORT_SYMBOL(nobh_writepage); |
2611 | 2647 | |
2612 | -/* | |
2613 | - * This function assumes that ->prepare_write() uses nobh_prepare_write(). | |
2614 | - */ | |
2615 | -int nobh_truncate_page(struct address_space *mapping, loff_t from) | |
2648 | +int nobh_truncate_page(struct address_space *mapping, | |
2649 | + loff_t from, get_block_t *get_block) | |
2616 | 2650 | { |
2617 | - struct inode *inode = mapping->host; | |
2618 | - unsigned blocksize = 1 << inode->i_blkbits; | |
2619 | 2651 | pgoff_t index = from >> PAGE_CACHE_SHIFT; |
2620 | 2652 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
2621 | - unsigned to; | |
2653 | + unsigned blocksize; | |
2654 | + sector_t iblock; | |
2655 | + unsigned length, pos; | |
2656 | + struct inode *inode = mapping->host; | |
2622 | 2657 | struct page *page; |
2623 | - const struct address_space_operations *a_ops = mapping->a_ops; | |
2624 | - int ret = 0; | |
2658 | + struct buffer_head map_bh; | |
2659 | + int err; | |
2625 | 2660 | |
2626 | - if ((offset & (blocksize - 1)) == 0) | |
2627 | - goto out; | |
2661 | + blocksize = 1 << inode->i_blkbits; | |
2662 | + length = offset & (blocksize - 1); | |
2628 | 2663 | |
2629 | - ret = -ENOMEM; | |
2664 | + /* Block boundary? Nothing to do */ | |
2665 | + if (!length) | |
2666 | + return 0; | |
2667 | + | |
2668 | + length = blocksize - length; | |
2669 | + iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | |
2670 | + | |
2630 | 2671 | page = grab_cache_page(mapping, index); |
2672 | + err = -ENOMEM; | |
2631 | 2673 | if (!page) |
2632 | 2674 | goto out; |
2633 | 2675 | |
2634 | - to = (offset + blocksize) & ~(blocksize - 1); | |
2635 | - ret = a_ops->prepare_write(NULL, page, offset, to); | |
2636 | - if (ret == 0) { | |
2637 | - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, | |
2638 | - KM_USER0); | |
2639 | - /* | |
2640 | - * It would be more correct to call aops->commit_write() | |
2641 | - * here, but this is more efficient. | |
2642 | - */ | |
2643 | - SetPageUptodate(page); | |
2644 | - set_page_dirty(page); | |
2676 | + if (page_has_buffers(page)) { | |
2677 | +has_buffers: | |
2678 | + unlock_page(page); | |
2679 | + page_cache_release(page); | |
2680 | + return block_truncate_page(mapping, from, get_block); | |
2645 | 2681 | } |
2682 | + | |
2683 | + /* Find the buffer that contains "offset" */ | |
2684 | + pos = blocksize; | |
2685 | + while (offset >= pos) { | |
2686 | + iblock++; | |
2687 | + pos += blocksize; | |
2688 | + } | |
2689 | + | |
2690 | + err = get_block(inode, iblock, &map_bh, 0); | |
2691 | + if (err) | |
2692 | + goto unlock; | |
2693 | + /* unmapped? It's a hole - nothing to do */ | |
2694 | + if (!buffer_mapped(&map_bh)) | |
2695 | + goto unlock; | |
2696 | + | |
2697 | + /* Ok, it's mapped. Make sure it's up-to-date */ | |
2698 | + if (!PageUptodate(page)) { | |
2699 | + err = mapping->a_ops->readpage(NULL, page); | |
2700 | + if (err) { | |
2701 | + page_cache_release(page); | |
2702 | + goto out; | |
2703 | + } | |
2704 | + lock_page(page); | |
2705 | + if (!PageUptodate(page)) { | |
2706 | + err = -EIO; | |
2707 | + goto unlock; | |
2708 | + } | |
2709 | + if (page_has_buffers(page)) | |
2710 | + goto has_buffers; | |
2711 | + } | |
2712 | + zero_user_page(page, offset, length, KM_USER0); | |
2713 | + set_page_dirty(page); | |
2714 | + err = 0; | |
2715 | + | |
2716 | +unlock: | |
2646 | 2717 | unlock_page(page); |
2647 | 2718 | page_cache_release(page); |
2648 | 2719 | out: |
2649 | - return ret; | |
2720 | + return err; | |
2650 | 2721 | } |
2651 | 2722 | EXPORT_SYMBOL(nobh_truncate_page); |
2652 | 2723 |
fs/ext2/inode.c
... | ... | @@ -659,6 +659,20 @@ |
659 | 659 | return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata); |
660 | 660 | } |
661 | 661 | |
662 | +static int | |
663 | +ext2_nobh_write_begin(struct file *file, struct address_space *mapping, | |
664 | + loff_t pos, unsigned len, unsigned flags, | |
665 | + struct page **pagep, void **fsdata) | |
666 | +{ | |
667 | + /* | |
668 | + * Dir-in-pagecache still uses ext2_write_begin. Would have to rework | |
669 | + * directory handling code to pass around offsets rather than struct | |
670 | + * pages in order to make this work easily. | |
671 | + */ | |
672 | + return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | |
673 | + ext2_get_block); | |
674 | +} | |
675 | + | |
662 | 676 | static int ext2_nobh_writepage(struct page *page, |
663 | 677 | struct writeback_control *wbc) |
664 | 678 | { |
... | ... | @@ -710,7 +724,8 @@ |
710 | 724 | .readpages = ext2_readpages, |
711 | 725 | .writepage = ext2_nobh_writepage, |
712 | 726 | .sync_page = block_sync_page, |
713 | - /* XXX: todo */ | |
727 | + .write_begin = ext2_nobh_write_begin, | |
728 | + .write_end = nobh_write_end, | |
714 | 729 | .bmap = ext2_bmap, |
715 | 730 | .direct_IO = ext2_direct_IO, |
716 | 731 | .writepages = ext2_writepages, |
... | ... | @@ -927,7 +942,8 @@ |
927 | 942 | if (mapping_is_xip(inode->i_mapping)) |
928 | 943 | xip_truncate_page(inode->i_mapping, inode->i_size); |
929 | 944 | else if (test_opt(inode->i_sb, NOBH)) |
930 | - nobh_truncate_page(inode->i_mapping, inode->i_size); | |
945 | + nobh_truncate_page(inode->i_mapping, | |
946 | + inode->i_size, ext2_get_block); | |
931 | 947 | else |
932 | 948 | block_truncate_page(inode->i_mapping, |
933 | 949 | inode->i_size, ext2_get_block); |
fs/jfs/inode.c
... | ... | @@ -279,8 +279,7 @@ |
279 | 279 | loff_t pos, unsigned len, unsigned flags, |
280 | 280 | struct page **pagep, void **fsdata) |
281 | 281 | { |
282 | - *pagep = NULL; | |
283 | - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | |
282 | + return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | |
284 | 283 | jfs_get_block); |
285 | 284 | } |
286 | 285 | |
... | ... | @@ -306,7 +305,7 @@ |
306 | 305 | .writepages = jfs_writepages, |
307 | 306 | .sync_page = block_sync_page, |
308 | 307 | .write_begin = jfs_write_begin, |
309 | - .write_end = generic_write_end, | |
308 | + .write_end = nobh_write_end, | |
310 | 309 | .bmap = jfs_bmap, |
311 | 310 | .direct_IO = jfs_direct_IO, |
312 | 311 | }; |
... | ... | @@ -359,7 +358,7 @@ |
359 | 358 | { |
360 | 359 | jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size); |
361 | 360 | |
362 | - block_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); | |
361 | + nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); | |
363 | 362 | |
364 | 363 | IWRITE_LOCK(ip, RDWRLOCK_NORMAL); |
365 | 364 | jfs_truncate_nolock(ip, ip->i_size); |
include/linux/buffer_head.h
... | ... | @@ -226,9 +226,13 @@ |
226 | 226 | int generic_commit_write(struct file *, struct page *, unsigned, unsigned); |
227 | 227 | int block_truncate_page(struct address_space *, loff_t, get_block_t *); |
228 | 228 | int file_fsync(struct file *, struct dentry *, int); |
229 | -int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*); | |
230 | -int nobh_commit_write(struct file *, struct page *, unsigned, unsigned); | |
231 | -int nobh_truncate_page(struct address_space *, loff_t); | |
229 | +int nobh_write_begin(struct file *, struct address_space *, | |
230 | + loff_t, unsigned, unsigned, | |
231 | + struct page **, void **, get_block_t*); | |
232 | +int nobh_write_end(struct file *, struct address_space *, | |
233 | + loff_t, unsigned, unsigned, | |
234 | + struct page *, void *); | |
235 | +int nobh_truncate_page(struct address_space *, loff_t, get_block_t *); | |
232 | 236 | int nobh_writepage(struct page *page, get_block_t *get_block, |
233 | 237 | struct writeback_control *wbc); |
234 | 238 |