Commit 8c26c4e2694a163d525976e804d81cd955bbb40c

Authored by Vyacheslav Dubeyko
Committed by Linus Torvalds
1 parent 9151b3982d

nilfs2: fix issue with flush kernel thread after remount in RO mode because of d…

…river's internal error or metadata corruption

The NILFS2 driver remounts itself in RO mode in the case of discovering
metadata corruption (for example, discovering a broken bmap).  But
usually, this takes place when there have been file system operations
before remounting in RO mode.

Thereby, NILFS2 driver can be in RO mode with presence of dirty pages in
modified inodes' address spaces.  It results in flush kernel thread's
infinite trying to flush dirty pages in RO mode.  As a result, it is
possible to see such side effects as: (1) flush kernel thread occupies
50% - 99% of CPU time; (2) system can't be shutdowned without manual
power switch off.

SYMPTOMS:
(1) System log contains error message: "Remounting filesystem read-only".
(2) The flush kernel thread occupies 50% - 99% of CPU time.
(3) The system can't be shutdowned without manual power switch off.

REPRODUCTION PATH:
(1) Create volume group with name "unencrypted" by means of vgcreate utility.
(2) Run script (prepared by Anthony Doggett <Anthony2486@interfaces.org.uk>):

  ----------------[BEGIN SCRIPT]--------------------
  #!/bin/bash

  VG=unencrypted
  #apt-get install nilfs-tools darcs
  lvcreate --size 2G --name ntest $VG
  mkfs.nilfs2 -b 1024 -B 8192 /dev/mapper/$VG-ntest
  mkdir /var/tmp/n
  mkdir /var/tmp/n/ntest
  mount /dev/mapper/$VG-ntest /var/tmp/n/ntest
  mkdir /var/tmp/n/ntest/thedir
  cd /var/tmp/n/ntest/thedir
  sleep 2
  date
  darcs init
  sleep 2
  dmesg|tail -n 5
  date
  darcs whatsnew || true
  date
  sleep 2
  dmesg|tail -n 5
  ----------------[END SCRIPT]--------------------

(3) Try to shutdown the system.

REPRODUCIBILITY: 100%

FIX:

This patch implements checking mount state of NILFS2 driver in
nilfs_writepage(), nilfs_writepages() and nilfs_mdt_write_page()
methods.  If it is detected the RO mount state then all dirty pages are
simply discarded with warning messages is written in system log.

[akpm@linux-foundation.org: fix printk warning]
Signed-off-by: Vyacheslav Dubeyko <slava@dubeyko.com>
Acked-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: Anthony Doggett <Anthony2486@interfaces.org.uk>
Cc: ARAI Shun-ichi <hermes@ceres.dti.ne.jp>
Cc: Piotr Szymaniak <szarpaj@grubelek.pl>
Cc: Zahid Chowdhury <zahid.chowdhury@starsolutions.com>
Cc: Elmer Zhang <freeboy6716@gmail.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 86 additions and 23 deletions Side-by-side Diff

... ... @@ -175,6 +175,11 @@
175 175 struct inode *inode = mapping->host;
176 176 int err = 0;
177 177  
  178 + if (inode->i_sb->s_flags & MS_RDONLY) {
  179 + nilfs_clear_dirty_pages(mapping, false);
  180 + return -EROFS;
  181 + }
  182 +
178 183 if (wbc->sync_mode == WB_SYNC_ALL)
179 184 err = nilfs_construct_dsync_segment(inode->i_sb, inode,
180 185 wbc->range_start,
... ... @@ -186,6 +191,18 @@
186 191 {
187 192 struct inode *inode = page->mapping->host;
188 193 int err;
  194 +
  195 + if (inode && (inode->i_sb->s_flags & MS_RDONLY)) {
  196 + /*
  197 + * It means that filesystem was remounted in read-only
  198 + * mode because of error or metadata corruption. But we
  199 + * have dirty pages that try to be flushed in background.
  200 + * So, here we simply discard this dirty page.
  201 + */
  202 + nilfs_clear_dirty_page(page, false);
  203 + unlock_page(page);
  204 + return -EROFS;
  205 + }
189 206  
190 207 redirty_page_for_writepage(wbc, page);
191 208 unlock_page(page);
... ... @@ -375,14 +375,25 @@
375 375 static int
376 376 nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
377 377 {
378   - struct inode *inode;
  378 + struct inode *inode = page->mapping->host;
379 379 struct super_block *sb;
380 380 int err = 0;
381 381  
  382 + if (inode && (inode->i_sb->s_flags & MS_RDONLY)) {
  383 + /*
  384 + * It means that filesystem was remounted in read-only
  385 + * mode because of error or metadata corruption. But we
  386 + * have dirty pages that try to be flushed in background.
  387 + * So, here we simply discard this dirty page.
  388 + */
  389 + nilfs_clear_dirty_page(page, false);
  390 + unlock_page(page);
  391 + return -EROFS;
  392 + }
  393 +
382 394 redirty_page_for_writepage(wbc, page);
383 395 unlock_page(page);
384 396  
385   - inode = page->mapping->host;
386 397 if (!inode)
387 398 return 0;
388 399  
389 400  
... ... @@ -561,10 +572,10 @@
561 572 if (mi->mi_palloc_cache)
562 573 nilfs_palloc_clear_cache(inode);
563 574  
564   - nilfs_clear_dirty_pages(inode->i_mapping);
  575 + nilfs_clear_dirty_pages(inode->i_mapping, true);
565 576 nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data);
566 577  
567   - nilfs_clear_dirty_pages(&ii->i_btnode_cache);
  578 + nilfs_clear_dirty_pages(&ii->i_btnode_cache, true);
568 579 nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes);
569 580  
570 581 nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store);
... ... @@ -370,7 +370,12 @@
370 370 goto repeat;
371 371 }
372 372  
373   -void nilfs_clear_dirty_pages(struct address_space *mapping)
  373 +/**
  374 + * nilfs_clear_dirty_pages - discard dirty pages in address space
  375 + * @mapping: address space with dirty pages for discarding
  376 + * @silent: suppress [true] or print [false] warning messages
  377 + */
  378 +void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
374 379 {
375 380 struct pagevec pvec;
376 381 unsigned int i;
377 382  
378 383  
... ... @@ -382,30 +387,59 @@
382 387 PAGEVEC_SIZE)) {
383 388 for (i = 0; i < pagevec_count(&pvec); i++) {
384 389 struct page *page = pvec.pages[i];
385   - struct buffer_head *bh, *head;
386 390  
387 391 lock_page(page);
388   - ClearPageUptodate(page);
389   - ClearPageMappedToDisk(page);
390   - bh = head = page_buffers(page);
391   - do {
392   - lock_buffer(bh);
393   - clear_buffer_dirty(bh);
394   - clear_buffer_nilfs_volatile(bh);
395   - clear_buffer_nilfs_checked(bh);
396   - clear_buffer_nilfs_redirected(bh);
397   - clear_buffer_uptodate(bh);
398   - clear_buffer_mapped(bh);
399   - unlock_buffer(bh);
400   - bh = bh->b_this_page;
401   - } while (bh != head);
402   -
403   - __nilfs_clear_page_dirty(page);
  392 + nilfs_clear_dirty_page(page, silent);
404 393 unlock_page(page);
405 394 }
406 395 pagevec_release(&pvec);
407 396 cond_resched();
408 397 }
  398 +}
  399 +
  400 +/**
  401 + * nilfs_clear_dirty_page - discard dirty page
  402 + * @page: dirty page that will be discarded
  403 + * @silent: suppress [true] or print [false] warning messages
  404 + */
  405 +void nilfs_clear_dirty_page(struct page *page, bool silent)
  406 +{
  407 + struct inode *inode = page->mapping->host;
  408 + struct super_block *sb = inode->i_sb;
  409 +
  410 + BUG_ON(!test_bit(PG_locked, &page->flags));
  411 +
  412 + if (!silent) {
  413 + nilfs_warning(sb, __func__,
  414 + "discard page: offset %lld, ino %lu",
  415 + page_offset(page), inode->i_ino);
  416 + }
  417 +
  418 + ClearPageUptodate(page);
  419 + ClearPageMappedToDisk(page);
  420 +
  421 + if (page_has_buffers(page)) {
  422 + struct buffer_head *bh, *head;
  423 +
  424 + bh = head = page_buffers(page);
  425 + do {
  426 + lock_buffer(bh);
  427 + if (!silent) {
  428 + nilfs_warning(sb, __func__,
  429 + "discard block %llu, size %zu",
  430 + (u64)bh->b_blocknr, bh->b_size);
  431 + }
  432 + clear_buffer_dirty(bh);
  433 + clear_buffer_nilfs_volatile(bh);
  434 + clear_buffer_nilfs_checked(bh);
  435 + clear_buffer_nilfs_redirected(bh);
  436 + clear_buffer_uptodate(bh);
  437 + clear_buffer_mapped(bh);
  438 + unlock_buffer(bh);
  439 + } while (bh = bh->b_this_page, bh != head);
  440 + }
  441 +
  442 + __nilfs_clear_page_dirty(page);
409 443 }
410 444  
411 445 unsigned nilfs_page_count_clean_buffers(struct page *page,
... ... @@ -55,7 +55,8 @@
55 55  
56 56 int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
57 57 void nilfs_copy_back_pages(struct address_space *, struct address_space *);
58   -void nilfs_clear_dirty_pages(struct address_space *);
  58 +void nilfs_clear_dirty_page(struct page *, bool);
  59 +void nilfs_clear_dirty_pages(struct address_space *, bool);
59 60 void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
60 61 struct backing_dev_info *bdi);
61 62 unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);