Commit 7d6e1f5461d0c16eb6aa8d226976995856d85e4e

Authored by Sha Zhengju
Committed by Sage Weil
1 parent ee7289bfad

ceph: use vfs __set_page_dirty_nobuffers interface instead of doing it inside filesystem

Following we will begin to add memcg dirty page accounting around
__set_page_dirty_{buffers,nobuffers} in vfs layer, so we'd better use vfs interface to
avoid exporting those details to filesystems.

Since vfs set_page_dirty() should be called under page lock, here we don't need elaborate
codes to handle racy anymore, and two WARN_ON() are added to detect such exceptions.
Thanks very much for Sage and Yan Zheng's coaching!

I tested it in a two server's ceph environment that one is client and the other is
mds/osd/mon, and run the following fsx test from xfstests:

  ./fsx   1MB -N 50000 -p 10000 -l 1048576
  ./fsx  10MB -N 50000 -p 10000 -l 10485760
  ./fsx 100MB -N 50000 -p 10000 -l 104857600

The fsx does lots of mmap-read/mmap-write/truncate operations and the tests completed
successfully without triggering any of WARN_ON.

Signed-off-by: Sha Zhengju <handai.szj@taobao.com>
Reviewed-by: Sage Weil <sage@inktank.com>

Showing 1 changed file with 14 additions and 29 deletions Side-by-side Diff

... ... @@ -70,15 +70,16 @@
70 70 struct address_space *mapping = page->mapping;
71 71 struct inode *inode;
72 72 struct ceph_inode_info *ci;
73   - int undo = 0;
74 73 struct ceph_snap_context *snapc;
  74 + int ret;
75 75  
76 76 if (unlikely(!mapping))
77 77 return !TestSetPageDirty(page);
78 78  
79   - if (TestSetPageDirty(page)) {
  79 + if (PageDirty(page)) {
80 80 dout("%p set_page_dirty %p idx %lu -- already dirty\n",
81 81 mapping->host, page, page->index);
  82 + BUG_ON(!PagePrivate(page));
82 83 return 0;
83 84 }
84 85  
85 86  
86 87  
... ... @@ -107,35 +108,19 @@
107 108 snapc, snapc->seq, snapc->num_snaps);
108 109 spin_unlock(&ci->i_ceph_lock);
109 110  
110   - /* now adjust page */
111   - spin_lock_irq(&mapping->tree_lock);
112   - if (page->mapping) { /* Race with truncate? */
113   - WARN_ON_ONCE(!PageUptodate(page));
114   - account_page_dirtied(page, page->mapping);
115   - radix_tree_tag_set(&mapping->page_tree,
116   - page_index(page), PAGECACHE_TAG_DIRTY);
  111 + /*
  112 + * Reference snap context in page->private. Also set
  113 + * PagePrivate so that we get invalidatepage callback.
  114 + */
  115 + BUG_ON(PagePrivate(page));
  116 + page->private = (unsigned long)snapc;
  117 + SetPagePrivate(page);
117 118  
118   - /*
119   - * Reference snap context in page->private. Also set
120   - * PagePrivate so that we get invalidatepage callback.
121   - */
122   - page->private = (unsigned long)snapc;
123   - SetPagePrivate(page);
124   - } else {
125   - dout("ANON set_page_dirty %p (raced truncate?)\n", page);
126   - undo = 1;
127   - }
  119 + ret = __set_page_dirty_nobuffers(page);
  120 + WARN_ON(!PageLocked(page));
  121 + WARN_ON(!page->mapping);
128 122  
129   - spin_unlock_irq(&mapping->tree_lock);
130   -
131   - if (undo)
132   - /* whoops, we failed to dirty the page */
133   - ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
134   -
135   - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
136   -
137   - BUG_ON(!PageDirty(page));
138   - return 1;
  123 + return ret;
139 124 }
140 125  
141 126 /*