Commit f8e6cc013b896d75d6ce4ec9e168014af1257fd8

Authored by Ryusuke Konishi
1 parent 7c397a81fe

nilfs2: fix buffer head leak in nilfs_btnode_submit_block

nilfs_btnode_submit_block() refers to buffer head just before
returning from the function, but it releases the buffer head earlier
than that if nilfs_dat_translate() gets an error.

This has potential for oops in the erroneous case.  This fixes the
issue.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>

Showing 1 changed file with 4 additions and 2 deletions Inline Diff

1 /* 1 /*
2 * btnode.c - NILFS B-tree node cache 2 * btnode.c - NILFS B-tree node cache
3 * 3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version. 9 * (at your option) any later version.
10 * 10 *
11 * This program is distributed in the hope that it will be useful, 11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details. 14 * GNU General Public License for more details.
15 * 15 *
16 * You should have received a copy of the GNU General Public License 16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 * 19 *
20 * This file was originally written by Seiji Kihara <kihara@osrg.net> 20 * This file was originally written by Seiji Kihara <kihara@osrg.net>
21 * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for 21 * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for
22 * stabilization and simplification. 22 * stabilization and simplification.
23 * 23 *
24 */ 24 */
25 25
26 #include <linux/types.h> 26 #include <linux/types.h>
27 #include <linux/buffer_head.h> 27 #include <linux/buffer_head.h>
28 #include <linux/mm.h> 28 #include <linux/mm.h>
29 #include <linux/backing-dev.h> 29 #include <linux/backing-dev.h>
30 #include <linux/gfp.h> 30 #include <linux/gfp.h>
31 #include "nilfs.h" 31 #include "nilfs.h"
32 #include "mdt.h" 32 #include "mdt.h"
33 #include "dat.h" 33 #include "dat.h"
34 #include "page.h" 34 #include "page.h"
35 #include "btnode.h" 35 #include "btnode.h"
36 36
37 37
38 void nilfs_btnode_cache_init_once(struct address_space *btnc) 38 void nilfs_btnode_cache_init_once(struct address_space *btnc)
39 { 39 {
40 memset(btnc, 0, sizeof(*btnc)); 40 memset(btnc, 0, sizeof(*btnc));
41 INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC); 41 INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC);
42 spin_lock_init(&btnc->tree_lock); 42 spin_lock_init(&btnc->tree_lock);
43 INIT_LIST_HEAD(&btnc->private_list); 43 INIT_LIST_HEAD(&btnc->private_list);
44 spin_lock_init(&btnc->private_lock); 44 spin_lock_init(&btnc->private_lock);
45 45
46 spin_lock_init(&btnc->i_mmap_lock); 46 spin_lock_init(&btnc->i_mmap_lock);
47 INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap); 47 INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap);
48 INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); 48 INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
49 } 49 }
50 50
51 static const struct address_space_operations def_btnode_aops = { 51 static const struct address_space_operations def_btnode_aops = {
52 .sync_page = block_sync_page, 52 .sync_page = block_sync_page,
53 }; 53 };
54 54
55 void nilfs_btnode_cache_init(struct address_space *btnc, 55 void nilfs_btnode_cache_init(struct address_space *btnc,
56 struct backing_dev_info *bdi) 56 struct backing_dev_info *bdi)
57 { 57 {
58 btnc->host = NULL; /* can safely set to host inode ? */ 58 btnc->host = NULL; /* can safely set to host inode ? */
59 btnc->flags = 0; 59 btnc->flags = 0;
60 mapping_set_gfp_mask(btnc, GFP_NOFS); 60 mapping_set_gfp_mask(btnc, GFP_NOFS);
61 btnc->assoc_mapping = NULL; 61 btnc->assoc_mapping = NULL;
62 btnc->backing_dev_info = bdi; 62 btnc->backing_dev_info = bdi;
63 btnc->a_ops = &def_btnode_aops; 63 btnc->a_ops = &def_btnode_aops;
64 } 64 }
65 65
66 void nilfs_btnode_cache_clear(struct address_space *btnc) 66 void nilfs_btnode_cache_clear(struct address_space *btnc)
67 { 67 {
68 invalidate_mapping_pages(btnc, 0, -1); 68 invalidate_mapping_pages(btnc, 0, -1);
69 truncate_inode_pages(btnc, 0); 69 truncate_inode_pages(btnc, 0);
70 } 70 }
71 71
72 struct buffer_head * 72 struct buffer_head *
73 nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) 73 nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
74 { 74 {
75 struct inode *inode = NILFS_BTNC_I(btnc); 75 struct inode *inode = NILFS_BTNC_I(btnc);
76 struct buffer_head *bh; 76 struct buffer_head *bh;
77 77
78 bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); 78 bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
79 if (unlikely(!bh)) 79 if (unlikely(!bh))
80 return NULL; 80 return NULL;
81 81
82 if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || 82 if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
83 buffer_dirty(bh))) { 83 buffer_dirty(bh))) {
84 brelse(bh); 84 brelse(bh);
85 BUG(); 85 BUG();
86 } 86 }
87 memset(bh->b_data, 0, 1 << inode->i_blkbits); 87 memset(bh->b_data, 0, 1 << inode->i_blkbits);
88 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 88 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
89 bh->b_blocknr = blocknr; 89 bh->b_blocknr = blocknr;
90 set_buffer_mapped(bh); 90 set_buffer_mapped(bh);
91 set_buffer_uptodate(bh); 91 set_buffer_uptodate(bh);
92 92
93 unlock_page(bh->b_page); 93 unlock_page(bh->b_page);
94 page_cache_release(bh->b_page); 94 page_cache_release(bh->b_page);
95 return bh; 95 return bh;
96 } 96 }
97 97
98 int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, 98 int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
99 sector_t pblocknr, struct buffer_head **pbh) 99 sector_t pblocknr, struct buffer_head **pbh)
100 { 100 {
101 struct buffer_head *bh; 101 struct buffer_head *bh;
102 struct inode *inode = NILFS_BTNC_I(btnc); 102 struct inode *inode = NILFS_BTNC_I(btnc);
103 struct page *page;
103 int err; 104 int err;
104 105
105 bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); 106 bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
106 if (unlikely(!bh)) 107 if (unlikely(!bh))
107 return -ENOMEM; 108 return -ENOMEM;
108 109
109 err = -EEXIST; /* internal code */ 110 err = -EEXIST; /* internal code */
111 page = bh->b_page;
110 112
111 if (buffer_uptodate(bh) || buffer_dirty(bh)) 113 if (buffer_uptodate(bh) || buffer_dirty(bh))
112 goto found; 114 goto found;
113 115
114 if (pblocknr == 0) { 116 if (pblocknr == 0) {
115 pblocknr = blocknr; 117 pblocknr = blocknr;
116 if (inode->i_ino != NILFS_DAT_INO) { 118 if (inode->i_ino != NILFS_DAT_INO) {
117 struct inode *dat = 119 struct inode *dat =
118 nilfs_dat_inode(NILFS_I_NILFS(inode)); 120 nilfs_dat_inode(NILFS_I_NILFS(inode));
119 121
120 /* blocknr is a virtual block number */ 122 /* blocknr is a virtual block number */
121 err = nilfs_dat_translate(dat, blocknr, &pblocknr); 123 err = nilfs_dat_translate(dat, blocknr, &pblocknr);
122 if (unlikely(err)) { 124 if (unlikely(err)) {
123 brelse(bh); 125 brelse(bh);
124 goto out_locked; 126 goto out_locked;
125 } 127 }
126 } 128 }
127 } 129 }
128 lock_buffer(bh); 130 lock_buffer(bh);
129 if (buffer_uptodate(bh)) { 131 if (buffer_uptodate(bh)) {
130 unlock_buffer(bh); 132 unlock_buffer(bh);
131 err = -EEXIST; /* internal code */ 133 err = -EEXIST; /* internal code */
132 goto found; 134 goto found;
133 } 135 }
134 set_buffer_mapped(bh); 136 set_buffer_mapped(bh);
135 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 137 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
136 bh->b_blocknr = pblocknr; /* set block address for read */ 138 bh->b_blocknr = pblocknr; /* set block address for read */
137 bh->b_end_io = end_buffer_read_sync; 139 bh->b_end_io = end_buffer_read_sync;
138 get_bh(bh); 140 get_bh(bh);
139 submit_bh(READ, bh); 141 submit_bh(READ, bh);
140 bh->b_blocknr = blocknr; /* set back to the given block address */ 142 bh->b_blocknr = blocknr; /* set back to the given block address */
141 err = 0; 143 err = 0;
142 found: 144 found:
143 *pbh = bh; 145 *pbh = bh;
144 146
145 out_locked: 147 out_locked:
146 unlock_page(bh->b_page); 148 unlock_page(page);
147 page_cache_release(bh->b_page); 149 page_cache_release(page);
148 return err; 150 return err;
149 } 151 }
150 152
151 /** 153 /**
152 * nilfs_btnode_delete - delete B-tree node buffer 154 * nilfs_btnode_delete - delete B-tree node buffer
153 * @bh: buffer to be deleted 155 * @bh: buffer to be deleted
154 * 156 *
155 * nilfs_btnode_delete() invalidates the specified buffer and delete the page 157 * nilfs_btnode_delete() invalidates the specified buffer and delete the page
156 * including the buffer if the page gets unbusy. 158 * including the buffer if the page gets unbusy.
157 */ 159 */
158 void nilfs_btnode_delete(struct buffer_head *bh) 160 void nilfs_btnode_delete(struct buffer_head *bh)
159 { 161 {
160 struct address_space *mapping; 162 struct address_space *mapping;
161 struct page *page = bh->b_page; 163 struct page *page = bh->b_page;
162 pgoff_t index = page_index(page); 164 pgoff_t index = page_index(page);
163 int still_dirty; 165 int still_dirty;
164 166
165 page_cache_get(page); 167 page_cache_get(page);
166 lock_page(page); 168 lock_page(page);
167 wait_on_page_writeback(page); 169 wait_on_page_writeback(page);
168 170
169 nilfs_forget_buffer(bh); 171 nilfs_forget_buffer(bh);
170 still_dirty = PageDirty(page); 172 still_dirty = PageDirty(page);
171 mapping = page->mapping; 173 mapping = page->mapping;
172 unlock_page(page); 174 unlock_page(page);
173 page_cache_release(page); 175 page_cache_release(page);
174 176
175 if (!still_dirty && mapping) 177 if (!still_dirty && mapping)
176 invalidate_inode_pages2_range(mapping, index, index); 178 invalidate_inode_pages2_range(mapping, index, index);
177 } 179 }
178 180
179 /** 181 /**
180 * nilfs_btnode_prepare_change_key 182 * nilfs_btnode_prepare_change_key
181 * prepare to move contents of the block for old key to one of new key. 183 * prepare to move contents of the block for old key to one of new key.
182 * the old buffer will not be removed, but might be reused for new buffer. 184 * the old buffer will not be removed, but might be reused for new buffer.
183 * it might return -ENOMEM because of memory allocation errors, 185 * it might return -ENOMEM because of memory allocation errors,
184 * and might return -EIO because of disk read errors. 186 * and might return -EIO because of disk read errors.
185 */ 187 */
186 int nilfs_btnode_prepare_change_key(struct address_space *btnc, 188 int nilfs_btnode_prepare_change_key(struct address_space *btnc,
187 struct nilfs_btnode_chkey_ctxt *ctxt) 189 struct nilfs_btnode_chkey_ctxt *ctxt)
188 { 190 {
189 struct buffer_head *obh, *nbh; 191 struct buffer_head *obh, *nbh;
190 struct inode *inode = NILFS_BTNC_I(btnc); 192 struct inode *inode = NILFS_BTNC_I(btnc);
191 __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; 193 __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
192 int err; 194 int err;
193 195
194 if (oldkey == newkey) 196 if (oldkey == newkey)
195 return 0; 197 return 0;
196 198
197 obh = ctxt->bh; 199 obh = ctxt->bh;
198 ctxt->newbh = NULL; 200 ctxt->newbh = NULL;
199 201
200 if (inode->i_blkbits == PAGE_CACHE_SHIFT) { 202 if (inode->i_blkbits == PAGE_CACHE_SHIFT) {
201 lock_page(obh->b_page); 203 lock_page(obh->b_page);
202 /* 204 /*
203 * We cannot call radix_tree_preload for the kernels older 205 * We cannot call radix_tree_preload for the kernels older
204 * than 2.6.23, because it is not exported for modules. 206 * than 2.6.23, because it is not exported for modules.
205 */ 207 */
206 retry: 208 retry:
207 err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); 209 err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
208 if (err) 210 if (err)
209 goto failed_unlock; 211 goto failed_unlock;
210 /* BUG_ON(oldkey != obh->b_page->index); */ 212 /* BUG_ON(oldkey != obh->b_page->index); */
211 if (unlikely(oldkey != obh->b_page->index)) 213 if (unlikely(oldkey != obh->b_page->index))
212 NILFS_PAGE_BUG(obh->b_page, 214 NILFS_PAGE_BUG(obh->b_page,
213 "invalid oldkey %lld (newkey=%lld)", 215 "invalid oldkey %lld (newkey=%lld)",
214 (unsigned long long)oldkey, 216 (unsigned long long)oldkey,
215 (unsigned long long)newkey); 217 (unsigned long long)newkey);
216 218
217 spin_lock_irq(&btnc->tree_lock); 219 spin_lock_irq(&btnc->tree_lock);
218 err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); 220 err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
219 spin_unlock_irq(&btnc->tree_lock); 221 spin_unlock_irq(&btnc->tree_lock);
220 /* 222 /*
221 * Note: page->index will not change to newkey until 223 * Note: page->index will not change to newkey until
222 * nilfs_btnode_commit_change_key() will be called. 224 * nilfs_btnode_commit_change_key() will be called.
223 * To protect the page in intermediate state, the page lock 225 * To protect the page in intermediate state, the page lock
224 * is held. 226 * is held.
225 */ 227 */
226 radix_tree_preload_end(); 228 radix_tree_preload_end();
227 if (!err) 229 if (!err)
228 return 0; 230 return 0;
229 else if (err != -EEXIST) 231 else if (err != -EEXIST)
230 goto failed_unlock; 232 goto failed_unlock;
231 233
232 err = invalidate_inode_pages2_range(btnc, newkey, newkey); 234 err = invalidate_inode_pages2_range(btnc, newkey, newkey);
233 if (!err) 235 if (!err)
234 goto retry; 236 goto retry;
235 /* fallback to copy mode */ 237 /* fallback to copy mode */
236 unlock_page(obh->b_page); 238 unlock_page(obh->b_page);
237 } 239 }
238 240
239 nbh = nilfs_btnode_create_block(btnc, newkey); 241 nbh = nilfs_btnode_create_block(btnc, newkey);
240 if (!nbh) 242 if (!nbh)
241 return -ENOMEM; 243 return -ENOMEM;
242 244
243 BUG_ON(nbh == obh); 245 BUG_ON(nbh == obh);
244 ctxt->newbh = nbh; 246 ctxt->newbh = nbh;
245 return 0; 247 return 0;
246 248
247 failed_unlock: 249 failed_unlock:
248 unlock_page(obh->b_page); 250 unlock_page(obh->b_page);
249 return err; 251 return err;
250 } 252 }
251 253
252 /** 254 /**
253 * nilfs_btnode_commit_change_key 255 * nilfs_btnode_commit_change_key
254 * commit the change_key operation prepared by prepare_change_key(). 256 * commit the change_key operation prepared by prepare_change_key().
255 */ 257 */
256 void nilfs_btnode_commit_change_key(struct address_space *btnc, 258 void nilfs_btnode_commit_change_key(struct address_space *btnc,
257 struct nilfs_btnode_chkey_ctxt *ctxt) 259 struct nilfs_btnode_chkey_ctxt *ctxt)
258 { 260 {
259 struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh; 261 struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh;
260 __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; 262 __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
261 struct page *opage; 263 struct page *opage;
262 264
263 if (oldkey == newkey) 265 if (oldkey == newkey)
264 return; 266 return;
265 267
266 if (nbh == NULL) { /* blocksize == pagesize */ 268 if (nbh == NULL) { /* blocksize == pagesize */
267 opage = obh->b_page; 269 opage = obh->b_page;
268 if (unlikely(oldkey != opage->index)) 270 if (unlikely(oldkey != opage->index))
269 NILFS_PAGE_BUG(opage, 271 NILFS_PAGE_BUG(opage,
270 "invalid oldkey %lld (newkey=%lld)", 272 "invalid oldkey %lld (newkey=%lld)",
271 (unsigned long long)oldkey, 273 (unsigned long long)oldkey,
272 (unsigned long long)newkey); 274 (unsigned long long)newkey);
273 nilfs_btnode_mark_dirty(obh); 275 nilfs_btnode_mark_dirty(obh);
274 276
275 spin_lock_irq(&btnc->tree_lock); 277 spin_lock_irq(&btnc->tree_lock);
276 radix_tree_delete(&btnc->page_tree, oldkey); 278 radix_tree_delete(&btnc->page_tree, oldkey);
277 radix_tree_tag_set(&btnc->page_tree, newkey, 279 radix_tree_tag_set(&btnc->page_tree, newkey,
278 PAGECACHE_TAG_DIRTY); 280 PAGECACHE_TAG_DIRTY);
279 spin_unlock_irq(&btnc->tree_lock); 281 spin_unlock_irq(&btnc->tree_lock);
280 282
281 opage->index = obh->b_blocknr = newkey; 283 opage->index = obh->b_blocknr = newkey;
282 unlock_page(opage); 284 unlock_page(opage);
283 } else { 285 } else {
284 nilfs_copy_buffer(nbh, obh); 286 nilfs_copy_buffer(nbh, obh);
285 nilfs_btnode_mark_dirty(nbh); 287 nilfs_btnode_mark_dirty(nbh);
286 288
287 nbh->b_blocknr = newkey; 289 nbh->b_blocknr = newkey;
288 ctxt->bh = nbh; 290 ctxt->bh = nbh;
289 nilfs_btnode_delete(obh); /* will decrement bh->b_count */ 291 nilfs_btnode_delete(obh); /* will decrement bh->b_count */
290 } 292 }
291 } 293 }
292 294
293 /** 295 /**
294 * nilfs_btnode_abort_change_key 296 * nilfs_btnode_abort_change_key
295 * abort the change_key operation prepared by prepare_change_key(). 297 * abort the change_key operation prepared by prepare_change_key().
296 */ 298 */
297 void nilfs_btnode_abort_change_key(struct address_space *btnc, 299 void nilfs_btnode_abort_change_key(struct address_space *btnc,
298 struct nilfs_btnode_chkey_ctxt *ctxt) 300 struct nilfs_btnode_chkey_ctxt *ctxt)
299 { 301 {
300 struct buffer_head *nbh = ctxt->newbh; 302 struct buffer_head *nbh = ctxt->newbh;
301 __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; 303 __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
302 304
303 if (oldkey == newkey) 305 if (oldkey == newkey)
304 return; 306 return;
305 307
306 if (nbh == NULL) { /* blocksize == pagesize */ 308 if (nbh == NULL) { /* blocksize == pagesize */
307 spin_lock_irq(&btnc->tree_lock); 309 spin_lock_irq(&btnc->tree_lock);
308 radix_tree_delete(&btnc->page_tree, newkey); 310 radix_tree_delete(&btnc->page_tree, newkey);
309 spin_unlock_irq(&btnc->tree_lock); 311 spin_unlock_irq(&btnc->tree_lock);
310 unlock_page(ctxt->bh->b_page); 312 unlock_page(ctxt->bh->b_page);
311 } else 313 } else
312 brelse(nbh); 314 brelse(nbh);
313 } 315 }
314 316