Commit 9421502b4fc894cc477be8fc49776830e37ca157

Authored by Joern Engel
1 parent 5c564c2a04

[LogFS] Fix bdev erases

Erases for block devices were always just emulated by writing 0xff.
Some time back the write was removed and only the page cache was
changed to 0xff.  Superficialy a good idea with two problems:
1. Touching the page cache isn't necessary either.
2. However, writing out 0xff _is_ necessary for the journal.  As the
   journal is scanned linearly, an old non-overwritten commit entry
   can be used on next mount and cause havoc.

This should fix both aspects.

Showing 6 changed files with 97 additions and 20 deletions Side-by-side Diff

... ... @@ -167,27 +167,91 @@
167 167 generic_unplug_device(bdev_get_queue(logfs_super(sb)->s_bdev));
168 168 }
169 169  
170   -static int bdev_erase(struct super_block *sb, loff_t to, size_t len)
  170 +
  171 +static void erase_end_io(struct bio *bio, int err)
  172 +{
  173 + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
  174 + struct super_block *sb = bio->bi_private;
  175 + struct logfs_super *super = logfs_super(sb);
  176 +
  177 + BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */
  178 + BUG_ON(err);
  179 + BUG_ON(bio->bi_vcnt == 0);
  180 + bio_put(bio);
  181 + if (atomic_dec_and_test(&super->s_pending_writes))
  182 + wake_up(&wq);
  183 +}
  184 +
  185 +static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
  186 + size_t nr_pages)
171 187 {
172 188 struct logfs_super *super = logfs_super(sb);
173   - struct address_space *mapping = super->s_mapping_inode->i_mapping;
174   - struct page *page;
175   - pgoff_t index = to >> PAGE_SHIFT;
176   - int i, nr_pages = len >> PAGE_SHIFT;
  189 + struct bio *bio;
  190 + struct request_queue *q = bdev_get_queue(sb->s_bdev);
  191 + unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
  192 + int i;
177 193  
  194 + bio = bio_alloc(GFP_NOFS, max_pages);
  195 + BUG_ON(!bio); /* FIXME: handle this */
  196 +
  197 + for (i = 0; i < nr_pages; i++) {
  198 + if (i >= max_pages) {
  199 + /* Block layer cannot split bios :( */
  200 + bio->bi_vcnt = i;
  201 + bio->bi_idx = 0;
  202 + bio->bi_size = i * PAGE_SIZE;
  203 + bio->bi_bdev = super->s_bdev;
  204 + bio->bi_sector = ofs >> 9;
  205 + bio->bi_private = sb;
  206 + bio->bi_end_io = erase_end_io;
  207 + atomic_inc(&super->s_pending_writes);
  208 + submit_bio(WRITE, bio);
  209 +
  210 + ofs += i * PAGE_SIZE;
  211 + index += i;
  212 + nr_pages -= i;
  213 + i = 0;
  214 +
  215 + bio = bio_alloc(GFP_NOFS, max_pages);
  216 + BUG_ON(!bio);
  217 + }
  218 + bio->bi_io_vec[i].bv_page = super->s_erase_page;
  219 + bio->bi_io_vec[i].bv_len = PAGE_SIZE;
  220 + bio->bi_io_vec[i].bv_offset = 0;
  221 + }
  222 + bio->bi_vcnt = nr_pages;
  223 + bio->bi_idx = 0;
  224 + bio->bi_size = nr_pages * PAGE_SIZE;
  225 + bio->bi_bdev = super->s_bdev;
  226 + bio->bi_sector = ofs >> 9;
  227 + bio->bi_private = sb;
  228 + bio->bi_end_io = erase_end_io;
  229 + atomic_inc(&super->s_pending_writes);
  230 + submit_bio(WRITE, bio);
  231 + return 0;
  232 +}
  233 +
  234 +static int bdev_erase(struct super_block *sb, loff_t to, size_t len,
  235 + int ensure_write)
  236 +{
  237 + struct logfs_super *super = logfs_super(sb);
  238 +
178 239 BUG_ON(to & (PAGE_SIZE - 1));
179 240 BUG_ON(len & (PAGE_SIZE - 1));
180 241  
181   - if (logfs_super(sb)->s_flags & LOGFS_SB_FLAG_RO)
  242 + if (super->s_flags & LOGFS_SB_FLAG_RO)
182 243 return -EROFS;
183 244  
184   - for (i = 0; i < nr_pages; i++) {
185   - page = find_get_page(mapping, index + i);
186   - if (page) {
187   - memset(page_address(page), 0xFF, PAGE_SIZE);
188   - page_cache_release(page);
189   - }
  245 + if (ensure_write) {
  246 + /*
  247 + * Object store doesn't care whether erases happen or not.
  248 + * But for the journal they are required. Otherwise a scan
  249 + * can find an old commit entry and assume it is the current
  250 + * one, travelling back in time.
  251 + */
  252 + do_erase(sb, to, to >> PAGE_SHIFT, len >> PAGE_SHIFT);
190 253 }
  254 +
191 255 return 0;
192 256 }
193 257  
... ... @@ -83,7 +83,8 @@
83 83 return 0;
84 84 }
85 85  
86   -static int mtd_erase(struct super_block *sb, loff_t ofs, size_t len)
  86 +static int mtd_erase(struct super_block *sb, loff_t ofs, size_t len,
  87 + int ensure_write)
87 88 {
88 89 struct mtd_info *mtd = logfs_super(sb)->s_mtd;
89 90 struct erase_info ei;
... ... @@ -392,7 +392,7 @@
392 392 u64 ofs;
393 393 int err;
394 394  
395   - err = logfs_erase_segment(sb, area->a_segno);
  395 + err = logfs_erase_segment(sb, area->a_segno, 1);
396 396 if (err)
397 397 return err;
398 398  
... ... @@ -151,7 +151,8 @@
151 151 int (*write_sb)(struct super_block *sb, struct page *page);
152 152 int (*readpage)(void *_sb, struct page *page);
153 153 void (*writeseg)(struct super_block *sb, u64 ofs, size_t len);
154   - int (*erase)(struct super_block *sb, loff_t ofs, size_t len);
  154 + int (*erase)(struct super_block *sb, loff_t ofs, size_t len,
  155 + int ensure_write);
155 156 void (*sync)(struct super_block *sb);
156 157 void (*put_device)(struct super_block *sb);
157 158 };
... ... @@ -327,6 +328,7 @@
327 328 u64 s_feature_compat;
328 329 u64 s_feature_flags;
329 330 u64 s_sb_ofs[2];
  331 + struct page *s_erase_page; /* for dev_bdev.c */
330 332 /* alias.c fields */
331 333 struct btree_head32 s_segment_alias; /* remapped segments */
332 334 int s_no_object_aliases;
... ... @@ -572,7 +574,7 @@
572 574 extern struct logfs_block_ops indirect_block_ops;
573 575  
574 576 /* segment.c */
575   -int logfs_erase_segment(struct super_block *sb, u32 ofs);
  577 +int logfs_erase_segment(struct super_block *sb, u32 ofs, int ensure_erase);
576 578 int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf);
577 579 int logfs_segment_read(struct inode *inode, struct page *page, u64 ofs, u64 bix,
578 580 level_t level);
... ... @@ -25,14 +25,14 @@
25 25 return 0;
26 26 }
27 27  
28   -int logfs_erase_segment(struct super_block *sb, u32 segno)
  28 +int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase)
29 29 {
30 30 struct logfs_super *super = logfs_super(sb);
31 31  
32 32 super->s_gec++;
33 33  
34 34 return super->s_devops->erase(sb, (u64)segno << super->s_segshift,
35   - super->s_segsize);
  35 + super->s_segsize, ensure_erase);
36 36 }
37 37  
38 38 static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes)
... ... @@ -798,7 +798,7 @@
798 798 u64 ofs;
799 799 int err;
800 800  
801   - err = logfs_erase_segment(sb, area->a_segno);
  801 + err = logfs_erase_segment(sb, area->a_segno, 0);
802 802 if (err)
803 803 return err;
804 804  
... ... @@ -317,6 +317,7 @@
317 317  
318 318 static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
319 319 {
  320 + struct logfs_super *super = logfs_super(sb);
320 321 struct inode *rootdir;
321 322 int err;
322 323  
323 324  
324 325  
... ... @@ -329,15 +330,22 @@
329 330 if (!sb->s_root)
330 331 goto fail;
331 332  
  333 + super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
  334 + if (!super->s_erase_page)
  335 + goto fail2;
  336 + memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
  337 +
332 338 /* FIXME: check for read-only mounts */
333 339 err = logfs_make_writeable(sb);
334 340 if (err)
335   - goto fail2;
  341 + goto fail3;
336 342  
337 343 log_super("LogFS: Finished mounting\n");
338 344 simple_set_mnt(mnt, sb);
339 345 return 0;
340 346  
  347 +fail3:
  348 + __free_page(super->s_erase_page);
341 349 fail2:
342 350 iput(rootdir);
343 351 fail:
... ... @@ -498,6 +506,8 @@
498 506 logfs_cleanup_journal(sb);
499 507 logfs_cleanup_areas(sb);
500 508 logfs_cleanup_rw(sb);
  509 + if (super->s_erase_page)
  510 + __free_page(super->s_erase_page);
501 511 super->s_devops->put_device(sb);
502 512 mempool_destroy(super->s_btree_pool);
503 513 mempool_destroy(super->s_alias_pool);