Blame view

fs/buffer.c 89.5 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
  /*
   *  linux/fs/buffer.c
   *
   *  Copyright (C) 1991, 1992, 2002  Linus Torvalds
   */
  
  /*
   * Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95
   *
   * Removed a lot of unnecessary code and simplified things now that
   * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
   *
   * Speed up hash, lru, and free list operations.  Use gfp() for allocating
   * hash table, use SLAB cache for buffer heads. SMP threading.  -DaveM
   *
   * Added 32k buffer block sizes - these are required older ARM systems. - RMK
   *
   * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de>
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
21
22
23
24
25
  #include <linux/kernel.h>
  #include <linux/syscalls.h>
  #include <linux/fs.h>
  #include <linux/mm.h>
  #include <linux/percpu.h>
  #include <linux/slab.h>
16f7e0fe2   Randy Dunlap   [PATCH] capable/c...
26
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
28
29
30
31
32
33
34
35
  #include <linux/blkdev.h>
  #include <linux/file.h>
  #include <linux/quotaops.h>
  #include <linux/highmem.h>
  #include <linux/module.h>
  #include <linux/writeback.h>
  #include <linux/hash.h>
  #include <linux/suspend.h>
  #include <linux/buffer_head.h>
55e829af0   Andrew Morton   [PATCH] io-accoun...
36
  #include <linux/task_io_accounting_ops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
38
39
40
41
  #include <linux/bio.h>
  #include <linux/notifier.h>
  #include <linux/cpu.h>
  #include <linux/bitops.h>
  #include <linux/mpage.h>
fb1c8f93d   Ingo Molnar   [PATCH] spinlock ...
42
  #include <linux/bit_spinlock.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
43
44
  
  static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
  
  #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
  
  inline void
  init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
  {
  	bh->b_end_io = handler;
  	bh->b_private = private;
  }
  
  static int sync_buffer(void *word)
  {
  	struct block_device *bd;
  	struct buffer_head *bh
  		= container_of(word, struct buffer_head, b_state);
  
  	smp_mb();
  	bd = bh->b_bdev;
  	if (bd)
  		blk_run_address_space(bd->bd_inode->i_mapping);
  	io_schedule();
  	return 0;
  }
fc9b52cd8   Harvey Harrison   fs: remove fastca...
68
  void __lock_buffer(struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
69
70
71
72
73
  {
  	wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer,
  							TASK_UNINTERRUPTIBLE);
  }
  EXPORT_SYMBOL(__lock_buffer);
fc9b52cd8   Harvey Harrison   fs: remove fastca...
74
  void unlock_buffer(struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
75
  {
51b07fc3c   Nick Piggin   fs: buffer lock u...
76
  	clear_bit_unlock(BH_Lock, &bh->b_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
  	smp_mb__after_clear_bit();
  	wake_up_bit(&bh->b_state, BH_Lock);
  }
  
  /*
   * Block until a buffer comes unlocked.  This doesn't stop it
   * from becoming locked again - you have to lock it yourself
   * if you want to preserve its state.
   */
  void __wait_on_buffer(struct buffer_head * bh)
  {
  	wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
  }
  
  static void
  __clear_page_buffers(struct page *page)
  {
  	ClearPagePrivate(page);
4c21e2f24   Hugh Dickins   [PATCH] mm: split...
95
  	set_page_private(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
96
97
  	page_cache_release(page);
  }
08bafc034   Keith Mannthey   block: Supress Bu...
98
99
100
101
102
103
104
  
  static int quiet_error(struct buffer_head *bh)
  {
  	if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
  		return 0;
  	return 1;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
105
106
107
  static void buffer_io_error(struct buffer_head *bh)
  {
  	char b[BDEVNAME_SIZE];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
108
109
110
111
112
113
114
  	printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu
  ",
  			bdevname(bh->b_bdev, b),
  			(unsigned long long)bh->b_blocknr);
  }
  
  /*
68671f35f   Dmitry Monakhov   mm: add end_buffe...
115
116
117
118
119
120
   * End-of-IO handler helper function which does not touch the bh after
   * unlocking it.
   * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
   * a race there is benign: unlock_buffer() only use the bh's address for
   * hashing after unlocking the buffer, so it doesn't actually touch the bh
   * itself.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
121
   */
68671f35f   Dmitry Monakhov   mm: add end_buffe...
122
  static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
123
124
125
126
127
128
129
130
  {
  	if (uptodate) {
  		set_buffer_uptodate(bh);
  	} else {
  		/* This happens, due to failed READA attempts. */
  		clear_buffer_uptodate(bh);
  	}
  	unlock_buffer(bh);
68671f35f   Dmitry Monakhov   mm: add end_buffe...
131
132
133
134
135
136
137
138
139
  }
  
  /*
   * Default synchronous end-of-IO handler..  Just mark it up-to-date and
   * unlock the buffer. This is what ll_rw_block uses too.
   */
  void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
  {
  	__end_buffer_read_notouch(bh, uptodate);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
140
141
142
143
144
145
146
147
148
149
  	put_bh(bh);
  }
  
  void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
  {
  	char b[BDEVNAME_SIZE];
  
  	if (uptodate) {
  		set_buffer_uptodate(bh);
  	} else {
08bafc034   Keith Mannthey   block: Supress Bu...
150
  		if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
  			buffer_io_error(bh);
  			printk(KERN_WARNING "lost page write due to "
  					"I/O error on %s
  ",
  				       bdevname(bh->b_bdev, b));
  		}
  		set_buffer_write_io_error(bh);
  		clear_buffer_uptodate(bh);
  	}
  	unlock_buffer(bh);
  	put_bh(bh);
  }
  
  /*
   * Write out and wait upon all the dirty data associated with a block
   * device via its mapping.  Does not take the superblock lock.
   */
  int sync_blockdev(struct block_device *bdev)
  {
  	int ret = 0;
28fd12982   OGAWA Hirofumi   [PATCH] Fix and a...
171
172
  	if (bdev)
  		ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
174
175
  	return ret;
  }
  EXPORT_SYMBOL(sync_blockdev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
  /*
   * Write out and wait upon all dirty data associated with this
   * device.   Filesystem data as well as the underlying block
   * device.  Takes the superblock lock.
   */
  int fsync_bdev(struct block_device *bdev)
  {
  	struct super_block *sb = get_super(bdev);
  	if (sb) {
  		int res = fsync_super(sb);
  		drop_super(sb);
  		return res;
  	}
  	return sync_blockdev(bdev);
  }
  
  /**
   * freeze_bdev  --  lock a filesystem and force it into a consistent state
   * @bdev:	blockdevice to lock
   *
f73ca1b76   David Chinner   [PATCH] Revert bd...
196
   * This takes the block device bd_mount_sem to make sure no new mounts
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197
198
199
   * happen on bdev until thaw_bdev() is called.
   * If a superblock is found on this device, we take the s_umount semaphore
   * on it to make sure nobody unmounts until the snapshot creation is done.
fcccf5025   Takashi Sato   filesystem freeze...
200
201
202
203
204
   * The reference counter (bd_fsfreeze_count) guarantees that only the last
   * unfreeze process can unfreeze the frozen filesystem actually when multiple
   * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
   * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
   * actually.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
205
206
207
208
   */
  struct super_block *freeze_bdev(struct block_device *bdev)
  {
  	struct super_block *sb;
fcccf5025   Takashi Sato   filesystem freeze...
209
210
211
212
213
214
215
216
217
218
  	int error = 0;
  
  	mutex_lock(&bdev->bd_fsfreeze_mutex);
  	if (bdev->bd_fsfreeze_count > 0) {
  		bdev->bd_fsfreeze_count++;
  		sb = get_super(bdev);
  		mutex_unlock(&bdev->bd_fsfreeze_mutex);
  		return sb;
  	}
  	bdev->bd_fsfreeze_count++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
219

f73ca1b76   David Chinner   [PATCH] Revert bd...
220
  	down(&bdev->bd_mount_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221
222
223
  	sb = get_super(bdev);
  	if (sb && !(sb->s_flags & MS_RDONLY)) {
  		sb->s_frozen = SB_FREEZE_WRITE;
d59dd4620   Andrew Morton   [PATCH] use smp_m...
224
  		smp_wmb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
225

d25b9a1ff   OGAWA Hirofumi   [PATCH] freeze_bd...
226
  		__fsync_super(sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
227
228
  
  		sb->s_frozen = SB_FREEZE_TRANS;
d59dd4620   Andrew Morton   [PATCH] use smp_m...
229
  		smp_wmb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
230
231
  
  		sync_blockdev(sb->s_bdev);
fcccf5025   Takashi Sato   filesystem freeze...
232
233
234
235
236
237
238
239
240
241
242
243
244
245
  		if (sb->s_op->freeze_fs) {
  			error = sb->s_op->freeze_fs(sb);
  			if (error) {
  				printk(KERN_ERR
  					"VFS:Filesystem freeze failed
  ");
  				sb->s_frozen = SB_UNFROZEN;
  				drop_super(sb);
  				up(&bdev->bd_mount_sem);
  				bdev->bd_fsfreeze_count--;
  				mutex_unlock(&bdev->bd_fsfreeze_mutex);
  				return ERR_PTR(error);
  			}
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
246
247
248
  	}
  
  	sync_blockdev(bdev);
fcccf5025   Takashi Sato   filesystem freeze...
249
  	mutex_unlock(&bdev->bd_fsfreeze_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
251
252
253
254
255
256
257
258
259
260
  	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
  }
  EXPORT_SYMBOL(freeze_bdev);
  
  /**
   * thaw_bdev  -- unlock filesystem
   * @bdev:	blockdevice to unlock
   * @sb:		associated superblock
   *
   * Unlocks the filesystem and marks it writeable again after freeze_bdev().
   */
fcccf5025   Takashi Sato   filesystem freeze...
261
  int thaw_bdev(struct block_device *bdev, struct super_block *sb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262
  {
fcccf5025   Takashi Sato   filesystem freeze...
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
  	int error = 0;
  
  	mutex_lock(&bdev->bd_fsfreeze_mutex);
  	if (!bdev->bd_fsfreeze_count) {
  		mutex_unlock(&bdev->bd_fsfreeze_mutex);
  		return -EINVAL;
  	}
  
  	bdev->bd_fsfreeze_count--;
  	if (bdev->bd_fsfreeze_count > 0) {
  		if (sb)
  			drop_super(sb);
  		mutex_unlock(&bdev->bd_fsfreeze_mutex);
  		return 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278
279
  	if (sb) {
  		BUG_ON(sb->s_bdev != bdev);
fcccf5025   Takashi Sato   filesystem freeze...
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
  		if (!(sb->s_flags & MS_RDONLY)) {
  			if (sb->s_op->unfreeze_fs) {
  				error = sb->s_op->unfreeze_fs(sb);
  				if (error) {
  					printk(KERN_ERR
  						"VFS:Filesystem thaw failed
  ");
  					sb->s_frozen = SB_FREEZE_TRANS;
  					bdev->bd_fsfreeze_count++;
  					mutex_unlock(&bdev->bd_fsfreeze_mutex);
  					return error;
  				}
  			}
  			sb->s_frozen = SB_UNFROZEN;
  			smp_wmb();
  			wake_up(&sb->s_wait_unfrozen);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297
298
  		drop_super(sb);
  	}
f73ca1b76   David Chinner   [PATCH] Revert bd...
299
  	up(&bdev->bd_mount_sem);
fcccf5025   Takashi Sato   filesystem freeze...
300
301
  	mutex_unlock(&bdev->bd_fsfreeze_mutex);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
302
303
304
305
  }
  EXPORT_SYMBOL(thaw_bdev);
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
306
307
308
309
310
311
312
313
314
315
316
   * Various filesystems appear to want __find_get_block to be non-blocking.
   * But it's the page lock which protects the buffers.  To get around this,
   * we get exclusion from try_to_free_buffers with the blockdev mapping's
   * private_lock.
   *
   * Hack idea: for the blockdev mapping, i_bufferlist_lock contention
   * may be quite high.  This code could TryLock the page, and if that
   * succeeds, there is no need to take private_lock. (But if
   * private_lock is contended then so is mapping->tree_lock).
   */
  static struct buffer_head *
385fd4c59   Coywolf Qi Hunt   [PATCH] __find_ge...
317
  __find_get_block_slow(struct block_device *bdev, sector_t block)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
  {
  	struct inode *bd_inode = bdev->bd_inode;
  	struct address_space *bd_mapping = bd_inode->i_mapping;
  	struct buffer_head *ret = NULL;
  	pgoff_t index;
  	struct buffer_head *bh;
  	struct buffer_head *head;
  	struct page *page;
  	int all_mapped = 1;
  
  	index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
  	page = find_get_page(bd_mapping, index);
  	if (!page)
  		goto out;
  
  	spin_lock(&bd_mapping->private_lock);
  	if (!page_has_buffers(page))
  		goto out_unlock;
  	head = page_buffers(page);
  	bh = head;
  	do {
  		if (bh->b_blocknr == block) {
  			ret = bh;
  			get_bh(bh);
  			goto out_unlock;
  		}
  		if (!buffer_mapped(bh))
  			all_mapped = 0;
  		bh = bh->b_this_page;
  	} while (bh != head);
  
  	/* we might be here because some of the buffers on this page are
  	 * not mapped.  This is due to various races between
  	 * file io on the block device and getblk.  It gets dealt with
  	 * elsewhere, don't buffer_error if we had some unmapped buffers
  	 */
  	if (all_mapped) {
  		printk("__find_get_block_slow() failed. "
  			"block=%llu, b_blocknr=%llu
  ",
205f87f6b   Badari Pulavarty   [PATCH] change bu...
358
359
360
361
362
  			(unsigned long long)block,
  			(unsigned long long)bh->b_blocknr);
  		printk("b_state=0x%08lx, b_size=%zu
  ",
  			bh->b_state, bh->b_size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
  		printk("device blocksize: %d
  ", 1 << bd_inode->i_blkbits);
  	}
  out_unlock:
  	spin_unlock(&bd_mapping->private_lock);
  	page_cache_release(page);
  out:
  	return ret;
  }
  
  /* If invalidate_buffers() will trash dirty buffers, it means some kind
     of fs corruption is going on. Trashing dirty data always imply losing
     information that was supposed to be just stored on the physical layer
     by the user.
  
     Thus invalidate_buffers in general usage is not allwowed to trash
     dirty buffers. For example ioctl(FLSBLKBUF) expects dirty data to
     be preserved.  These buffers are simply skipped.
    
     We also skip buffers which are still in use.  For example this can
     happen if a userspace program is reading the block device.
  
     NOTE: In the case where the user removed a removable-media-disk even if
     there's still dirty data not synced on disk (due a bug in the device driver
     or due an error of the user), by not destroying the dirty buffers we could
     generate corruption also on the next media inserted, thus a parameter is
     necessary to handle this case in the most safe way possible (trying
     to not corrupt also the new disk inserted with the data belonging to
     the old now corrupted disk). Also for the ramdisk the natural thing
     to do in order to release the ramdisk memory is to destroy dirty buffers.
  
     These are two special cases. Normal usage imply the device driver
     to issue a sync on the device (without waiting I/O completion) and
     then an invalidate_buffers call that doesn't trash dirty buffers.
  
     For handling cache coherency with the blkdev pagecache the 'update' case
     is been introduced. It is needed to re-read from disk any pinned
     buffer. NOTE: re-reading from disk is destructive so we can do it only
     when we assume nobody is changing the buffercache under our I/O and when
     we think the disk contains more recent information than the buffercache.
     The update == 1 pass marks the buffers we need to update, the update == 2
     pass does the actual I/O. */
f98393a64   Peter Zijlstra   mm: remove destro...
405
  void invalidate_bdev(struct block_device *bdev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
406
  {
0e1dfc66b   Andrew Morton   [PATCH] invalidat...
407
408
409
410
  	struct address_space *mapping = bdev->bd_inode->i_mapping;
  
  	if (mapping->nrpages == 0)
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
411
  	invalidate_bh_lrus();
fc0ecff69   Andrew Morton   [PATCH] remove in...
412
  	invalidate_mapping_pages(mapping, 0, -1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413
414
415
416
417
418
419
  }
  
  /*
   * Kick pdflush then try to free up some ZONE_NORMAL memory.
   */
  static void free_more_memory(void)
  {
19770b326   Mel Gorman   mm: filter based ...
420
  	struct zone *zone;
0e88460da   Mel Gorman   mm: introduce nod...
421
  	int nid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
422

687a21cee   Pekka J Enberg   [PATCH] rename wa...
423
  	wakeup_pdflush(1024);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
424
  	yield();
0e88460da   Mel Gorman   mm: introduce nod...
425
  	for_each_online_node(nid) {
19770b326   Mel Gorman   mm: filter based ...
426
427
428
429
  		(void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
  						gfp_zone(GFP_NOFS), NULL,
  						&zone);
  		if (zone)
54a6eb5c4   Mel Gorman   mm: use two zonel...
430
431
  			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
  						GFP_NOFS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
432
433
434
435
436
437
438
439
440
  	}
  }
  
  /*
   * I/O completion handler for block_read_full_page() - pages
   * which come unlocked at the end of I/O.
   */
  static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
441
  	unsigned long flags;
a39722034   Nick Piggin   [PATCH] page_upto...
442
  	struct buffer_head *first;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
443
444
445
446
447
448
449
450
451
452
453
  	struct buffer_head *tmp;
  	struct page *page;
  	int page_uptodate = 1;
  
  	BUG_ON(!buffer_async_read(bh));
  
  	page = bh->b_page;
  	if (uptodate) {
  		set_buffer_uptodate(bh);
  	} else {
  		clear_buffer_uptodate(bh);
08bafc034   Keith Mannthey   block: Supress Bu...
454
  		if (!quiet_error(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
456
457
458
459
460
461
462
463
  			buffer_io_error(bh);
  		SetPageError(page);
  	}
  
  	/*
  	 * Be _very_ careful from here on. Bad things can happen if
  	 * two buffer heads end IO at almost the same time and both
  	 * decide that the page is now completely done.
  	 */
a39722034   Nick Piggin   [PATCH] page_upto...
464
465
466
  	first = page_buffers(page);
  	local_irq_save(flags);
  	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
467
468
469
470
471
472
473
474
475
476
477
478
  	clear_buffer_async_read(bh);
  	unlock_buffer(bh);
  	tmp = bh;
  	do {
  		if (!buffer_uptodate(tmp))
  			page_uptodate = 0;
  		if (buffer_async_read(tmp)) {
  			BUG_ON(!buffer_locked(tmp));
  			goto still_busy;
  		}
  		tmp = tmp->b_this_page;
  	} while (tmp != bh);
a39722034   Nick Piggin   [PATCH] page_upto...
479
480
  	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  	local_irq_restore(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
481
482
483
484
485
486
487
488
489
490
491
  
  	/*
  	 * If none of the buffers had errors and they are all
  	 * uptodate then we can set the page uptodate.
  	 */
  	if (page_uptodate && !PageError(page))
  		SetPageUptodate(page);
  	unlock_page(page);
  	return;
  
  still_busy:
a39722034   Nick Piggin   [PATCH] page_upto...
492
493
  	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  	local_irq_restore(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
494
495
496
497
498
499
500
  	return;
  }
  
  /*
   * Completion handler for block_write_full_page() - pages which are unlocked
   * during I/O, and which have PageWriteback cleared upon I/O completion.
   */
b6cd0b772   Adrian Bunk   [PATCH] fs/buffer...
501
  static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
502
503
  {
  	char b[BDEVNAME_SIZE];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
504
  	unsigned long flags;
a39722034   Nick Piggin   [PATCH] page_upto...
505
  	struct buffer_head *first;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
506
507
508
509
510
511
512
513
514
  	struct buffer_head *tmp;
  	struct page *page;
  
  	BUG_ON(!buffer_async_write(bh));
  
  	page = bh->b_page;
  	if (uptodate) {
  		set_buffer_uptodate(bh);
  	} else {
08bafc034   Keith Mannthey   block: Supress Bu...
515
  		if (!quiet_error(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
517
518
519
520
521
522
  			buffer_io_error(bh);
  			printk(KERN_WARNING "lost page write due to "
  					"I/O error on %s
  ",
  			       bdevname(bh->b_bdev, b));
  		}
  		set_bit(AS_EIO, &page->mapping->flags);
58ff407be   Jan Kara   [PATCH] Fix IO er...
523
  		set_buffer_write_io_error(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
524
525
526
  		clear_buffer_uptodate(bh);
  		SetPageError(page);
  	}
a39722034   Nick Piggin   [PATCH] page_upto...
527
528
529
  	first = page_buffers(page);
  	local_irq_save(flags);
  	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
530
531
532
533
534
535
536
537
538
539
  	clear_buffer_async_write(bh);
  	unlock_buffer(bh);
  	tmp = bh->b_this_page;
  	while (tmp != bh) {
  		if (buffer_async_write(tmp)) {
  			BUG_ON(!buffer_locked(tmp));
  			goto still_busy;
  		}
  		tmp = tmp->b_this_page;
  	}
a39722034   Nick Piggin   [PATCH] page_upto...
540
541
  	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  	local_irq_restore(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
542
543
544
545
  	end_page_writeback(page);
  	return;
  
  still_busy:
a39722034   Nick Piggin   [PATCH] page_upto...
546
547
  	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  	local_irq_restore(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
  	return;
  }
  
  /*
   * If a page's buffers are under async readin (end_buffer_async_read
   * completion) then there is a possibility that another thread of
   * control could lock one of the buffers after it has completed
   * but while some of the other buffers have not completed.  This
   * locked buffer would confuse end_buffer_async_read() into not unlocking
   * the page.  So the absence of BH_Async_Read tells end_buffer_async_read()
   * that this buffer is not under async I/O.
   *
   * The page comes unlocked when it has no locked buffer_async buffers
   * left.
   *
   * PageLocked prevents anyone starting new async I/O reads any of
   * the buffers.
   *
   * PageWriteback is used to prevent simultaneous writeout of the same
   * page.
   *
   * PageLocked prevents anyone from starting writeback of a page which is
   * under read I/O (PageWriteback is only ever set against a locked page).
   */
  static void mark_buffer_async_read(struct buffer_head *bh)
  {
  	bh->b_end_io = end_buffer_async_read;
  	set_buffer_async_read(bh);
  }
  
  void mark_buffer_async_write(struct buffer_head *bh)
  {
  	bh->b_end_io = end_buffer_async_write;
  	set_buffer_async_write(bh);
  }
  EXPORT_SYMBOL(mark_buffer_async_write);
  
  
  /*
   * fs/buffer.c contains helper functions for buffer-backed address space's
   * fsync functions.  A common requirement for buffer-based filesystems is
   * that certain data from the backing blockdev needs to be written out for
   * a successful fsync().  For example, ext2 indirect blocks need to be
   * written back and waited upon before fsync() returns.
   *
   * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
   * inode_has_buffers() and invalidate_inode_buffers() are provided for the
   * management of a list of dependent buffers at ->i_mapping->private_list.
   *
   * Locking is a little subtle: try_to_free_buffers() will remove buffers
   * from their controlling inode's queue when they are being freed.  But
   * try_to_free_buffers() will be operating against the *blockdev* mapping
   * at the time, not against the S_ISREG file which depends on those buffers.
   * So the locking for private_list is via the private_lock in the address_space
   * which backs the buffers.  Which is different from the address_space 
   * against which the buffers are listed.  So for a particular address_space,
   * mapping->private_lock does *not* protect mapping->private_list!  In fact,
   * mapping->private_list will always be protected by the backing blockdev's
   * ->private_lock.
   *
   * Which introduces a requirement: all buffers on an address_space's
   * ->private_list must be from the same address_space: the blockdev's.
   *
   * address_spaces which do not place buffers at ->private_list via these
   * utility functions are free to use private_lock and private_list for
   * whatever they want.  The only requirement is that list_empty(private_list)
   * be true at clear_inode() time.
   *
   * FIXME: clear_inode should not call invalidate_inode_buffers().  The
   * filesystems should do that.  invalidate_inode_buffers() should just go
   * BUG_ON(!list_empty).
   *
   * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
   * take an address_space, not an inode.  And it should be called
   * mark_buffer_dirty_fsync() to clearly define why those buffers are being
   * queued up.
   *
   * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
   * list if it is already on a list.  Because if the buffer is on a list,
   * it *must* already be on the right one.  If not, the filesystem is being
   * silly.  This will save a ton of locking.  But first we have to ensure
   * that buffers are taken *off* the old inode's list when they are freed
   * (presumably in truncate).  That requires careful auditing of all
   * filesystems (do it inside bforget()).  It could also be done by bringing
   * b_inode back.
   */
  
  /*
   * The buffer's backing address_space's private_lock must be held
   */
dbacefc9c   Thomas Petazzoni   fs/buffer.c: unin...
638
  static void __remove_assoc_queue(struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
639
640
  {
  	list_del_init(&bh->b_assoc_buffers);
58ff407be   Jan Kara   [PATCH] Fix IO er...
641
642
643
644
  	WARN_ON(!bh->b_assoc_map);
  	if (buffer_write_io_error(bh))
  		set_bit(AS_EIO, &bh->b_assoc_map->flags);
  	bh->b_assoc_map = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
  }
  
  int inode_has_buffers(struct inode *inode)
  {
  	return !list_empty(&inode->i_data.private_list);
  }
  
  /*
   * osync is designed to support O_SYNC io.  It waits synchronously for
   * all already-submitted IO to complete, but does not queue any new
   * writes to the disk.
   *
   * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
   * you dirty the buffers, and then use osync_inode_buffers to wait for
   * completion.  Any other dirty buffers which are not yet queued for
   * write will not be flushed to disk by the osync.
   */
  static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
  {
  	struct buffer_head *bh;
  	struct list_head *p;
  	int err = 0;
  
  	spin_lock(lock);
  repeat:
  	list_for_each_prev(p, list) {
  		bh = BH_ENTRY(p);
  		if (buffer_locked(bh)) {
  			get_bh(bh);
  			spin_unlock(lock);
  			wait_on_buffer(bh);
  			if (!buffer_uptodate(bh))
  				err = -EIO;
  			brelse(bh);
  			spin_lock(lock);
  			goto repeat;
  		}
  	}
  	spin_unlock(lock);
  	return err;
  }
  
  /**
78a4a50a8   Randy Dunlap   docbook: fix file...
688
   * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
689
   * @mapping: the mapping which wants those buffers written
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
690
691
692
693
   *
   * Starts I/O against the buffers at mapping->private_list, and waits upon
   * that I/O.
   *
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
694
695
696
   * Basically, this is a convenience function for fsync().
   * @mapping is a file or directory which needs those buffers to be written for
   * a successful fsync().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
   */
  int sync_mapping_buffers(struct address_space *mapping)
  {
  	struct address_space *buffer_mapping = mapping->assoc_mapping;
  
  	if (buffer_mapping == NULL || list_empty(&mapping->private_list))
  		return 0;
  
  	return fsync_buffers_list(&buffer_mapping->private_lock,
  					&mapping->private_list);
  }
  EXPORT_SYMBOL(sync_mapping_buffers);
  
  /*
   * Called when we've recently written block `bblock', and it is known that
   * `bblock' was for a buffer_boundary() buffer.  This means that the block at
   * `bblock + 1' is probably a dirty indirect block.  Hunt it down and, if it's
   * dirty, schedule it for IO.  So that indirects merge nicely with their data.
   */
  void write_boundary_block(struct block_device *bdev,
  			sector_t bblock, unsigned blocksize)
  {
  	struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
  	if (bh) {
  		if (buffer_dirty(bh))
  			ll_rw_block(WRITE, 1, &bh);
  		put_bh(bh);
  	}
  }
  
  void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
  {
  	struct address_space *mapping = inode->i_mapping;
  	struct address_space *buffer_mapping = bh->b_page->mapping;
  
  	mark_buffer_dirty(bh);
  	if (!mapping->assoc_mapping) {
  		mapping->assoc_mapping = buffer_mapping;
  	} else {
e827f9235   Eric Sesterhenn   BUG_ON() Conversi...
736
  		BUG_ON(mapping->assoc_mapping != buffer_mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
737
  	}
535ee2fbf   Jan Kara   buffer_head: fix ...
738
  	if (!bh->b_assoc_map) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
739
740
741
  		spin_lock(&buffer_mapping->private_lock);
  		list_move_tail(&bh->b_assoc_buffers,
  				&mapping->private_list);
58ff407be   Jan Kara   [PATCH] Fix IO er...
742
  		bh->b_assoc_map = mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
743
744
745
746
747
748
  		spin_unlock(&buffer_mapping->private_lock);
  	}
  }
  EXPORT_SYMBOL(mark_buffer_dirty_inode);
  
  /*
787d2214c   Nick Piggin   fs: introduce som...
749
750
751
752
753
754
755
756
757
758
759
760
761
762
   * Mark the page dirty, and set it dirty in the radix tree, and mark the inode
   * dirty.
   *
   * If warn is true, then emit a warning if the page is not uptodate and has
   * not been truncated.
   */
  static int __set_page_dirty(struct page *page,
  		struct address_space *mapping, int warn)
  {
  	if (unlikely(!mapping))
  		return !TestSetPageDirty(page);
  
  	if (TestSetPageDirty(page))
  		return 0;
19fd62312   Nick Piggin   mm: spinlock tree...
763
  	spin_lock_irq(&mapping->tree_lock);
787d2214c   Nick Piggin   fs: introduce som...
764
765
766
767
768
  	if (page->mapping) {	/* Race with truncate? */
  		WARN_ON_ONCE(warn && !PageUptodate(page));
  
  		if (mapping_cap_account_dirty(mapping)) {
  			__inc_zone_page_state(page, NR_FILE_DIRTY);
c9e51e418   Peter Zijlstra   mm: count reclaim...
769
770
  			__inc_bdi_stat(mapping->backing_dev_info,
  					BDI_RECLAIMABLE);
1cf6e7d83   Nick Piggin   mm: task dirty ac...
771
  			task_dirty_inc(current);
787d2214c   Nick Piggin   fs: introduce som...
772
773
774
775
776
  			task_io_account_write(PAGE_CACHE_SIZE);
  		}
  		radix_tree_tag_set(&mapping->page_tree,
  				page_index(page), PAGECACHE_TAG_DIRTY);
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
777
  	spin_unlock_irq(&mapping->tree_lock);
787d2214c   Nick Piggin   fs: introduce som...
778
779
780
781
782
783
  	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
  
  	return 1;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
   * Add a page to the dirty page list.
   *
   * It is a sad fact of life that this function is called from several places
   * deeply under spinlocking.  It may not sleep.
   *
   * If the page has buffers, the uptodate buffers are set dirty, to preserve
   * dirty-state coherency between the page and the buffers.  It the page does
   * not have buffers then when they are later attached they will all be set
   * dirty.
   *
   * The buffers are dirtied before the page is dirtied.  There's a small race
   * window in which a writepage caller may see the page cleanness but not the
   * buffer dirtiness.  That's fine.  If this code were to set the page dirty
   * before the buffers, a concurrent writepage caller could clear the page dirty
   * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
   * page on the dirty page list.
   *
   * We use private_lock to lock against try_to_free_buffers while using the
   * page's buffer list.  Also use this to protect against clean buffers being
   * added to the page after it was set dirty.
   *
   * FIXME: may need to call ->reservepage here as well.  That's rather up to the
   * address_space though.
   */
  int __set_page_dirty_buffers(struct page *page)
  {
787d2214c   Nick Piggin   fs: introduce som...
810
  	struct address_space *mapping = page_mapping(page);
ebf7a227d   Nick Piggin   [PATCH] mm: bug i...
811
812
813
  
  	if (unlikely(!mapping))
  		return !TestSetPageDirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
814
815
816
817
818
819
820
821
822
823
824
825
  
  	spin_lock(&mapping->private_lock);
  	if (page_has_buffers(page)) {
  		struct buffer_head *head = page_buffers(page);
  		struct buffer_head *bh = head;
  
  		do {
  			set_buffer_dirty(bh);
  			bh = bh->b_this_page;
  		} while (bh != head);
  	}
  	spin_unlock(&mapping->private_lock);
787d2214c   Nick Piggin   fs: introduce som...
826
  	return __set_page_dirty(page, mapping, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
  }
  EXPORT_SYMBOL(__set_page_dirty_buffers);
  
  /*
   * Write out and wait upon a list of buffers.
   *
   * We have conflicting pressures: we want to make sure that all
   * initially dirty buffers get waited on, but that any subsequently
   * dirtied buffers don't.  After all, we don't want fsync to last
   * forever if somebody is actively writing to the file.
   *
   * Do this in two main stages: first we copy dirty buffers to a
   * temporary inode list, queueing the writes as we go.  Then we clean
   * up, waiting for those writes to complete.
   * 
   * During this second stage, any subsequent updates to the file may end
   * up refiling the buffer on the original inode's dirty list again, so
   * there is a chance we will end up with a buffer queued for write but
   * not yet completed on that list.  So, as a final cleanup we go through
   * the osync code to catch these locked, dirty buffers without requeuing
   * any newly dirty buffers for write.
   */
  static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
  {
  	struct buffer_head *bh;
  	struct list_head tmp;
535ee2fbf   Jan Kara   buffer_head: fix ...
853
  	struct address_space *mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
854
855
856
857
858
859
860
  	int err = 0, err2;
  
  	INIT_LIST_HEAD(&tmp);
  
  	spin_lock(lock);
  	while (!list_empty(list)) {
  		bh = BH_ENTRY(list->next);
535ee2fbf   Jan Kara   buffer_head: fix ...
861
  		mapping = bh->b_assoc_map;
58ff407be   Jan Kara   [PATCH] Fix IO er...
862
  		__remove_assoc_queue(bh);
535ee2fbf   Jan Kara   buffer_head: fix ...
863
864
865
  		/* Avoid race with mark_buffer_dirty_inode() which does
  		 * a lockless check and we rely on seeing the dirty bit */
  		smp_mb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
866
867
  		if (buffer_dirty(bh) || buffer_locked(bh)) {
  			list_add(&bh->b_assoc_buffers, &tmp);
535ee2fbf   Jan Kara   buffer_head: fix ...
868
  			bh->b_assoc_map = mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
869
870
871
872
873
874
875
876
877
  			if (buffer_dirty(bh)) {
  				get_bh(bh);
  				spin_unlock(lock);
  				/*
  				 * Ensure any pending I/O completes so that
  				 * ll_rw_block() actually writes the current
  				 * contents - it is a noop if I/O is still in
  				 * flight on potentially older contents.
  				 */
18ce3751c   Jens Axboe   Properly notify b...
878
  				ll_rw_block(SWRITE_SYNC, 1, &bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
879
880
881
882
883
884
885
886
  				brelse(bh);
  				spin_lock(lock);
  			}
  		}
  	}
  
  	while (!list_empty(&tmp)) {
  		bh = BH_ENTRY(tmp.prev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
887
  		get_bh(bh);
535ee2fbf   Jan Kara   buffer_head: fix ...
888
889
890
891
892
893
894
  		mapping = bh->b_assoc_map;
  		__remove_assoc_queue(bh);
  		/* Avoid race with mark_buffer_dirty_inode() which does
  		 * a lockless check and we rely on seeing the dirty bit */
  		smp_mb();
  		if (buffer_dirty(bh)) {
  			list_add(&bh->b_assoc_buffers,
e3892296d   Jan Kara   vfs: fix NULL poi...
895
  				 &mapping->private_list);
535ee2fbf   Jan Kara   buffer_head: fix ...
896
897
  			bh->b_assoc_map = mapping;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
  		spin_unlock(lock);
  		wait_on_buffer(bh);
  		if (!buffer_uptodate(bh))
  			err = -EIO;
  		brelse(bh);
  		spin_lock(lock);
  	}
  	
  	spin_unlock(lock);
  	err2 = osync_buffers_list(lock, list);
  	if (err)
  		return err;
  	else
  		return err2;
  }
  
  /*
   * Invalidate any and all dirty buffers on a given inode.  We are
   * probably unmounting the fs, but that doesn't mean we have already
   * done a sync().  Just drop the buffers from the inode list.
   *
   * NOTE: we take the inode's blockdev's mapping's private_lock.  Which
   * assumes that all the buffers are against the blockdev.  Not true
   * for reiserfs.
   */
  void invalidate_inode_buffers(struct inode *inode)
  {
  	if (inode_has_buffers(inode)) {
  		struct address_space *mapping = &inode->i_data;
  		struct list_head *list = &mapping->private_list;
  		struct address_space *buffer_mapping = mapping->assoc_mapping;
  
  		spin_lock(&buffer_mapping->private_lock);
  		while (!list_empty(list))
  			__remove_assoc_queue(BH_ENTRY(list->next));
  		spin_unlock(&buffer_mapping->private_lock);
  	}
  }
52b19ac99   Jan Kara   udf: Fix BUG_ON()...
936
  EXPORT_SYMBOL(invalidate_inode_buffers);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
  
  /*
   * Remove any clean buffers from the inode's buffer list.  This is called
   * when we're trying to free the inode itself.  Those buffers can pin it.
   *
   * Returns true if all buffers were removed.
   */
  int remove_inode_buffers(struct inode *inode)
  {
  	int ret = 1;
  
  	if (inode_has_buffers(inode)) {
  		struct address_space *mapping = &inode->i_data;
  		struct list_head *list = &mapping->private_list;
  		struct address_space *buffer_mapping = mapping->assoc_mapping;
  
  		spin_lock(&buffer_mapping->private_lock);
  		while (!list_empty(list)) {
  			struct buffer_head *bh = BH_ENTRY(list->next);
  			if (buffer_dirty(bh)) {
  				ret = 0;
  				break;
  			}
  			__remove_assoc_queue(bh);
  		}
  		spin_unlock(&buffer_mapping->private_lock);
  	}
  	return ret;
  }
  
  /*
   * Create the appropriate buffers when given a page for data area and
   * the size of each buffer.. Use the bh->b_this_page linked list to
   * follow the buffers created.  Return NULL if unable to create more
   * buffers.
   *
   * The retry flag is used to differentiate async IO (paging, swapping)
   * which may not fail from ordinary buffer allocations.
   */
  struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
  		int retry)
  {
  	struct buffer_head *bh, *head;
  	long offset;
  
  try_again:
  	head = NULL;
  	offset = PAGE_SIZE;
  	while ((offset -= size) >= 0) {
  		bh = alloc_buffer_head(GFP_NOFS);
  		if (!bh)
  			goto no_grow;
  
  		bh->b_bdev = NULL;
  		bh->b_this_page = head;
  		bh->b_blocknr = -1;
  		head = bh;
  
  		bh->b_state = 0;
  		atomic_set(&bh->b_count, 0);
fc5cd582e   Chris Mason   [PATCH] reiserfs:...
997
  		bh->b_private = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
998
999
1000
1001
  		bh->b_size = size;
  
  		/* Link the buffer to its page */
  		set_bh_page(bh, page, offset);
01ffe339e   Nathan Scott   Make alloc_page_b...
1002
  		init_buffer(bh, NULL, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
  	}
  	return head;
  /*
   * In case anything failed, we just free everything we got.
   */
  no_grow:
  	if (head) {
  		do {
  			bh = head;
  			head = head->b_this_page;
  			free_buffer_head(bh);
  		} while (head);
  	}
  
  	/*
  	 * Return failure for non-async IO requests.  Async IO requests
  	 * are not allowed to fail, so we have to wait until buffer heads
  	 * become available.  But we don't want tasks sleeping with 
  	 * partially complete buffers, so all were released above.
  	 */
  	if (!retry)
  		return NULL;
  
  	/* We're _really_ low on memory. Now we just
  	 * wait for old buffer heads to become free due to
  	 * finishing IO.  Since this is an async request and
  	 * the reserve list is empty, we're sure there are 
  	 * async buffer heads in use.
  	 */
  	free_more_memory();
  	goto try_again;
  }
  EXPORT_SYMBOL_GPL(alloc_page_buffers);
  
  static inline void
  link_dev_buffers(struct page *page, struct buffer_head *head)
  {
  	struct buffer_head *bh, *tail;
  
  	bh = head;
  	do {
  		tail = bh;
  		bh = bh->b_this_page;
  	} while (bh);
  	tail->b_this_page = head;
  	attach_page_buffers(page, head);
  }
  
  /*
   * Initialise the state of a blockdev page's buffers.
   */ 
  static void
  init_page_buffers(struct page *page, struct block_device *bdev,
  			sector_t block, int size)
  {
  	struct buffer_head *head = page_buffers(page);
  	struct buffer_head *bh = head;
  	int uptodate = PageUptodate(page);
  
  	do {
  		if (!buffer_mapped(bh)) {
  			init_buffer(bh, NULL, NULL);
  			bh->b_bdev = bdev;
  			bh->b_blocknr = block;
  			if (uptodate)
  				set_buffer_uptodate(bh);
  			set_buffer_mapped(bh);
  		}
  		block++;
  		bh = bh->b_this_page;
  	} while (bh != head);
  }
  
  /*
   * Create the page-cache page that contains the requested block.
   *
   * This is user purely for blockdev mappings.
   */
  static struct page *
  grow_dev_page(struct block_device *bdev, sector_t block,
  		pgoff_t index, int size)
  {
  	struct inode *inode = bdev->bd_inode;
  	struct page *page;
  	struct buffer_head *bh;
ea125892a   Christoph Lameter   Fix page allocati...
1088
  	page = find_or_create_page(inode->i_mapping, index,
769848c03   Mel Gorman   Add __GFP_MOVABLE...
1089
  		(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1090
1091
  	if (!page)
  		return NULL;
e827f9235   Eric Sesterhenn   BUG_ON() Conversi...
1092
  	BUG_ON(!PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
  
  	if (page_has_buffers(page)) {
  		bh = page_buffers(page);
  		if (bh->b_size == size) {
  			init_page_buffers(page, bdev, block, size);
  			return page;
  		}
  		if (!try_to_free_buffers(page))
  			goto failed;
  	}
  
  	/*
  	 * Allocate some buffers for this page
  	 */
  	bh = alloc_page_buffers(page, size, 0);
  	if (!bh)
  		goto failed;
  
  	/*
  	 * Link the page to the buffers and initialise them.  Take the
  	 * lock to be atomic wrt __find_get_block(), which does not
  	 * run under the page lock.
  	 */
  	spin_lock(&inode->i_mapping->private_lock);
  	link_dev_buffers(page, bh);
  	init_page_buffers(page, bdev, block, size);
  	spin_unlock(&inode->i_mapping->private_lock);
  	return page;
  
  failed:
  	BUG();
  	unlock_page(page);
  	page_cache_release(page);
  	return NULL;
  }
  
  /*
   * Create buffers for the specified block device block's page.  If
   * that page was dirty, the buffers are set dirty also.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1132
   */
858119e15   Arjan van de Ven   [PATCH] Unlinline...
1133
  static int
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
  grow_buffers(struct block_device *bdev, sector_t block, int size)
  {
  	struct page *page;
  	pgoff_t index;
  	int sizebits;
  
  	sizebits = -1;
  	do {
  		sizebits++;
  	} while ((size << sizebits) < PAGE_SIZE);
  
  	index = block >> sizebits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1146

e56579338   Andrew Morton   [PATCH] grow_buff...
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
  	/*
  	 * Check for a block which wants to lie outside our maximum possible
  	 * pagecache index.  (this comparison is done using sector_t types).
  	 */
  	if (unlikely(index != block >> sizebits)) {
  		char b[BDEVNAME_SIZE];
  
  		printk(KERN_ERR "%s: requested out-of-range block %llu for "
  			"device %s
  ",
8e24eea72   Harvey Harrison   fs: replace remai...
1157
  			__func__, (unsigned long long)block,
e56579338   Andrew Morton   [PATCH] grow_buff...
1158
1159
1160
1161
  			bdevname(bdev, b));
  		return -EIO;
  	}
  	block = index << sizebits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1162
1163
1164
1165
1166
1167
1168
1169
  	/* Create a page with the proper size buffers.. */
  	page = grow_dev_page(bdev, block, index, size);
  	if (!page)
  		return 0;
  	unlock_page(page);
  	page_cache_release(page);
  	return 1;
  }
75c96f858   Adrian Bunk   [PATCH] make some...
1170
  static struct buffer_head *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
  __getblk_slow(struct block_device *bdev, sector_t block, int size)
  {
  	/* Size must be multiple of hard sectorsize */
  	if (unlikely(size & (bdev_hardsect_size(bdev)-1) ||
  			(size < 512 || size > PAGE_SIZE))) {
  		printk(KERN_ERR "getblk(): invalid block size %d requested
  ",
  					size);
  		printk(KERN_ERR "hardsect size: %d
  ",
  					bdev_hardsect_size(bdev));
  
  		dump_stack();
  		return NULL;
  	}
  
  	for (;;) {
  		struct buffer_head * bh;
e56579338   Andrew Morton   [PATCH] grow_buff...
1189
  		int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1190
1191
1192
1193
  
  		bh = __find_get_block(bdev, block, size);
  		if (bh)
  			return bh;
e56579338   Andrew Morton   [PATCH] grow_buff...
1194
1195
1196
1197
  		ret = grow_buffers(bdev, block, size);
  		if (ret < 0)
  			return NULL;
  		if (ret == 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
  			free_more_memory();
  	}
  }
  
  /*
   * The relationship between dirty buffers and dirty pages:
   *
   * Whenever a page has any dirty buffers, the page's dirty bit is set, and
   * the page is tagged dirty in its radix tree.
   *
   * At all times, the dirtiness of the buffers represents the dirtiness of
   * subsections of the page.  If the page has buffers, the page dirty bit is
   * merely a hint about the true dirty state.
   *
   * When a page is set dirty in its entirety, all its buffers are marked dirty
   * (if the page has buffers).
   *
   * When a buffer is marked dirty, its page is dirtied, but the page's other
   * buffers are not.
   *
   * Also.  When blockdev buffers are explicitly read with bread(), they
   * individually become uptodate.  But their backing page remains not
   * uptodate - even if all of its buffers are uptodate.  A subsequent
   * block_read_full_page() against that page will discover all the uptodate
   * buffers, will set the page uptodate and will perform no I/O.
   */
  
  /**
   * mark_buffer_dirty - mark a buffer_head as needing writeout
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
1227
   * @bh: the buffer_head to mark dirty
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1228
1229
1230
1231
1232
1233
1234
1235
1236
   *
   * mark_buffer_dirty() will set the dirty bit against the buffer, then set its
   * backing page dirty, then tag the page as dirty in its address_space's radix
   * tree and then attach the address_space's inode to its superblock's dirty
   * inode list.
   *
   * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
   * mapping->tree_lock and the global inode_lock.
   */
fc9b52cd8   Harvey Harrison   fs: remove fastca...
1237
  void mark_buffer_dirty(struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1238
  {
787d2214c   Nick Piggin   fs: introduce som...
1239
  	WARN_ON_ONCE(!buffer_uptodate(bh));
1be62dc19   Linus Torvalds   Be more careful a...
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
  
  	/*
  	 * Very *carefully* optimize the it-is-already-dirty case.
  	 *
  	 * Don't let the final "is it dirty" escape to before we
  	 * perhaps modified the buffer.
  	 */
  	if (buffer_dirty(bh)) {
  		smp_mb();
  		if (buffer_dirty(bh))
  			return;
  	}
  
  	if (!test_set_buffer_dirty(bh))
787d2214c   Nick Piggin   fs: introduce som...
1254
  		__set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
  }
  
  /*
   * Decrement a buffer_head's reference count.  If all buffers against a page
   * have zero reference count, are clean and unlocked, and if the page is clean
   * and unlocked then try_to_free_buffers() may strip the buffers from the page
   * in preparation for freeing it (sometimes, rarely, buffers are removed from
   * a page but it ends up not being freed, and buffers may later be reattached).
   */
  void __brelse(struct buffer_head * buf)
  {
  	if (atomic_read(&buf->b_count)) {
  		put_bh(buf);
  		return;
  	}
5c752ad9f   Arjan van de Ven   Use WARN() in fs/
1270
1271
  	WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1272
1273
1274
1275
1276
1277
1278
1279
1280
  }
  
  /*
   * bforget() is like brelse(), except it discards any
   * potentially dirty data.
   */
  void __bforget(struct buffer_head *bh)
  {
  	clear_buffer_dirty(bh);
535ee2fbf   Jan Kara   buffer_head: fix ...
1281
  	if (bh->b_assoc_map) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1282
1283
1284
1285
  		struct address_space *buffer_mapping = bh->b_page->mapping;
  
  		spin_lock(&buffer_mapping->private_lock);
  		list_del_init(&bh->b_assoc_buffers);
58ff407be   Jan Kara   [PATCH] Fix IO er...
1286
  		bh->b_assoc_map = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
  		spin_unlock(&buffer_mapping->private_lock);
  	}
  	__brelse(bh);
  }
  
  static struct buffer_head *__bread_slow(struct buffer_head *bh)
  {
  	lock_buffer(bh);
  	if (buffer_uptodate(bh)) {
  		unlock_buffer(bh);
  		return bh;
  	} else {
  		get_bh(bh);
  		bh->b_end_io = end_buffer_read_sync;
  		submit_bh(READ, bh);
  		wait_on_buffer(bh);
  		if (buffer_uptodate(bh))
  			return bh;
  	}
  	brelse(bh);
  	return NULL;
  }
  
  /*
   * Per-cpu buffer LRU implementation.  To reduce the cost of __find_get_block().
   * The bhs[] array is sorted - newest buffer is at bhs[0].  Buffers have their
   * refcount elevated by one when they're in an LRU.  A buffer can only appear
   * once in a particular CPU's LRU.  A single buffer can be present in multiple
   * CPU's LRUs at the same time.
   *
   * This is a transparent caching front-end to sb_bread(), sb_getblk() and
   * sb_find_get_block().
   *
   * The LRUs themselves only need locking against invalidate_bh_lrus.  We use
   * a local interrupt disable for that.
   */
  
  #define BH_LRU_SIZE	8
  
  struct bh_lru {
  	struct buffer_head *bhs[BH_LRU_SIZE];
  };
  
  static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
  
  #ifdef CONFIG_SMP
  #define bh_lru_lock()	local_irq_disable()
  #define bh_lru_unlock()	local_irq_enable()
  #else
  #define bh_lru_lock()	preempt_disable()
  #define bh_lru_unlock()	preempt_enable()
  #endif
  
  static inline void check_irqs_on(void)
  {
  #ifdef irqs_disabled
  	BUG_ON(irqs_disabled());
  #endif
  }
  
  /*
   * The LRU management algorithm is dopey-but-simple.  Sorry.
   */
  static void bh_lru_install(struct buffer_head *bh)
  {
  	struct buffer_head *evictee = NULL;
  	struct bh_lru *lru;
  
  	check_irqs_on();
  	bh_lru_lock();
  	lru = &__get_cpu_var(bh_lrus);
  	if (lru->bhs[0] != bh) {
  		struct buffer_head *bhs[BH_LRU_SIZE];
  		int in;
  		int out = 0;
  
  		get_bh(bh);
  		bhs[out++] = bh;
  		for (in = 0; in < BH_LRU_SIZE; in++) {
  			struct buffer_head *bh2 = lru->bhs[in];
  
  			if (bh2 == bh) {
  				__brelse(bh2);
  			} else {
  				if (out >= BH_LRU_SIZE) {
  					BUG_ON(evictee != NULL);
  					evictee = bh2;
  				} else {
  					bhs[out++] = bh2;
  				}
  			}
  		}
  		while (out < BH_LRU_SIZE)
  			bhs[out++] = NULL;
  		memcpy(lru->bhs, bhs, sizeof(bhs));
  	}
  	bh_lru_unlock();
  
  	if (evictee)
  		__brelse(evictee);
  }
  
  /*
   * Look up the bh in this cpu's LRU.  If it's there, move it to the head.
   */
858119e15   Arjan van de Ven   [PATCH] Unlinline...
1392
  static struct buffer_head *
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1393
  lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1394
1395
1396
  {
  	struct buffer_head *ret = NULL;
  	struct bh_lru *lru;
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1397
  	unsigned int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
  
  	check_irqs_on();
  	bh_lru_lock();
  	lru = &__get_cpu_var(bh_lrus);
  	for (i = 0; i < BH_LRU_SIZE; i++) {
  		struct buffer_head *bh = lru->bhs[i];
  
  		if (bh && bh->b_bdev == bdev &&
  				bh->b_blocknr == block && bh->b_size == size) {
  			if (i) {
  				while (i) {
  					lru->bhs[i] = lru->bhs[i - 1];
  					i--;
  				}
  				lru->bhs[0] = bh;
  			}
  			get_bh(bh);
  			ret = bh;
  			break;
  		}
  	}
  	bh_lru_unlock();
  	return ret;
  }
  
  /*
   * Perform a pagecache lookup for the matching buffer.  If it's there, refresh
   * it in the LRU and mark it as accessed.  If it is not present then return
   * NULL
   */
  struct buffer_head *
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1429
  __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1430
1431
1432
1433
  {
  	struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
  
  	if (bh == NULL) {
385fd4c59   Coywolf Qi Hunt   [PATCH] __find_ge...
1434
  		bh = __find_get_block_slow(bdev, block);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
  		if (bh)
  			bh_lru_install(bh);
  	}
  	if (bh)
  		touch_buffer(bh);
  	return bh;
  }
  EXPORT_SYMBOL(__find_get_block);
  
  /*
   * __getblk will locate (and, if necessary, create) the buffer_head
   * which corresponds to the passed block_device, block and size. The
   * returned buffer has its reference count incremented.
   *
   * __getblk() cannot fail - it just keeps trying.  If you pass it an
   * illegal block number, __getblk() will happily return a buffer_head
   * which represents the non-existent block.  Very weird.
   *
   * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
   * attempt is failing.  FIXME, perhaps?
   */
  struct buffer_head *
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1457
  __getblk(struct block_device *bdev, sector_t block, unsigned size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
  {
  	struct buffer_head *bh = __find_get_block(bdev, block, size);
  
  	might_sleep();
  	if (bh == NULL)
  		bh = __getblk_slow(bdev, block, size);
  	return bh;
  }
  EXPORT_SYMBOL(__getblk);
  
  /*
   * Do async read-ahead on a buffer..
   */
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1471
  void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1472
1473
  {
  	struct buffer_head *bh = __getblk(bdev, block, size);
a3e713b5f   Andrew Morton   [PATCH] __bread o...
1474
1475
1476
1477
  	if (likely(bh)) {
  		ll_rw_block(READA, 1, &bh);
  		brelse(bh);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1478
1479
1480
1481
1482
  }
  EXPORT_SYMBOL(__breadahead);
  
  /**
   *  __bread() - reads a specified block and returns the bh
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
1483
   *  @bdev: the block_device to read from
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1484
1485
1486
1487
1488
1489
1490
   *  @block: number of block
   *  @size: size (in bytes) to read
   * 
   *  Reads a specified block, and returns buffer head that contains it.
   *  It returns NULL if the block was unreadable.
   */
  struct buffer_head *
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1491
  __bread(struct block_device *bdev, sector_t block, unsigned size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1492
1493
  {
  	struct buffer_head *bh = __getblk(bdev, block, size);
a3e713b5f   Andrew Morton   [PATCH] __bread o...
1494
  	if (likely(bh) && !buffer_uptodate(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
  		bh = __bread_slow(bh);
  	return bh;
  }
  EXPORT_SYMBOL(__bread);
  
  /*
   * invalidate_bh_lrus() is called rarely - but not only at unmount.
   * This doesn't race because it runs in each cpu either in irq
   * or with preempt disabled.
   */
  static void invalidate_bh_lru(void *arg)
  {
  	struct bh_lru *b = &get_cpu_var(bh_lrus);
  	int i;
  
  	for (i = 0; i < BH_LRU_SIZE; i++) {
  		brelse(b->bhs[i]);
  		b->bhs[i] = NULL;
  	}
  	put_cpu_var(bh_lrus);
  }
  	
f9a14399a   Peter Zijlstra   mm: optimize kill...
1517
  void invalidate_bh_lrus(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1518
  {
15c8b6c1a   Jens Axboe   on_each_cpu(): ki...
1519
  	on_each_cpu(invalidate_bh_lru, NULL, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1520
  }
9db5579be   Nick Piggin   rewrite rd
1521
  EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1522
1523
1524
1525
1526
  
  void set_bh_page(struct buffer_head *bh,
  		struct page *page, unsigned long offset)
  {
  	bh->b_page = page;
e827f9235   Eric Sesterhenn   BUG_ON() Conversi...
1527
  	BUG_ON(offset >= PAGE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
  	if (PageHighMem(page))
  		/*
  		 * This catches illegal uses and preserves the offset:
  		 */
  		bh->b_data = (char *)(0 + offset);
  	else
  		bh->b_data = page_address(page) + offset;
  }
  EXPORT_SYMBOL(set_bh_page);
  
  /*
   * Called when truncating a buffer on a page completely.
   */
858119e15   Arjan van de Ven   [PATCH] Unlinline...
1541
  static void discard_buffer(struct buffer_head * bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1542
1543
1544
1545
1546
1547
1548
1549
  {
  	lock_buffer(bh);
  	clear_buffer_dirty(bh);
  	bh->b_bdev = NULL;
  	clear_buffer_mapped(bh);
  	clear_buffer_req(bh);
  	clear_buffer_new(bh);
  	clear_buffer_delay(bh);
33a266dda   David Chinner   [PATCH] Make BH_U...
1550
  	clear_buffer_unwritten(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1551
1552
1553
1554
  	unlock_buffer(bh);
  }
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
   * block_invalidatepage - invalidate part of all of a buffer-backed page
   *
   * @page: the page which is affected
   * @offset: the index of the truncation point
   *
   * block_invalidatepage() is called when all or part of the page has become
   * invalidatedby a truncate operation.
   *
   * block_invalidatepage() does not have to release all buffers, but it must
   * ensure that no dirty buffer is left outside @offset and that no I/O
   * is underway against any of the blocks which are outside the truncation
   * point.  Because the caller is about to free (and possibly reuse) those
   * blocks on-disk.
   */
2ff28e22b   NeilBrown   [PATCH] Make addr...
1569
  void block_invalidatepage(struct page *page, unsigned long offset)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1570
1571
1572
  {
  	struct buffer_head *head, *bh, *next;
  	unsigned int curr_off = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
  
  	BUG_ON(!PageLocked(page));
  	if (!page_has_buffers(page))
  		goto out;
  
  	head = page_buffers(page);
  	bh = head;
  	do {
  		unsigned int next_off = curr_off + bh->b_size;
  		next = bh->b_this_page;
  
  		/*
  		 * is this block fully invalidated?
  		 */
  		if (offset <= curr_off)
  			discard_buffer(bh);
  		curr_off = next_off;
  		bh = next;
  	} while (bh != head);
  
  	/*
  	 * We release buffers only if the entire page is being invalidated.
  	 * The get_block cached value has been unconditionally invalidated,
  	 * so real IO is not possible anymore.
  	 */
  	if (offset == 0)
2ff28e22b   NeilBrown   [PATCH] Make addr...
1599
  		try_to_release_page(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1600
  out:
2ff28e22b   NeilBrown   [PATCH] Make addr...
1601
  	return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
  }
  EXPORT_SYMBOL(block_invalidatepage);
  
  /*
   * We attach and possibly dirty the buffers atomically wrt
   * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers
   * is already excluded via the page lock.
   */
  void create_empty_buffers(struct page *page,
  			unsigned long blocksize, unsigned long b_state)
  {
  	struct buffer_head *bh, *head, *tail;
  
  	head = alloc_page_buffers(page, blocksize, 1);
  	bh = head;
  	do {
  		bh->b_state |= b_state;
  		tail = bh;
  		bh = bh->b_this_page;
  	} while (bh);
  	tail->b_this_page = head;
  
  	spin_lock(&page->mapping->private_lock);
  	if (PageUptodate(page) || PageDirty(page)) {
  		bh = head;
  		do {
  			if (PageDirty(page))
  				set_buffer_dirty(bh);
  			if (PageUptodate(page))
  				set_buffer_uptodate(bh);
  			bh = bh->b_this_page;
  		} while (bh != head);
  	}
  	attach_page_buffers(page, head);
  	spin_unlock(&page->mapping->private_lock);
  }
  EXPORT_SYMBOL(create_empty_buffers);
  
  /*
   * We are taking a block for data and we don't want any output from any
   * buffer-cache aliases starting from return from that function and
   * until the moment when something will explicitly mark the buffer
   * dirty (hopefully that will not happen until we will free that block ;-)
   * We don't even need to mark it not-uptodate - nobody can expect
   * anything from a newly allocated buffer anyway. We used to used
   * unmap_buffer() for such invalidation, but that was wrong. We definitely
   * don't want to mark the alias unmapped, for example - it would confuse
   * anyone who might pick it with bread() afterwards...
   *
   * Also..  Note that bforget() doesn't lock the buffer.  So there can
   * be writeout I/O going on against recently-freed buffers.  We don't
   * wait on that I/O in bforget() - it's more efficient to wait on the I/O
   * only if we really need to.  That happens here.
   */
  void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
  {
  	struct buffer_head *old_bh;
  
  	might_sleep();
385fd4c59   Coywolf Qi Hunt   [PATCH] __find_ge...
1661
  	old_bh = __find_get_block_slow(bdev, block);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
  	if (old_bh) {
  		clear_buffer_dirty(old_bh);
  		wait_on_buffer(old_bh);
  		clear_buffer_req(old_bh);
  		__brelse(old_bh);
  	}
  }
  EXPORT_SYMBOL(unmap_underlying_metadata);
  
  /*
   * NOTE! All mapped/uptodate combinations are valid:
   *
   *	Mapped	Uptodate	Meaning
   *
   *	No	No		"unknown" - must do get_block()
   *	No	Yes		"hole" - zero-filled
   *	Yes	No		"allocated" - allocated on disk, not read in
   *	Yes	Yes		"valid" - allocated and up-to-date in memory.
   *
   * "Dirty" is valid only with the last case (mapped+uptodate).
   */
  
  /*
   * While block_write_full_page is writing back the dirty buffers under
   * the page lock, whoever dirtied the buffers may decide to clean them
   * again at any time.  We handle that by only looking at the buffer
   * state inside lock_buffer().
   *
   * If block_write_full_page() is called for regular writeback
   * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
   * locked buffer.   This only can happen if someone has written the buffer
   * directly, with submit_bh().  At the address_space level PageWriteback
   * prevents this contention from occurring.
   */
  static int __block_write_full_page(struct inode *inode, struct page *page,
  			get_block_t *get_block, struct writeback_control *wbc)
  {
  	int err;
  	sector_t block;
  	sector_t last_block;
f0fbd5fc0   Andrew Morton   [PATCH] __block_w...
1702
  	struct buffer_head *bh, *head;
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
1703
  	const unsigned blocksize = 1 << inode->i_blkbits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1704
1705
1706
1707
1708
1709
1710
  	int nr_underway = 0;
  
  	BUG_ON(!PageLocked(page));
  
  	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
  
  	if (!page_has_buffers(page)) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
1711
  		create_empty_buffers(page, blocksize,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
  					(1 << BH_Dirty)|(1 << BH_Uptodate));
  	}
  
  	/*
  	 * Be very careful.  We have no exclusion from __set_page_dirty_buffers
  	 * here, and the (potentially unmapped) buffers may become dirty at
  	 * any time.  If a buffer becomes dirty here after we've inspected it
  	 * then we just miss that fact, and the page stays dirty.
  	 *
  	 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
  	 * handle that here by just cleaning them.
  	 */
54b21a799   Andrew Morton   [PATCH] fix possi...
1724
  	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
  	head = page_buffers(page);
  	bh = head;
  
  	/*
  	 * Get all the dirty buffers mapped to disk addresses and
  	 * handle any aliases from the underlying blockdev's mapping.
  	 */
  	do {
  		if (block > last_block) {
  			/*
  			 * mapped buffers outside i_size will occur, because
  			 * this page can be outside i_size when there is a
  			 * truncate in progress.
  			 */
  			/*
  			 * The buffer was zeroed by block_write_full_page()
  			 */
  			clear_buffer_dirty(bh);
  			set_buffer_uptodate(bh);
29a814d2e   Alex Tomas   vfs: add hooks fo...
1744
1745
  		} else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
  			   buffer_dirty(bh)) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
1746
  			WARN_ON(bh->b_size != blocksize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1747
1748
1749
  			err = get_block(inode, block, bh, 1);
  			if (err)
  				goto recover;
29a814d2e   Alex Tomas   vfs: add hooks fo...
1750
  			clear_buffer_delay(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
  			if (buffer_new(bh)) {
  				/* blockdev mappings never come here */
  				clear_buffer_new(bh);
  				unmap_underlying_metadata(bh->b_bdev,
  							bh->b_blocknr);
  			}
  		}
  		bh = bh->b_this_page;
  		block++;
  	} while (bh != head);
  
  	do {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
  		if (!buffer_mapped(bh))
  			continue;
  		/*
  		 * If it's a fully non-blocking write attempt and we cannot
  		 * lock the buffer then redirty the page.  Note that this can
  		 * potentially cause a busy-wait loop from pdflush and kswapd
  		 * activity, but those code paths have their own higher-level
  		 * throttling.
  		 */
  		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
  			lock_buffer(bh);
ca5de404f   Nick Piggin   fs: rename buffer...
1774
  		} else if (!trylock_buffer(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
  			redirty_page_for_writepage(wbc, page);
  			continue;
  		}
  		if (test_clear_buffer_dirty(bh)) {
  			mark_buffer_async_write(bh);
  		} else {
  			unlock_buffer(bh);
  		}
  	} while ((bh = bh->b_this_page) != head);
  
  	/*
  	 * The page and its buffers are protected by PageWriteback(), so we can
  	 * drop the bh refcounts early.
  	 */
  	BUG_ON(PageWriteback(page));
  	set_page_writeback(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1791
1792
1793
1794
1795
1796
1797
  
  	do {
  		struct buffer_head *next = bh->b_this_page;
  		if (buffer_async_write(bh)) {
  			submit_bh(WRITE, bh);
  			nr_underway++;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1798
1799
  		bh = next;
  	} while (bh != head);
05937baae   Andrew Morton   [PATCH] __block_w...
1800
  	unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1801
1802
1803
1804
1805
1806
1807
1808
1809
  
  	err = 0;
  done:
  	if (nr_underway == 0) {
  		/*
  		 * The page was marked dirty, but the buffers were
  		 * clean.  Someone wrote them back by hand with
  		 * ll_rw_block/submit_bh.  A rare case.
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1810
  		end_page_writeback(page);
3d67f2d7c   Nick Piggin   fs: buffer don't ...
1811

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1812
1813
1814
1815
  		/*
  		 * The page and buffer_heads can be released at any time from
  		 * here on.
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
  	}
  	return err;
  
  recover:
  	/*
  	 * ENOSPC, or some other error.  We may already have added some
  	 * blocks to the file, so we need to write these out to avoid
  	 * exposing stale data.
  	 * The page is currently locked and not marked for writeback
  	 */
  	bh = head;
  	/* Recovery: lock and submit the mapped buffers */
  	do {
29a814d2e   Alex Tomas   vfs: add hooks fo...
1829
1830
  		if (buffer_mapped(bh) && buffer_dirty(bh) &&
  		    !buffer_delay(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
  			lock_buffer(bh);
  			mark_buffer_async_write(bh);
  		} else {
  			/*
  			 * The buffer may have been set dirty during
  			 * attachment to a dirty page.
  			 */
  			clear_buffer_dirty(bh);
  		}
  	} while ((bh = bh->b_this_page) != head);
  	SetPageError(page);
  	BUG_ON(PageWriteback(page));
7e4c3690b   Andrew Morton   block_write_full_...
1843
  	mapping_set_error(page->mapping, err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1844
  	set_page_writeback(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1845
1846
1847
1848
1849
1850
1851
  	do {
  		struct buffer_head *next = bh->b_this_page;
  		if (buffer_async_write(bh)) {
  			clear_buffer_dirty(bh);
  			submit_bh(WRITE, bh);
  			nr_underway++;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1852
1853
  		bh = next;
  	} while (bh != head);
ffda9d302   Nick Piggin   [PATCH] fs: fix _...
1854
  	unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1855
1856
  	goto done;
  }
afddba49d   Nick Piggin   fs: introduce wri...
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
  /*
   * If a page has any new buffers, zero them out here, and mark them uptodate
   * and dirty so they'll be written out (in order to prevent uninitialised
   * block data from leaking). And clear the new bit.
   */
  void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
  {
  	unsigned int block_start, block_end;
  	struct buffer_head *head, *bh;
  
  	BUG_ON(!PageLocked(page));
  	if (!page_has_buffers(page))
  		return;
  
  	bh = head = page_buffers(page);
  	block_start = 0;
  	do {
  		block_end = block_start + bh->b_size;
  
  		if (buffer_new(bh)) {
  			if (block_end > from && block_start < to) {
  				if (!PageUptodate(page)) {
  					unsigned start, size;
  
  					start = max(from, block_start);
  					size = min(to, block_end) - start;
eebd2aa35   Christoph Lameter   Pagecache zeroing...
1883
  					zero_user(page, start, size);
afddba49d   Nick Piggin   fs: introduce wri...
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
  					set_buffer_uptodate(bh);
  				}
  
  				clear_buffer_new(bh);
  				mark_buffer_dirty(bh);
  			}
  		}
  
  		block_start = block_end;
  		bh = bh->b_this_page;
  	} while (bh != head);
  }
  EXPORT_SYMBOL(page_zero_new_buffers);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
  static int __block_prepare_write(struct inode *inode, struct page *page,
  		unsigned from, unsigned to, get_block_t *get_block)
  {
  	unsigned block_start, block_end;
  	sector_t block;
  	int err = 0;
  	unsigned blocksize, bbits;
  	struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
  
  	BUG_ON(!PageLocked(page));
  	BUG_ON(from > PAGE_CACHE_SIZE);
  	BUG_ON(to > PAGE_CACHE_SIZE);
  	BUG_ON(from > to);
  
  	blocksize = 1 << inode->i_blkbits;
  	if (!page_has_buffers(page))
  		create_empty_buffers(page, blocksize, 0);
  	head = page_buffers(page);
  
  	bbits = inode->i_blkbits;
  	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
  
  	for(bh = head, block_start = 0; bh != head || !block_start;
  	    block++, block_start=block_end, bh = bh->b_this_page) {
  		block_end = block_start + blocksize;
  		if (block_end <= from || block_start >= to) {
  			if (PageUptodate(page)) {
  				if (!buffer_uptodate(bh))
  					set_buffer_uptodate(bh);
  			}
  			continue;
  		}
  		if (buffer_new(bh))
  			clear_buffer_new(bh);
  		if (!buffer_mapped(bh)) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
1932
  			WARN_ON(bh->b_size != blocksize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1933
1934
  			err = get_block(inode, block, bh, 1);
  			if (err)
f3ddbdc62   Nick Piggin   [PATCH] fix race ...
1935
  				break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1936
  			if (buffer_new(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1937
1938
1939
  				unmap_underlying_metadata(bh->b_bdev,
  							bh->b_blocknr);
  				if (PageUptodate(page)) {
637aff46f   Nick Piggin   fs: fix data-loss...
1940
  					clear_buffer_new(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1941
  					set_buffer_uptodate(bh);
637aff46f   Nick Piggin   fs: fix data-loss...
1942
  					mark_buffer_dirty(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1943
1944
  					continue;
  				}
eebd2aa35   Christoph Lameter   Pagecache zeroing...
1945
1946
1947
1948
  				if (block_end > to || block_start < from)
  					zero_user_segments(page,
  						to, block_end,
  						block_start, from);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1949
1950
1951
1952
1953
1954
1955
1956
1957
  				continue;
  			}
  		}
  		if (PageUptodate(page)) {
  			if (!buffer_uptodate(bh))
  				set_buffer_uptodate(bh);
  			continue; 
  		}
  		if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
33a266dda   David Chinner   [PATCH] Make BH_U...
1958
  		    !buffer_unwritten(bh) &&
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
  		     (block_start < from || block_end > to)) {
  			ll_rw_block(READ, 1, &bh);
  			*wait_bh++=bh;
  		}
  	}
  	/*
  	 * If we issued read requests - let them complete.
  	 */
  	while(wait_bh > wait) {
  		wait_on_buffer(*--wait_bh);
  		if (!buffer_uptodate(*wait_bh))
f3ddbdc62   Nick Piggin   [PATCH] fix race ...
1970
  			err = -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1971
  	}
afddba49d   Nick Piggin   fs: introduce wri...
1972
1973
  	if (unlikely(err))
  		page_zero_new_buffers(page, from, to);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
  	return err;
  }
  
  static int __block_commit_write(struct inode *inode, struct page *page,
  		unsigned from, unsigned to)
  {
  	unsigned block_start, block_end;
  	int partial = 0;
  	unsigned blocksize;
  	struct buffer_head *bh, *head;
  
  	blocksize = 1 << inode->i_blkbits;
  
  	for(bh = head = page_buffers(page), block_start = 0;
  	    bh != head || !block_start;
  	    block_start=block_end, bh = bh->b_this_page) {
  		block_end = block_start + blocksize;
  		if (block_end <= from || block_start >= to) {
  			if (!buffer_uptodate(bh))
  				partial = 1;
  		} else {
  			set_buffer_uptodate(bh);
  			mark_buffer_dirty(bh);
  		}
afddba49d   Nick Piggin   fs: introduce wri...
1998
  		clear_buffer_new(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
  	}
  
  	/*
  	 * If this is a partial write which happened to make all buffers
  	 * uptodate then we can optimize away a bogus readpage() for
  	 * the next read(). Here we 'discover' whether the page went
  	 * uptodate as a result of this (potentially partial) write.
  	 */
  	if (!partial)
  		SetPageUptodate(page);
  	return 0;
  }
  
  /*
afddba49d   Nick Piggin   fs: introduce wri...
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
   * block_write_begin takes care of the basic task of block allocation and
   * bringing partial write blocks uptodate first.
   *
   * If *pagep is not NULL, then block_write_begin uses the locked page
   * at *pagep rather than allocating its own. In this case, the page will
   * not be unlocked or deallocated on failure.
   */
  int block_write_begin(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata,
  			get_block_t *get_block)
  {
  	struct inode *inode = mapping->host;
  	int status = 0;
  	struct page *page;
  	pgoff_t index;
  	unsigned start, end;
  	int ownpage = 0;
  
  	index = pos >> PAGE_CACHE_SHIFT;
  	start = pos & (PAGE_CACHE_SIZE - 1);
  	end = start + len;
  
  	page = *pagep;
  	if (page == NULL) {
  		ownpage = 1;
54566b2c1   Nick Piggin   fs: symlink write...
2039
  		page = grab_cache_page_write_begin(mapping, index, flags);
afddba49d   Nick Piggin   fs: introduce wri...
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
  		if (!page) {
  			status = -ENOMEM;
  			goto out;
  		}
  		*pagep = page;
  	} else
  		BUG_ON(!PageLocked(page));
  
  	status = __block_prepare_write(inode, page, start, end, get_block);
  	if (unlikely(status)) {
  		ClearPageUptodate(page);
  
  		if (ownpage) {
  			unlock_page(page);
  			page_cache_release(page);
  			*pagep = NULL;
  
  			/*
  			 * prepare_write() may have instantiated a few blocks
  			 * outside i_size.  Trim these off again. Don't need
  			 * i_size_read because we hold i_mutex.
  			 */
  			if (pos + len > inode->i_size)
  				vmtruncate(inode, inode->i_size);
  		}
afddba49d   Nick Piggin   fs: introduce wri...
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
  	}
  
  out:
  	return status;
  }
  EXPORT_SYMBOL(block_write_begin);
  
  int block_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
  {
  	struct inode *inode = mapping->host;
  	unsigned start;
  
  	start = pos & (PAGE_CACHE_SIZE - 1);
  
  	if (unlikely(copied < len)) {
  		/*
  		 * The buffers that were written will now be uptodate, so we
  		 * don't have to worry about a readpage reading them and
  		 * overwriting a partial write. However if we have encountered
  		 * a short write and only partially written into a buffer, it
  		 * will not be marked uptodate, so a readpage might come in and
  		 * destroy our partial write.
  		 *
  		 * Do the simplest thing, and just treat any short write to a
  		 * non uptodate page as a zero-length write, and force the
  		 * caller to redo the whole thing.
  		 */
  		if (!PageUptodate(page))
  			copied = 0;
  
  		page_zero_new_buffers(page, start+copied, start+len);
  	}
  	flush_dcache_page(page);
  
  	/* This could be a short (even 0-length) commit */
  	__block_commit_write(inode, page, start, start+copied);
  
  	return copied;
  }
  EXPORT_SYMBOL(block_write_end);
  
  int generic_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
  {
  	struct inode *inode = mapping->host;
c7d206b33   Jan Kara   vfs: Move mark_in...
2113
  	int i_size_changed = 0;
afddba49d   Nick Piggin   fs: introduce wri...
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
  
  	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
  
  	/*
  	 * No need to use i_size_read() here, the i_size
  	 * cannot change under us because we hold i_mutex.
  	 *
  	 * But it's important to update i_size while still holding page lock:
  	 * page writeout could otherwise come in and zero beyond i_size.
  	 */
  	if (pos+copied > inode->i_size) {
  		i_size_write(inode, pos+copied);
c7d206b33   Jan Kara   vfs: Move mark_in...
2126
  		i_size_changed = 1;
afddba49d   Nick Piggin   fs: introduce wri...
2127
2128
2129
2130
  	}
  
  	unlock_page(page);
  	page_cache_release(page);
c7d206b33   Jan Kara   vfs: Move mark_in...
2131
2132
2133
2134
2135
2136
2137
2138
  	/*
  	 * Don't mark the inode dirty under page lock. First, it unnecessarily
  	 * makes the holding time of page lock longer. Second, it forces lock
  	 * ordering of page lock and transaction start for journaling
  	 * filesystems.
  	 */
  	if (i_size_changed)
  		mark_inode_dirty(inode);
afddba49d   Nick Piggin   fs: introduce wri...
2139
2140
2141
2142
2143
  	return copied;
  }
  EXPORT_SYMBOL(generic_write_end);
  
  /*
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
   * block_is_partially_uptodate checks whether buffers within a page are
   * uptodate or not.
   *
   * Returns true if all buffers which correspond to a file portion
   * we want to read are uptodate.
   */
  int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
  					unsigned long from)
  {
  	struct inode *inode = page->mapping->host;
  	unsigned block_start, block_end, blocksize;
  	unsigned to;
  	struct buffer_head *bh, *head;
  	int ret = 1;
  
  	if (!page_has_buffers(page))
  		return 0;
  
  	blocksize = 1 << inode->i_blkbits;
  	to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
  	to = from + to;
  	if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
  		return 0;
  
  	head = page_buffers(page);
  	bh = head;
  	block_start = 0;
  	do {
  		block_end = block_start + blocksize;
  		if (block_end > from && block_start < to) {
  			if (!buffer_uptodate(bh)) {
  				ret = 0;
  				break;
  			}
  			if (block_end >= to)
  				break;
  		}
  		block_start = block_end;
  		bh = bh->b_this_page;
  	} while (bh != head);
  
  	return ret;
  }
  EXPORT_SYMBOL(block_is_partially_uptodate);
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
   * Generic "read page" function for block devices that have the normal
   * get_block functionality. This is most of the block device filesystems.
   * Reads the page asynchronously --- the unlock_buffer() and
   * set/clear_buffer_uptodate() functions propagate buffer state into the
   * page struct once IO has completed.
   */
  int block_read_full_page(struct page *page, get_block_t *get_block)
  {
  	struct inode *inode = page->mapping->host;
  	sector_t iblock, lblock;
  	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
  	unsigned int blocksize;
  	int nr, i;
  	int fully_mapped = 1;
cd7619d6b   Matt Mackall   [PATCH] Extermina...
2204
  	BUG_ON(!PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
  	blocksize = 1 << inode->i_blkbits;
  	if (!page_has_buffers(page))
  		create_empty_buffers(page, blocksize, 0);
  	head = page_buffers(page);
  
  	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
  	lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
  	bh = head;
  	nr = 0;
  	i = 0;
  
  	do {
  		if (buffer_uptodate(bh))
  			continue;
  
  		if (!buffer_mapped(bh)) {
c64610ba5   Andrew Morton   [PATCH] block_rea...
2221
  			int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2222
2223
  			fully_mapped = 0;
  			if (iblock < lblock) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
2224
  				WARN_ON(bh->b_size != blocksize);
c64610ba5   Andrew Morton   [PATCH] block_rea...
2225
2226
  				err = get_block(inode, iblock, bh, 0);
  				if (err)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2227
2228
2229
  					SetPageError(page);
  			}
  			if (!buffer_mapped(bh)) {
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2230
  				zero_user(page, i * blocksize, blocksize);
c64610ba5   Andrew Morton   [PATCH] block_rea...
2231
2232
  				if (!err)
  					set_buffer_uptodate(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
  				continue;
  			}
  			/*
  			 * get_block() might have updated the buffer
  			 * synchronously
  			 */
  			if (buffer_uptodate(bh))
  				continue;
  		}
  		arr[nr++] = bh;
  	} while (i++, iblock++, (bh = bh->b_this_page) != head);
  
  	if (fully_mapped)
  		SetPageMappedToDisk(page);
  
  	if (!nr) {
  		/*
  		 * All buffers are uptodate - we can set the page uptodate
  		 * as well. But not if get_block() returned an error.
  		 */
  		if (!PageError(page))
  			SetPageUptodate(page);
  		unlock_page(page);
  		return 0;
  	}
  
  	/* Stage two: lock the buffers */
  	for (i = 0; i < nr; i++) {
  		bh = arr[i];
  		lock_buffer(bh);
  		mark_buffer_async_read(bh);
  	}
  
  	/*
  	 * Stage 3: start the IO.  Check for uptodateness
  	 * inside the buffer lock in case another process reading
  	 * the underlying blockdev brought it uptodate (the sct fix).
  	 */
  	for (i = 0; i < nr; i++) {
  		bh = arr[i];
  		if (buffer_uptodate(bh))
  			end_buffer_async_read(bh, 1);
  		else
  			submit_bh(READ, bh);
  	}
  	return 0;
  }
  
  /* utility function for filesystems that need to do work on expanding
89e107877   Nick Piggin   fs: new cont helpers
2282
   * truncates.  Uses filesystem pagecache writes to allow the filesystem to
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2283
2284
   * deal with the hole.  
   */
89e107877   Nick Piggin   fs: new cont helpers
2285
  int generic_cont_expand_simple(struct inode *inode, loff_t size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2286
2287
2288
  {
  	struct address_space *mapping = inode->i_mapping;
  	struct page *page;
89e107877   Nick Piggin   fs: new cont helpers
2289
  	void *fsdata;
05eb0b51f   OGAWA Hirofumi   [PATCH] fat: supp...
2290
  	unsigned long limit;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
  	int err;
  
  	err = -EFBIG;
          limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
  	if (limit != RLIM_INFINITY && size > (loff_t)limit) {
  		send_sig(SIGXFSZ, current, 0);
  		goto out;
  	}
  	if (size > inode->i_sb->s_maxbytes)
  		goto out;
89e107877   Nick Piggin   fs: new cont helpers
2301
2302
2303
2304
  	err = pagecache_write_begin(NULL, mapping, size, 0,
  				AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
  				&page, &fsdata);
  	if (err)
05eb0b51f   OGAWA Hirofumi   [PATCH] fat: supp...
2305
  		goto out;
05eb0b51f   OGAWA Hirofumi   [PATCH] fat: supp...
2306

89e107877   Nick Piggin   fs: new cont helpers
2307
2308
  	err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
  	BUG_ON(err > 0);
05eb0b51f   OGAWA Hirofumi   [PATCH] fat: supp...
2309

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2310
2311
2312
  out:
  	return err;
  }
f1e3af72c   Adrian Bunk   make fs/buffer.c:...
2313
2314
  static int cont_expand_zero(struct file *file, struct address_space *mapping,
  			    loff_t pos, loff_t *bytes)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2315
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2316
  	struct inode *inode = mapping->host;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2317
  	unsigned blocksize = 1 << inode->i_blkbits;
89e107877   Nick Piggin   fs: new cont helpers
2318
2319
2320
2321
2322
2323
  	struct page *page;
  	void *fsdata;
  	pgoff_t index, curidx;
  	loff_t curpos;
  	unsigned zerofrom, offset, len;
  	int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2324

89e107877   Nick Piggin   fs: new cont helpers
2325
2326
2327
2328
2329
  	index = pos >> PAGE_CACHE_SHIFT;
  	offset = pos & ~PAGE_CACHE_MASK;
  
  	while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
  		zerofrom = curpos & ~PAGE_CACHE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2330
2331
2332
2333
  		if (zerofrom & (blocksize-1)) {
  			*bytes |= (blocksize-1);
  			(*bytes)++;
  		}
89e107877   Nick Piggin   fs: new cont helpers
2334
  		len = PAGE_CACHE_SIZE - zerofrom;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2335

89e107877   Nick Piggin   fs: new cont helpers
2336
2337
2338
2339
2340
  		err = pagecache_write_begin(file, mapping, curpos, len,
  						AOP_FLAG_UNINTERRUPTIBLE,
  						&page, &fsdata);
  		if (err)
  			goto out;
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2341
  		zero_user(page, zerofrom, len);
89e107877   Nick Piggin   fs: new cont helpers
2342
2343
2344
2345
2346
2347
  		err = pagecache_write_end(file, mapping, curpos, len, len,
  						page, fsdata);
  		if (err < 0)
  			goto out;
  		BUG_ON(err != len);
  		err = 0;
061e97469   OGAWA Hirofumi   Add balance_dirty...
2348
2349
  
  		balance_dirty_pages_ratelimited(mapping);
89e107877   Nick Piggin   fs: new cont helpers
2350
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2351

89e107877   Nick Piggin   fs: new cont helpers
2352
2353
2354
  	/* page covers the boundary, find the boundary offset */
  	if (index == curidx) {
  		zerofrom = curpos & ~PAGE_CACHE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2355
  		/* if we will expand the thing last block will be filled */
89e107877   Nick Piggin   fs: new cont helpers
2356
2357
2358
2359
  		if (offset <= zerofrom) {
  			goto out;
  		}
  		if (zerofrom & (blocksize-1)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2360
2361
2362
  			*bytes |= (blocksize-1);
  			(*bytes)++;
  		}
89e107877   Nick Piggin   fs: new cont helpers
2363
  		len = offset - zerofrom;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2364

89e107877   Nick Piggin   fs: new cont helpers
2365
2366
2367
2368
2369
  		err = pagecache_write_begin(file, mapping, curpos, len,
  						AOP_FLAG_UNINTERRUPTIBLE,
  						&page, &fsdata);
  		if (err)
  			goto out;
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2370
  		zero_user(page, zerofrom, len);
89e107877   Nick Piggin   fs: new cont helpers
2371
2372
2373
2374
2375
2376
  		err = pagecache_write_end(file, mapping, curpos, len, len,
  						page, fsdata);
  		if (err < 0)
  			goto out;
  		BUG_ON(err != len);
  		err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2377
  	}
89e107877   Nick Piggin   fs: new cont helpers
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
  out:
  	return err;
  }
  
  /*
   * For moronic filesystems that do not allow holes in file.
   * We may have to extend the file.
   */
  int cont_write_begin(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata,
  			get_block_t *get_block, loff_t *bytes)
  {
  	struct inode *inode = mapping->host;
  	unsigned blocksize = 1 << inode->i_blkbits;
  	unsigned zerofrom;
  	int err;
  
  	err = cont_expand_zero(file, mapping, pos, bytes);
  	if (err)
  		goto out;
  
  	zerofrom = *bytes & ~PAGE_CACHE_MASK;
  	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
  		*bytes |= (blocksize-1);
  		(*bytes)++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2404
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2405

89e107877   Nick Piggin   fs: new cont helpers
2406
2407
2408
  	*pagep = NULL;
  	err = block_write_begin(file, mapping, pos, len,
  				flags, pagep, fsdata, get_block);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2409
  out:
89e107877   Nick Piggin   fs: new cont helpers
2410
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
  }
  
  int block_prepare_write(struct page *page, unsigned from, unsigned to,
  			get_block_t *get_block)
  {
  	struct inode *inode = page->mapping->host;
  	int err = __block_prepare_write(inode, page, from, to, get_block);
  	if (err)
  		ClearPageUptodate(page);
  	return err;
  }
  
  int block_commit_write(struct page *page, unsigned from, unsigned to)
  {
  	struct inode *inode = page->mapping->host;
  	__block_commit_write(inode,page,from,to);
  	return 0;
  }
541716902   David Chinner   [FS] Implement bl...
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
  /*
   * block_page_mkwrite() is not allowed to change the file size as it gets
   * called from a page fault handler when a page is first dirtied. Hence we must
   * be careful to check for EOF conditions here. We set the page up correctly
   * for a written page which means we get ENOSPC checking when writing into
   * holes and correct delalloc and unwritten extent mapping on filesystems that
   * support these features.
   *
   * We are not allowed to take the i_mutex here so we have to play games to
   * protect against truncate races as the page could now be beyond EOF.  Because
   * vmtruncate() writes the inode size before removing pages, once we have the
   * page lock we can determine safely if the page is beyond EOF. If it is not
   * beyond EOF, then the page is guaranteed safe against truncation until we
   * unlock the page.
   */
  int
  block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
  		   get_block_t get_block)
  {
  	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
  	unsigned long end;
  	loff_t size;
  	int ret = -EINVAL;
  
  	lock_page(page);
  	size = i_size_read(inode);
  	if ((page->mapping != inode->i_mapping) ||
183363380   Nick Piggin   fix some conversi...
2456
  	    (page_offset(page) > size)) {
541716902   David Chinner   [FS] Implement bl...
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
  		/* page got truncated out from underneath us */
  		goto out_unlock;
  	}
  
  	/* page is wholly or partially inside EOF */
  	if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
  		end = size & ~PAGE_CACHE_MASK;
  	else
  		end = PAGE_CACHE_SIZE;
  
  	ret = block_prepare_write(page, 0, end, get_block);
  	if (!ret)
  		ret = block_commit_write(page, 0, end);
  
  out_unlock:
  	unlock_page(page);
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2475
2476
  
  /*
03158cd7e   Nick Piggin   fs: restore nobh
2477
   * nobh_write_begin()'s prereads are special: the buffer_heads are freed
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2478
2479
   * immediately, while under the page lock.  So it needs a special end_io
   * handler which does not touch the bh after unlocking it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2480
2481
2482
   */
  static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
  {
68671f35f   Dmitry Monakhov   mm: add end_buffe...
2483
  	__end_buffer_read_notouch(bh, uptodate);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2484
2485
2486
  }
  
  /*
03158cd7e   Nick Piggin   fs: restore nobh
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
   * Attach the singly-linked list of buffers created by nobh_write_begin, to
   * the page (converting it to circular linked list and taking care of page
   * dirty races).
   */
  static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
  {
  	struct buffer_head *bh;
  
  	BUG_ON(!PageLocked(page));
  
  	spin_lock(&page->mapping->private_lock);
  	bh = head;
  	do {
  		if (PageDirty(page))
  			set_buffer_dirty(bh);
  		if (!bh->b_this_page)
  			bh->b_this_page = head;
  		bh = bh->b_this_page;
  	} while (bh != head);
  	attach_page_buffers(page, head);
  	spin_unlock(&page->mapping->private_lock);
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2511
2512
2513
   * On entry, the page is fully not uptodate.
   * On exit the page is fully uptodate in the areas outside (from,to)
   */
03158cd7e   Nick Piggin   fs: restore nobh
2514
2515
2516
  int nobh_write_begin(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2517
2518
  			get_block_t *get_block)
  {
03158cd7e   Nick Piggin   fs: restore nobh
2519
  	struct inode *inode = mapping->host;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2520
2521
  	const unsigned blkbits = inode->i_blkbits;
  	const unsigned blocksize = 1 << blkbits;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2522
  	struct buffer_head *head, *bh;
03158cd7e   Nick Piggin   fs: restore nobh
2523
2524
2525
  	struct page *page;
  	pgoff_t index;
  	unsigned from, to;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2526
  	unsigned block_in_page;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2527
  	unsigned block_start, block_end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2528
  	sector_t block_in_file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2529
  	int nr_reads = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2530
2531
  	int ret = 0;
  	int is_mapped_to_disk = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2532

03158cd7e   Nick Piggin   fs: restore nobh
2533
2534
2535
  	index = pos >> PAGE_CACHE_SHIFT;
  	from = pos & (PAGE_CACHE_SIZE - 1);
  	to = from + len;
54566b2c1   Nick Piggin   fs: symlink write...
2536
  	page = grab_cache_page_write_begin(mapping, index, flags);
03158cd7e   Nick Piggin   fs: restore nobh
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
  	if (!page)
  		return -ENOMEM;
  	*pagep = page;
  	*fsdata = NULL;
  
  	if (page_has_buffers(page)) {
  		unlock_page(page);
  		page_cache_release(page);
  		*pagep = NULL;
  		return block_write_begin(file, mapping, pos, len, flags, pagep,
  					fsdata, get_block);
  	}
a4b0672db   Nick Piggin   fs: fix nobh erro...
2549

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2550
2551
  	if (PageMappedToDisk(page))
  		return 0;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
  	/*
  	 * Allocate buffers so that we can keep track of state, and potentially
  	 * attach them to the page if an error occurs. In the common case of
  	 * no error, they will just be freed again without ever being attached
  	 * to the page (which is all OK, because we're under the page lock).
  	 *
  	 * Be careful: the buffer linked list is a NULL terminated one, rather
  	 * than the circular one we're used to.
  	 */
  	head = alloc_page_buffers(page, blocksize, 0);
03158cd7e   Nick Piggin   fs: restore nobh
2562
2563
2564
2565
  	if (!head) {
  		ret = -ENOMEM;
  		goto out_release;
  	}
a4b0672db   Nick Piggin   fs: fix nobh erro...
2566

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2567
  	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2568
2569
2570
2571
2572
2573
  
  	/*
  	 * We loop across all blocks in the page, whether or not they are
  	 * part of the affected region.  This is so we can discover if the
  	 * page is fully mapped-to-disk.
  	 */
a4b0672db   Nick Piggin   fs: fix nobh erro...
2574
  	for (block_start = 0, block_in_page = 0, bh = head;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2575
  		  block_start < PAGE_CACHE_SIZE;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2576
  		  block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2577
  		int create;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2578
2579
  		block_end = block_start + blocksize;
  		bh->b_state = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2580
2581
2582
2583
  		create = 1;
  		if (block_start >= to)
  			create = 0;
  		ret = get_block(inode, block_in_file + block_in_page,
a4b0672db   Nick Piggin   fs: fix nobh erro...
2584
  					bh, create);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2585
2586
  		if (ret)
  			goto failed;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2587
  		if (!buffer_mapped(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2588
  			is_mapped_to_disk = 0;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2589
2590
2591
2592
  		if (buffer_new(bh))
  			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
  		if (PageUptodate(page)) {
  			set_buffer_uptodate(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2593
  			continue;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2594
2595
  		}
  		if (buffer_new(bh) || !buffer_mapped(bh)) {
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2596
2597
  			zero_user_segments(page, block_start, from,
  							to, block_end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2598
2599
  			continue;
  		}
a4b0672db   Nick Piggin   fs: fix nobh erro...
2600
  		if (buffer_uptodate(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2601
2602
  			continue;	/* reiserfs does this */
  		if (block_start < from || block_end > to) {
a4b0672db   Nick Piggin   fs: fix nobh erro...
2603
2604
2605
2606
  			lock_buffer(bh);
  			bh->b_end_io = end_buffer_read_nobh;
  			submit_bh(READ, bh);
  			nr_reads++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2607
2608
2609
2610
  		}
  	}
  
  	if (nr_reads) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2611
2612
2613
2614
2615
  		/*
  		 * The page is locked, so these buffers are protected from
  		 * any VM or truncate activity.  Hence we don't need to care
  		 * for the buffer_head refcounts.
  		 */
a4b0672db   Nick Piggin   fs: fix nobh erro...
2616
  		for (bh = head; bh; bh = bh->b_this_page) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2617
2618
2619
  			wait_on_buffer(bh);
  			if (!buffer_uptodate(bh))
  				ret = -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2620
2621
2622
2623
2624
2625
2626
  		}
  		if (ret)
  			goto failed;
  	}
  
  	if (is_mapped_to_disk)
  		SetPageMappedToDisk(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2627

03158cd7e   Nick Piggin   fs: restore nobh
2628
  	*fsdata = head; /* to be released by nobh_write_end */
a4b0672db   Nick Piggin   fs: fix nobh erro...
2629

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2630
2631
2632
  	return 0;
  
  failed:
03158cd7e   Nick Piggin   fs: restore nobh
2633
  	BUG_ON(!ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2634
  	/*
a4b0672db   Nick Piggin   fs: fix nobh erro...
2635
2636
2637
2638
2639
  	 * Error recovery is a bit difficult. We need to zero out blocks that
  	 * were newly allocated, and dirty them to ensure they get written out.
  	 * Buffers need to be attached to the page at this point, otherwise
  	 * the handling of potential IO errors during writeout would be hard
  	 * (could try doing synchronous writeout, but what if that fails too?)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2640
  	 */
03158cd7e   Nick Piggin   fs: restore nobh
2641
2642
  	attach_nobh_buffers(page, head);
  	page_zero_new_buffers(page, from, to);
a4b0672db   Nick Piggin   fs: fix nobh erro...
2643

03158cd7e   Nick Piggin   fs: restore nobh
2644
2645
2646
2647
  out_release:
  	unlock_page(page);
  	page_cache_release(page);
  	*pagep = NULL;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2648

03158cd7e   Nick Piggin   fs: restore nobh
2649
2650
  	if (pos + len > inode->i_size)
  		vmtruncate(inode, inode->i_size);
a4b0672db   Nick Piggin   fs: fix nobh erro...
2651

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2652
2653
  	return ret;
  }
03158cd7e   Nick Piggin   fs: restore nobh
2654
  EXPORT_SYMBOL(nobh_write_begin);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2655

03158cd7e   Nick Piggin   fs: restore nobh
2656
2657
2658
  int nobh_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2659
2660
  {
  	struct inode *inode = page->mapping->host;
efdc31319   Nick Piggin   nobh: nobh_write_...
2661
  	struct buffer_head *head = fsdata;
03158cd7e   Nick Piggin   fs: restore nobh
2662
  	struct buffer_head *bh;
5b41e74ad   Dmitri Monakhov   vfs: fix data lea...
2663
  	BUG_ON(fsdata != NULL && page_has_buffers(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2664

d4cf109f0   Dave Kleikamp   vfs: Don't call a...
2665
  	if (unlikely(copied < len) && head)
5b41e74ad   Dmitri Monakhov   vfs: fix data lea...
2666
2667
2668
2669
  		attach_nobh_buffers(page, head);
  	if (page_has_buffers(page))
  		return generic_write_end(file, mapping, pos, len,
  					copied, page, fsdata);
a4b0672db   Nick Piggin   fs: fix nobh erro...
2670

22c8ca78f   Nick Piggin   [PATCH] fs: fix n...
2671
  	SetPageUptodate(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2672
  	set_page_dirty(page);
03158cd7e   Nick Piggin   fs: restore nobh
2673
2674
  	if (pos+copied > inode->i_size) {
  		i_size_write(inode, pos+copied);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2675
2676
  		mark_inode_dirty(inode);
  	}
03158cd7e   Nick Piggin   fs: restore nobh
2677
2678
2679
  
  	unlock_page(page);
  	page_cache_release(page);
03158cd7e   Nick Piggin   fs: restore nobh
2680
2681
2682
2683
2684
2685
2686
  	while (head) {
  		bh = head;
  		head = head->b_this_page;
  		free_buffer_head(bh);
  	}
  
  	return copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2687
  }
03158cd7e   Nick Piggin   fs: restore nobh
2688
  EXPORT_SYMBOL(nobh_write_end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
  
  /*
   * nobh_writepage() - based on block_full_write_page() except
   * that it tries to operate without attaching bufferheads to
   * the page.
   */
  int nobh_writepage(struct page *page, get_block_t *get_block,
  			struct writeback_control *wbc)
  {
  	struct inode * const inode = page->mapping->host;
  	loff_t i_size = i_size_read(inode);
  	const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
  	unsigned offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
  	int ret;
  
  	/* Is the page fully inside i_size? */
  	if (page->index < end_index)
  		goto out;
  
  	/* Is the page fully outside i_size? (truncate in progress) */
  	offset = i_size & (PAGE_CACHE_SIZE-1);
  	if (page->index >= end_index+1 || !offset) {
  		/*
  		 * The page may have dirty, unmapped buffers.  For example,
  		 * they may have been added in ext3_writepage().  Make them
  		 * freeable here, so the page does not leak.
  		 */
  #if 0
  		/* Not really sure about this  - do we need this ? */
  		if (page->mapping->a_ops->invalidatepage)
  			page->mapping->a_ops->invalidatepage(page, offset);
  #endif
  		unlock_page(page);
  		return 0; /* don't care */
  	}
  
  	/*
  	 * The page straddles i_size.  It must be zeroed out on each and every
  	 * writepage invocation because it may be mmapped.  "A file is mapped
  	 * in multiples of the page size.  For a file that is not a multiple of
  	 * the  page size, the remaining memory is zeroed when mapped, and
  	 * writes to that region are not written out to the file."
  	 */
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2732
  	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2733
2734
2735
2736
2737
2738
2739
  out:
  	ret = mpage_writepage(page, get_block, wbc);
  	if (ret == -EAGAIN)
  		ret = __block_write_full_page(inode, page, get_block, wbc);
  	return ret;
  }
  EXPORT_SYMBOL(nobh_writepage);
03158cd7e   Nick Piggin   fs: restore nobh
2740
2741
  int nobh_truncate_page(struct address_space *mapping,
  			loff_t from, get_block_t *get_block)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2742
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2743
2744
  	pgoff_t index = from >> PAGE_CACHE_SHIFT;
  	unsigned offset = from & (PAGE_CACHE_SIZE-1);
03158cd7e   Nick Piggin   fs: restore nobh
2745
2746
2747
2748
  	unsigned blocksize;
  	sector_t iblock;
  	unsigned length, pos;
  	struct inode *inode = mapping->host;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2749
  	struct page *page;
03158cd7e   Nick Piggin   fs: restore nobh
2750
2751
  	struct buffer_head map_bh;
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2752

03158cd7e   Nick Piggin   fs: restore nobh
2753
2754
2755
2756
2757
2758
2759
2760
2761
  	blocksize = 1 << inode->i_blkbits;
  	length = offset & (blocksize - 1);
  
  	/* Block boundary? Nothing to do */
  	if (!length)
  		return 0;
  
  	length = blocksize - length;
  	iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2762

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2763
  	page = grab_cache_page(mapping, index);
03158cd7e   Nick Piggin   fs: restore nobh
2764
  	err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2765
2766
  	if (!page)
  		goto out;
03158cd7e   Nick Piggin   fs: restore nobh
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
  	if (page_has_buffers(page)) {
  has_buffers:
  		unlock_page(page);
  		page_cache_release(page);
  		return block_truncate_page(mapping, from, get_block);
  	}
  
  	/* Find the buffer that contains "offset" */
  	pos = blocksize;
  	while (offset >= pos) {
  		iblock++;
  		pos += blocksize;
  	}
  
  	err = get_block(inode, iblock, &map_bh, 0);
  	if (err)
  		goto unlock;
  	/* unmapped? It's a hole - nothing to do */
  	if (!buffer_mapped(&map_bh))
  		goto unlock;
  
  	/* Ok, it's mapped. Make sure it's up-to-date */
  	if (!PageUptodate(page)) {
  		err = mapping->a_ops->readpage(NULL, page);
  		if (err) {
  			page_cache_release(page);
  			goto out;
  		}
  		lock_page(page);
  		if (!PageUptodate(page)) {
  			err = -EIO;
  			goto unlock;
  		}
  		if (page_has_buffers(page))
  			goto has_buffers;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2802
  	}
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2803
  	zero_user(page, offset, length);
03158cd7e   Nick Piggin   fs: restore nobh
2804
2805
2806
2807
  	set_page_dirty(page);
  	err = 0;
  
  unlock:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2808
2809
2810
  	unlock_page(page);
  	page_cache_release(page);
  out:
03158cd7e   Nick Piggin   fs: restore nobh
2811
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2812
2813
2814
2815
2816
2817
2818
2819
2820
  }
  EXPORT_SYMBOL(nobh_truncate_page);
  
  int block_truncate_page(struct address_space *mapping,
  			loff_t from, get_block_t *get_block)
  {
  	pgoff_t index = from >> PAGE_CACHE_SHIFT;
  	unsigned offset = from & (PAGE_CACHE_SIZE-1);
  	unsigned blocksize;
54b21a799   Andrew Morton   [PATCH] fix possi...
2821
  	sector_t iblock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2822
2823
2824
2825
  	unsigned length, pos;
  	struct inode *inode = mapping->host;
  	struct page *page;
  	struct buffer_head *bh;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
  	int err;
  
  	blocksize = 1 << inode->i_blkbits;
  	length = offset & (blocksize - 1);
  
  	/* Block boundary? Nothing to do */
  	if (!length)
  		return 0;
  
  	length = blocksize - length;
54b21a799   Andrew Morton   [PATCH] fix possi...
2836
  	iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
  	
  	page = grab_cache_page(mapping, index);
  	err = -ENOMEM;
  	if (!page)
  		goto out;
  
  	if (!page_has_buffers(page))
  		create_empty_buffers(page, blocksize, 0);
  
  	/* Find the buffer that contains "offset" */
  	bh = page_buffers(page);
  	pos = blocksize;
  	while (offset >= pos) {
  		bh = bh->b_this_page;
  		iblock++;
  		pos += blocksize;
  	}
  
  	err = 0;
  	if (!buffer_mapped(bh)) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
2857
  		WARN_ON(bh->b_size != blocksize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
  		err = get_block(inode, iblock, bh, 0);
  		if (err)
  			goto unlock;
  		/* unmapped? It's a hole - nothing to do */
  		if (!buffer_mapped(bh))
  			goto unlock;
  	}
  
  	/* Ok, it's mapped. Make sure it's up-to-date */
  	if (PageUptodate(page))
  		set_buffer_uptodate(bh);
33a266dda   David Chinner   [PATCH] Make BH_U...
2869
  	if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2870
2871
2872
2873
2874
2875
2876
  		err = -EIO;
  		ll_rw_block(READ, 1, &bh);
  		wait_on_buffer(bh);
  		/* Uhhuh. Read error. Complain and punt. */
  		if (!buffer_uptodate(bh))
  			goto unlock;
  	}
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2877
  	zero_user(page, offset, length);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
  	mark_buffer_dirty(bh);
  	err = 0;
  
  unlock:
  	unlock_page(page);
  	page_cache_release(page);
  out:
  	return err;
  }
  
  /*
   * The generic ->writepage function for buffer-backed address_spaces
   */
  int block_write_full_page(struct page *page, get_block_t *get_block,
  			struct writeback_control *wbc)
  {
  	struct inode * const inode = page->mapping->host;
  	loff_t i_size = i_size_read(inode);
  	const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
  	unsigned offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
  
  	/* Is the page fully inside i_size? */
  	if (page->index < end_index)
  		return __block_write_full_page(inode, page, get_block, wbc);
  
  	/* Is the page fully outside i_size? (truncate in progress) */
  	offset = i_size & (PAGE_CACHE_SIZE-1);
  	if (page->index >= end_index+1 || !offset) {
  		/*
  		 * The page may have dirty, unmapped buffers.  For example,
  		 * they may have been added in ext3_writepage().  Make them
  		 * freeable here, so the page does not leak.
  		 */
aaa4059bc   Jan Kara   [PATCH] ext3: Fix...
2911
  		do_invalidatepage(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
  		unlock_page(page);
  		return 0; /* don't care */
  	}
  
  	/*
  	 * The page straddles i_size.  It must be zeroed out on each and every
  	 * writepage invokation because it may be mmapped.  "A file is mapped
  	 * in multiples of the page size.  For a file that is not a multiple of
  	 * the  page size, the remaining memory is zeroed when mapped, and
  	 * writes to that region are not written out to the file."
  	 */
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2923
  	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
  	return __block_write_full_page(inode, page, get_block, wbc);
  }
  
  sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
  			    get_block_t *get_block)
  {
  	struct buffer_head tmp;
  	struct inode *inode = mapping->host;
  	tmp.b_state = 0;
  	tmp.b_blocknr = 0;
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
2934
  	tmp.b_size = 1 << inode->i_blkbits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2935
2936
2937
  	get_block(inode, block, &tmp, 0);
  	return tmp.b_blocknr;
  }
6712ecf8f   NeilBrown   Drop 'size' argum...
2938
  static void end_bio_bh_io_sync(struct bio *bio, int err)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2939
2940
  {
  	struct buffer_head *bh = bio->bi_private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2941
2942
2943
2944
  	if (err == -EOPNOTSUPP) {
  		set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
  		set_bit(BH_Eopnotsupp, &bh->b_state);
  	}
08bafc034   Keith Mannthey   block: Supress Bu...
2945
2946
  	if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
  		set_bit(BH_Quiet, &bh->b_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2947
2948
  	bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
  	bio_put(bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
  }
  
  int submit_bh(int rw, struct buffer_head * bh)
  {
  	struct bio *bio;
  	int ret = 0;
  
  	BUG_ON(!buffer_locked(bh));
  	BUG_ON(!buffer_mapped(bh));
  	BUG_ON(!bh->b_end_io);
48fd4f93a   Jens Axboe   block: submit_bh(...
2959
2960
2961
2962
2963
2964
  	/*
  	 * Mask in barrier bit for a write (could be either a WRITE or a
  	 * WRITE_SYNC
  	 */
  	if (buffer_ordered(bh) && (rw & WRITE))
  		rw |= WRITE_BARRIER;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2965
2966
  
  	/*
48fd4f93a   Jens Axboe   block: submit_bh(...
2967
  	 * Only clear out a write error when rewriting
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2968
  	 */
48fd4f93a   Jens Axboe   block: submit_bh(...
2969
  	if (test_set_buffer_req(bh) && (rw & WRITE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
  		clear_buffer_write_io_error(bh);
  
  	/*
  	 * from here on down, it's all bio -- do the initial mapping,
  	 * submit_bio -> generic_make_request may further map this bio around
  	 */
  	bio = bio_alloc(GFP_NOIO, 1);
  
  	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
  	bio->bi_bdev = bh->b_bdev;
  	bio->bi_io_vec[0].bv_page = bh->b_page;
  	bio->bi_io_vec[0].bv_len = bh->b_size;
  	bio->bi_io_vec[0].bv_offset = bh_offset(bh);
  
  	bio->bi_vcnt = 1;
  	bio->bi_idx = 0;
  	bio->bi_size = bh->b_size;
  
  	bio->bi_end_io = end_bio_bh_io_sync;
  	bio->bi_private = bh;
  
  	bio_get(bio);
  	submit_bio(rw, bio);
  
  	if (bio_flagged(bio, BIO_EOPNOTSUPP))
  		ret = -EOPNOTSUPP;
  
  	bio_put(bio);
  	return ret;
  }
  
  /**
   * ll_rw_block: low-level access to block devices (DEPRECATED)
a76622362   Jan Kara   [PATCH] Make ll_r...
3003
   * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3004
3005
3006
   * @nr: number of &struct buffer_heads in the array
   * @bhs: array of pointers to &struct buffer_head
   *
a76622362   Jan Kara   [PATCH] Make ll_r...
3007
3008
3009
3010
3011
   * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
   * requests an I/O operation on them, either a %READ or a %WRITE.  The third
   * %SWRITE is like %WRITE only we make sure that the *current* data in buffers
   * are sent to disk. The fourth %READA option is described in the documentation
   * for generic_make_request() which ll_rw_block() calls.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3012
3013
   *
   * This function drops any buffer that it cannot get a lock on (with the
a76622362   Jan Kara   [PATCH] Make ll_r...
3014
3015
3016
3017
3018
   * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be
   * clean when doing a write request, and any buffer that appears to be
   * up-to-date when doing read request.  Further it marks as clean buffers that
   * are processed for writing (the buffer cache won't assume that they are
   * actually clean until the buffer gets unlocked).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
   *
   * ll_rw_block sets b_end_io to simple completion handler that marks
   * the buffer up-to-date (if approriate), unlocks the buffer and wakes
   * any waiters. 
   *
   * All of the buffers must be for the same device, and must also be a
   * multiple of the current approved size for the device.
   */
  void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
  {
  	int i;
  
  	for (i = 0; i < nr; i++) {
  		struct buffer_head *bh = bhs[i];
18ce3751c   Jens Axboe   Properly notify b...
3033
  		if (rw == SWRITE || rw == SWRITE_SYNC)
a76622362   Jan Kara   [PATCH] Make ll_r...
3034
  			lock_buffer(bh);
ca5de404f   Nick Piggin   fs: rename buffer...
3035
  		else if (!trylock_buffer(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3036
  			continue;
18ce3751c   Jens Axboe   Properly notify b...
3037
  		if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3038
  			if (test_clear_buffer_dirty(bh)) {
76c3073a8   Andrew Morton   [PATCH] end_buffe...
3039
  				bh->b_end_io = end_buffer_write_sync;
e60e5c50a   OGAWA Hirofumi   [PATCH] Trivial o...
3040
  				get_bh(bh);
18ce3751c   Jens Axboe   Properly notify b...
3041
3042
3043
3044
  				if (rw == SWRITE_SYNC)
  					submit_bh(WRITE_SYNC, bh);
  				else
  					submit_bh(WRITE, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3045
3046
3047
  				continue;
  			}
  		} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3048
  			if (!buffer_uptodate(bh)) {
76c3073a8   Andrew Morton   [PATCH] end_buffe...
3049
  				bh->b_end_io = end_buffer_read_sync;
e60e5c50a   OGAWA Hirofumi   [PATCH] Trivial o...
3050
  				get_bh(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3051
3052
3053
3054
3055
  				submit_bh(rw, bh);
  				continue;
  			}
  		}
  		unlock_buffer(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
  	}
  }
  
  /*
   * For a data-integrity writeout, we need to wait upon any in-progress I/O
   * and then start new I/O and then wait upon it.  The caller must have a ref on
   * the buffer_head.
   */
  int sync_dirty_buffer(struct buffer_head *bh)
  {
  	int ret = 0;
  
  	WARN_ON(atomic_read(&bh->b_count) < 1);
  	lock_buffer(bh);
  	if (test_clear_buffer_dirty(bh)) {
  		get_bh(bh);
  		bh->b_end_io = end_buffer_write_sync;
78f707bfc   Jens Axboe   block: revert par...
3073
  		ret = submit_bh(WRITE, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
  		wait_on_buffer(bh);
  		if (buffer_eopnotsupp(bh)) {
  			clear_buffer_eopnotsupp(bh);
  			ret = -EOPNOTSUPP;
  		}
  		if (!ret && !buffer_uptodate(bh))
  			ret = -EIO;
  	} else {
  		unlock_buffer(bh);
  	}
  	return ret;
  }
  
  /*
   * try_to_free_buffers() checks if all the buffers on this particular page
   * are unused, and releases them if so.
   *
   * Exclusion against try_to_free_buffers may be obtained by either
   * locking the page or by holding its mapping's private_lock.
   *
   * If the page is dirty but all the buffers are clean then we need to
   * be sure to mark the page clean as well.  This is because the page
   * may be against a block device, and a later reattachment of buffers
   * to a dirty page will set *all* buffers dirty.  Which would corrupt
   * filesystem data on the same device.
   *
   * The same applies to regular filesystem pages: if all the buffers are
   * clean then we set the page clean and proceed.  To do that, we require
   * total exclusion from __set_page_dirty_buffers().  That is obtained with
   * private_lock.
   *
   * try_to_free_buffers() is non-blocking.
   */
  static inline int buffer_busy(struct buffer_head *bh)
  {
  	return atomic_read(&bh->b_count) |
  		(bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
  }
  
  static int
  drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
  {
  	struct buffer_head *head = page_buffers(page);
  	struct buffer_head *bh;
  
  	bh = head;
  	do {
de7d5a3b6   Andrew Morton   [PATCH] drop_buff...
3121
  		if (buffer_write_io_error(bh) && page->mapping)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3122
3123
3124
3125
3126
3127
3128
3129
  			set_bit(AS_EIO, &page->mapping->flags);
  		if (buffer_busy(bh))
  			goto failed;
  		bh = bh->b_this_page;
  	} while (bh != head);
  
  	do {
  		struct buffer_head *next = bh->b_this_page;
535ee2fbf   Jan Kara   buffer_head: fix ...
3130
  		if (bh->b_assoc_map)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
  			__remove_assoc_queue(bh);
  		bh = next;
  	} while (bh != head);
  	*buffers_to_free = head;
  	__clear_page_buffers(page);
  	return 1;
  failed:
  	return 0;
  }
  
  int try_to_free_buffers(struct page *page)
  {
  	struct address_space * const mapping = page->mapping;
  	struct buffer_head *buffers_to_free = NULL;
  	int ret = 0;
  
  	BUG_ON(!PageLocked(page));
ecdfc9787   Linus Torvalds   Resurrect 'try_to...
3148
  	if (PageWriteback(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3149
3150
3151
3152
3153
3154
3155
3156
3157
  		return 0;
  
  	if (mapping == NULL) {		/* can this still happen? */
  		ret = drop_buffers(page, &buffers_to_free);
  		goto out;
  	}
  
  	spin_lock(&mapping->private_lock);
  	ret = drop_buffers(page, &buffers_to_free);
ecdfc9787   Linus Torvalds   Resurrect 'try_to...
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
  
  	/*
  	 * If the filesystem writes its buffers by hand (eg ext3)
  	 * then we can have clean buffers against a dirty page.  We
  	 * clean the page here; otherwise the VM will never notice
  	 * that the filesystem did any IO at all.
  	 *
  	 * Also, during truncate, discard_buffer will have marked all
  	 * the page's buffers clean.  We discover that here and clean
  	 * the page also.
87df7241b   Nick Piggin   [PATCH] Fix try_t...
3168
3169
3170
3171
  	 *
  	 * private_lock must be held over this entire operation in order
  	 * to synchronise against __set_page_dirty_buffers and prevent the
  	 * dirty bit from being lost.
ecdfc9787   Linus Torvalds   Resurrect 'try_to...
3172
3173
3174
  	 */
  	if (ret)
  		cancel_dirty_page(page, PAGE_CACHE_SIZE);
87df7241b   Nick Piggin   [PATCH] Fix try_t...
3175
  	spin_unlock(&mapping->private_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
  out:
  	if (buffers_to_free) {
  		struct buffer_head *bh = buffers_to_free;
  
  		do {
  			struct buffer_head *next = bh->b_this_page;
  			free_buffer_head(bh);
  			bh = next;
  		} while (bh != buffers_to_free);
  	}
  	return ret;
  }
  EXPORT_SYMBOL(try_to_free_buffers);
3978d7179   NeilBrown   [PATCH] Make addr...
3189
  void block_sync_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3190
3191
3192
3193
3194
3195
3196
  {
  	struct address_space *mapping;
  
  	smp_mb();
  	mapping = page_mapping(page);
  	if (mapping)
  		blk_run_backing_dev(mapping->backing_dev_info, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3197
3198
3199
3200
3201
3202
3203
3204
3205
  }
  
  /*
   * There are no bdflush tunables left.  But distributions are
   * still running obsolete flush daemons, so we terminate them here.
   *
   * Use of bdflush() is deprecated and will be removed in a future kernel.
   * The `pdflush' kernel threads fully replace bdflush daemons and this call.
   */
bdc480e3b   Heiko Carstens   [CVE-2009-0029] S...
3206
  SYSCALL_DEFINE2(bdflush, int, func, long, data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
  {
  	static int msg_count;
  
  	if (!capable(CAP_SYS_ADMIN))
  		return -EPERM;
  
  	if (msg_count < 5) {
  		msg_count++;
  		printk(KERN_INFO
  			"warning: process `%s' used the obsolete bdflush"
  			" system call
  ", current->comm);
  		printk(KERN_INFO "Fix your initscripts?
  ");
  	}
  
  	if (func == 1)
  		do_exit(0);
  	return 0;
  }
  
  /*
   * Buffer-head allocation
   */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
3231
  static struct kmem_cache *bh_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
  
  /*
   * Once the number of bh's in the machine exceeds this level, we start
   * stripping them in writeback.
   */
  static int max_buffer_heads;
  
  int buffer_heads_over_limit;
  
  struct bh_accounting {
  	int nr;			/* Number of live bh's */
  	int ratelimit;		/* Limit cacheline bouncing */
  };
  
  static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
  
  static void recalc_bh_state(void)
  {
  	int i;
  	int tot = 0;
  
  	if (__get_cpu_var(bh_accounting).ratelimit++ < 4096)
  		return;
  	__get_cpu_var(bh_accounting).ratelimit = 0;
8a1434268   Eric Dumazet   [PATCH] HOTPLUG_C...
3256
  	for_each_online_cpu(i)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3257
3258
3259
3260
  		tot += per_cpu(bh_accounting, i).nr;
  	buffer_heads_over_limit = (tot > max_buffer_heads);
  }
  	
dd0fc66fb   Al Viro   [PATCH] gfp flags...
3261
  struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3262
  {
488514d17   Christoph Lameter   Remove set_migrat...
3263
  	struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3264
  	if (ret) {
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
3265
  		INIT_LIST_HEAD(&ret->b_assoc_buffers);
736c7b808   Coywolf Qi Hunt   [PATCH] alloc_buf...
3266
  		get_cpu_var(bh_accounting).nr++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3267
  		recalc_bh_state();
736c7b808   Coywolf Qi Hunt   [PATCH] alloc_buf...
3268
  		put_cpu_var(bh_accounting);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3269
3270
3271
3272
3273
3274
3275
3276
3277
  	}
  	return ret;
  }
  EXPORT_SYMBOL(alloc_buffer_head);
  
  void free_buffer_head(struct buffer_head *bh)
  {
  	BUG_ON(!list_empty(&bh->b_assoc_buffers));
  	kmem_cache_free(bh_cachep, bh);
736c7b808   Coywolf Qi Hunt   [PATCH] alloc_buf...
3278
  	get_cpu_var(bh_accounting).nr--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3279
  	recalc_bh_state();
736c7b808   Coywolf Qi Hunt   [PATCH] alloc_buf...
3280
  	put_cpu_var(bh_accounting);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3281
3282
  }
  EXPORT_SYMBOL(free_buffer_head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3283
3284
3285
3286
3287
3288
3289
3290
3291
  static void buffer_exit_cpu(int cpu)
  {
  	int i;
  	struct bh_lru *b = &per_cpu(bh_lrus, cpu);
  
  	for (i = 0; i < BH_LRU_SIZE; i++) {
  		brelse(b->bhs[i]);
  		b->bhs[i] = NULL;
  	}
8a1434268   Eric Dumazet   [PATCH] HOTPLUG_C...
3292
3293
3294
  	get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr;
  	per_cpu(bh_accounting, cpu).nr = 0;
  	put_cpu_var(bh_accounting);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3295
3296
3297
3298
3299
  }
  
  static int buffer_cpu_notify(struct notifier_block *self,
  			      unsigned long action, void *hcpu)
  {
8bb784428   Rafael J. Wysocki   Add suspend-relat...
3300
  	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3301
3302
3303
  		buffer_exit_cpu((unsigned long)hcpu);
  	return NOTIFY_OK;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3304

389d1b083   Aneesh Kumar K.V   Add buffer head r...
3305
  /**
a6b91919e   Randy Dunlap   fs: fix kernel-do...
3306
   * bh_uptodate_or_lock - Test whether the buffer is uptodate
389d1b083   Aneesh Kumar K.V   Add buffer head r...
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
   * @bh: struct buffer_head
   *
   * Return true if the buffer is up-to-date and false,
   * with the buffer locked, if not.
   */
  int bh_uptodate_or_lock(struct buffer_head *bh)
  {
  	if (!buffer_uptodate(bh)) {
  		lock_buffer(bh);
  		if (!buffer_uptodate(bh))
  			return 0;
  		unlock_buffer(bh);
  	}
  	return 1;
  }
  EXPORT_SYMBOL(bh_uptodate_or_lock);
  
  /**
a6b91919e   Randy Dunlap   fs: fix kernel-do...
3325
   * bh_submit_read - Submit a locked buffer for reading
389d1b083   Aneesh Kumar K.V   Add buffer head r...
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
   * @bh: struct buffer_head
   *
   * Returns zero on success and -EIO on error.
   */
  int bh_submit_read(struct buffer_head *bh)
  {
  	BUG_ON(!buffer_locked(bh));
  
  	if (buffer_uptodate(bh)) {
  		unlock_buffer(bh);
  		return 0;
  	}
  
  	get_bh(bh);
  	bh->b_end_io = end_buffer_read_sync;
  	submit_bh(READ, bh);
  	wait_on_buffer(bh);
  	if (buffer_uptodate(bh))
  		return 0;
  	return -EIO;
  }
  EXPORT_SYMBOL(bh_submit_read);
b98938c37   Christoph Lameter   bufferhead: rever...
3348
  static void
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
3349
  init_buffer_head(void *data)
b98938c37   Christoph Lameter   bufferhead: rever...
3350
3351
3352
3353
3354
3355
  {
  	struct buffer_head *bh = data;
  
  	memset(bh, 0, sizeof(*bh));
  	INIT_LIST_HEAD(&bh->b_assoc_buffers);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3356
3357
3358
  void __init buffer_init(void)
  {
  	int nrpages;
b98938c37   Christoph Lameter   bufferhead: rever...
3359
3360
3361
3362
3363
  	bh_cachep = kmem_cache_create("buffer_head",
  			sizeof(struct buffer_head), 0,
  				(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
  				SLAB_MEM_SPREAD),
  				init_buffer_head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
  
  	/*
  	 * Limit the bh occupancy to 10% of ZONE_NORMAL
  	 */
  	nrpages = (nr_free_buffer_pages() * 10) / 100;
  	max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
  	hotcpu_notifier(buffer_cpu_notify, 0);
  }
  
  EXPORT_SYMBOL(__bforget);
  EXPORT_SYMBOL(__brelse);
  EXPORT_SYMBOL(__wait_on_buffer);
  EXPORT_SYMBOL(block_commit_write);
  EXPORT_SYMBOL(block_prepare_write);
541716902   David Chinner   [FS] Implement bl...
3378
  EXPORT_SYMBOL(block_page_mkwrite);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3379
3380
3381
3382
  EXPORT_SYMBOL(block_read_full_page);
  EXPORT_SYMBOL(block_sync_page);
  EXPORT_SYMBOL(block_truncate_page);
  EXPORT_SYMBOL(block_write_full_page);
89e107877   Nick Piggin   fs: new cont helpers
3383
  EXPORT_SYMBOL(cont_write_begin);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3384
3385
3386
3387
3388
  EXPORT_SYMBOL(end_buffer_read_sync);
  EXPORT_SYMBOL(end_buffer_write_sync);
  EXPORT_SYMBOL(file_fsync);
  EXPORT_SYMBOL(fsync_bdev);
  EXPORT_SYMBOL(generic_block_bmap);
05eb0b51f   OGAWA Hirofumi   [PATCH] fat: supp...
3389
  EXPORT_SYMBOL(generic_cont_expand_simple);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3390
3391
3392
3393
3394
3395
3396
  EXPORT_SYMBOL(init_buffer);
  EXPORT_SYMBOL(invalidate_bdev);
  EXPORT_SYMBOL(ll_rw_block);
  EXPORT_SYMBOL(mark_buffer_dirty);
  EXPORT_SYMBOL(submit_bh);
  EXPORT_SYMBOL(sync_dirty_buffer);
  EXPORT_SYMBOL(unlock_buffer);