Blame view

fs/buffer.c 90.4 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
  /*
   *  linux/fs/buffer.c
   *
   *  Copyright (C) 1991, 1992, 2002  Linus Torvalds
   */
  
  /*
   * Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95
   *
   * Removed a lot of unnecessary code and simplified things now that
   * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
   *
   * Speed up hash, lru, and free list operations.  Use gfp() for allocating
   * hash table, use SLAB cache for buffer heads. SMP threading.  -DaveM
   *
   * Added 32k buffer block sizes - these are required older ARM systems. - RMK
   *
   * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de>
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
  #include <linux/kernel.h>
f361bf4a6   Ingo Molnar   sched/headers: Pr...
22
  #include <linux/sched/signal.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
24
  #include <linux/syscalls.h>
  #include <linux/fs.h>
ae259a9c8   Christoph Hellwig   fs: introduce iom...
25
  #include <linux/iomap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
  #include <linux/mm.h>
  #include <linux/percpu.h>
  #include <linux/slab.h>
16f7e0fe2   Randy Dunlap   [PATCH] capable/c...
29
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
31
32
33
  #include <linux/blkdev.h>
  #include <linux/file.h>
  #include <linux/quotaops.h>
  #include <linux/highmem.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
34
  #include <linux/export.h>
bafc0dba1   Tejun Heo   buffer, writeback...
35
  #include <linux/backing-dev.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
36
37
38
39
  #include <linux/writeback.h>
  #include <linux/hash.h>
  #include <linux/suspend.h>
  #include <linux/buffer_head.h>
55e829af0   Andrew Morton   [PATCH] io-accoun...
40
  #include <linux/task_io_accounting_ops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
41
  #include <linux/bio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
43
44
  #include <linux/cpu.h>
  #include <linux/bitops.h>
  #include <linux/mpage.h>
fb1c8f93d   Ingo Molnar   [PATCH] spinlock ...
45
  #include <linux/bit_spinlock.h>
29f3ad7d8   Jan Kara   fs: Provide funct...
46
  #include <linux/pagevec.h>
f745c6f5f   Shakeel Butt   fs, mm: account b...
47
  #include <linux/sched/mm.h>
5305cb830   Tejun Heo   block: add block_...
48
  #include <trace/events/block.h>
31fb992ce   Eric Biggers   fs/buffer.c: supp...
49
  #include <linux/fscrypt.h>
43edfc892   Laura Abbott   FROMLIST: fs/buff...
50
  #include <linux/xarray.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
51

2b211dc04   Ben Dooks   fs/buffer.c: incl...
52
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
  static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
2a222ca99   Mike Christie   fs: have submit_b...
54
  static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
8e8f92988   Jens Axboe   fs: add support f...
55
  			 enum rw_hint hint, struct writeback_control *wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
56
57
  
  #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
f0059afd3   Tejun Heo   buffer: make touc...
58
59
  inline void touch_buffer(struct buffer_head *bh)
  {
5305cb830   Tejun Heo   block: add block_...
60
  	trace_block_touch_buffer(bh);
f0059afd3   Tejun Heo   buffer: make touc...
61
62
63
  	mark_page_accessed(bh->b_page);
  }
  EXPORT_SYMBOL(touch_buffer);
fc9b52cd8   Harvey Harrison   fs: remove fastca...
64
  void __lock_buffer(struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
65
  {
743162013   NeilBrown   sched: Remove pro...
66
  	wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
67
68
  }
  EXPORT_SYMBOL(__lock_buffer);
fc9b52cd8   Harvey Harrison   fs: remove fastca...
69
  void unlock_buffer(struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
70
  {
51b07fc3c   Nick Piggin   fs: buffer lock u...
71
  	clear_bit_unlock(BH_Lock, &bh->b_state);
4e857c58e   Peter Zijlstra   arch: Mass conver...
72
  	smp_mb__after_atomic();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
73
74
  	wake_up_bit(&bh->b_state, BH_Lock);
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
75
  EXPORT_SYMBOL(unlock_buffer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
76
77
  
  /*
b45972265   Mel Gorman   mm: vmscan: take ...
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
   * Returns if the page has dirty or writeback buffers. If all the buffers
   * are unlocked and clean then the PageDirty information is stale. If
   * any of the pages are locked, it is assumed they are locked for IO.
   */
  void buffer_check_dirty_writeback(struct page *page,
  				     bool *dirty, bool *writeback)
  {
  	struct buffer_head *head, *bh;
  	*dirty = false;
  	*writeback = false;
  
  	BUG_ON(!PageLocked(page));
  
  	if (!page_has_buffers(page))
  		return;
  
  	if (PageWriteback(page))
  		*writeback = true;
  
  	head = page_buffers(page);
  	bh = head;
  	do {
  		if (buffer_locked(bh))
  			*writeback = true;
  
  		if (buffer_dirty(bh))
  			*dirty = true;
  
  		bh = bh->b_this_page;
  	} while (bh != head);
  }
  EXPORT_SYMBOL(buffer_check_dirty_writeback);
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
112
113
114
115
116
117
   * Block until a buffer comes unlocked.  This doesn't stop it
   * from becoming locked again - you have to lock it yourself
   * if you want to preserve its state.
   */
  void __wait_on_buffer(struct buffer_head * bh)
  {
743162013   NeilBrown   sched: Remove pro...
118
  	wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
120
  EXPORT_SYMBOL(__wait_on_buffer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
121

b744c2ac4   Robert Elliott   fs: merge I/O err...
122
  static void buffer_io_error(struct buffer_head *bh, char *msg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
123
  {
432f16e64   Robert Elliott   fs: clarify rate ...
124
125
  	if (!test_bit(BH_Quiet, &bh->b_state))
  		printk_ratelimited(KERN_ERR
a1c6f0573   Dmitry Monakhov   fs: use block_dev...
126
127
128
  			"Buffer I/O error on dev %pg, logical block %llu%s
  ",
  			bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
129
130
131
  }
  
  /*
68671f35f   Dmitry Monakhov   mm: add end_buffe...
132
133
134
135
136
137
   * End-of-IO handler helper function which does not touch the bh after
   * unlocking it.
   * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
   * a race there is benign: unlock_buffer() only use the bh's address for
   * hashing after unlocking the buffer, so it doesn't actually touch the bh
   * itself.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
138
   */
68671f35f   Dmitry Monakhov   mm: add end_buffe...
139
  static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
140
141
142
143
  {
  	if (uptodate) {
  		set_buffer_uptodate(bh);
  	} else {
70246286e   Christoph Hellwig   block: get rid of...
144
  		/* This happens, due to failed read-ahead attempts. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
145
146
147
  		clear_buffer_uptodate(bh);
  	}
  	unlock_buffer(bh);
68671f35f   Dmitry Monakhov   mm: add end_buffe...
148
149
150
151
152
153
154
155
156
  }
  
  /*
   * Default synchronous end-of-IO handler..  Just mark it up-to-date and
   * unlock the buffer. This is what ll_rw_block uses too.
   */
  void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
  {
  	__end_buffer_read_notouch(bh, uptodate);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
157
158
  	put_bh(bh);
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
159
  EXPORT_SYMBOL(end_buffer_read_sync);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
160
161
162
  
  void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
163
164
165
  	if (uptodate) {
  		set_buffer_uptodate(bh);
  	} else {
432f16e64   Robert Elliott   fs: clarify rate ...
166
  		buffer_io_error(bh, ", lost sync page write");
87354e5de   Jeff Layton   buffer: set error...
167
  		mark_buffer_write_io_error(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168
169
170
171
172
  		clear_buffer_uptodate(bh);
  	}
  	unlock_buffer(bh);
  	put_bh(bh);
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
173
  EXPORT_SYMBOL(end_buffer_write_sync);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
174
175
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
176
177
178
179
180
   * Various filesystems appear to want __find_get_block to be non-blocking.
   * But it's the page lock which protects the buffers.  To get around this,
   * we get exclusion from try_to_free_buffers with the blockdev mapping's
   * private_lock.
   *
b93b01631   Matthew Wilcox   page cache: use x...
181
   * Hack idea: for the blockdev mapping, private_lock contention
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
182
   * may be quite high.  This code could TryLock the page, and if that
b93b01631   Matthew Wilcox   page cache: use x...
183
   * succeeds, there is no need to take private_lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
184
185
   */
  static struct buffer_head *
385fd4c59   Coywolf Qi Hunt   [PATCH] __find_ge...
186
  __find_get_block_slow(struct block_device *bdev, sector_t block)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187
188
189
190
191
192
193
194
195
  {
  	struct inode *bd_inode = bdev->bd_inode;
  	struct address_space *bd_mapping = bd_inode->i_mapping;
  	struct buffer_head *ret = NULL;
  	pgoff_t index;
  	struct buffer_head *bh;
  	struct buffer_head *head;
  	struct page *page;
  	int all_mapped = 1;
43636c804   Tetsuo Handa   fs: ratelimit __f...
196
  	static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
198
  	index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
2457aec63   Mel Gorman   mm: non-atomicall...
199
  	page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
200
201
202
203
204
205
206
207
208
  	if (!page)
  		goto out;
  
  	spin_lock(&bd_mapping->private_lock);
  	if (!page_has_buffers(page))
  		goto out_unlock;
  	head = page_buffers(page);
  	bh = head;
  	do {
97f76d3d1   Nikanth Karthikesan   vfs: check bh->b_...
209
210
211
  		if (!buffer_mapped(bh))
  			all_mapped = 0;
  		else if (bh->b_blocknr == block) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
212
213
214
215
  			ret = bh;
  			get_bh(bh);
  			goto out_unlock;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
216
217
218
219
220
221
222
223
  		bh = bh->b_this_page;
  	} while (bh != head);
  
  	/* we might be here because some of the buffers on this page are
  	 * not mapped.  This is due to various races between
  	 * file io on the block device and getblk.  It gets dealt with
  	 * elsewhere, don't buffer_error if we had some unmapped buffers
  	 */
43636c804   Tetsuo Handa   fs: ratelimit __f...
224
225
226
227
228
229
230
231
232
233
  	ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
  	if (all_mapped && __ratelimit(&last_warned)) {
  		printk("__find_get_block_slow() failed. block=%llu, "
  		       "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
  		       "device %pg blocksize: %d
  ",
  		       (unsigned long long)block,
  		       (unsigned long long)bh->b_blocknr,
  		       bh->b_state, bh->b_size, bdev,
  		       1 << bd_inode->i_blkbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234
235
236
  	}
  out_unlock:
  	spin_unlock(&bd_mapping->private_lock);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
237
  	put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
239
240
  out:
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
241
242
  static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
243
  	unsigned long flags;
a39722034   Nick Piggin   [PATCH] page_upto...
244
  	struct buffer_head *first;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
245
246
247
248
249
250
251
252
253
254
255
  	struct buffer_head *tmp;
  	struct page *page;
  	int page_uptodate = 1;
  
  	BUG_ON(!buffer_async_read(bh));
  
  	page = bh->b_page;
  	if (uptodate) {
  		set_buffer_uptodate(bh);
  	} else {
  		clear_buffer_uptodate(bh);
432f16e64   Robert Elliott   fs: clarify rate ...
256
  		buffer_io_error(bh, ", async page read");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257
258
259
260
261
262
263
264
  		SetPageError(page);
  	}
  
  	/*
  	 * Be _very_ careful from here on. Bad things can happen if
  	 * two buffer heads end IO at almost the same time and both
  	 * decide that the page is now completely done.
  	 */
a39722034   Nick Piggin   [PATCH] page_upto...
265
  	first = page_buffers(page);
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
266
  	spin_lock_irqsave(&first->b_uptodate_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
267
268
269
270
271
272
273
274
275
276
277
278
  	clear_buffer_async_read(bh);
  	unlock_buffer(bh);
  	tmp = bh;
  	do {
  		if (!buffer_uptodate(tmp))
  			page_uptodate = 0;
  		if (buffer_async_read(tmp)) {
  			BUG_ON(!buffer_locked(tmp));
  			goto still_busy;
  		}
  		tmp = tmp->b_this_page;
  	} while (tmp != bh);
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
279
  	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280
281
282
283
284
285
286
287
288
289
290
  
  	/*
  	 * If none of the buffers had errors and they are all
  	 * uptodate then we can set the page uptodate.
  	 */
  	if (page_uptodate && !PageError(page))
  		SetPageUptodate(page);
  	unlock_page(page);
  	return;
  
  still_busy:
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
291
  	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
293
  	return;
  }
31fb992ce   Eric Biggers   fs/buffer.c: supp...
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
  struct decrypt_bh_ctx {
  	struct work_struct work;
  	struct buffer_head *bh;
  };
  
  static void decrypt_bh(struct work_struct *work)
  {
  	struct decrypt_bh_ctx *ctx =
  		container_of(work, struct decrypt_bh_ctx, work);
  	struct buffer_head *bh = ctx->bh;
  	int err;
  
  	err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
  					       bh_offset(bh));
  	end_buffer_async_read(bh, err == 0);
  	kfree(ctx);
  }
  
  /*
   * I/O completion handler for block_read_full_page() - pages
   * which come unlocked at the end of I/O.
   */
  static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
  {
  	/* Decrypt if needed */
4f74d15fe   Eric Biggers   ext4: add inline ...
319
320
  	if (uptodate &&
  	    fscrypt_inode_uses_fs_layer_crypto(bh->b_page->mapping->host)) {
31fb992ce   Eric Biggers   fs/buffer.c: supp...
321
322
323
324
325
326
327
328
329
330
331
332
  		struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
  
  		if (ctx) {
  			INIT_WORK(&ctx->work, decrypt_bh);
  			ctx->bh = bh;
  			fscrypt_enqueue_decrypt_work(&ctx->work);
  			return;
  		}
  		uptodate = 0;
  	}
  	end_buffer_async_read(bh, uptodate);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
333
334
335
336
  /*
   * Completion handler for block_write_full_page() - pages which are unlocked
   * during I/O, and which have PageWriteback cleared upon I/O completion.
   */
35c80d5f4   Chris Mason   Add block_write_f...
337
  void end_buffer_async_write(struct buffer_head *bh, int uptodate)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
338
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
339
  	unsigned long flags;
a39722034   Nick Piggin   [PATCH] page_upto...
340
  	struct buffer_head *first;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
341
342
343
344
345
346
347
348
349
  	struct buffer_head *tmp;
  	struct page *page;
  
  	BUG_ON(!buffer_async_write(bh));
  
  	page = bh->b_page;
  	if (uptodate) {
  		set_buffer_uptodate(bh);
  	} else {
432f16e64   Robert Elliott   fs: clarify rate ...
350
  		buffer_io_error(bh, ", lost async page write");
87354e5de   Jeff Layton   buffer: set error...
351
  		mark_buffer_write_io_error(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
353
354
  		clear_buffer_uptodate(bh);
  		SetPageError(page);
  	}
a39722034   Nick Piggin   [PATCH] page_upto...
355
  	first = page_buffers(page);
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
356
  	spin_lock_irqsave(&first->b_uptodate_lock, flags);
a39722034   Nick Piggin   [PATCH] page_upto...
357

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
358
359
360
361
362
363
364
365
366
367
  	clear_buffer_async_write(bh);
  	unlock_buffer(bh);
  	tmp = bh->b_this_page;
  	while (tmp != bh) {
  		if (buffer_async_write(tmp)) {
  			BUG_ON(!buffer_locked(tmp));
  			goto still_busy;
  		}
  		tmp = tmp->b_this_page;
  	}
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
368
  	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
369
370
371
372
  	end_page_writeback(page);
  	return;
  
  still_busy:
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
373
  	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
374
375
  	return;
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
376
  EXPORT_SYMBOL(end_buffer_async_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
  
  /*
   * If a page's buffers are under async readin (end_buffer_async_read
   * completion) then there is a possibility that another thread of
   * control could lock one of the buffers after it has completed
   * but while some of the other buffers have not completed.  This
   * locked buffer would confuse end_buffer_async_read() into not unlocking
   * the page.  So the absence of BH_Async_Read tells end_buffer_async_read()
   * that this buffer is not under async I/O.
   *
   * The page comes unlocked when it has no locked buffer_async buffers
   * left.
   *
   * PageLocked prevents anyone starting new async I/O reads any of
   * the buffers.
   *
   * PageWriteback is used to prevent simultaneous writeout of the same
   * page.
   *
   * PageLocked prevents anyone from starting writeback of a page which is
   * under read I/O (PageWriteback is only ever set against a locked page).
   */
  static void mark_buffer_async_read(struct buffer_head *bh)
  {
31fb992ce   Eric Biggers   fs/buffer.c: supp...
401
  	bh->b_end_io = end_buffer_async_read_io;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
402
403
  	set_buffer_async_read(bh);
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
404
405
  static void mark_buffer_async_write_endio(struct buffer_head *bh,
  					  bh_end_io_t *handler)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
406
  {
35c80d5f4   Chris Mason   Add block_write_f...
407
  	bh->b_end_io = handler;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
408
409
  	set_buffer_async_write(bh);
  }
35c80d5f4   Chris Mason   Add block_write_f...
410
411
412
413
414
  
  void mark_buffer_async_write(struct buffer_head *bh)
  {
  	mark_buffer_async_write_endio(bh, end_buffer_async_write);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
  EXPORT_SYMBOL(mark_buffer_async_write);
  
  
  /*
   * fs/buffer.c contains helper functions for buffer-backed address space's
   * fsync functions.  A common requirement for buffer-based filesystems is
   * that certain data from the backing blockdev needs to be written out for
   * a successful fsync().  For example, ext2 indirect blocks need to be
   * written back and waited upon before fsync() returns.
   *
   * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
   * inode_has_buffers() and invalidate_inode_buffers() are provided for the
   * management of a list of dependent buffers at ->i_mapping->private_list.
   *
   * Locking is a little subtle: try_to_free_buffers() will remove buffers
   * from their controlling inode's queue when they are being freed.  But
   * try_to_free_buffers() will be operating against the *blockdev* mapping
   * at the time, not against the S_ISREG file which depends on those buffers.
   * So the locking for private_list is via the private_lock in the address_space
   * which backs the buffers.  Which is different from the address_space 
   * against which the buffers are listed.  So for a particular address_space,
   * mapping->private_lock does *not* protect mapping->private_list!  In fact,
   * mapping->private_list will always be protected by the backing blockdev's
   * ->private_lock.
   *
   * Which introduces a requirement: all buffers on an address_space's
   * ->private_list must be from the same address_space: the blockdev's.
   *
   * address_spaces which do not place buffers at ->private_list via these
   * utility functions are free to use private_lock and private_list for
   * whatever they want.  The only requirement is that list_empty(private_list)
   * be true at clear_inode() time.
   *
   * FIXME: clear_inode should not call invalidate_inode_buffers().  The
   * filesystems should do that.  invalidate_inode_buffers() should just go
   * BUG_ON(!list_empty).
   *
   * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
   * take an address_space, not an inode.  And it should be called
   * mark_buffer_dirty_fsync() to clearly define why those buffers are being
   * queued up.
   *
   * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
   * list if it is already on a list.  Because if the buffer is on a list,
   * it *must* already be on the right one.  If not, the filesystem is being
   * silly.  This will save a ton of locking.  But first we have to ensure
   * that buffers are taken *off* the old inode's list when they are freed
   * (presumably in truncate).  That requires careful auditing of all
   * filesystems (do it inside bforget()).  It could also be done by bringing
   * b_inode back.
   */
  
  /*
   * The buffer's backing address_space's private_lock must be held
   */
dbacefc9c   Thomas Petazzoni   fs/buffer.c: unin...
470
  static void __remove_assoc_queue(struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
471
472
  {
  	list_del_init(&bh->b_assoc_buffers);
58ff407be   Jan Kara   [PATCH] Fix IO er...
473
  	WARN_ON(!bh->b_assoc_map);
58ff407be   Jan Kara   [PATCH] Fix IO er...
474
  	bh->b_assoc_map = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
  }
  
  int inode_has_buffers(struct inode *inode)
  {
  	return !list_empty(&inode->i_data.private_list);
  }
  
  /*
   * osync is designed to support O_SYNC io.  It waits synchronously for
   * all already-submitted IO to complete, but does not queue any new
   * writes to the disk.
   *
   * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
   * you dirty the buffers, and then use osync_inode_buffers to wait for
   * completion.  Any other dirty buffers which are not yet queued for
   * write will not be flushed to disk by the osync.
   */
  static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
  {
  	struct buffer_head *bh;
  	struct list_head *p;
  	int err = 0;
  
  	spin_lock(lock);
  repeat:
  	list_for_each_prev(p, list) {
  		bh = BH_ENTRY(p);
  		if (buffer_locked(bh)) {
  			get_bh(bh);
  			spin_unlock(lock);
  			wait_on_buffer(bh);
  			if (!buffer_uptodate(bh))
  				err = -EIO;
  			brelse(bh);
  			spin_lock(lock);
  			goto repeat;
  		}
  	}
  	spin_unlock(lock);
  	return err;
  }
08fdc8a01   Mateusz Guzik   buffer.c: call th...
516
  void emergency_thaw_bdev(struct super_block *sb)
c2d754385   Eric Sandeen   filesystem freeze...
517
  {
01a05b337   Al Viro   new helper: itera...
518
  	while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
a1c6f0573   Dmitry Monakhov   fs: use block_dev...
519
520
  		printk(KERN_WARNING "Emergency Thaw on %pg
  ", sb->s_bdev);
01a05b337   Al Viro   new helper: itera...
521
  }
c2d754385   Eric Sandeen   filesystem freeze...
522

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
523
  /**
78a4a50a8   Randy Dunlap   docbook: fix file...
524
   * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
525
   * @mapping: the mapping which wants those buffers written
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
526
527
528
529
   *
   * Starts I/O against the buffers at mapping->private_list, and waits upon
   * that I/O.
   *
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
530
531
532
   * Basically, this is a convenience function for fsync().
   * @mapping is a file or directory which needs those buffers to be written for
   * a successful fsync().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
533
534
535
   */
  int sync_mapping_buffers(struct address_space *mapping)
  {
252aa6f5b   Rafael Aquini   mm: redefine addr...
536
  	struct address_space *buffer_mapping = mapping->private_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
  
  	if (buffer_mapping == NULL || list_empty(&mapping->private_list))
  		return 0;
  
  	return fsync_buffers_list(&buffer_mapping->private_lock,
  					&mapping->private_list);
  }
  EXPORT_SYMBOL(sync_mapping_buffers);
  
  /*
   * Called when we've recently written block `bblock', and it is known that
   * `bblock' was for a buffer_boundary() buffer.  This means that the block at
   * `bblock + 1' is probably a dirty indirect block.  Hunt it down and, if it's
   * dirty, schedule it for IO.  So that indirects merge nicely with their data.
   */
  void write_boundary_block(struct block_device *bdev,
  			sector_t bblock, unsigned blocksize)
  {
  	struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
  	if (bh) {
  		if (buffer_dirty(bh))
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
558
  			ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
559
560
561
562
563
564
565
566
567
568
  		put_bh(bh);
  	}
  }
  
  void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
  {
  	struct address_space *mapping = inode->i_mapping;
  	struct address_space *buffer_mapping = bh->b_page->mapping;
  
  	mark_buffer_dirty(bh);
252aa6f5b   Rafael Aquini   mm: redefine addr...
569
570
  	if (!mapping->private_data) {
  		mapping->private_data = buffer_mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
571
  	} else {
252aa6f5b   Rafael Aquini   mm: redefine addr...
572
  		BUG_ON(mapping->private_data != buffer_mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
573
  	}
535ee2fbf   Jan Kara   buffer_head: fix ...
574
  	if (!bh->b_assoc_map) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575
576
577
  		spin_lock(&buffer_mapping->private_lock);
  		list_move_tail(&bh->b_assoc_buffers,
  				&mapping->private_list);
58ff407be   Jan Kara   [PATCH] Fix IO er...
578
  		bh->b_assoc_map = mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
579
580
581
582
583
584
  		spin_unlock(&buffer_mapping->private_lock);
  	}
  }
  EXPORT_SYMBOL(mark_buffer_dirty_inode);
  
  /*
ec82e1c1c   Matthew Wilcox   fs: Convert buffe...
585
   * Mark the page dirty, and set it dirty in the page cache, and mark the inode
787d2214c   Nick Piggin   fs: introduce som...
586
587
588
589
   * dirty.
   *
   * If warn is true, then emit a warning if the page is not uptodate and has
   * not been truncated.
c4843a759   Greg Thelen   memcg: add per cg...
590
   *
81f8c3a46   Johannes Weiner   mm: memcontrol: g...
591
   * The caller must hold lock_page_memcg().
787d2214c   Nick Piggin   fs: introduce som...
592
   */
f82b37641   Matthew Wilcox   export __set_page...
593
  void __set_page_dirty(struct page *page, struct address_space *mapping,
62cccb8c8   Johannes Weiner   mm: simplify lock...
594
  			     int warn)
787d2214c   Nick Piggin   fs: introduce som...
595
  {
227d53b39   KOSAKI Motohiro   mm: __set_page_di...
596
  	unsigned long flags;
b93b01631   Matthew Wilcox   page cache: use x...
597
  	xa_lock_irqsave(&mapping->i_pages, flags);
787d2214c   Nick Piggin   fs: introduce som...
598
599
  	if (page->mapping) {	/* Race with truncate? */
  		WARN_ON_ONCE(warn && !PageUptodate(page));
62cccb8c8   Johannes Weiner   mm: simplify lock...
600
  		account_page_dirtied(page, mapping);
ec82e1c1c   Matthew Wilcox   fs: Convert buffe...
601
602
  		__xa_set_mark(&mapping->i_pages, page_index(page),
  				PAGECACHE_TAG_DIRTY);
787d2214c   Nick Piggin   fs: introduce som...
603
  	}
b93b01631   Matthew Wilcox   page cache: use x...
604
  	xa_unlock_irqrestore(&mapping->i_pages, flags);
787d2214c   Nick Piggin   fs: introduce som...
605
  }
f82b37641   Matthew Wilcox   export __set_page...
606
  EXPORT_SYMBOL_GPL(__set_page_dirty);
787d2214c   Nick Piggin   fs: introduce som...
607
608
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
   * Add a page to the dirty page list.
   *
   * It is a sad fact of life that this function is called from several places
   * deeply under spinlocking.  It may not sleep.
   *
   * If the page has buffers, the uptodate buffers are set dirty, to preserve
   * dirty-state coherency between the page and the buffers.  It the page does
   * not have buffers then when they are later attached they will all be set
   * dirty.
   *
   * The buffers are dirtied before the page is dirtied.  There's a small race
   * window in which a writepage caller may see the page cleanness but not the
   * buffer dirtiness.  That's fine.  If this code were to set the page dirty
   * before the buffers, a concurrent writepage caller could clear the page dirty
   * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
   * page on the dirty page list.
   *
   * We use private_lock to lock against try_to_free_buffers while using the
   * page's buffer list.  Also use this to protect against clean buffers being
   * added to the page after it was set dirty.
   *
   * FIXME: may need to call ->reservepage here as well.  That's rather up to the
   * address_space though.
   */
  int __set_page_dirty_buffers(struct page *page)
  {
a8e7d49aa   Linus Torvalds   Fix race in creat...
635
  	int newly_dirty;
787d2214c   Nick Piggin   fs: introduce som...
636
  	struct address_space *mapping = page_mapping(page);
ebf7a227d   Nick Piggin   [PATCH] mm: bug i...
637
638
639
  
  	if (unlikely(!mapping))
  		return !TestSetPageDirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
640
641
642
643
644
645
646
647
648
649
650
  
  	spin_lock(&mapping->private_lock);
  	if (page_has_buffers(page)) {
  		struct buffer_head *head = page_buffers(page);
  		struct buffer_head *bh = head;
  
  		do {
  			set_buffer_dirty(bh);
  			bh = bh->b_this_page;
  		} while (bh != head);
  	}
c4843a759   Greg Thelen   memcg: add per cg...
651
  	/*
81f8c3a46   Johannes Weiner   mm: memcontrol: g...
652
653
  	 * Lock out page->mem_cgroup migration to keep PageDirty
  	 * synchronized with per-memcg dirty page counters.
c4843a759   Greg Thelen   memcg: add per cg...
654
  	 */
62cccb8c8   Johannes Weiner   mm: simplify lock...
655
  	lock_page_memcg(page);
a8e7d49aa   Linus Torvalds   Fix race in creat...
656
  	newly_dirty = !TestSetPageDirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
657
  	spin_unlock(&mapping->private_lock);
a8e7d49aa   Linus Torvalds   Fix race in creat...
658
  	if (newly_dirty)
62cccb8c8   Johannes Weiner   mm: simplify lock...
659
  		__set_page_dirty(page, mapping, 1);
c4843a759   Greg Thelen   memcg: add per cg...
660

62cccb8c8   Johannes Weiner   mm: simplify lock...
661
  	unlock_page_memcg(page);
c4843a759   Greg Thelen   memcg: add per cg...
662
663
664
  
  	if (newly_dirty)
  		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
a8e7d49aa   Linus Torvalds   Fix race in creat...
665
  	return newly_dirty;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
  }
  EXPORT_SYMBOL(__set_page_dirty_buffers);
  
  /*
   * Write out and wait upon a list of buffers.
   *
   * We have conflicting pressures: we want to make sure that all
   * initially dirty buffers get waited on, but that any subsequently
   * dirtied buffers don't.  After all, we don't want fsync to last
   * forever if somebody is actively writing to the file.
   *
   * Do this in two main stages: first we copy dirty buffers to a
   * temporary inode list, queueing the writes as we go.  Then we clean
   * up, waiting for those writes to complete.
   * 
   * During this second stage, any subsequent updates to the file may end
   * up refiling the buffer on the original inode's dirty list again, so
   * there is a chance we will end up with a buffer queued for write but
   * not yet completed on that list.  So, as a final cleanup we go through
   * the osync code to catch these locked, dirty buffers without requeuing
   * any newly dirty buffers for write.
   */
  static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
  {
  	struct buffer_head *bh;
  	struct list_head tmp;
7eaceacca   Jens Axboe   block: remove per...
692
  	struct address_space *mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
693
  	int err = 0, err2;
4ee2491ed   Jens Axboe   fs: make fsync_bu...
694
  	struct blk_plug plug;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
695
696
  
  	INIT_LIST_HEAD(&tmp);
4ee2491ed   Jens Axboe   fs: make fsync_bu...
697
  	blk_start_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
698
699
700
701
  
  	spin_lock(lock);
  	while (!list_empty(list)) {
  		bh = BH_ENTRY(list->next);
535ee2fbf   Jan Kara   buffer_head: fix ...
702
  		mapping = bh->b_assoc_map;
58ff407be   Jan Kara   [PATCH] Fix IO er...
703
  		__remove_assoc_queue(bh);
535ee2fbf   Jan Kara   buffer_head: fix ...
704
705
706
  		/* Avoid race with mark_buffer_dirty_inode() which does
  		 * a lockless check and we rely on seeing the dirty bit */
  		smp_mb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
707
708
  		if (buffer_dirty(bh) || buffer_locked(bh)) {
  			list_add(&bh->b_assoc_buffers, &tmp);
535ee2fbf   Jan Kara   buffer_head: fix ...
709
  			bh->b_assoc_map = mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
710
711
712
713
714
  			if (buffer_dirty(bh)) {
  				get_bh(bh);
  				spin_unlock(lock);
  				/*
  				 * Ensure any pending I/O completes so that
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
715
716
717
718
  				 * write_dirty_buffer() actually writes the
  				 * current contents - it is a noop if I/O is
  				 * still in flight on potentially older
  				 * contents.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
719
  				 */
70fd76140   Christoph Hellwig   block,fs: use REQ...
720
  				write_dirty_buffer(bh, REQ_SYNC);
9cf6b720f   Jens Axboe   block: fsync_buff...
721
722
723
724
725
726
727
  
  				/*
  				 * Kick off IO for the previous mapping. Note
  				 * that we will not run the very last mapping,
  				 * wait_on_buffer() will do that for us
  				 * through sync_buffer().
  				 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
728
729
730
731
732
  				brelse(bh);
  				spin_lock(lock);
  			}
  		}
  	}
4ee2491ed   Jens Axboe   fs: make fsync_bu...
733
734
735
  	spin_unlock(lock);
  	blk_finish_plug(&plug);
  	spin_lock(lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
736
737
  	while (!list_empty(&tmp)) {
  		bh = BH_ENTRY(tmp.prev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
738
  		get_bh(bh);
535ee2fbf   Jan Kara   buffer_head: fix ...
739
740
741
742
743
744
745
  		mapping = bh->b_assoc_map;
  		__remove_assoc_queue(bh);
  		/* Avoid race with mark_buffer_dirty_inode() which does
  		 * a lockless check and we rely on seeing the dirty bit */
  		smp_mb();
  		if (buffer_dirty(bh)) {
  			list_add(&bh->b_assoc_buffers,
e3892296d   Jan Kara   vfs: fix NULL poi...
746
  				 &mapping->private_list);
535ee2fbf   Jan Kara   buffer_head: fix ...
747
748
  			bh->b_assoc_map = mapping;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
  		spin_unlock(lock);
  		wait_on_buffer(bh);
  		if (!buffer_uptodate(bh))
  			err = -EIO;
  		brelse(bh);
  		spin_lock(lock);
  	}
  	
  	spin_unlock(lock);
  	err2 = osync_buffers_list(lock, list);
  	if (err)
  		return err;
  	else
  		return err2;
  }
  
  /*
   * Invalidate any and all dirty buffers on a given inode.  We are
   * probably unmounting the fs, but that doesn't mean we have already
   * done a sync().  Just drop the buffers from the inode list.
   *
   * NOTE: we take the inode's blockdev's mapping's private_lock.  Which
   * assumes that all the buffers are against the blockdev.  Not true
   * for reiserfs.
   */
  void invalidate_inode_buffers(struct inode *inode)
  {
  	if (inode_has_buffers(inode)) {
  		struct address_space *mapping = &inode->i_data;
  		struct list_head *list = &mapping->private_list;
252aa6f5b   Rafael Aquini   mm: redefine addr...
779
  		struct address_space *buffer_mapping = mapping->private_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
780
781
782
783
784
785
786
  
  		spin_lock(&buffer_mapping->private_lock);
  		while (!list_empty(list))
  			__remove_assoc_queue(BH_ENTRY(list->next));
  		spin_unlock(&buffer_mapping->private_lock);
  	}
  }
52b19ac99   Jan Kara   udf: Fix BUG_ON()...
787
  EXPORT_SYMBOL(invalidate_inode_buffers);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
788
789
790
791
792
793
794
795
796
797
798
799
800
801
  
  /*
   * Remove any clean buffers from the inode's buffer list.  This is called
   * when we're trying to free the inode itself.  Those buffers can pin it.
   *
   * Returns true if all buffers were removed.
   */
  int remove_inode_buffers(struct inode *inode)
  {
  	int ret = 1;
  
  	if (inode_has_buffers(inode)) {
  		struct address_space *mapping = &inode->i_data;
  		struct list_head *list = &mapping->private_list;
252aa6f5b   Rafael Aquini   mm: redefine addr...
802
  		struct address_space *buffer_mapping = mapping->private_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
  
  		spin_lock(&buffer_mapping->private_lock);
  		while (!list_empty(list)) {
  			struct buffer_head *bh = BH_ENTRY(list->next);
  			if (buffer_dirty(bh)) {
  				ret = 0;
  				break;
  			}
  			__remove_assoc_queue(bh);
  		}
  		spin_unlock(&buffer_mapping->private_lock);
  	}
  	return ret;
  }
  
  /*
   * Create the appropriate buffers when given a page for data area and
   * the size of each buffer.. Use the bh->b_this_page linked list to
   * follow the buffers created.  Return NULL if unable to create more
   * buffers.
   *
   * The retry flag is used to differentiate async IO (paging, swapping)
   * which may not fail from ordinary buffer allocations.
   */
  struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
640ab98fb   Jens Axboe   buffer: have allo...
828
  		bool retry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829
830
  {
  	struct buffer_head *bh, *head;
f745c6f5f   Shakeel Butt   fs, mm: account b...
831
  	gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
832
  	long offset;
b87d8cefe   Roman Gushchin   mm, memcg: rework...
833
  	struct mem_cgroup *memcg, *old_memcg;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
834

640ab98fb   Jens Axboe   buffer: have allo...
835
836
  	if (retry)
  		gfp |= __GFP_NOFAIL;
f745c6f5f   Shakeel Butt   fs, mm: account b...
837
  	memcg = get_mem_cgroup_from_page(page);
b87d8cefe   Roman Gushchin   mm, memcg: rework...
838
  	old_memcg = set_active_memcg(memcg);
f745c6f5f   Shakeel Butt   fs, mm: account b...
839

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
840
841
842
  	head = NULL;
  	offset = PAGE_SIZE;
  	while ((offset -= size) >= 0) {
640ab98fb   Jens Axboe   buffer: have allo...
843
  		bh = alloc_buffer_head(gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
844
845
  		if (!bh)
  			goto no_grow;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
846
847
848
  		bh->b_this_page = head;
  		bh->b_blocknr = -1;
  		head = bh;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
849
850
851
852
  		bh->b_size = size;
  
  		/* Link the buffer to its page */
  		set_bh_page(bh, page, offset);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
853
  	}
f745c6f5f   Shakeel Butt   fs, mm: account b...
854
  out:
b87d8cefe   Roman Gushchin   mm, memcg: rework...
855
  	set_active_memcg(old_memcg);
f745c6f5f   Shakeel Butt   fs, mm: account b...
856
  	mem_cgroup_put(memcg);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
857
858
859
860
861
862
863
864
865
866
867
868
  	return head;
  /*
   * In case anything failed, we just free everything we got.
   */
  no_grow:
  	if (head) {
  		do {
  			bh = head;
  			head = head->b_this_page;
  			free_buffer_head(bh);
  		} while (head);
  	}
f745c6f5f   Shakeel Butt   fs, mm: account b...
869
  	goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
870
871
872
873
874
875
876
877
878
879
880
881
882
883
  }
  EXPORT_SYMBOL_GPL(alloc_page_buffers);
  
  static inline void
  link_dev_buffers(struct page *page, struct buffer_head *head)
  {
  	struct buffer_head *bh, *tail;
  
  	bh = head;
  	do {
  		tail = bh;
  		bh = bh->b_this_page;
  	} while (bh);
  	tail->b_this_page = head;
45dcfc273   Guoqing Jiang   fs/buffer.c: use ...
884
  	attach_page_private(page, head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
885
  }
bbec0270b   Linus Torvalds   blkdev_max_block:...
886
887
888
889
890
891
892
893
894
895
896
  static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
  {
  	sector_t retval = ~((sector_t)0);
  	loff_t sz = i_size_read(bdev->bd_inode);
  
  	if (sz) {
  		unsigned int sizebits = blksize_bits(size);
  		retval = (sz >> sizebits);
  	}
  	return retval;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
897
898
899
  /*
   * Initialise the state of a blockdev page's buffers.
   */ 
676ce6d5c   Hugh Dickins   block: replace __...
900
  static sector_t
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
901
902
903
904
905
906
  init_page_buffers(struct page *page, struct block_device *bdev,
  			sector_t block, int size)
  {
  	struct buffer_head *head = page_buffers(page);
  	struct buffer_head *bh = head;
  	int uptodate = PageUptodate(page);
bbec0270b   Linus Torvalds   blkdev_max_block:...
907
  	sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
908
909
910
  
  	do {
  		if (!buffer_mapped(bh)) {
01950a349   Eric Biggers   fs/buffer.c: fold...
911
912
  			bh->b_end_io = NULL;
  			bh->b_private = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
913
914
915
916
  			bh->b_bdev = bdev;
  			bh->b_blocknr = block;
  			if (uptodate)
  				set_buffer_uptodate(bh);
080399aaa   Jeff Moyer   block: don't mark...
917
918
  			if (block < end_block)
  				set_buffer_mapped(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
919
920
921
922
  		}
  		block++;
  		bh = bh->b_this_page;
  	} while (bh != head);
676ce6d5c   Hugh Dickins   block: replace __...
923
924
925
926
927
  
  	/*
  	 * Caller needs to validate requested block against end of device.
  	 */
  	return end_block;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
928
929
930
931
932
  }
  
  /*
   * Create the page-cache page that contains the requested block.
   *
676ce6d5c   Hugh Dickins   block: replace __...
933
   * This is used purely for blockdev mappings.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
934
   */
676ce6d5c   Hugh Dickins   block: replace __...
935
  static int
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
936
  grow_dev_page(struct block_device *bdev, sector_t block,
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
937
  	      pgoff_t index, int size, int sizebits, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
938
939
940
941
  {
  	struct inode *inode = bdev->bd_inode;
  	struct page *page;
  	struct buffer_head *bh;
676ce6d5c   Hugh Dickins   block: replace __...
942
  	sector_t end_block;
c4b4c2a78   Zhiqiang Liu   buffer: remove us...
943
  	int ret = 0;
84235de39   Johannes Weiner   fs: buffer: move ...
944
  	gfp_t gfp_mask;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
945

c62d25556   Michal Hocko   mm, fs: introduce...
946
  	gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
947

84235de39   Johannes Weiner   fs: buffer: move ...
948
949
950
951
952
953
954
955
956
  	/*
  	 * XXX: __getblk_slow() can not really deal with failure and
  	 * will endlessly loop on improvised global reclaim.  Prefer
  	 * looping in the allocator rather than here, at least that
  	 * code knows what it's doing.
  	 */
  	gfp_mask |= __GFP_NOFAIL;
  
  	page = find_or_create_page(inode->i_mapping, index, gfp_mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
957

e827f9235   Eric Sesterhenn   BUG_ON() Conversi...
958
  	BUG_ON(!PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
959
960
961
962
  
  	if (page_has_buffers(page)) {
  		bh = page_buffers(page);
  		if (bh->b_size == size) {
676ce6d5c   Hugh Dickins   block: replace __...
963
  			end_block = init_page_buffers(page, bdev,
f2d5a9443   Anton Altaparmakov   Fix nasty 32-bit ...
964
965
  						(sector_t)index << sizebits,
  						size);
676ce6d5c   Hugh Dickins   block: replace __...
966
  			goto done;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
967
968
969
970
971
972
973
974
  		}
  		if (!try_to_free_buffers(page))
  			goto failed;
  	}
  
  	/*
  	 * Allocate some buffers for this page
  	 */
94dc24c0c   Jens Axboe   buffer: grow_dev_...
975
  	bh = alloc_page_buffers(page, size, true);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
976
977
978
979
980
981
982
983
  
  	/*
  	 * Link the page to the buffers and initialise them.  Take the
  	 * lock to be atomic wrt __find_get_block(), which does not
  	 * run under the page lock.
  	 */
  	spin_lock(&inode->i_mapping->private_lock);
  	link_dev_buffers(page, bh);
f2d5a9443   Anton Altaparmakov   Fix nasty 32-bit ...
984
985
  	end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
  			size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
986
  	spin_unlock(&inode->i_mapping->private_lock);
676ce6d5c   Hugh Dickins   block: replace __...
987
988
  done:
  	ret = (block < end_block) ? 1 : -ENXIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
989
  failed:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
990
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
991
  	put_page(page);
676ce6d5c   Hugh Dickins   block: replace __...
992
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
993
994
995
996
997
  }
  
  /*
   * Create buffers for the specified block device block's page.  If
   * that page was dirty, the buffers are set dirty also.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
998
   */
858119e15   Arjan van de Ven   [PATCH] Unlinline...
999
  static int
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1000
  grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1001
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1002
1003
1004
1005
1006
1007
1008
1009
1010
  	pgoff_t index;
  	int sizebits;
  
  	sizebits = -1;
  	do {
  		sizebits++;
  	} while ((size << sizebits) < PAGE_SIZE);
  
  	index = block >> sizebits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1011

e56579338   Andrew Morton   [PATCH] grow_buff...
1012
1013
1014
1015
1016
  	/*
  	 * Check for a block which wants to lie outside our maximum possible
  	 * pagecache index.  (this comparison is done using sector_t types).
  	 */
  	if (unlikely(index != block >> sizebits)) {
e56579338   Andrew Morton   [PATCH] grow_buff...
1017
  		printk(KERN_ERR "%s: requested out-of-range block %llu for "
a1c6f0573   Dmitry Monakhov   fs: use block_dev...
1018
1019
  			"device %pg
  ",
8e24eea72   Harvey Harrison   fs: replace remai...
1020
  			__func__, (unsigned long long)block,
a1c6f0573   Dmitry Monakhov   fs: use block_dev...
1021
  			bdev);
e56579338   Andrew Morton   [PATCH] grow_buff...
1022
1023
  		return -EIO;
  	}
676ce6d5c   Hugh Dickins   block: replace __...
1024

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1025
  	/* Create a page with the proper size buffers.. */
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1026
  	return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1027
  }
0026ba400   Eric Biggers   fs/buffer.c: make...
1028
  static struct buffer_head *
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1029
1030
  __getblk_slow(struct block_device *bdev, sector_t block,
  	     unsigned size, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1031
1032
  {
  	/* Size must be multiple of hard sectorsize */
e1defc4ff   Martin K. Petersen   block: Do away wi...
1033
  	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1034
1035
1036
1037
  			(size < 512 || size > PAGE_SIZE))) {
  		printk(KERN_ERR "getblk(): invalid block size %d requested
  ",
  					size);
e1defc4ff   Martin K. Petersen   block: Do away wi...
1038
1039
1040
  		printk(KERN_ERR "logical block size: %d
  ",
  					bdev_logical_block_size(bdev));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1041
1042
1043
1044
  
  		dump_stack();
  		return NULL;
  	}
676ce6d5c   Hugh Dickins   block: replace __...
1045
1046
1047
  	for (;;) {
  		struct buffer_head *bh;
  		int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1048
1049
1050
1051
  
  		bh = __find_get_block(bdev, block, size);
  		if (bh)
  			return bh;
676ce6d5c   Hugh Dickins   block: replace __...
1052

3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1053
  		ret = grow_buffers(bdev, block, size, gfp);
676ce6d5c   Hugh Dickins   block: replace __...
1054
1055
  		if (ret < 0)
  			return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1056
1057
1058
1059
1060
1061
1062
  	}
  }
  
  /*
   * The relationship between dirty buffers and dirty pages:
   *
   * Whenever a page has any dirty buffers, the page's dirty bit is set, and
ec82e1c1c   Matthew Wilcox   fs: Convert buffe...
1063
   * the page is tagged dirty in the page cache.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
   *
   * At all times, the dirtiness of the buffers represents the dirtiness of
   * subsections of the page.  If the page has buffers, the page dirty bit is
   * merely a hint about the true dirty state.
   *
   * When a page is set dirty in its entirety, all its buffers are marked dirty
   * (if the page has buffers).
   *
   * When a buffer is marked dirty, its page is dirtied, but the page's other
   * buffers are not.
   *
   * Also.  When blockdev buffers are explicitly read with bread(), they
   * individually become uptodate.  But their backing page remains not
   * uptodate - even if all of its buffers are uptodate.  A subsequent
   * block_read_full_page() against that page will discover all the uptodate
   * buffers, will set the page uptodate and will perform no I/O.
   */
  
  /**
   * mark_buffer_dirty - mark a buffer_head as needing writeout
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
1084
   * @bh: the buffer_head to mark dirty
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1085
   *
ec82e1c1c   Matthew Wilcox   fs: Convert buffe...
1086
1087
1088
   * mark_buffer_dirty() will set the dirty bit against the buffer, then set
   * its backing page dirty, then tag the page as dirty in the page cache
   * and then attach the address_space's inode to its superblock's dirty
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1089
1090
1091
   * inode list.
   *
   * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
b93b01631   Matthew Wilcox   page cache: use x...
1092
   * i_pages lock and mapping->host->i_lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093
   */
fc9b52cd8   Harvey Harrison   fs: remove fastca...
1094
  void mark_buffer_dirty(struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1095
  {
787d2214c   Nick Piggin   fs: introduce som...
1096
  	WARN_ON_ONCE(!buffer_uptodate(bh));
1be62dc19   Linus Torvalds   Be more careful a...
1097

5305cb830   Tejun Heo   block: add block_...
1098
  	trace_block_dirty_buffer(bh);
1be62dc19   Linus Torvalds   Be more careful a...
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
  	/*
  	 * Very *carefully* optimize the it-is-already-dirty case.
  	 *
  	 * Don't let the final "is it dirty" escape to before we
  	 * perhaps modified the buffer.
  	 */
  	if (buffer_dirty(bh)) {
  		smp_mb();
  		if (buffer_dirty(bh))
  			return;
  	}
a8e7d49aa   Linus Torvalds   Fix race in creat...
1110
1111
  	if (!test_set_buffer_dirty(bh)) {
  		struct page *page = bh->b_page;
c4843a759   Greg Thelen   memcg: add per cg...
1112
  		struct address_space *mapping = NULL;
c4843a759   Greg Thelen   memcg: add per cg...
1113

62cccb8c8   Johannes Weiner   mm: simplify lock...
1114
  		lock_page_memcg(page);
8e9d78ede   Linus Torvalds   Re-introduce page...
1115
  		if (!TestSetPageDirty(page)) {
c4843a759   Greg Thelen   memcg: add per cg...
1116
  			mapping = page_mapping(page);
8e9d78ede   Linus Torvalds   Re-introduce page...
1117
  			if (mapping)
62cccb8c8   Johannes Weiner   mm: simplify lock...
1118
  				__set_page_dirty(page, mapping, 0);
8e9d78ede   Linus Torvalds   Re-introduce page...
1119
  		}
62cccb8c8   Johannes Weiner   mm: simplify lock...
1120
  		unlock_page_memcg(page);
c4843a759   Greg Thelen   memcg: add per cg...
1121
1122
  		if (mapping)
  			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
a8e7d49aa   Linus Torvalds   Fix race in creat...
1123
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1124
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
1125
  EXPORT_SYMBOL(mark_buffer_dirty);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1126

87354e5de   Jeff Layton   buffer: set error...
1127
1128
  void mark_buffer_write_io_error(struct buffer_head *bh)
  {
485e9605c   Jeff Layton   fs/buffer.c: reco...
1129
  	struct super_block *sb;
87354e5de   Jeff Layton   buffer: set error...
1130
1131
1132
1133
1134
1135
  	set_buffer_write_io_error(bh);
  	/* FIXME: do we need to set this in both places? */
  	if (bh->b_page && bh->b_page->mapping)
  		mapping_set_error(bh->b_page->mapping, -EIO);
  	if (bh->b_assoc_map)
  		mapping_set_error(bh->b_assoc_map, -EIO);
485e9605c   Jeff Layton   fs/buffer.c: reco...
1136
1137
1138
1139
1140
  	rcu_read_lock();
  	sb = READ_ONCE(bh->b_bdev->bd_super);
  	if (sb)
  		errseq_set(&sb->s_wb_err, -EIO);
  	rcu_read_unlock();
87354e5de   Jeff Layton   buffer: set error...
1141
1142
  }
  EXPORT_SYMBOL(mark_buffer_write_io_error);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
  /*
   * Decrement a buffer_head's reference count.  If all buffers against a page
   * have zero reference count, are clean and unlocked, and if the page is clean
   * and unlocked then try_to_free_buffers() may strip the buffers from the page
   * in preparation for freeing it (sometimes, rarely, buffers are removed from
   * a page but it ends up not being freed, and buffers may later be reattached).
   */
  void __brelse(struct buffer_head * buf)
  {
  	if (atomic_read(&buf->b_count)) {
  		put_bh(buf);
  		return;
  	}
5c752ad9f   Arjan van de Ven   Use WARN() in fs/
1156
1157
  	WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1158
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
1159
  EXPORT_SYMBOL(__brelse);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1160
1161
1162
1163
1164
1165
1166
1167
  
  /*
   * bforget() is like brelse(), except it discards any
   * potentially dirty data.
   */
  void __bforget(struct buffer_head *bh)
  {
  	clear_buffer_dirty(bh);
535ee2fbf   Jan Kara   buffer_head: fix ...
1168
  	if (bh->b_assoc_map) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1169
1170
1171
1172
  		struct address_space *buffer_mapping = bh->b_page->mapping;
  
  		spin_lock(&buffer_mapping->private_lock);
  		list_del_init(&bh->b_assoc_buffers);
58ff407be   Jan Kara   [PATCH] Fix IO er...
1173
  		bh->b_assoc_map = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1174
1175
1176
1177
  		spin_unlock(&buffer_mapping->private_lock);
  	}
  	__brelse(bh);
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
1178
  EXPORT_SYMBOL(__bforget);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
  
  static struct buffer_head *__bread_slow(struct buffer_head *bh)
  {
  	lock_buffer(bh);
  	if (buffer_uptodate(bh)) {
  		unlock_buffer(bh);
  		return bh;
  	} else {
  		get_bh(bh);
  		bh->b_end_io = end_buffer_read_sync;
2a222ca99   Mike Christie   fs: have submit_b...
1189
  		submit_bh(REQ_OP_READ, 0, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
  		wait_on_buffer(bh);
  		if (buffer_uptodate(bh))
  			return bh;
  	}
  	brelse(bh);
  	return NULL;
  }
  
  /*
   * Per-cpu buffer LRU implementation.  To reduce the cost of __find_get_block().
   * The bhs[] array is sorted - newest buffer is at bhs[0].  Buffers have their
   * refcount elevated by one when they're in an LRU.  A buffer can only appear
   * once in a particular CPU's LRU.  A single buffer can be present in multiple
   * CPU's LRUs at the same time.
   *
   * This is a transparent caching front-end to sb_bread(), sb_getblk() and
   * sb_find_get_block().
   *
   * The LRUs themselves only need locking against invalidate_bh_lrus.  We use
   * a local interrupt disable for that.
   */
86cf78d73   Sebastien Buisson   fs/buffer.c: incr...
1211
  #define BH_LRU_SIZE	16
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
  
  struct bh_lru {
  	struct buffer_head *bhs[BH_LRU_SIZE];
  };
  
  static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
  
  #ifdef CONFIG_SMP
  #define bh_lru_lock()	local_irq_disable()
  #define bh_lru_unlock()	local_irq_enable()
  #else
  #define bh_lru_lock()	preempt_disable()
  #define bh_lru_unlock()	preempt_enable()
  #endif
  
  static inline void check_irqs_on(void)
  {
  #ifdef irqs_disabled
  	BUG_ON(irqs_disabled());
  #endif
  }
  
  /*
241f01fbe   Eric Biggers   fs/buffer.c: make...
1235
1236
1237
   * Install a buffer_head into this cpu's LRU.  If not already in the LRU, it is
   * inserted at the front, and the buffer_head at the back if any is evicted.
   * Or, if already in the LRU it is moved to the front.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1238
1239
1240
   */
  static void bh_lru_install(struct buffer_head *bh)
  {
241f01fbe   Eric Biggers   fs/buffer.c: make...
1241
1242
1243
  	struct buffer_head *evictee = bh;
  	struct bh_lru *b;
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1244
1245
1246
  
  	check_irqs_on();
  	bh_lru_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1247

241f01fbe   Eric Biggers   fs/buffer.c: make...
1248
1249
1250
1251
1252
1253
  	b = this_cpu_ptr(&bh_lrus);
  	for (i = 0; i < BH_LRU_SIZE; i++) {
  		swap(evictee, b->bhs[i]);
  		if (evictee == bh) {
  			bh_lru_unlock();
  			return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1254
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1255
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1256

241f01fbe   Eric Biggers   fs/buffer.c: make...
1257
1258
1259
  	get_bh(bh);
  	bh_lru_unlock();
  	brelse(evictee);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1260
1261
1262
1263
1264
  }
  
  /*
   * Look up the bh in this cpu's LRU.  If it's there, move it to the head.
   */
858119e15   Arjan van de Ven   [PATCH] Unlinline...
1265
  static struct buffer_head *
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1266
  lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1267
1268
  {
  	struct buffer_head *ret = NULL;
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1269
  	unsigned int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1270
1271
1272
  
  	check_irqs_on();
  	bh_lru_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1273
  	for (i = 0; i < BH_LRU_SIZE; i++) {
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
1274
  		struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1275

9470dd5d3   Zach Brown   fs: check bh bloc...
1276
1277
  		if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
  		    bh->b_size == size) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1278
1279
  			if (i) {
  				while (i) {
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
1280
1281
  					__this_cpu_write(bh_lrus.bhs[i],
  						__this_cpu_read(bh_lrus.bhs[i - 1]));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1282
1283
  					i--;
  				}
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
1284
  				__this_cpu_write(bh_lrus.bhs[0], bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
  			}
  			get_bh(bh);
  			ret = bh;
  			break;
  		}
  	}
  	bh_lru_unlock();
  	return ret;
  }
  
  /*
   * Perform a pagecache lookup for the matching buffer.  If it's there, refresh
   * it in the LRU and mark it as accessed.  If it is not present then return
   * NULL
   */
  struct buffer_head *
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1301
  __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1302
1303
1304
1305
  {
  	struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
  
  	if (bh == NULL) {
2457aec63   Mel Gorman   mm: non-atomicall...
1306
  		/* __find_get_block_slow will mark the page accessed */
385fd4c59   Coywolf Qi Hunt   [PATCH] __find_ge...
1307
  		bh = __find_get_block_slow(bdev, block);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1308
1309
  		if (bh)
  			bh_lru_install(bh);
2457aec63   Mel Gorman   mm: non-atomicall...
1310
  	} else
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1311
  		touch_buffer(bh);
2457aec63   Mel Gorman   mm: non-atomicall...
1312

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1313
1314
1315
1316
1317
  	return bh;
  }
  EXPORT_SYMBOL(__find_get_block);
  
  /*
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1318
   * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1319
1320
1321
   * which corresponds to the passed block_device, block and size. The
   * returned buffer has its reference count incremented.
   *
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1322
1323
   * __getblk_gfp() will lock up the machine if grow_dev_page's
   * try_to_free_buffers() attempt is failing.  FIXME, perhaps?
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1324
1325
   */
  struct buffer_head *
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1326
1327
  __getblk_gfp(struct block_device *bdev, sector_t block,
  	     unsigned size, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1328
1329
1330
1331
1332
  {
  	struct buffer_head *bh = __find_get_block(bdev, block, size);
  
  	might_sleep();
  	if (bh == NULL)
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1333
  		bh = __getblk_slow(bdev, block, size, gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1334
1335
  	return bh;
  }
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1336
  EXPORT_SYMBOL(__getblk_gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1337
1338
1339
1340
  
  /*
   * Do async read-ahead on a buffer..
   */
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1341
  void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1342
1343
  {
  	struct buffer_head *bh = __getblk(bdev, block, size);
a3e713b5f   Andrew Morton   [PATCH] __bread o...
1344
  	if (likely(bh)) {
70246286e   Christoph Hellwig   block: get rid of...
1345
  		ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
a3e713b5f   Andrew Morton   [PATCH] __bread o...
1346
1347
  		brelse(bh);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1348
1349
  }
  EXPORT_SYMBOL(__breadahead);
d87f63925   Roman Gushchin   ext4: use non-mov...
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
  void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
  		      gfp_t gfp)
  {
  	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
  	if (likely(bh)) {
  		ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
  		brelse(bh);
  	}
  }
  EXPORT_SYMBOL(__breadahead_gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1360
  /**
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1361
   *  __bread_gfp() - reads a specified block and returns the bh
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
1362
   *  @bdev: the block_device to read from
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1363
1364
   *  @block: number of block
   *  @size: size (in bytes) to read
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1365
1366
   *  @gfp: page allocation flag
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1367
   *  Reads a specified block, and returns buffer head that contains it.
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1368
1369
   *  The page cache can be allocated from non-movable area
   *  not to prevent page migration if you set gfp to zero.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1370
1371
1372
   *  It returns NULL if the block was unreadable.
   */
  struct buffer_head *
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1373
1374
  __bread_gfp(struct block_device *bdev, sector_t block,
  		   unsigned size, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1375
  {
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1376
  	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1377

a3e713b5f   Andrew Morton   [PATCH] __bread o...
1378
  	if (likely(bh) && !buffer_uptodate(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1379
1380
1381
  		bh = __bread_slow(bh);
  	return bh;
  }
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1382
  EXPORT_SYMBOL(__bread_gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
  
  /*
   * invalidate_bh_lrus() is called rarely - but not only at unmount.
   * This doesn't race because it runs in each cpu either in irq
   * or with preempt disabled.
   */
  static void invalidate_bh_lru(void *arg)
  {
  	struct bh_lru *b = &get_cpu_var(bh_lrus);
  	int i;
  
  	for (i = 0; i < BH_LRU_SIZE; i++) {
  		brelse(b->bhs[i]);
  		b->bhs[i] = NULL;
  	}
  	put_cpu_var(bh_lrus);
  }
42be35d03   Gilad Ben-Yossef   fs: only send IPI...
1400
1401
1402
1403
1404
  
  static bool has_bh_in_lru(int cpu, void *dummy)
  {
  	struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1405
  	
42be35d03   Gilad Ben-Yossef   fs: only send IPI...
1406
1407
  	for (i = 0; i < BH_LRU_SIZE; i++) {
  		if (b->bhs[i])
1d7066797   Saurav Girepunje   fs/buffer.c: fix ...
1408
  			return true;
42be35d03   Gilad Ben-Yossef   fs: only send IPI...
1409
  	}
1d7066797   Saurav Girepunje   fs/buffer.c: fix ...
1410
  	return false;
42be35d03   Gilad Ben-Yossef   fs: only send IPI...
1411
  }
43edfc892   Laura Abbott   FROMLIST: fs/buff...
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
  static void __evict_bhs_lru(void *arg)
  {
  	struct bh_lru *b = &get_cpu_var(bh_lrus);
  	struct xarray *busy_bhs = arg;
  	struct buffer_head *bh;
  	unsigned long i, xarray_index;
  
  	xa_for_each(busy_bhs, xarray_index, bh) {
  		for (i = 0; i < BH_LRU_SIZE; i++) {
  			if (b->bhs[i] == bh) {
  				brelse(b->bhs[i]);
  				b->bhs[i] = NULL;
  				break;
  			}
  		}
  
  		bh = bh->b_this_page;
  	}
  
  	put_cpu_var(bh_lrus);
  }
  
  static bool page_has_bhs_in_lru(int cpu, void *arg)
  {
  	struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
  	struct xarray *busy_bhs = arg;
  	struct buffer_head *bh;
  	unsigned long i, xarray_index;
  
  	xa_for_each(busy_bhs, xarray_index, bh) {
  		for (i = 0; i < BH_LRU_SIZE; i++) {
  			if (b->bhs[i] == bh)
  				return true;
  		}
  
  		bh = bh->b_this_page;
  	}
  
  	return false;
  
  }
f9a14399a   Peter Zijlstra   mm: optimize kill...
1453
  void invalidate_bh_lrus(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1454
  {
cb923159b   Sebastian Andrzej Siewior   smp: Remove alloc...
1455
  	on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1456
  }
9db5579be   Nick Piggin   rewrite rd
1457
  EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1458

43edfc892   Laura Abbott   FROMLIST: fs/buff...
1459
1460
1461
1462
1463
  static void evict_bh_lrus(struct xarray *busy_bhs)
  {
  	on_each_cpu_cond(page_has_bhs_in_lru, __evict_bhs_lru,
  			 busy_bhs, 1);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1464
1465
1466
1467
  void set_bh_page(struct buffer_head *bh,
  		struct page *page, unsigned long offset)
  {
  	bh->b_page = page;
e827f9235   Eric Sesterhenn   BUG_ON() Conversi...
1468
  	BUG_ON(offset >= PAGE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
  	if (PageHighMem(page))
  		/*
  		 * This catches illegal uses and preserves the offset:
  		 */
  		bh->b_data = (char *)(0 + offset);
  	else
  		bh->b_data = page_address(page) + offset;
  }
  EXPORT_SYMBOL(set_bh_page);
  
  /*
   * Called when truncating a buffer on a page completely.
   */
e7470ee89   Mel Gorman   fs: buffer: do no...
1482
1483
1484
1485
1486
  
  /* Bits that are cleared during an invalidate */
  #define BUFFER_FLAGS_DISCARD \
  	(1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
  	 1 << BH_Delay | 1 << BH_Unwritten)
858119e15   Arjan van de Ven   [PATCH] Unlinline...
1487
  static void discard_buffer(struct buffer_head * bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1488
  {
e7470ee89   Mel Gorman   fs: buffer: do no...
1489
  	unsigned long b_state, b_state_old;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1490
1491
1492
  	lock_buffer(bh);
  	clear_buffer_dirty(bh);
  	bh->b_bdev = NULL;
e7470ee89   Mel Gorman   fs: buffer: do no...
1493
1494
1495
1496
1497
1498
1499
1500
  	b_state = bh->b_state;
  	for (;;) {
  		b_state_old = cmpxchg(&bh->b_state, b_state,
  				      (b_state & ~BUFFER_FLAGS_DISCARD));
  		if (b_state_old == b_state)
  			break;
  		b_state = b_state_old;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1501
1502
1503
1504
  	unlock_buffer(bh);
  }
  
  /**
814e1d25a   Wang Sheng-Hui   cleanup: vfs: sma...
1505
   * block_invalidatepage - invalidate part or all of a buffer-backed page
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1506
1507
   *
   * @page: the page which is affected
d47992f86   Lukas Czerner   mm: change invali...
1508
1509
   * @offset: start of the range to invalidate
   * @length: length of the range to invalidate
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1510
1511
   *
   * block_invalidatepage() is called when all or part of the page has become
814e1d25a   Wang Sheng-Hui   cleanup: vfs: sma...
1512
   * invalidated by a truncate operation.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1513
1514
1515
1516
1517
1518
1519
   *
   * block_invalidatepage() does not have to release all buffers, but it must
   * ensure that no dirty buffer is left outside @offset and that no I/O
   * is underway against any of the blocks which are outside the truncation
   * point.  Because the caller is about to free (and possibly reuse) those
   * blocks on-disk.
   */
d47992f86   Lukas Czerner   mm: change invali...
1520
1521
  void block_invalidatepage(struct page *page, unsigned int offset,
  			  unsigned int length)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1522
1523
1524
  {
  	struct buffer_head *head, *bh, *next;
  	unsigned int curr_off = 0;
d47992f86   Lukas Czerner   mm: change invali...
1525
  	unsigned int stop = length + offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1526
1527
1528
1529
  
  	BUG_ON(!PageLocked(page));
  	if (!page_has_buffers(page))
  		goto out;
d47992f86   Lukas Czerner   mm: change invali...
1530
1531
1532
  	/*
  	 * Check for overflow
  	 */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1533
  	BUG_ON(stop > PAGE_SIZE || stop < length);
d47992f86   Lukas Czerner   mm: change invali...
1534

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1535
1536
1537
1538
1539
1540
1541
  	head = page_buffers(page);
  	bh = head;
  	do {
  		unsigned int next_off = curr_off + bh->b_size;
  		next = bh->b_this_page;
  
  		/*
d47992f86   Lukas Czerner   mm: change invali...
1542
1543
1544
1545
1546
1547
  		 * Are we still fully in range ?
  		 */
  		if (next_off > stop)
  			goto out;
  
  		/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
  		 * is this block fully invalidated?
  		 */
  		if (offset <= curr_off)
  			discard_buffer(bh);
  		curr_off = next_off;
  		bh = next;
  	} while (bh != head);
  
  	/*
  	 * We release buffers only if the entire page is being invalidated.
  	 * The get_block cached value has been unconditionally invalidated,
  	 * so real IO is not possible anymore.
  	 */
3172485f4   Jeff Moyer   block_invalidatep...
1561
  	if (length == PAGE_SIZE)
2ff28e22b   NeilBrown   [PATCH] Make addr...
1562
  		try_to_release_page(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1563
  out:
2ff28e22b   NeilBrown   [PATCH] Make addr...
1564
  	return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1565
1566
  }
  EXPORT_SYMBOL(block_invalidatepage);
d47992f86   Lukas Czerner   mm: change invali...
1567

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1568
1569
1570
1571
1572
1573
1574
1575
1576
  /*
   * We attach and possibly dirty the buffers atomically wrt
   * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers
   * is already excluded via the page lock.
   */
  void create_empty_buffers(struct page *page,
  			unsigned long blocksize, unsigned long b_state)
  {
  	struct buffer_head *bh, *head, *tail;
640ab98fb   Jens Axboe   buffer: have allo...
1577
  	head = alloc_page_buffers(page, blocksize, true);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
  	bh = head;
  	do {
  		bh->b_state |= b_state;
  		tail = bh;
  		bh = bh->b_this_page;
  	} while (bh);
  	tail->b_this_page = head;
  
  	spin_lock(&page->mapping->private_lock);
  	if (PageUptodate(page) || PageDirty(page)) {
  		bh = head;
  		do {
  			if (PageDirty(page))
  				set_buffer_dirty(bh);
  			if (PageUptodate(page))
  				set_buffer_uptodate(bh);
  			bh = bh->b_this_page;
  		} while (bh != head);
  	}
45dcfc273   Guoqing Jiang   fs/buffer.c: use ...
1597
  	attach_page_private(page, head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1598
1599
1600
  	spin_unlock(&page->mapping->private_lock);
  }
  EXPORT_SYMBOL(create_empty_buffers);
29f3ad7d8   Jan Kara   fs: Provide funct...
1601
1602
1603
1604
1605
  /**
   * clean_bdev_aliases: clean a range of buffers in block device
   * @bdev: Block device to clean buffers in
   * @block: Start of a range of blocks to clean
   * @len: Number of blocks to clean
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1606
   *
29f3ad7d8   Jan Kara   fs: Provide funct...
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
   * We are taking a range of blocks for data and we don't want writeback of any
   * buffer-cache aliases starting from return from this function and until the
   * moment when something will explicitly mark the buffer dirty (hopefully that
   * will not happen until we will free that block ;-) We don't even need to mark
   * it not-uptodate - nobody can expect anything from a newly allocated buffer
   * anyway. We used to use unmap_buffer() for such invalidation, but that was
   * wrong. We definitely don't want to mark the alias unmapped, for example - it
   * would confuse anyone who might pick it with bread() afterwards...
   *
   * Also..  Note that bforget() doesn't lock the buffer.  So there can be
   * writeout I/O going on against recently-freed buffers.  We don't wait on that
   * I/O in bforget() - it's more efficient to wait on the I/O only if we really
   * need to.  That happens here.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1620
   */
29f3ad7d8   Jan Kara   fs: Provide funct...
1621
  void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1622
  {
29f3ad7d8   Jan Kara   fs: Provide funct...
1623
1624
1625
1626
1627
  	struct inode *bd_inode = bdev->bd_inode;
  	struct address_space *bd_mapping = bd_inode->i_mapping;
  	struct pagevec pvec;
  	pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
  	pgoff_t end;
c10f778dd   Jan Kara   fs: fix performan...
1628
  	int i, count;
29f3ad7d8   Jan Kara   fs: Provide funct...
1629
1630
  	struct buffer_head *bh;
  	struct buffer_head *head;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1631

29f3ad7d8   Jan Kara   fs: Provide funct...
1632
  	end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
866798201   Mel Gorman   mm, pagevec: remo...
1633
  	pagevec_init(&pvec);
397162ffa   Jan Kara   mm: remove nr_pag...
1634
  	while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
c10f778dd   Jan Kara   fs: fix performan...
1635
1636
  		count = pagevec_count(&pvec);
  		for (i = 0; i < count; i++) {
29f3ad7d8   Jan Kara   fs: Provide funct...
1637
  			struct page *page = pvec.pages[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1638

29f3ad7d8   Jan Kara   fs: Provide funct...
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
  			if (!page_has_buffers(page))
  				continue;
  			/*
  			 * We use page lock instead of bd_mapping->private_lock
  			 * to pin buffers here since we can afford to sleep and
  			 * it scales better than a global spinlock lock.
  			 */
  			lock_page(page);
  			/* Recheck when the page is locked which pins bhs */
  			if (!page_has_buffers(page))
  				goto unlock_page;
  			head = page_buffers(page);
  			bh = head;
  			do {
6c006a9d9   Chandan Rajendra   clean_bdev_aliase...
1653
  				if (!buffer_mapped(bh) || (bh->b_blocknr < block))
29f3ad7d8   Jan Kara   fs: Provide funct...
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
  					goto next;
  				if (bh->b_blocknr >= block + len)
  					break;
  				clear_buffer_dirty(bh);
  				wait_on_buffer(bh);
  				clear_buffer_req(bh);
  next:
  				bh = bh->b_this_page;
  			} while (bh != head);
  unlock_page:
  			unlock_page(page);
  		}
  		pagevec_release(&pvec);
  		cond_resched();
c10f778dd   Jan Kara   fs: fix performan...
1668
1669
1670
  		/* End of range already reached? */
  		if (index > end || !index)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1671
1672
  	}
  }
29f3ad7d8   Jan Kara   fs: Provide funct...
1673
  EXPORT_SYMBOL(clean_bdev_aliases);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1674
1675
  
  /*
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
   * Size is a power-of-two in the range 512..PAGE_SIZE,
   * and the case we care about most is PAGE_SIZE.
   *
   * So this *could* possibly be written with those
   * constraints in mind (relevant mostly if some
   * architecture has a slow bit-scan instruction)
   */
  static inline int block_size_bits(unsigned int blocksize)
  {
  	return ilog2(blocksize);
  }
  
  static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
  {
  	BUG_ON(!PageLocked(page));
  
  	if (!page_has_buffers(page))
6aa7de059   Mark Rutland   locking/atomics: ...
1693
1694
  		create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
  				     b_state);
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1695
1696
1697
1698
  	return page_buffers(page);
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
   * NOTE! All mapped/uptodate combinations are valid:
   *
   *	Mapped	Uptodate	Meaning
   *
   *	No	No		"unknown" - must do get_block()
   *	No	Yes		"hole" - zero-filled
   *	Yes	No		"allocated" - allocated on disk, not read in
   *	Yes	Yes		"valid" - allocated and up-to-date in memory.
   *
   * "Dirty" is valid only with the last case (mapped+uptodate).
   */
  
  /*
   * While block_write_full_page is writing back the dirty buffers under
   * the page lock, whoever dirtied the buffers may decide to clean them
   * again at any time.  We handle that by only looking at the buffer
   * state inside lock_buffer().
   *
   * If block_write_full_page() is called for regular writeback
   * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
   * locked buffer.   This only can happen if someone has written the buffer
   * directly, with submit_bh().  At the address_space level PageWriteback
   * prevents this contention from occurring.
6e34eeddf   Theodore Ts'o   block_write_full_...
1722
1723
   *
   * If block_write_full_page() is called with wbc->sync_mode ==
70fd76140   Christoph Hellwig   block,fs: use REQ...
1724
   * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
721a9602e   Jens Axboe   block: kill off R...
1725
   * causes the writes to be flagged as synchronous writes.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1726
   */
b4bba3890   Benjamin Marzinski   fs: export __bloc...
1727
  int __block_write_full_page(struct inode *inode, struct page *page,
35c80d5f4   Chris Mason   Add block_write_f...
1728
1729
  			get_block_t *get_block, struct writeback_control *wbc,
  			bh_end_io_t *handler)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1730
1731
1732
1733
  {
  	int err;
  	sector_t block;
  	sector_t last_block;
f0fbd5fc0   Andrew Morton   [PATCH] __block_w...
1734
  	struct buffer_head *bh, *head;
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1735
  	unsigned int blocksize, bbits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1736
  	int nr_underway = 0;
7637241e6   Jens Axboe   writeback: add wb...
1737
  	int write_flags = wbc_to_write_flags(wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1738

45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1739
  	head = create_page_buffers(page, inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1740
  					(1 << BH_Dirty)|(1 << BH_Uptodate));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
  
  	/*
  	 * Be very careful.  We have no exclusion from __set_page_dirty_buffers
  	 * here, and the (potentially unmapped) buffers may become dirty at
  	 * any time.  If a buffer becomes dirty here after we've inspected it
  	 * then we just miss that fact, and the page stays dirty.
  	 *
  	 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
  	 * handle that here by just cleaning them.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1751
  	bh = head;
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1752
1753
  	blocksize = bh->b_size;
  	bbits = block_size_bits(blocksize);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1754
  	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1755
  	last_block = (i_size_read(inode) - 1) >> bbits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
  
  	/*
  	 * Get all the dirty buffers mapped to disk addresses and
  	 * handle any aliases from the underlying blockdev's mapping.
  	 */
  	do {
  		if (block > last_block) {
  			/*
  			 * mapped buffers outside i_size will occur, because
  			 * this page can be outside i_size when there is a
  			 * truncate in progress.
  			 */
  			/*
  			 * The buffer was zeroed by block_write_full_page()
  			 */
  			clear_buffer_dirty(bh);
  			set_buffer_uptodate(bh);
29a814d2e   Alex Tomas   vfs: add hooks fo...
1773
1774
  		} else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
  			   buffer_dirty(bh)) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
1775
  			WARN_ON(bh->b_size != blocksize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1776
1777
1778
  			err = get_block(inode, block, bh, 1);
  			if (err)
  				goto recover;
29a814d2e   Alex Tomas   vfs: add hooks fo...
1779
  			clear_buffer_delay(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1780
1781
1782
  			if (buffer_new(bh)) {
  				/* blockdev mappings never come here */
  				clear_buffer_new(bh);
e64855c6c   Jan Kara   fs: Add helper to...
1783
  				clean_bdev_bh_alias(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1784
1785
1786
1787
1788
1789
1790
  			}
  		}
  		bh = bh->b_this_page;
  		block++;
  	} while (bh != head);
  
  	do {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1791
1792
1793
1794
1795
  		if (!buffer_mapped(bh))
  			continue;
  		/*
  		 * If it's a fully non-blocking write attempt and we cannot
  		 * lock the buffer then redirty the page.  Note that this can
5b0830cb9   Jens Axboe   writeback: get ri...
1796
1797
1798
  		 * potentially cause a busy-wait loop from writeback threads
  		 * and kswapd activity, but those code paths have their own
  		 * higher-level throttling.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1799
  		 */
1b430beee   Wu Fengguang   writeback: remove...
1800
  		if (wbc->sync_mode != WB_SYNC_NONE) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1801
  			lock_buffer(bh);
ca5de404f   Nick Piggin   fs: rename buffer...
1802
  		} else if (!trylock_buffer(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1803
1804
1805
1806
  			redirty_page_for_writepage(wbc, page);
  			continue;
  		}
  		if (test_clear_buffer_dirty(bh)) {
35c80d5f4   Chris Mason   Add block_write_f...
1807
  			mark_buffer_async_write_endio(bh, handler);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
  		} else {
  			unlock_buffer(bh);
  		}
  	} while ((bh = bh->b_this_page) != head);
  
  	/*
  	 * The page and its buffers are protected by PageWriteback(), so we can
  	 * drop the bh refcounts early.
  	 */
  	BUG_ON(PageWriteback(page));
  	set_page_writeback(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1819
1820
1821
1822
  
  	do {
  		struct buffer_head *next = bh->b_this_page;
  		if (buffer_async_write(bh)) {
8e8f92988   Jens Axboe   fs: add support f...
1823
1824
  			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
  					inode->i_write_hint, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1825
1826
  			nr_underway++;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1827
1828
  		bh = next;
  	} while (bh != head);
05937baae   Andrew Morton   [PATCH] __block_w...
1829
  	unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1830
1831
1832
1833
1834
1835
1836
1837
1838
  
  	err = 0;
  done:
  	if (nr_underway == 0) {
  		/*
  		 * The page was marked dirty, but the buffers were
  		 * clean.  Someone wrote them back by hand with
  		 * ll_rw_block/submit_bh.  A rare case.
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1839
  		end_page_writeback(page);
3d67f2d7c   Nick Piggin   fs: buffer don't ...
1840

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1841
1842
1843
1844
  		/*
  		 * The page and buffer_heads can be released at any time from
  		 * here on.
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
  	}
  	return err;
  
  recover:
  	/*
  	 * ENOSPC, or some other error.  We may already have added some
  	 * blocks to the file, so we need to write these out to avoid
  	 * exposing stale data.
  	 * The page is currently locked and not marked for writeback
  	 */
  	bh = head;
  	/* Recovery: lock and submit the mapped buffers */
  	do {
29a814d2e   Alex Tomas   vfs: add hooks fo...
1858
1859
  		if (buffer_mapped(bh) && buffer_dirty(bh) &&
  		    !buffer_delay(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1860
  			lock_buffer(bh);
35c80d5f4   Chris Mason   Add block_write_f...
1861
  			mark_buffer_async_write_endio(bh, handler);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
  		} else {
  			/*
  			 * The buffer may have been set dirty during
  			 * attachment to a dirty page.
  			 */
  			clear_buffer_dirty(bh);
  		}
  	} while ((bh = bh->b_this_page) != head);
  	SetPageError(page);
  	BUG_ON(PageWriteback(page));
7e4c3690b   Andrew Morton   block_write_full_...
1872
  	mapping_set_error(page->mapping, err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1873
  	set_page_writeback(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1874
1875
1876
1877
  	do {
  		struct buffer_head *next = bh->b_this_page;
  		if (buffer_async_write(bh)) {
  			clear_buffer_dirty(bh);
8e8f92988   Jens Axboe   fs: add support f...
1878
1879
  			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
  					inode->i_write_hint, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1880
1881
  			nr_underway++;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1882
1883
  		bh = next;
  	} while (bh != head);
ffda9d302   Nick Piggin   [PATCH] fs: fix _...
1884
  	unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1885
1886
  	goto done;
  }
b4bba3890   Benjamin Marzinski   fs: export __bloc...
1887
  EXPORT_SYMBOL(__block_write_full_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1888

afddba49d   Nick Piggin   fs: introduce wri...
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
  /*
   * If a page has any new buffers, zero them out here, and mark them uptodate
   * and dirty so they'll be written out (in order to prevent uninitialised
   * block data from leaking). And clear the new bit.
   */
  void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
  {
  	unsigned int block_start, block_end;
  	struct buffer_head *head, *bh;
  
  	BUG_ON(!PageLocked(page));
  	if (!page_has_buffers(page))
  		return;
  
  	bh = head = page_buffers(page);
  	block_start = 0;
  	do {
  		block_end = block_start + bh->b_size;
  
  		if (buffer_new(bh)) {
  			if (block_end > from && block_start < to) {
  				if (!PageUptodate(page)) {
  					unsigned start, size;
  
  					start = max(from, block_start);
  					size = min(to, block_end) - start;
eebd2aa35   Christoph Lameter   Pagecache zeroing...
1915
  					zero_user(page, start, size);
afddba49d   Nick Piggin   fs: introduce wri...
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
  					set_buffer_uptodate(bh);
  				}
  
  				clear_buffer_new(bh);
  				mark_buffer_dirty(bh);
  			}
  		}
  
  		block_start = block_end;
  		bh = bh->b_this_page;
  	} while (bh != head);
  }
  EXPORT_SYMBOL(page_zero_new_buffers);
ae259a9c8   Christoph Hellwig   fs: introduce iom...
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
  static void
  iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
  		struct iomap *iomap)
  {
  	loff_t offset = block << inode->i_blkbits;
  
  	bh->b_bdev = iomap->bdev;
  
  	/*
  	 * Block points to offset in file we need to map, iomap contains
  	 * the offset at which the map starts. If the map ends before the
  	 * current block, then do not map the buffer and let the caller
  	 * handle it.
  	 */
  	BUG_ON(offset >= iomap->offset + iomap->length);
  
  	switch (iomap->type) {
  	case IOMAP_HOLE:
  		/*
  		 * If the buffer is not up to date or beyond the current EOF,
  		 * we need to mark it as new to ensure sub-block zeroing is
  		 * executed if necessary.
  		 */
  		if (!buffer_uptodate(bh) ||
  		    (offset >= i_size_read(inode)))
  			set_buffer_new(bh);
  		break;
  	case IOMAP_DELALLOC:
  		if (!buffer_uptodate(bh) ||
  		    (offset >= i_size_read(inode)))
  			set_buffer_new(bh);
  		set_buffer_uptodate(bh);
  		set_buffer_mapped(bh);
  		set_buffer_delay(bh);
  		break;
  	case IOMAP_UNWRITTEN:
  		/*
3d7b6b21f   Andreas Gruenbacher   iomap: mark newly...
1966
1967
1968
  		 * For unwritten regions, we always need to ensure that regions
  		 * in the block we are not writing to are zeroed. Mark the
  		 * buffer as new to ensure this.
ae259a9c8   Christoph Hellwig   fs: introduce iom...
1969
1970
1971
  		 */
  		set_buffer_new(bh);
  		set_buffer_unwritten(bh);
df561f668   Gustavo A. R. Silva   treewide: Use fal...
1972
  		fallthrough;
ae259a9c8   Christoph Hellwig   fs: introduce iom...
1973
  	case IOMAP_MAPPED:
3d7b6b21f   Andreas Gruenbacher   iomap: mark newly...
1974
1975
  		if ((iomap->flags & IOMAP_F_NEW) ||
  		    offset >= i_size_read(inode))
ae259a9c8   Christoph Hellwig   fs: introduce iom...
1976
  			set_buffer_new(bh);
19fe5f643   Andreas Gruenbacher   iomap: Switch fro...
1977
1978
  		bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
  				inode->i_blkbits;
ae259a9c8   Christoph Hellwig   fs: introduce iom...
1979
1980
1981
1982
1983
1984
1985
  		set_buffer_mapped(bh);
  		break;
  	}
  }
  
  int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
  		get_block_t *get_block, struct iomap *iomap)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1986
  {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1987
  	unsigned from = pos & (PAGE_SIZE - 1);
ebdec241d   Christoph Hellwig   fs: kill block_pr...
1988
  	unsigned to = from + len;
6e1db88d5   Christoph Hellwig   introduce __block...
1989
  	struct inode *inode = page->mapping->host;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1990
1991
1992
1993
1994
1995
1996
  	unsigned block_start, block_end;
  	sector_t block;
  	int err = 0;
  	unsigned blocksize, bbits;
  	struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
  
  	BUG_ON(!PageLocked(page));
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1997
1998
  	BUG_ON(from > PAGE_SIZE);
  	BUG_ON(to > PAGE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1999
  	BUG_ON(from > to);
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2000
2001
2002
  	head = create_page_buffers(page, inode, 0);
  	blocksize = head->b_size;
  	bbits = block_size_bits(blocksize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2003

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2004
  	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
  
  	for(bh = head, block_start = 0; bh != head || !block_start;
  	    block++, block_start=block_end, bh = bh->b_this_page) {
  		block_end = block_start + blocksize;
  		if (block_end <= from || block_start >= to) {
  			if (PageUptodate(page)) {
  				if (!buffer_uptodate(bh))
  					set_buffer_uptodate(bh);
  			}
  			continue;
  		}
  		if (buffer_new(bh))
  			clear_buffer_new(bh);
  		if (!buffer_mapped(bh)) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
2019
  			WARN_ON(bh->b_size != blocksize);
ae259a9c8   Christoph Hellwig   fs: introduce iom...
2020
2021
2022
2023
2024
2025
2026
  			if (get_block) {
  				err = get_block(inode, block, bh, 1);
  				if (err)
  					break;
  			} else {
  				iomap_to_bh(inode, block, bh, iomap);
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2027
  			if (buffer_new(bh)) {
e64855c6c   Jan Kara   fs: Add helper to...
2028
  				clean_bdev_bh_alias(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2029
  				if (PageUptodate(page)) {
637aff46f   Nick Piggin   fs: fix data-loss...
2030
  					clear_buffer_new(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2031
  					set_buffer_uptodate(bh);
637aff46f   Nick Piggin   fs: fix data-loss...
2032
  					mark_buffer_dirty(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2033
2034
  					continue;
  				}
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2035
2036
2037
2038
  				if (block_end > to || block_start < from)
  					zero_user_segments(page,
  						to, block_end,
  						block_start, from);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2039
2040
2041
2042
2043
2044
2045
2046
2047
  				continue;
  			}
  		}
  		if (PageUptodate(page)) {
  			if (!buffer_uptodate(bh))
  				set_buffer_uptodate(bh);
  			continue; 
  		}
  		if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
33a266dda   David Chinner   [PATCH] Make BH_U...
2048
  		    !buffer_unwritten(bh) &&
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2049
  		     (block_start < from || block_end > to)) {
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
2050
  			ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2051
2052
2053
2054
2055
2056
2057
2058
2059
  			*wait_bh++=bh;
  		}
  	}
  	/*
  	 * If we issued read requests - let them complete.
  	 */
  	while(wait_bh > wait) {
  		wait_on_buffer(*--wait_bh);
  		if (!buffer_uptodate(*wait_bh))
f3ddbdc62   Nick Piggin   [PATCH] fix race ...
2060
  			err = -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2061
  	}
f9f07b6c1   Jan Kara   vfs: Fix data cor...
2062
  	if (unlikely(err))
afddba49d   Nick Piggin   fs: introduce wri...
2063
  		page_zero_new_buffers(page, from, to);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2064
2065
  	return err;
  }
ae259a9c8   Christoph Hellwig   fs: introduce iom...
2066
2067
2068
2069
2070
2071
  
  int __block_write_begin(struct page *page, loff_t pos, unsigned len,
  		get_block_t *get_block)
  {
  	return __block_write_begin_int(page, pos, len, get_block, NULL);
  }
ebdec241d   Christoph Hellwig   fs: kill block_pr...
2072
  EXPORT_SYMBOL(__block_write_begin);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2073
2074
2075
2076
2077
2078
2079
2080
  
  static int __block_commit_write(struct inode *inode, struct page *page,
  		unsigned from, unsigned to)
  {
  	unsigned block_start, block_end;
  	int partial = 0;
  	unsigned blocksize;
  	struct buffer_head *bh, *head;
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2081
2082
  	bh = head = page_buffers(page);
  	blocksize = bh->b_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2083

45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2084
2085
  	block_start = 0;
  	do {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2086
2087
2088
2089
2090
2091
2092
2093
  		block_end = block_start + blocksize;
  		if (block_end <= from || block_start >= to) {
  			if (!buffer_uptodate(bh))
  				partial = 1;
  		} else {
  			set_buffer_uptodate(bh);
  			mark_buffer_dirty(bh);
  		}
afddba49d   Nick Piggin   fs: introduce wri...
2094
  		clear_buffer_new(bh);
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2095
2096
2097
2098
  
  		block_start = block_end;
  		bh = bh->b_this_page;
  	} while (bh != head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
  
  	/*
  	 * If this is a partial write which happened to make all buffers
  	 * uptodate then we can optimize away a bogus readpage() for
  	 * the next read(). Here we 'discover' whether the page went
  	 * uptodate as a result of this (potentially partial) write.
  	 */
  	if (!partial)
  		SetPageUptodate(page);
  	return 0;
  }
  
  /*
155130a4f   Christoph Hellwig   get rid of block_...
2112
2113
2114
   * block_write_begin takes care of the basic task of block allocation and
   * bringing partial write blocks uptodate first.
   *
7bb46a673   npiggin@suse.de   fs: introduce new...
2115
   * The filesystem needs to handle block truncation upon failure.
afddba49d   Nick Piggin   fs: introduce wri...
2116
   */
155130a4f   Christoph Hellwig   get rid of block_...
2117
2118
  int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
  		unsigned flags, struct page **pagep, get_block_t *get_block)
afddba49d   Nick Piggin   fs: introduce wri...
2119
  {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2120
  	pgoff_t index = pos >> PAGE_SHIFT;
afddba49d   Nick Piggin   fs: introduce wri...
2121
  	struct page *page;
6e1db88d5   Christoph Hellwig   introduce __block...
2122
  	int status;
afddba49d   Nick Piggin   fs: introduce wri...
2123

6e1db88d5   Christoph Hellwig   introduce __block...
2124
2125
2126
  	page = grab_cache_page_write_begin(mapping, index, flags);
  	if (!page)
  		return -ENOMEM;
afddba49d   Nick Piggin   fs: introduce wri...
2127

6e1db88d5   Christoph Hellwig   introduce __block...
2128
  	status = __block_write_begin(page, pos, len, get_block);
afddba49d   Nick Piggin   fs: introduce wri...
2129
  	if (unlikely(status)) {
6e1db88d5   Christoph Hellwig   introduce __block...
2130
  		unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2131
  		put_page(page);
6e1db88d5   Christoph Hellwig   introduce __block...
2132
  		page = NULL;
afddba49d   Nick Piggin   fs: introduce wri...
2133
  	}
6e1db88d5   Christoph Hellwig   introduce __block...
2134
  	*pagep = page;
afddba49d   Nick Piggin   fs: introduce wri...
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
  	return status;
  }
  EXPORT_SYMBOL(block_write_begin);
  
  int block_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
  {
  	struct inode *inode = mapping->host;
  	unsigned start;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2145
  	start = pos & (PAGE_SIZE - 1);
afddba49d   Nick Piggin   fs: introduce wri...
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
  
  	if (unlikely(copied < len)) {
  		/*
  		 * The buffers that were written will now be uptodate, so we
  		 * don't have to worry about a readpage reading them and
  		 * overwriting a partial write. However if we have encountered
  		 * a short write and only partially written into a buffer, it
  		 * will not be marked uptodate, so a readpage might come in and
  		 * destroy our partial write.
  		 *
  		 * Do the simplest thing, and just treat any short write to a
  		 * non uptodate page as a zero-length write, and force the
  		 * caller to redo the whole thing.
  		 */
  		if (!PageUptodate(page))
  			copied = 0;
  
  		page_zero_new_buffers(page, start+copied, start+len);
  	}
  	flush_dcache_page(page);
  
  	/* This could be a short (even 0-length) commit */
  	__block_commit_write(inode, page, start, start+copied);
  
  	return copied;
  }
  EXPORT_SYMBOL(block_write_end);
  
  int generic_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
  {
8af54f291   Christoph Hellwig   fs: fold __generi...
2178
2179
2180
  	struct inode *inode = mapping->host;
  	loff_t old_size = inode->i_size;
  	bool i_size_changed = false;
afddba49d   Nick Piggin   fs: introduce wri...
2181
  	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
8af54f291   Christoph Hellwig   fs: fold __generi...
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
  
  	/*
  	 * No need to use i_size_read() here, the i_size cannot change under us
  	 * because we hold i_rwsem.
  	 *
  	 * But it's important to update i_size while still holding page lock:
  	 * page writeout could otherwise come in and zero beyond i_size.
  	 */
  	if (pos + copied > inode->i_size) {
  		i_size_write(inode, pos + copied);
  		i_size_changed = true;
  	}
  
  	unlock_page(page);
7a77dad7e   Andreas Gruenbacher   iomap: Fix use-af...
2196
  	put_page(page);
8af54f291   Christoph Hellwig   fs: fold __generi...
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
  
  	if (old_size < pos)
  		pagecache_isize_extended(inode, old_size, pos);
  	/*
  	 * Don't mark the inode dirty under page lock. First, it unnecessarily
  	 * makes the holding time of page lock longer. Second, it forces lock
  	 * ordering of page lock and transaction start for journaling
  	 * filesystems.
  	 */
  	if (i_size_changed)
  		mark_inode_dirty(inode);
26ddb1f4f   Andreas Gruenbacher   fs: Turn __generi...
2208
  	return copied;
afddba49d   Nick Piggin   fs: introduce wri...
2209
2210
2211
2212
  }
  EXPORT_SYMBOL(generic_write_end);
  
  /*
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2213
2214
2215
2216
2217
2218
   * block_is_partially_uptodate checks whether buffers within a page are
   * uptodate or not.
   *
   * Returns true if all buffers which correspond to a file portion
   * we want to read are uptodate.
   */
c186afb4d   Al Viro   switch ->is_parti...
2219
2220
  int block_is_partially_uptodate(struct page *page, unsigned long from,
  					unsigned long count)
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2221
  {
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2222
2223
2224
2225
2226
2227
2228
  	unsigned block_start, block_end, blocksize;
  	unsigned to;
  	struct buffer_head *bh, *head;
  	int ret = 1;
  
  	if (!page_has_buffers(page))
  		return 0;
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2229
2230
  	head = page_buffers(page);
  	blocksize = head->b_size;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2231
  	to = min_t(unsigned, PAGE_SIZE - from, count);
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2232
  	to = from + to;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2233
  	if (from < blocksize && to > PAGE_SIZE - blocksize)
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2234
  		return 0;
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
  	bh = head;
  	block_start = 0;
  	do {
  		block_end = block_start + blocksize;
  		if (block_end > from && block_start < to) {
  			if (!buffer_uptodate(bh)) {
  				ret = 0;
  				break;
  			}
  			if (block_end >= to)
  				break;
  		}
  		block_start = block_end;
  		bh = bh->b_this_page;
  	} while (bh != head);
  
  	return ret;
  }
  EXPORT_SYMBOL(block_is_partially_uptodate);
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
   * Generic "read page" function for block devices that have the normal
   * get_block functionality. This is most of the block device filesystems.
   * Reads the page asynchronously --- the unlock_buffer() and
   * set/clear_buffer_uptodate() functions propagate buffer state into the
   * page struct once IO has completed.
   */
  int block_read_full_page(struct page *page, get_block_t *get_block)
  {
  	struct inode *inode = page->mapping->host;
  	sector_t iblock, lblock;
  	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2267
  	unsigned int blocksize, bbits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2268
2269
  	int nr, i;
  	int fully_mapped = 1;
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2270
2271
2272
  	head = create_page_buffers(page, inode, 0);
  	blocksize = head->b_size;
  	bbits = block_size_bits(blocksize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2273

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2274
  	iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2275
  	lblock = (i_size_read(inode)+blocksize-1) >> bbits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2276
2277
2278
2279
2280
2281
2282
2283
2284
  	bh = head;
  	nr = 0;
  	i = 0;
  
  	do {
  		if (buffer_uptodate(bh))
  			continue;
  
  		if (!buffer_mapped(bh)) {
c64610ba5   Andrew Morton   [PATCH] block_rea...
2285
  			int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2286
2287
  			fully_mapped = 0;
  			if (iblock < lblock) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
2288
  				WARN_ON(bh->b_size != blocksize);
c64610ba5   Andrew Morton   [PATCH] block_rea...
2289
2290
  				err = get_block(inode, iblock, bh, 0);
  				if (err)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2291
2292
2293
  					SetPageError(page);
  			}
  			if (!buffer_mapped(bh)) {
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2294
  				zero_user(page, i * blocksize, blocksize);
c64610ba5   Andrew Morton   [PATCH] block_rea...
2295
2296
  				if (!err)
  					set_buffer_uptodate(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
  				continue;
  			}
  			/*
  			 * get_block() might have updated the buffer
  			 * synchronously
  			 */
  			if (buffer_uptodate(bh))
  				continue;
  		}
  		arr[nr++] = bh;
  	} while (i++, iblock++, (bh = bh->b_this_page) != head);
  
  	if (fully_mapped)
  		SetPageMappedToDisk(page);
  
  	if (!nr) {
  		/*
  		 * All buffers are uptodate - we can set the page uptodate
  		 * as well. But not if get_block() returned an error.
  		 */
  		if (!PageError(page))
  			SetPageUptodate(page);
  		unlock_page(page);
  		return 0;
  	}
  
  	/* Stage two: lock the buffers */
  	for (i = 0; i < nr; i++) {
  		bh = arr[i];
  		lock_buffer(bh);
  		mark_buffer_async_read(bh);
  	}
  
  	/*
  	 * Stage 3: start the IO.  Check for uptodateness
  	 * inside the buffer lock in case another process reading
  	 * the underlying blockdev brought it uptodate (the sct fix).
  	 */
  	for (i = 0; i < nr; i++) {
  		bh = arr[i];
  		if (buffer_uptodate(bh))
  			end_buffer_async_read(bh, 1);
  		else
2a222ca99   Mike Christie   fs: have submit_b...
2340
  			submit_bh(REQ_OP_READ, 0, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2341
2342
2343
  	}
  	return 0;
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2344
  EXPORT_SYMBOL(block_read_full_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2345
2346
  
  /* utility function for filesystems that need to do work on expanding
89e107877   Nick Piggin   fs: new cont helpers
2347
   * truncates.  Uses filesystem pagecache writes to allow the filesystem to
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2348
2349
   * deal with the hole.  
   */
89e107877   Nick Piggin   fs: new cont helpers
2350
  int generic_cont_expand_simple(struct inode *inode, loff_t size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2351
2352
2353
  {
  	struct address_space *mapping = inode->i_mapping;
  	struct page *page;
89e107877   Nick Piggin   fs: new cont helpers
2354
  	void *fsdata;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2355
  	int err;
c08d3b0e3   npiggin@suse.de   truncate: use new...
2356
2357
  	err = inode_newsize_ok(inode, size);
  	if (err)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2358
  		goto out;
89e107877   Nick Piggin   fs: new cont helpers
2359
  	err = pagecache_write_begin(NULL, mapping, size, 0,
c718a9751   Tetsuo Handa   fs: semove set bu...
2360
  				    AOP_FLAG_CONT_EXPAND, &page, &fsdata);
89e107877   Nick Piggin   fs: new cont helpers
2361
  	if (err)
05eb0b51f   OGAWA Hirofumi   [PATCH] fat: supp...
2362
  		goto out;
05eb0b51f   OGAWA Hirofumi   [PATCH] fat: supp...
2363

89e107877   Nick Piggin   fs: new cont helpers
2364
2365
  	err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
  	BUG_ON(err > 0);
05eb0b51f   OGAWA Hirofumi   [PATCH] fat: supp...
2366

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2367
2368
2369
  out:
  	return err;
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2370
  EXPORT_SYMBOL(generic_cont_expand_simple);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2371

f1e3af72c   Adrian Bunk   make fs/buffer.c:...
2372
2373
  static int cont_expand_zero(struct file *file, struct address_space *mapping,
  			    loff_t pos, loff_t *bytes)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2374
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2375
  	struct inode *inode = mapping->host;
93407472a   Fabian Frederick   fs: add i_blocksi...
2376
  	unsigned int blocksize = i_blocksize(inode);
89e107877   Nick Piggin   fs: new cont helpers
2377
2378
2379
2380
2381
2382
  	struct page *page;
  	void *fsdata;
  	pgoff_t index, curidx;
  	loff_t curpos;
  	unsigned zerofrom, offset, len;
  	int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2383

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2384
2385
  	index = pos >> PAGE_SHIFT;
  	offset = pos & ~PAGE_MASK;
89e107877   Nick Piggin   fs: new cont helpers
2386

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2387
2388
  	while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
  		zerofrom = curpos & ~PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2389
2390
2391
2392
  		if (zerofrom & (blocksize-1)) {
  			*bytes |= (blocksize-1);
  			(*bytes)++;
  		}
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2393
  		len = PAGE_SIZE - zerofrom;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2394

c718a9751   Tetsuo Handa   fs: semove set bu...
2395
2396
  		err = pagecache_write_begin(file, mapping, curpos, len, 0,
  					    &page, &fsdata);
89e107877   Nick Piggin   fs: new cont helpers
2397
2398
  		if (err)
  			goto out;
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2399
  		zero_user(page, zerofrom, len);
89e107877   Nick Piggin   fs: new cont helpers
2400
2401
2402
2403
2404
2405
  		err = pagecache_write_end(file, mapping, curpos, len, len,
  						page, fsdata);
  		if (err < 0)
  			goto out;
  		BUG_ON(err != len);
  		err = 0;
061e97469   OGAWA Hirofumi   Add balance_dirty...
2406
2407
  
  		balance_dirty_pages_ratelimited(mapping);
c2ca0fcd2   Mikulas Patocka   fs: make cont_exp...
2408

08d405c8b   Davidlohr Bueso   fs/: remove calle...
2409
  		if (fatal_signal_pending(current)) {
c2ca0fcd2   Mikulas Patocka   fs: make cont_exp...
2410
2411
2412
  			err = -EINTR;
  			goto out;
  		}
89e107877   Nick Piggin   fs: new cont helpers
2413
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2414

89e107877   Nick Piggin   fs: new cont helpers
2415
2416
  	/* page covers the boundary, find the boundary offset */
  	if (index == curidx) {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2417
  		zerofrom = curpos & ~PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2418
  		/* if we will expand the thing last block will be filled */
89e107877   Nick Piggin   fs: new cont helpers
2419
2420
2421
2422
  		if (offset <= zerofrom) {
  			goto out;
  		}
  		if (zerofrom & (blocksize-1)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2423
2424
2425
  			*bytes |= (blocksize-1);
  			(*bytes)++;
  		}
89e107877   Nick Piggin   fs: new cont helpers
2426
  		len = offset - zerofrom;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2427

c718a9751   Tetsuo Handa   fs: semove set bu...
2428
2429
  		err = pagecache_write_begin(file, mapping, curpos, len, 0,
  					    &page, &fsdata);
89e107877   Nick Piggin   fs: new cont helpers
2430
2431
  		if (err)
  			goto out;
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2432
  		zero_user(page, zerofrom, len);
89e107877   Nick Piggin   fs: new cont helpers
2433
2434
2435
2436
2437
2438
  		err = pagecache_write_end(file, mapping, curpos, len, len,
  						page, fsdata);
  		if (err < 0)
  			goto out;
  		BUG_ON(err != len);
  		err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2439
  	}
89e107877   Nick Piggin   fs: new cont helpers
2440
2441
2442
2443
2444
2445
2446
2447
  out:
  	return err;
  }
  
  /*
   * For moronic filesystems that do not allow holes in file.
   * We may have to extend the file.
   */
282dc1788   Christoph Hellwig   get rid of cont_w...
2448
  int cont_write_begin(struct file *file, struct address_space *mapping,
89e107877   Nick Piggin   fs: new cont helpers
2449
2450
2451
2452
2453
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata,
  			get_block_t *get_block, loff_t *bytes)
  {
  	struct inode *inode = mapping->host;
93407472a   Fabian Frederick   fs: add i_blocksi...
2454
2455
  	unsigned int blocksize = i_blocksize(inode);
  	unsigned int zerofrom;
89e107877   Nick Piggin   fs: new cont helpers
2456
2457
2458
2459
  	int err;
  
  	err = cont_expand_zero(file, mapping, pos, bytes);
  	if (err)
155130a4f   Christoph Hellwig   get rid of block_...
2460
  		return err;
89e107877   Nick Piggin   fs: new cont helpers
2461

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2462
  	zerofrom = *bytes & ~PAGE_MASK;
89e107877   Nick Piggin   fs: new cont helpers
2463
2464
2465
  	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
  		*bytes |= (blocksize-1);
  		(*bytes)++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2466
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2467

155130a4f   Christoph Hellwig   get rid of block_...
2468
  	return block_write_begin(mapping, pos, len, flags, pagep, get_block);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2469
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2470
  EXPORT_SYMBOL(cont_write_begin);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2471

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2472
2473
2474
2475
2476
2477
  int block_commit_write(struct page *page, unsigned from, unsigned to)
  {
  	struct inode *inode = page->mapping->host;
  	__block_commit_write(inode,page,from,to);
  	return 0;
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2478
  EXPORT_SYMBOL(block_commit_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2479

541716902   David Chinner   [FS] Implement bl...
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
  /*
   * block_page_mkwrite() is not allowed to change the file size as it gets
   * called from a page fault handler when a page is first dirtied. Hence we must
   * be careful to check for EOF conditions here. We set the page up correctly
   * for a written page which means we get ENOSPC checking when writing into
   * holes and correct delalloc and unwritten extent mapping on filesystems that
   * support these features.
   *
   * We are not allowed to take the i_mutex here so we have to play games to
   * protect against truncate races as the page could now be beyond EOF.  Because
7bb46a673   npiggin@suse.de   fs: introduce new...
2490
   * truncate writes the inode size before removing pages, once we have the
541716902   David Chinner   [FS] Implement bl...
2491
2492
2493
   * page lock we can determine safely if the page is beyond EOF. If it is not
   * beyond EOF, then the page is guaranteed safe against truncation until we
   * unlock the page.
ea13a8646   Jan Kara   vfs: Block mmappe...
2494
   *
14da92001   Jan Kara   fs: Protect write...
2495
   * Direct callers of this function should protect against filesystem freezing
5c5000296   Ross Zwisler   vfs: remove unuse...
2496
   * using sb_start_pagefault() - sb_end_pagefault() functions.
541716902   David Chinner   [FS] Implement bl...
2497
   */
5c5000296   Ross Zwisler   vfs: remove unuse...
2498
  int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
24da4fab5   Jan Kara   vfs: Create __blo...
2499
  			 get_block_t get_block)
541716902   David Chinner   [FS] Implement bl...
2500
  {
c2ec175c3   Nick Piggin   mm: page_mkwrite ...
2501
  	struct page *page = vmf->page;
496ad9aa8   Al Viro   new helper: file_...
2502
  	struct inode *inode = file_inode(vma->vm_file);
541716902   David Chinner   [FS] Implement bl...
2503
2504
  	unsigned long end;
  	loff_t size;
24da4fab5   Jan Kara   vfs: Create __blo...
2505
  	int ret;
541716902   David Chinner   [FS] Implement bl...
2506
2507
2508
2509
  
  	lock_page(page);
  	size = i_size_read(inode);
  	if ((page->mapping != inode->i_mapping) ||
183363380   Nick Piggin   fix some conversi...
2510
  	    (page_offset(page) > size)) {
24da4fab5   Jan Kara   vfs: Create __blo...
2511
2512
2513
  		/* We overload EFAULT to mean page got truncated */
  		ret = -EFAULT;
  		goto out_unlock;
541716902   David Chinner   [FS] Implement bl...
2514
2515
2516
  	}
  
  	/* page is wholly or partially inside EOF */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2517
2518
  	if (((page->index + 1) << PAGE_SHIFT) > size)
  		end = size & ~PAGE_MASK;
541716902   David Chinner   [FS] Implement bl...
2519
  	else
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2520
  		end = PAGE_SIZE;
541716902   David Chinner   [FS] Implement bl...
2521

ebdec241d   Christoph Hellwig   fs: kill block_pr...
2522
  	ret = __block_write_begin(page, 0, end, get_block);
541716902   David Chinner   [FS] Implement bl...
2523
2524
  	if (!ret)
  		ret = block_commit_write(page, 0, end);
24da4fab5   Jan Kara   vfs: Create __blo...
2525
2526
  	if (unlikely(ret < 0))
  		goto out_unlock;
ea13a8646   Jan Kara   vfs: Block mmappe...
2527
  	set_page_dirty(page);
1d1d1a767   Darrick J. Wong   mm: only enforce ...
2528
  	wait_for_stable_page(page);
24da4fab5   Jan Kara   vfs: Create __blo...
2529
2530
2531
  	return 0;
  out_unlock:
  	unlock_page(page);
541716902   David Chinner   [FS] Implement bl...
2532
  	return ret;
24da4fab5   Jan Kara   vfs: Create __blo...
2533
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2534
  EXPORT_SYMBOL(block_page_mkwrite);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2535
2536
  
  /*
03158cd7e   Nick Piggin   fs: restore nobh
2537
   * nobh_write_begin()'s prereads are special: the buffer_heads are freed
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2538
2539
   * immediately, while under the page lock.  So it needs a special end_io
   * handler which does not touch the bh after unlocking it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2540
2541
2542
   */
  static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
  {
68671f35f   Dmitry Monakhov   mm: add end_buffe...
2543
  	__end_buffer_read_notouch(bh, uptodate);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2544
2545
2546
  }
  
  /*
03158cd7e   Nick Piggin   fs: restore nobh
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
   * Attach the singly-linked list of buffers created by nobh_write_begin, to
   * the page (converting it to circular linked list and taking care of page
   * dirty races).
   */
  static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
  {
  	struct buffer_head *bh;
  
  	BUG_ON(!PageLocked(page));
  
  	spin_lock(&page->mapping->private_lock);
  	bh = head;
  	do {
  		if (PageDirty(page))
  			set_buffer_dirty(bh);
  		if (!bh->b_this_page)
  			bh->b_this_page = head;
  		bh = bh->b_this_page;
  	} while (bh != head);
45dcfc273   Guoqing Jiang   fs/buffer.c: use ...
2566
  	attach_page_private(page, head);
03158cd7e   Nick Piggin   fs: restore nobh
2567
2568
2569
2570
  	spin_unlock(&page->mapping->private_lock);
  }
  
  /*
ea0f04e59   Christoph Hellwig   get rid of nobh_w...
2571
2572
   * On entry, the page is fully not uptodate.
   * On exit the page is fully uptodate in the areas outside (from,to)
7bb46a673   npiggin@suse.de   fs: introduce new...
2573
   * The filesystem needs to handle block truncation upon failure.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2574
   */
ea0f04e59   Christoph Hellwig   get rid of nobh_w...
2575
  int nobh_write_begin(struct address_space *mapping,
03158cd7e   Nick Piggin   fs: restore nobh
2576
2577
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2578
2579
  			get_block_t *get_block)
  {
03158cd7e   Nick Piggin   fs: restore nobh
2580
  	struct inode *inode = mapping->host;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2581
2582
  	const unsigned blkbits = inode->i_blkbits;
  	const unsigned blocksize = 1 << blkbits;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2583
  	struct buffer_head *head, *bh;
03158cd7e   Nick Piggin   fs: restore nobh
2584
2585
2586
  	struct page *page;
  	pgoff_t index;
  	unsigned from, to;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2587
  	unsigned block_in_page;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2588
  	unsigned block_start, block_end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2589
  	sector_t block_in_file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2590
  	int nr_reads = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2591
2592
  	int ret = 0;
  	int is_mapped_to_disk = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2593

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2594
2595
  	index = pos >> PAGE_SHIFT;
  	from = pos & (PAGE_SIZE - 1);
03158cd7e   Nick Piggin   fs: restore nobh
2596
  	to = from + len;
54566b2c1   Nick Piggin   fs: symlink write...
2597
  	page = grab_cache_page_write_begin(mapping, index, flags);
03158cd7e   Nick Piggin   fs: restore nobh
2598
2599
2600
2601
2602
2603
  	if (!page)
  		return -ENOMEM;
  	*pagep = page;
  	*fsdata = NULL;
  
  	if (page_has_buffers(page)) {
309f77ad9   Namhyung Kim   fs/buffer.c: call...
2604
2605
2606
2607
  		ret = __block_write_begin(page, pos, len, get_block);
  		if (unlikely(ret))
  			goto out_release;
  		return ret;
03158cd7e   Nick Piggin   fs: restore nobh
2608
  	}
a4b0672db   Nick Piggin   fs: fix nobh erro...
2609

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2610
2611
  	if (PageMappedToDisk(page))
  		return 0;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2612
2613
2614
2615
2616
2617
2618
2619
2620
  	/*
  	 * Allocate buffers so that we can keep track of state, and potentially
  	 * attach them to the page if an error occurs. In the common case of
  	 * no error, they will just be freed again without ever being attached
  	 * to the page (which is all OK, because we're under the page lock).
  	 *
  	 * Be careful: the buffer linked list is a NULL terminated one, rather
  	 * than the circular one we're used to.
  	 */
640ab98fb   Jens Axboe   buffer: have allo...
2621
  	head = alloc_page_buffers(page, blocksize, false);
03158cd7e   Nick Piggin   fs: restore nobh
2622
2623
2624
2625
  	if (!head) {
  		ret = -ENOMEM;
  		goto out_release;
  	}
a4b0672db   Nick Piggin   fs: fix nobh erro...
2626

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2627
  	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2628
2629
2630
2631
2632
2633
  
  	/*
  	 * We loop across all blocks in the page, whether or not they are
  	 * part of the affected region.  This is so we can discover if the
  	 * page is fully mapped-to-disk.
  	 */
a4b0672db   Nick Piggin   fs: fix nobh erro...
2634
  	for (block_start = 0, block_in_page = 0, bh = head;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2635
  		  block_start < PAGE_SIZE;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2636
  		  block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2637
  		int create;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2638
2639
  		block_end = block_start + blocksize;
  		bh->b_state = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2640
2641
2642
2643
  		create = 1;
  		if (block_start >= to)
  			create = 0;
  		ret = get_block(inode, block_in_file + block_in_page,
a4b0672db   Nick Piggin   fs: fix nobh erro...
2644
  					bh, create);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2645
2646
  		if (ret)
  			goto failed;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2647
  		if (!buffer_mapped(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2648
  			is_mapped_to_disk = 0;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2649
  		if (buffer_new(bh))
e64855c6c   Jan Kara   fs: Add helper to...
2650
  			clean_bdev_bh_alias(bh);
a4b0672db   Nick Piggin   fs: fix nobh erro...
2651
2652
  		if (PageUptodate(page)) {
  			set_buffer_uptodate(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2653
  			continue;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2654
2655
  		}
  		if (buffer_new(bh) || !buffer_mapped(bh)) {
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2656
2657
  			zero_user_segments(page, block_start, from,
  							to, block_end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2658
2659
  			continue;
  		}
a4b0672db   Nick Piggin   fs: fix nobh erro...
2660
  		if (buffer_uptodate(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2661
2662
  			continue;	/* reiserfs does this */
  		if (block_start < from || block_end > to) {
a4b0672db   Nick Piggin   fs: fix nobh erro...
2663
2664
  			lock_buffer(bh);
  			bh->b_end_io = end_buffer_read_nobh;
2a222ca99   Mike Christie   fs: have submit_b...
2665
  			submit_bh(REQ_OP_READ, 0, bh);
a4b0672db   Nick Piggin   fs: fix nobh erro...
2666
  			nr_reads++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2667
2668
2669
2670
  		}
  	}
  
  	if (nr_reads) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2671
2672
2673
2674
2675
  		/*
  		 * The page is locked, so these buffers are protected from
  		 * any VM or truncate activity.  Hence we don't need to care
  		 * for the buffer_head refcounts.
  		 */
a4b0672db   Nick Piggin   fs: fix nobh erro...
2676
  		for (bh = head; bh; bh = bh->b_this_page) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2677
2678
2679
  			wait_on_buffer(bh);
  			if (!buffer_uptodate(bh))
  				ret = -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2680
2681
2682
2683
2684
2685
2686
  		}
  		if (ret)
  			goto failed;
  	}
  
  	if (is_mapped_to_disk)
  		SetPageMappedToDisk(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2687

03158cd7e   Nick Piggin   fs: restore nobh
2688
  	*fsdata = head; /* to be released by nobh_write_end */
a4b0672db   Nick Piggin   fs: fix nobh erro...
2689

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2690
2691
2692
  	return 0;
  
  failed:
03158cd7e   Nick Piggin   fs: restore nobh
2693
  	BUG_ON(!ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2694
  	/*
a4b0672db   Nick Piggin   fs: fix nobh erro...
2695
2696
2697
2698
2699
  	 * Error recovery is a bit difficult. We need to zero out blocks that
  	 * were newly allocated, and dirty them to ensure they get written out.
  	 * Buffers need to be attached to the page at this point, otherwise
  	 * the handling of potential IO errors during writeout would be hard
  	 * (could try doing synchronous writeout, but what if that fails too?)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2700
  	 */
03158cd7e   Nick Piggin   fs: restore nobh
2701
2702
  	attach_nobh_buffers(page, head);
  	page_zero_new_buffers(page, from, to);
a4b0672db   Nick Piggin   fs: fix nobh erro...
2703

03158cd7e   Nick Piggin   fs: restore nobh
2704
2705
  out_release:
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2706
  	put_page(page);
03158cd7e   Nick Piggin   fs: restore nobh
2707
  	*pagep = NULL;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2708

7bb46a673   npiggin@suse.de   fs: introduce new...
2709
2710
  	return ret;
  }
03158cd7e   Nick Piggin   fs: restore nobh
2711
  EXPORT_SYMBOL(nobh_write_begin);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2712

03158cd7e   Nick Piggin   fs: restore nobh
2713
2714
2715
  int nobh_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2716
2717
  {
  	struct inode *inode = page->mapping->host;
efdc31319   Nick Piggin   nobh: nobh_write_...
2718
  	struct buffer_head *head = fsdata;
03158cd7e   Nick Piggin   fs: restore nobh
2719
  	struct buffer_head *bh;
5b41e74ad   Dmitri Monakhov   vfs: fix data lea...
2720
  	BUG_ON(fsdata != NULL && page_has_buffers(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2721

d4cf109f0   Dave Kleikamp   vfs: Don't call a...
2722
  	if (unlikely(copied < len) && head)
5b41e74ad   Dmitri Monakhov   vfs: fix data lea...
2723
2724
2725
2726
  		attach_nobh_buffers(page, head);
  	if (page_has_buffers(page))
  		return generic_write_end(file, mapping, pos, len,
  					copied, page, fsdata);
a4b0672db   Nick Piggin   fs: fix nobh erro...
2727

22c8ca78f   Nick Piggin   [PATCH] fs: fix n...
2728
  	SetPageUptodate(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2729
  	set_page_dirty(page);
03158cd7e   Nick Piggin   fs: restore nobh
2730
2731
  	if (pos+copied > inode->i_size) {
  		i_size_write(inode, pos+copied);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2732
2733
  		mark_inode_dirty(inode);
  	}
03158cd7e   Nick Piggin   fs: restore nobh
2734
2735
  
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2736
  	put_page(page);
03158cd7e   Nick Piggin   fs: restore nobh
2737

03158cd7e   Nick Piggin   fs: restore nobh
2738
2739
2740
2741
2742
2743
2744
  	while (head) {
  		bh = head;
  		head = head->b_this_page;
  		free_buffer_head(bh);
  	}
  
  	return copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2745
  }
03158cd7e   Nick Piggin   fs: restore nobh
2746
  EXPORT_SYMBOL(nobh_write_end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
  
  /*
   * nobh_writepage() - based on block_full_write_page() except
   * that it tries to operate without attaching bufferheads to
   * the page.
   */
  int nobh_writepage(struct page *page, get_block_t *get_block,
  			struct writeback_control *wbc)
  {
  	struct inode * const inode = page->mapping->host;
  	loff_t i_size = i_size_read(inode);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2758
  	const pgoff_t end_index = i_size >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2759
  	unsigned offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2760
2761
2762
2763
2764
2765
2766
  	int ret;
  
  	/* Is the page fully inside i_size? */
  	if (page->index < end_index)
  		goto out;
  
  	/* Is the page fully outside i_size? (truncate in progress) */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2767
  	offset = i_size & (PAGE_SIZE-1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2768
  	if (page->index >= end_index+1 || !offset) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
  		unlock_page(page);
  		return 0; /* don't care */
  	}
  
  	/*
  	 * The page straddles i_size.  It must be zeroed out on each and every
  	 * writepage invocation because it may be mmapped.  "A file is mapped
  	 * in multiples of the page size.  For a file that is not a multiple of
  	 * the  page size, the remaining memory is zeroed when mapped, and
  	 * writes to that region are not written out to the file."
  	 */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2780
  	zero_user_segment(page, offset, PAGE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2781
2782
2783
  out:
  	ret = mpage_writepage(page, get_block, wbc);
  	if (ret == -EAGAIN)
35c80d5f4   Chris Mason   Add block_write_f...
2784
2785
  		ret = __block_write_full_page(inode, page, get_block, wbc,
  					      end_buffer_async_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2786
2787
2788
  	return ret;
  }
  EXPORT_SYMBOL(nobh_writepage);
03158cd7e   Nick Piggin   fs: restore nobh
2789
2790
  int nobh_truncate_page(struct address_space *mapping,
  			loff_t from, get_block_t *get_block)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2791
  {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2792
2793
  	pgoff_t index = from >> PAGE_SHIFT;
  	unsigned offset = from & (PAGE_SIZE-1);
03158cd7e   Nick Piggin   fs: restore nobh
2794
2795
2796
2797
  	unsigned blocksize;
  	sector_t iblock;
  	unsigned length, pos;
  	struct inode *inode = mapping->host;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2798
  	struct page *page;
03158cd7e   Nick Piggin   fs: restore nobh
2799
2800
  	struct buffer_head map_bh;
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2801

93407472a   Fabian Frederick   fs: add i_blocksi...
2802
  	blocksize = i_blocksize(inode);
03158cd7e   Nick Piggin   fs: restore nobh
2803
2804
2805
2806
2807
2808
2809
  	length = offset & (blocksize - 1);
  
  	/* Block boundary? Nothing to do */
  	if (!length)
  		return 0;
  
  	length = blocksize - length;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2810
  	iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2811

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2812
  	page = grab_cache_page(mapping, index);
03158cd7e   Nick Piggin   fs: restore nobh
2813
  	err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2814
2815
  	if (!page)
  		goto out;
03158cd7e   Nick Piggin   fs: restore nobh
2816
2817
2818
  	if (page_has_buffers(page)) {
  has_buffers:
  		unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2819
  		put_page(page);
03158cd7e   Nick Piggin   fs: restore nobh
2820
2821
2822
2823
2824
2825
2826
2827
2828
  		return block_truncate_page(mapping, from, get_block);
  	}
  
  	/* Find the buffer that contains "offset" */
  	pos = blocksize;
  	while (offset >= pos) {
  		iblock++;
  		pos += blocksize;
  	}
460bcf57b   Theodore Ts'o   Fix nobh_truncate...
2829
2830
  	map_bh.b_size = blocksize;
  	map_bh.b_state = 0;
03158cd7e   Nick Piggin   fs: restore nobh
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
  	err = get_block(inode, iblock, &map_bh, 0);
  	if (err)
  		goto unlock;
  	/* unmapped? It's a hole - nothing to do */
  	if (!buffer_mapped(&map_bh))
  		goto unlock;
  
  	/* Ok, it's mapped. Make sure it's up-to-date */
  	if (!PageUptodate(page)) {
  		err = mapping->a_ops->readpage(NULL, page);
  		if (err) {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2842
  			put_page(page);
03158cd7e   Nick Piggin   fs: restore nobh
2843
2844
2845
2846
2847
2848
2849
2850
2851
  			goto out;
  		}
  		lock_page(page);
  		if (!PageUptodate(page)) {
  			err = -EIO;
  			goto unlock;
  		}
  		if (page_has_buffers(page))
  			goto has_buffers;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2852
  	}
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2853
  	zero_user(page, offset, length);
03158cd7e   Nick Piggin   fs: restore nobh
2854
2855
2856
2857
  	set_page_dirty(page);
  	err = 0;
  
  unlock:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2858
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2859
  	put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2860
  out:
03158cd7e   Nick Piggin   fs: restore nobh
2861
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2862
2863
2864
2865
2866
2867
  }
  EXPORT_SYMBOL(nobh_truncate_page);
  
  int block_truncate_page(struct address_space *mapping,
  			loff_t from, get_block_t *get_block)
  {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2868
2869
  	pgoff_t index = from >> PAGE_SHIFT;
  	unsigned offset = from & (PAGE_SIZE-1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2870
  	unsigned blocksize;
54b21a799   Andrew Morton   [PATCH] fix possi...
2871
  	sector_t iblock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2872
2873
2874
2875
  	unsigned length, pos;
  	struct inode *inode = mapping->host;
  	struct page *page;
  	struct buffer_head *bh;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2876
  	int err;
93407472a   Fabian Frederick   fs: add i_blocksi...
2877
  	blocksize = i_blocksize(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2878
2879
2880
2881
2882
2883
2884
  	length = offset & (blocksize - 1);
  
  	/* Block boundary? Nothing to do */
  	if (!length)
  		return 0;
  
  	length = blocksize - length;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2885
  	iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
  	
  	page = grab_cache_page(mapping, index);
  	err = -ENOMEM;
  	if (!page)
  		goto out;
  
  	if (!page_has_buffers(page))
  		create_empty_buffers(page, blocksize, 0);
  
  	/* Find the buffer that contains "offset" */
  	bh = page_buffers(page);
  	pos = blocksize;
  	while (offset >= pos) {
  		bh = bh->b_this_page;
  		iblock++;
  		pos += blocksize;
  	}
  
  	err = 0;
  	if (!buffer_mapped(bh)) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
2906
  		WARN_ON(bh->b_size != blocksize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
  		err = get_block(inode, iblock, bh, 0);
  		if (err)
  			goto unlock;
  		/* unmapped? It's a hole - nothing to do */
  		if (!buffer_mapped(bh))
  			goto unlock;
  	}
  
  	/* Ok, it's mapped. Make sure it's up-to-date */
  	if (PageUptodate(page))
  		set_buffer_uptodate(bh);
33a266dda   David Chinner   [PATCH] Make BH_U...
2918
  	if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2919
  		err = -EIO;
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
2920
  		ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2921
2922
2923
2924
2925
  		wait_on_buffer(bh);
  		/* Uhhuh. Read error. Complain and punt. */
  		if (!buffer_uptodate(bh))
  			goto unlock;
  	}
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2926
  	zero_user(page, offset, length);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2927
2928
2929
2930
2931
  	mark_buffer_dirty(bh);
  	err = 0;
  
  unlock:
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2932
  	put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2933
2934
2935
  out:
  	return err;
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2936
  EXPORT_SYMBOL(block_truncate_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2937
2938
2939
2940
  
  /*
   * The generic ->writepage function for buffer-backed address_spaces
   */
1b938c082   Matthew Wilcox   fs/buffer.c: remo...
2941
2942
  int block_write_full_page(struct page *page, get_block_t *get_block,
  			struct writeback_control *wbc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2943
2944
2945
  {
  	struct inode * const inode = page->mapping->host;
  	loff_t i_size = i_size_read(inode);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2946
  	const pgoff_t end_index = i_size >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2947
  	unsigned offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2948
2949
2950
  
  	/* Is the page fully inside i_size? */
  	if (page->index < end_index)
35c80d5f4   Chris Mason   Add block_write_f...
2951
  		return __block_write_full_page(inode, page, get_block, wbc,
1b938c082   Matthew Wilcox   fs/buffer.c: remo...
2952
  					       end_buffer_async_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2953
2954
  
  	/* Is the page fully outside i_size? (truncate in progress) */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2955
  	offset = i_size & (PAGE_SIZE-1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2956
  	if (page->index >= end_index+1 || !offset) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2957
2958
2959
2960
2961
2962
  		unlock_page(page);
  		return 0; /* don't care */
  	}
  
  	/*
  	 * The page straddles i_size.  It must be zeroed out on each and every
2a61aa401   Adam Buchbinder   Fix misspellings ...
2963
  	 * writepage invocation because it may be mmapped.  "A file is mapped
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2964
2965
2966
2967
  	 * in multiples of the page size.  For a file that is not a multiple of
  	 * the  page size, the remaining memory is zeroed when mapped, and
  	 * writes to that region are not written out to the file."
  	 */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2968
  	zero_user_segment(page, offset, PAGE_SIZE);
1b938c082   Matthew Wilcox   fs/buffer.c: remo...
2969
2970
  	return __block_write_full_page(inode, page, get_block, wbc,
  							end_buffer_async_write);
35c80d5f4   Chris Mason   Add block_write_f...
2971
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2972
  EXPORT_SYMBOL(block_write_full_page);
35c80d5f4   Chris Mason   Add block_write_f...
2973

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2974
2975
2976
  sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
  			    get_block_t *get_block)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2977
  	struct inode *inode = mapping->host;
2a527d685   Alexander Potapenko   fs: generic_block...
2978
2979
2980
  	struct buffer_head tmp = {
  		.b_size = i_blocksize(inode),
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2981
2982
2983
  	get_block(inode, block, &tmp, 0);
  	return tmp.b_blocknr;
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2984
  EXPORT_SYMBOL(generic_block_bmap);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2985

4246a0b63   Christoph Hellwig   block: add a bi_e...
2986
  static void end_bio_bh_io_sync(struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2987
2988
  {
  	struct buffer_head *bh = bio->bi_private;
b7c44ed9d   Jens Axboe   block: manipulate...
2989
  	if (unlikely(bio_flagged(bio, BIO_QUIET)))
08bafc034   Keith Mannthey   block: Supress Bu...
2990
  		set_bit(BH_Quiet, &bh->b_state);
4e4cbee93   Christoph Hellwig   block: switch bio...
2991
  	bh->b_end_io(bh, !bio->bi_status);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2992
  	bio_put(bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2993
  }
2a222ca99   Mike Christie   fs: have submit_b...
2994
  static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
8e8f92988   Jens Axboe   fs: add support f...
2995
  			 enum rw_hint write_hint, struct writeback_control *wbc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2996
2997
  {
  	struct bio *bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2998
2999
3000
3001
  
  	BUG_ON(!buffer_locked(bh));
  	BUG_ON(!buffer_mapped(bh));
  	BUG_ON(!bh->b_end_io);
8fb0e3424   Aneesh Kumar K.V   vfs: Add BUG_ON f...
3002
3003
  	BUG_ON(buffer_delay(bh));
  	BUG_ON(buffer_unwritten(bh));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3004

48fd4f93a   Jens Axboe   block: submit_bh(...
3005
  	/*
48fd4f93a   Jens Axboe   block: submit_bh(...
3006
  	 * Only clear out a write error when rewriting
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3007
  	 */
2a222ca99   Mike Christie   fs: have submit_b...
3008
  	if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3009
  		clear_buffer_write_io_error(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3010
  	bio = bio_alloc(GFP_NOIO, 1);
4f74d15fe   Eric Biggers   ext4: add inline ...
3011
  	fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
4f024f379   Kent Overstreet   block: Abstract o...
3012
  	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
74d46992e   Christoph Hellwig   block: replace bi...
3013
  	bio_set_dev(bio, bh->b_bdev);
8e8f92988   Jens Axboe   fs: add support f...
3014
  	bio->bi_write_hint = write_hint;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3015

6cf66b4ca   Kent Overstreet   fs: use helper bi...
3016
3017
  	bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
  	BUG_ON(bio->bi_iter.bi_size != bh->b_size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3018
3019
3020
  
  	bio->bi_end_io = end_bio_bh_io_sync;
  	bio->bi_private = bh;
877f962c5   Theodore Ts'o   buffer: add BH_Pr...
3021
  	if (buffer_meta(bh))
2a222ca99   Mike Christie   fs: have submit_b...
3022
  		op_flags |= REQ_META;
877f962c5   Theodore Ts'o   buffer: add BH_Pr...
3023
  	if (buffer_prio(bh))
2a222ca99   Mike Christie   fs: have submit_b...
3024
3025
  		op_flags |= REQ_PRIO;
  	bio_set_op_attrs(bio, op, op_flags);
877f962c5   Theodore Ts'o   buffer: add BH_Pr...
3026

83c9c5471   Ming Lei   fs: move guard_bi...
3027
3028
  	/* Take care of bh's that straddle the end of the device */
  	guard_bio_eod(bio);
fd42df305   Dennis Zhou   blkcg: associate ...
3029
3030
  	if (wbc) {
  		wbc_init_bio(wbc, bio);
34e51a5e1   Tejun Heo   blkcg, writeback:...
3031
  		wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
fd42df305   Dennis Zhou   blkcg: associate ...
3032
  	}
4e49ea4a3   Mike Christie   block/fs/drivers:...
3033
  	submit_bio(bio);
f6454b049   Julia Lawall   block: fix return...
3034
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3035
  }
bafc0dba1   Tejun Heo   buffer, writeback...
3036

020c2833d   Eric Biggers   fs: remove _submi...
3037
  int submit_bh(int op, int op_flags, struct buffer_head *bh)
bafc0dba1   Tejun Heo   buffer, writeback...
3038
  {
8e8f92988   Jens Axboe   fs: add support f...
3039
  	return submit_bh_wbc(op, op_flags, bh, 0, NULL);
713685111   Darrick J. Wong   mm: make snapshot...
3040
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
3041
  EXPORT_SYMBOL(submit_bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3042
3043
3044
  
  /**
   * ll_rw_block: low-level access to block devices (DEPRECATED)
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
3045
   * @op: whether to %READ or %WRITE
ef295ecf0   Christoph Hellwig   block: better op ...
3046
   * @op_flags: req_flag_bits
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3047
3048
3049
   * @nr: number of &struct buffer_heads in the array
   * @bhs: array of pointers to &struct buffer_head
   *
a76622362   Jan Kara   [PATCH] Make ll_r...
3050
   * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
70246286e   Christoph Hellwig   block: get rid of...
3051
3052
3053
   * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE.
   * @op_flags contains flags modifying the detailed I/O behavior, most notably
   * %REQ_RAHEAD.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3054
3055
   *
   * This function drops any buffer that it cannot get a lock on (with the
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
3056
3057
3058
3059
3060
   * BH_Lock state bit), any buffer that appears to be clean when doing a write
   * request, and any buffer that appears to be up-to-date when doing read
   * request.  Further it marks as clean buffers that are processed for
   * writing (the buffer cache won't assume that they are actually clean
   * until the buffer gets unlocked).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3061
3062
   *
   * ll_rw_block sets b_end_io to simple completion handler that marks
e227867f1   Masanari Iida   treewide: Fix typ...
3063
   * the buffer up-to-date (if appropriate), unlocks the buffer and wakes
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3064
3065
3066
3067
3068
   * any waiters. 
   *
   * All of the buffers must be for the same device, and must also be a
   * multiple of the current approved size for the device.
   */
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
3069
  void ll_rw_block(int op, int op_flags,  int nr, struct buffer_head *bhs[])
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3070
3071
3072
3073
3074
  {
  	int i;
  
  	for (i = 0; i < nr; i++) {
  		struct buffer_head *bh = bhs[i];
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
3075
  		if (!trylock_buffer(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3076
  			continue;
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
3077
  		if (op == WRITE) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3078
  			if (test_clear_buffer_dirty(bh)) {
76c3073a8   Andrew Morton   [PATCH] end_buffe...
3079
  				bh->b_end_io = end_buffer_write_sync;
e60e5c50a   OGAWA Hirofumi   [PATCH] Trivial o...
3080
  				get_bh(bh);
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
3081
  				submit_bh(op, op_flags, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3082
3083
3084
  				continue;
  			}
  		} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3085
  			if (!buffer_uptodate(bh)) {
76c3073a8   Andrew Morton   [PATCH] end_buffe...
3086
  				bh->b_end_io = end_buffer_read_sync;
e60e5c50a   OGAWA Hirofumi   [PATCH] Trivial o...
3087
  				get_bh(bh);
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
3088
  				submit_bh(op, op_flags, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3089
3090
3091
3092
  				continue;
  			}
  		}
  		unlock_buffer(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3093
3094
  	}
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
3095
  EXPORT_SYMBOL(ll_rw_block);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3096

2a222ca99   Mike Christie   fs: have submit_b...
3097
  void write_dirty_buffer(struct buffer_head *bh, int op_flags)
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
3098
3099
3100
3101
3102
3103
3104
3105
  {
  	lock_buffer(bh);
  	if (!test_clear_buffer_dirty(bh)) {
  		unlock_buffer(bh);
  		return;
  	}
  	bh->b_end_io = end_buffer_write_sync;
  	get_bh(bh);
2a222ca99   Mike Christie   fs: have submit_b...
3106
  	submit_bh(REQ_OP_WRITE, op_flags, bh);
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
3107
3108
  }
  EXPORT_SYMBOL(write_dirty_buffer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3109
3110
3111
3112
3113
  /*
   * For a data-integrity writeout, we need to wait upon any in-progress I/O
   * and then start new I/O and then wait upon it.  The caller must have a ref on
   * the buffer_head.
   */
2a222ca99   Mike Christie   fs: have submit_b...
3114
  int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3115
3116
3117
3118
3119
3120
  {
  	int ret = 0;
  
  	WARN_ON(atomic_read(&bh->b_count) < 1);
  	lock_buffer(bh);
  	if (test_clear_buffer_dirty(bh)) {
377254b2c   Xianting Tian   fs: prevent BUG_O...
3121
3122
3123
3124
3125
3126
3127
3128
  		/*
  		 * The bh should be mapped, but it might not be if the
  		 * device was hot-removed. Not much we can do but fail the I/O.
  		 */
  		if (!buffer_mapped(bh)) {
  			unlock_buffer(bh);
  			return -EIO;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3129
3130
  		get_bh(bh);
  		bh->b_end_io = end_buffer_write_sync;
2a222ca99   Mike Christie   fs: have submit_b...
3131
  		ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3132
  		wait_on_buffer(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3133
3134
3135
3136
3137
3138
3139
  		if (!ret && !buffer_uptodate(bh))
  			ret = -EIO;
  	} else {
  		unlock_buffer(bh);
  	}
  	return ret;
  }
87e99511e   Christoph Hellwig   kill BH_Ordered flag
3140
3141
3142
3143
  EXPORT_SYMBOL(__sync_dirty_buffer);
  
  int sync_dirty_buffer(struct buffer_head *bh)
  {
70fd76140   Christoph Hellwig   block,fs: use REQ...
3144
  	return __sync_dirty_buffer(bh, REQ_SYNC);
87e99511e   Christoph Hellwig   kill BH_Ordered flag
3145
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
3146
  EXPORT_SYMBOL(sync_dirty_buffer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
  
  /*
   * try_to_free_buffers() checks if all the buffers on this particular page
   * are unused, and releases them if so.
   *
   * Exclusion against try_to_free_buffers may be obtained by either
   * locking the page or by holding its mapping's private_lock.
   *
   * If the page is dirty but all the buffers are clean then we need to
   * be sure to mark the page clean as well.  This is because the page
   * may be against a block device, and a later reattachment of buffers
   * to a dirty page will set *all* buffers dirty.  Which would corrupt
   * filesystem data on the same device.
   *
   * The same applies to regular filesystem pages: if all the buffers are
   * clean then we set the page clean and proceed.  To do that, we require
   * total exclusion from __set_page_dirty_buffers().  That is obtained with
   * private_lock.
   *
   * try_to_free_buffers() is non-blocking.
   */
  static inline int buffer_busy(struct buffer_head *bh)
  {
  	return atomic_read(&bh->b_count) |
  		(bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
  }
  
  static int
  drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
  {
  	struct buffer_head *head = page_buffers(page);
  	struct buffer_head *bh;
43edfc892   Laura Abbott   FROMLIST: fs/buff...
3179
3180
3181
3182
3183
  	struct xarray busy_bhs;
  	int bh_count = 0;
  	int xa_ret, ret = 0;
  
  	xa_init(&busy_bhs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3184
3185
3186
  
  	bh = head;
  	do {
43edfc892   Laura Abbott   FROMLIST: fs/buff...
3187
3188
3189
3190
3191
3192
  		if (buffer_busy(bh)) {
  			xa_ret = xa_err(xa_store(&busy_bhs, bh_count++,
  						 bh, GFP_ATOMIC));
  			if (xa_ret)
  				goto out;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3193
3194
  		bh = bh->b_this_page;
  	} while (bh != head);
43edfc892   Laura Abbott   FROMLIST: fs/buff...
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
  	if (bh_count) {
  		/*
  		 * Check if the busy failure was due to an outstanding
  		 * LRU reference
  		 */
  		evict_bh_lrus(&busy_bhs);
  		do {
  			if (buffer_busy(bh))
  				goto out;
  		} while (bh != head);
  	}
  
  	ret = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3208
3209
  	do {
  		struct buffer_head *next = bh->b_this_page;
535ee2fbf   Jan Kara   buffer_head: fix ...
3210
  		if (bh->b_assoc_map)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3211
3212
3213
3214
  			__remove_assoc_queue(bh);
  		bh = next;
  	} while (bh != head);
  	*buffers_to_free = head;
45dcfc273   Guoqing Jiang   fs/buffer.c: use ...
3215
  	detach_page_private(page);
43edfc892   Laura Abbott   FROMLIST: fs/buff...
3216
3217
3218
3219
  out:
  	xa_destroy(&busy_bhs);
  
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3220
3221
3222
3223
3224
3225
3226
3227
3228
  }
  
  int try_to_free_buffers(struct page *page)
  {
  	struct address_space * const mapping = page->mapping;
  	struct buffer_head *buffers_to_free = NULL;
  	int ret = 0;
  
  	BUG_ON(!PageLocked(page));
ecdfc9787   Linus Torvalds   Resurrect 'try_to...
3229
  	if (PageWriteback(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3230
3231
3232
3233
3234
3235
3236
3237
3238
  		return 0;
  
  	if (mapping == NULL) {		/* can this still happen? */
  		ret = drop_buffers(page, &buffers_to_free);
  		goto out;
  	}
  
  	spin_lock(&mapping->private_lock);
  	ret = drop_buffers(page, &buffers_to_free);
ecdfc9787   Linus Torvalds   Resurrect 'try_to...
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
  
  	/*
  	 * If the filesystem writes its buffers by hand (eg ext3)
  	 * then we can have clean buffers against a dirty page.  We
  	 * clean the page here; otherwise the VM will never notice
  	 * that the filesystem did any IO at all.
  	 *
  	 * Also, during truncate, discard_buffer will have marked all
  	 * the page's buffers clean.  We discover that here and clean
  	 * the page also.
87df7241b   Nick Piggin   [PATCH] Fix try_t...
3249
3250
3251
3252
  	 *
  	 * private_lock must be held over this entire operation in order
  	 * to synchronise against __set_page_dirty_buffers and prevent the
  	 * dirty bit from being lost.
ecdfc9787   Linus Torvalds   Resurrect 'try_to...
3253
  	 */
11f81becc   Tejun Heo   page_writeback: r...
3254
3255
  	if (ret)
  		cancel_dirty_page(page);
87df7241b   Nick Piggin   [PATCH] Fix try_t...
3256
  	spin_unlock(&mapping->private_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
  out:
  	if (buffers_to_free) {
  		struct buffer_head *bh = buffers_to_free;
  
  		do {
  			struct buffer_head *next = bh->b_this_page;
  			free_buffer_head(bh);
  			bh = next;
  		} while (bh != buffers_to_free);
  	}
  	return ret;
  }
  EXPORT_SYMBOL(try_to_free_buffers);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3270
3271
3272
3273
3274
  /*
   * There are no bdflush tunables left.  But distributions are
   * still running obsolete flush daemons, so we terminate them here.
   *
   * Use of bdflush() is deprecated and will be removed in a future kernel.
5b0830cb9   Jens Axboe   writeback: get ri...
3275
   * The `flush-X' kernel threads fully replace bdflush daemons and this call.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3276
   */
bdc480e3b   Heiko Carstens   [CVE-2009-0029] S...
3277
  SYSCALL_DEFINE2(bdflush, int, func, long, data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
  {
  	static int msg_count;
  
  	if (!capable(CAP_SYS_ADMIN))
  		return -EPERM;
  
  	if (msg_count < 5) {
  		msg_count++;
  		printk(KERN_INFO
  			"warning: process `%s' used the obsolete bdflush"
  			" system call
  ", current->comm);
  		printk(KERN_INFO "Fix your initscripts?
  ");
  	}
  
  	if (func == 1)
  		do_exit(0);
  	return 0;
  }
  
  /*
   * Buffer-head allocation
   */
a0a9b0433   Shai Fultheim   fs: Move bh_cache...
3302
  static struct kmem_cache *bh_cachep __read_mostly;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3303
3304
3305
3306
3307
  
  /*
   * Once the number of bh's in the machine exceeds this level, we start
   * stripping them in writeback.
   */
43be594a6   Zhang Yanfei   fs/buffer.c: chan...
3308
  static unsigned long max_buffer_heads;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
  
  int buffer_heads_over_limit;
  
  struct bh_accounting {
  	int nr;			/* Number of live bh's */
  	int ratelimit;		/* Limit cacheline bouncing */
  };
  
  static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
  
  static void recalc_bh_state(void)
  {
  	int i;
  	int tot = 0;
ee1be8626   Christoph Lameter   fs: Use this_cpu_...
3323
  	if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3324
  		return;
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3325
  	__this_cpu_write(bh_accounting.ratelimit, 0);
8a1434268   Eric Dumazet   [PATCH] HOTPLUG_C...
3326
  	for_each_online_cpu(i)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3327
3328
3329
  		tot += per_cpu(bh_accounting, i).nr;
  	buffer_heads_over_limit = (tot > max_buffer_heads);
  }
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3330

dd0fc66fb   Al Viro   [PATCH] gfp flags...
3331
  struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3332
  {
019b4d123   Richard Kennedy   fs: buffer_head: ...
3333
  	struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3334
  	if (ret) {
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
3335
  		INIT_LIST_HEAD(&ret->b_assoc_buffers);
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
3336
  		spin_lock_init(&ret->b_uptodate_lock);
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3337
3338
  		preempt_disable();
  		__this_cpu_inc(bh_accounting.nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3339
  		recalc_bh_state();
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3340
  		preempt_enable();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3341
3342
3343
3344
3345
3346
3347
3348
3349
  	}
  	return ret;
  }
  EXPORT_SYMBOL(alloc_buffer_head);
  
  void free_buffer_head(struct buffer_head *bh)
  {
  	BUG_ON(!list_empty(&bh->b_assoc_buffers));
  	kmem_cache_free(bh_cachep, bh);
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3350
3351
  	preempt_disable();
  	__this_cpu_dec(bh_accounting.nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3352
  	recalc_bh_state();
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3353
  	preempt_enable();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3354
3355
  }
  EXPORT_SYMBOL(free_buffer_head);
fc4d24c9b   Sebastian Andrzej Siewior   fs/buffer: Conver...
3356
  static int buffer_exit_cpu_dead(unsigned int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3357
3358
3359
3360
3361
3362
3363
3364
  {
  	int i;
  	struct bh_lru *b = &per_cpu(bh_lrus, cpu);
  
  	for (i = 0; i < BH_LRU_SIZE; i++) {
  		brelse(b->bhs[i]);
  		b->bhs[i] = NULL;
  	}
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3365
  	this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
8a1434268   Eric Dumazet   [PATCH] HOTPLUG_C...
3366
  	per_cpu(bh_accounting, cpu).nr = 0;
fc4d24c9b   Sebastian Andrzej Siewior   fs/buffer: Conver...
3367
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3368
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3369

389d1b083   Aneesh Kumar K.V   Add buffer head r...
3370
  /**
a6b91919e   Randy Dunlap   fs: fix kernel-do...
3371
   * bh_uptodate_or_lock - Test whether the buffer is uptodate
389d1b083   Aneesh Kumar K.V   Add buffer head r...
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
   * @bh: struct buffer_head
   *
   * Return true if the buffer is up-to-date and false,
   * with the buffer locked, if not.
   */
  int bh_uptodate_or_lock(struct buffer_head *bh)
  {
  	if (!buffer_uptodate(bh)) {
  		lock_buffer(bh);
  		if (!buffer_uptodate(bh))
  			return 0;
  		unlock_buffer(bh);
  	}
  	return 1;
  }
  EXPORT_SYMBOL(bh_uptodate_or_lock);
  
  /**
a6b91919e   Randy Dunlap   fs: fix kernel-do...
3390
   * bh_submit_read - Submit a locked buffer for reading
389d1b083   Aneesh Kumar K.V   Add buffer head r...
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
   * @bh: struct buffer_head
   *
   * Returns zero on success and -EIO on error.
   */
  int bh_submit_read(struct buffer_head *bh)
  {
  	BUG_ON(!buffer_locked(bh));
  
  	if (buffer_uptodate(bh)) {
  		unlock_buffer(bh);
  		return 0;
  	}
  
  	get_bh(bh);
  	bh->b_end_io = end_buffer_read_sync;
2a222ca99   Mike Christie   fs: have submit_b...
3406
  	submit_bh(REQ_OP_READ, 0, bh);
389d1b083   Aneesh Kumar K.V   Add buffer head r...
3407
3408
3409
3410
3411
3412
  	wait_on_buffer(bh);
  	if (buffer_uptodate(bh))
  		return 0;
  	return -EIO;
  }
  EXPORT_SYMBOL(bh_submit_read);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3413
3414
  void __init buffer_init(void)
  {
43be594a6   Zhang Yanfei   fs/buffer.c: chan...
3415
  	unsigned long nrpages;
fc4d24c9b   Sebastian Andrzej Siewior   fs/buffer: Conver...
3416
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3417

b98938c37   Christoph Lameter   bufferhead: rever...
3418
3419
3420
3421
  	bh_cachep = kmem_cache_create("buffer_head",
  			sizeof(struct buffer_head), 0,
  				(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
  				SLAB_MEM_SPREAD),
019b4d123   Richard Kennedy   fs: buffer_head: ...
3422
  				NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3423
3424
3425
3426
3427
3428
  
  	/*
  	 * Limit the bh occupancy to 10% of ZONE_NORMAL
  	 */
  	nrpages = (nr_free_buffer_pages() * 10) / 100;
  	max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
fc4d24c9b   Sebastian Andrzej Siewior   fs/buffer: Conver...
3429
3430
3431
  	ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
  					NULL, buffer_exit_cpu_dead);
  	WARN_ON(ret < 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3432
  }