Blame view

fs/buffer.c 89.1 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
  /*
   *  linux/fs/buffer.c
   *
   *  Copyright (C) 1991, 1992, 2002  Linus Torvalds
   */
  
  /*
   * Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95
   *
   * Removed a lot of unnecessary code and simplified things now that
   * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
   *
   * Speed up hash, lru, and free list operations.  Use gfp() for allocating
   * hash table, use SLAB cache for buffer heads. SMP threading.  -DaveM
   *
   * Added 32k buffer block sizes - these are required older ARM systems. - RMK
   *
   * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de>
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
  #include <linux/kernel.h>
f361bf4a6   Ingo Molnar   sched/headers: Pr...
22
  #include <linux/sched/signal.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
24
  #include <linux/syscalls.h>
  #include <linux/fs.h>
ae259a9c8   Christoph Hellwig   fs: introduce iom...
25
  #include <linux/iomap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
  #include <linux/mm.h>
  #include <linux/percpu.h>
  #include <linux/slab.h>
16f7e0fe2   Randy Dunlap   [PATCH] capable/c...
29
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
31
32
33
  #include <linux/blkdev.h>
  #include <linux/file.h>
  #include <linux/quotaops.h>
  #include <linux/highmem.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
34
  #include <linux/export.h>
bafc0dba1   Tejun Heo   buffer, writeback...
35
  #include <linux/backing-dev.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
36
37
38
39
  #include <linux/writeback.h>
  #include <linux/hash.h>
  #include <linux/suspend.h>
  #include <linux/buffer_head.h>
55e829af0   Andrew Morton   [PATCH] io-accoun...
40
  #include <linux/task_io_accounting_ops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
41
  #include <linux/bio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
43
44
  #include <linux/cpu.h>
  #include <linux/bitops.h>
  #include <linux/mpage.h>
fb1c8f93d   Ingo Molnar   [PATCH] spinlock ...
45
  #include <linux/bit_spinlock.h>
29f3ad7d8   Jan Kara   fs: Provide funct...
46
  #include <linux/pagevec.h>
f745c6f5f   Shakeel Butt   fs, mm: account b...
47
  #include <linux/sched/mm.h>
5305cb830   Tejun Heo   block: add block_...
48
  #include <trace/events/block.h>
31fb992ce   Eric Biggers   fs/buffer.c: supp...
49
  #include <linux/fscrypt.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
50

2b211dc04   Ben Dooks   fs/buffer.c: incl...
51
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
  static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
2a222ca99   Mike Christie   fs: have submit_b...
53
  static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
8e8f92988   Jens Axboe   fs: add support f...
54
  			 enum rw_hint hint, struct writeback_control *wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
55
56
  
  #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
f0059afd3   Tejun Heo   buffer: make touc...
57
58
  inline void touch_buffer(struct buffer_head *bh)
  {
5305cb830   Tejun Heo   block: add block_...
59
  	trace_block_touch_buffer(bh);
f0059afd3   Tejun Heo   buffer: make touc...
60
61
62
  	mark_page_accessed(bh->b_page);
  }
  EXPORT_SYMBOL(touch_buffer);
fc9b52cd8   Harvey Harrison   fs: remove fastca...
63
  void __lock_buffer(struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64
  {
743162013   NeilBrown   sched: Remove pro...
65
  	wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
66
67
  }
  EXPORT_SYMBOL(__lock_buffer);
fc9b52cd8   Harvey Harrison   fs: remove fastca...
68
  void unlock_buffer(struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
69
  {
51b07fc3c   Nick Piggin   fs: buffer lock u...
70
  	clear_bit_unlock(BH_Lock, &bh->b_state);
4e857c58e   Peter Zijlstra   arch: Mass conver...
71
  	smp_mb__after_atomic();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
72
73
  	wake_up_bit(&bh->b_state, BH_Lock);
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
74
  EXPORT_SYMBOL(unlock_buffer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
75
76
  
  /*
b45972265   Mel Gorman   mm: vmscan: take ...
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
   * Returns if the page has dirty or writeback buffers. If all the buffers
   * are unlocked and clean then the PageDirty information is stale. If
   * any of the pages are locked, it is assumed they are locked for IO.
   */
  void buffer_check_dirty_writeback(struct page *page,
  				     bool *dirty, bool *writeback)
  {
  	struct buffer_head *head, *bh;
  	*dirty = false;
  	*writeback = false;
  
  	BUG_ON(!PageLocked(page));
  
  	if (!page_has_buffers(page))
  		return;
  
  	if (PageWriteback(page))
  		*writeback = true;
  
  	head = page_buffers(page);
  	bh = head;
  	do {
  		if (buffer_locked(bh))
  			*writeback = true;
  
  		if (buffer_dirty(bh))
  			*dirty = true;
  
  		bh = bh->b_this_page;
  	} while (bh != head);
  }
  EXPORT_SYMBOL(buffer_check_dirty_writeback);
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111
112
113
114
115
116
   * Block until a buffer comes unlocked.  This doesn't stop it
   * from becoming locked again - you have to lock it yourself
   * if you want to preserve its state.
   */
  void __wait_on_buffer(struct buffer_head * bh)
  {
743162013   NeilBrown   sched: Remove pro...
117
  	wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
119
  EXPORT_SYMBOL(__wait_on_buffer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
120

b744c2ac4   Robert Elliott   fs: merge I/O err...
121
  static void buffer_io_error(struct buffer_head *bh, char *msg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122
  {
432f16e64   Robert Elliott   fs: clarify rate ...
123
124
  	if (!test_bit(BH_Quiet, &bh->b_state))
  		printk_ratelimited(KERN_ERR
a1c6f0573   Dmitry Monakhov   fs: use block_dev...
125
126
127
  			"Buffer I/O error on dev %pg, logical block %llu%s
  ",
  			bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
129
130
  }
  
  /*
68671f35f   Dmitry Monakhov   mm: add end_buffe...
131
132
133
134
135
136
   * End-of-IO handler helper function which does not touch the bh after
   * unlocking it.
   * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
   * a race there is benign: unlock_buffer() only use the bh's address for
   * hashing after unlocking the buffer, so it doesn't actually touch the bh
   * itself.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
137
   */
68671f35f   Dmitry Monakhov   mm: add end_buffe...
138
  static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
139
140
141
142
  {
  	if (uptodate) {
  		set_buffer_uptodate(bh);
  	} else {
70246286e   Christoph Hellwig   block: get rid of...
143
  		/* This happens, due to failed read-ahead attempts. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
144
145
146
  		clear_buffer_uptodate(bh);
  	}
  	unlock_buffer(bh);
68671f35f   Dmitry Monakhov   mm: add end_buffe...
147
148
149
150
151
152
153
154
155
  }
  
  /*
   * Default synchronous end-of-IO handler..  Just mark it up-to-date and
   * unlock the buffer. This is what ll_rw_block uses too.
   */
  void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
  {
  	__end_buffer_read_notouch(bh, uptodate);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
156
157
  	put_bh(bh);
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
158
  EXPORT_SYMBOL(end_buffer_read_sync);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
159
160
161
  
  void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
162
163
164
  	if (uptodate) {
  		set_buffer_uptodate(bh);
  	} else {
432f16e64   Robert Elliott   fs: clarify rate ...
165
  		buffer_io_error(bh, ", lost sync page write");
87354e5de   Jeff Layton   buffer: set error...
166
  		mark_buffer_write_io_error(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
167
168
169
170
171
  		clear_buffer_uptodate(bh);
  	}
  	unlock_buffer(bh);
  	put_bh(bh);
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
172
  EXPORT_SYMBOL(end_buffer_write_sync);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
174
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
176
177
178
179
   * Various filesystems appear to want __find_get_block to be non-blocking.
   * But it's the page lock which protects the buffers.  To get around this,
   * we get exclusion from try_to_free_buffers with the blockdev mapping's
   * private_lock.
   *
b93b01631   Matthew Wilcox   page cache: use x...
180
   * Hack idea: for the blockdev mapping, private_lock contention
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181
   * may be quite high.  This code could TryLock the page, and if that
b93b01631   Matthew Wilcox   page cache: use x...
182
   * succeeds, there is no need to take private_lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
183
184
   */
  static struct buffer_head *
385fd4c59   Coywolf Qi Hunt   [PATCH] __find_ge...
185
  __find_get_block_slow(struct block_device *bdev, sector_t block)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
186
187
188
189
190
191
192
193
194
  {
  	struct inode *bd_inode = bdev->bd_inode;
  	struct address_space *bd_mapping = bd_inode->i_mapping;
  	struct buffer_head *ret = NULL;
  	pgoff_t index;
  	struct buffer_head *bh;
  	struct buffer_head *head;
  	struct page *page;
  	int all_mapped = 1;
43636c804   Tetsuo Handa   fs: ratelimit __f...
195
  	static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
196

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
197
  	index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
2457aec63   Mel Gorman   mm: non-atomicall...
198
  	page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
199
200
201
202
203
204
205
206
207
  	if (!page)
  		goto out;
  
  	spin_lock(&bd_mapping->private_lock);
  	if (!page_has_buffers(page))
  		goto out_unlock;
  	head = page_buffers(page);
  	bh = head;
  	do {
97f76d3d1   Nikanth Karthikesan   vfs: check bh->b_...
208
209
210
  		if (!buffer_mapped(bh))
  			all_mapped = 0;
  		else if (bh->b_blocknr == block) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
212
213
214
  			ret = bh;
  			get_bh(bh);
  			goto out_unlock;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215
216
217
218
219
220
221
222
  		bh = bh->b_this_page;
  	} while (bh != head);
  
  	/* we might be here because some of the buffers on this page are
  	 * not mapped.  This is due to various races between
  	 * file io on the block device and getblk.  It gets dealt with
  	 * elsewhere, don't buffer_error if we had some unmapped buffers
  	 */
43636c804   Tetsuo Handa   fs: ratelimit __f...
223
224
225
226
227
228
229
230
231
232
  	ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
  	if (all_mapped && __ratelimit(&last_warned)) {
  		printk("__find_get_block_slow() failed. block=%llu, "
  		       "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
  		       "device %pg blocksize: %d
  ",
  		       (unsigned long long)block,
  		       (unsigned long long)bh->b_blocknr,
  		       bh->b_state, bh->b_size, bdev,
  		       1 << bd_inode->i_blkbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
234
235
  	}
  out_unlock:
  	spin_unlock(&bd_mapping->private_lock);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
236
  	put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
237
238
239
  out:
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
240
241
  static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
242
  	unsigned long flags;
a39722034   Nick Piggin   [PATCH] page_upto...
243
  	struct buffer_head *first;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244
245
246
247
248
249
250
251
252
253
254
  	struct buffer_head *tmp;
  	struct page *page;
  	int page_uptodate = 1;
  
  	BUG_ON(!buffer_async_read(bh));
  
  	page = bh->b_page;
  	if (uptodate) {
  		set_buffer_uptodate(bh);
  	} else {
  		clear_buffer_uptodate(bh);
432f16e64   Robert Elliott   fs: clarify rate ...
255
  		buffer_io_error(bh, ", async page read");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
256
257
258
259
260
261
262
263
  		SetPageError(page);
  	}
  
  	/*
  	 * Be _very_ careful from here on. Bad things can happen if
  	 * two buffer heads end IO at almost the same time and both
  	 * decide that the page is now completely done.
  	 */
a39722034   Nick Piggin   [PATCH] page_upto...
264
  	first = page_buffers(page);
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
265
  	spin_lock_irqsave(&first->b_uptodate_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
266
267
268
269
270
271
272
273
274
275
276
277
  	clear_buffer_async_read(bh);
  	unlock_buffer(bh);
  	tmp = bh;
  	do {
  		if (!buffer_uptodate(tmp))
  			page_uptodate = 0;
  		if (buffer_async_read(tmp)) {
  			BUG_ON(!buffer_locked(tmp));
  			goto still_busy;
  		}
  		tmp = tmp->b_this_page;
  	} while (tmp != bh);
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
278
  	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
279
280
281
282
283
284
285
286
287
288
289
  
  	/*
  	 * If none of the buffers had errors and they are all
  	 * uptodate then we can set the page uptodate.
  	 */
  	if (page_uptodate && !PageError(page))
  		SetPageUptodate(page);
  	unlock_page(page);
  	return;
  
  still_busy:
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
290
  	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
291
292
  	return;
  }
31fb992ce   Eric Biggers   fs/buffer.c: supp...
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
  struct decrypt_bh_ctx {
  	struct work_struct work;
  	struct buffer_head *bh;
  };
  
  static void decrypt_bh(struct work_struct *work)
  {
  	struct decrypt_bh_ctx *ctx =
  		container_of(work, struct decrypt_bh_ctx, work);
  	struct buffer_head *bh = ctx->bh;
  	int err;
  
  	err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
  					       bh_offset(bh));
  	end_buffer_async_read(bh, err == 0);
  	kfree(ctx);
  }
  
  /*
   * I/O completion handler for block_read_full_page() - pages
   * which come unlocked at the end of I/O.
   */
  static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
  {
  	/* Decrypt if needed */
4f74d15fe   Eric Biggers   ext4: add inline ...
318
319
  	if (uptodate &&
  	    fscrypt_inode_uses_fs_layer_crypto(bh->b_page->mapping->host)) {
31fb992ce   Eric Biggers   fs/buffer.c: supp...
320
321
322
323
324
325
326
327
328
329
330
331
  		struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
  
  		if (ctx) {
  			INIT_WORK(&ctx->work, decrypt_bh);
  			ctx->bh = bh;
  			fscrypt_enqueue_decrypt_work(&ctx->work);
  			return;
  		}
  		uptodate = 0;
  	}
  	end_buffer_async_read(bh, uptodate);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
332
333
334
335
  /*
   * Completion handler for block_write_full_page() - pages which are unlocked
   * during I/O, and which have PageWriteback cleared upon I/O completion.
   */
35c80d5f4   Chris Mason   Add block_write_f...
336
  void end_buffer_async_write(struct buffer_head *bh, int uptodate)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
337
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
338
  	unsigned long flags;
a39722034   Nick Piggin   [PATCH] page_upto...
339
  	struct buffer_head *first;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340
341
342
343
344
345
346
347
348
  	struct buffer_head *tmp;
  	struct page *page;
  
  	BUG_ON(!buffer_async_write(bh));
  
  	page = bh->b_page;
  	if (uptodate) {
  		set_buffer_uptodate(bh);
  	} else {
432f16e64   Robert Elliott   fs: clarify rate ...
349
  		buffer_io_error(bh, ", lost async page write");
87354e5de   Jeff Layton   buffer: set error...
350
  		mark_buffer_write_io_error(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
351
352
353
  		clear_buffer_uptodate(bh);
  		SetPageError(page);
  	}
a39722034   Nick Piggin   [PATCH] page_upto...
354
  	first = page_buffers(page);
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
355
  	spin_lock_irqsave(&first->b_uptodate_lock, flags);
a39722034   Nick Piggin   [PATCH] page_upto...
356

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
357
358
359
360
361
362
363
364
365
366
  	clear_buffer_async_write(bh);
  	unlock_buffer(bh);
  	tmp = bh->b_this_page;
  	while (tmp != bh) {
  		if (buffer_async_write(tmp)) {
  			BUG_ON(!buffer_locked(tmp));
  			goto still_busy;
  		}
  		tmp = tmp->b_this_page;
  	}
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
367
  	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
368
369
370
371
  	end_page_writeback(page);
  	return;
  
  still_busy:
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
372
  	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
373
374
  	return;
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
375
  EXPORT_SYMBOL(end_buffer_async_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
  
  /*
   * If a page's buffers are under async readin (end_buffer_async_read
   * completion) then there is a possibility that another thread of
   * control could lock one of the buffers after it has completed
   * but while some of the other buffers have not completed.  This
   * locked buffer would confuse end_buffer_async_read() into not unlocking
   * the page.  So the absence of BH_Async_Read tells end_buffer_async_read()
   * that this buffer is not under async I/O.
   *
   * The page comes unlocked when it has no locked buffer_async buffers
   * left.
   *
   * PageLocked prevents anyone starting new async I/O reads any of
   * the buffers.
   *
   * PageWriteback is used to prevent simultaneous writeout of the same
   * page.
   *
   * PageLocked prevents anyone from starting writeback of a page which is
   * under read I/O (PageWriteback is only ever set against a locked page).
   */
  static void mark_buffer_async_read(struct buffer_head *bh)
  {
31fb992ce   Eric Biggers   fs/buffer.c: supp...
400
  	bh->b_end_io = end_buffer_async_read_io;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
401
402
  	set_buffer_async_read(bh);
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
403
404
  static void mark_buffer_async_write_endio(struct buffer_head *bh,
  					  bh_end_io_t *handler)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
405
  {
35c80d5f4   Chris Mason   Add block_write_f...
406
  	bh->b_end_io = handler;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
407
408
  	set_buffer_async_write(bh);
  }
35c80d5f4   Chris Mason   Add block_write_f...
409
410
411
412
413
  
  void mark_buffer_async_write(struct buffer_head *bh)
  {
  	mark_buffer_async_write_endio(bh, end_buffer_async_write);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
  EXPORT_SYMBOL(mark_buffer_async_write);
  
  
  /*
   * fs/buffer.c contains helper functions for buffer-backed address space's
   * fsync functions.  A common requirement for buffer-based filesystems is
   * that certain data from the backing blockdev needs to be written out for
   * a successful fsync().  For example, ext2 indirect blocks need to be
   * written back and waited upon before fsync() returns.
   *
   * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
   * inode_has_buffers() and invalidate_inode_buffers() are provided for the
   * management of a list of dependent buffers at ->i_mapping->private_list.
   *
   * Locking is a little subtle: try_to_free_buffers() will remove buffers
   * from their controlling inode's queue when they are being freed.  But
   * try_to_free_buffers() will be operating against the *blockdev* mapping
   * at the time, not against the S_ISREG file which depends on those buffers.
   * So the locking for private_list is via the private_lock in the address_space
   * which backs the buffers.  Which is different from the address_space 
   * against which the buffers are listed.  So for a particular address_space,
   * mapping->private_lock does *not* protect mapping->private_list!  In fact,
   * mapping->private_list will always be protected by the backing blockdev's
   * ->private_lock.
   *
   * Which introduces a requirement: all buffers on an address_space's
   * ->private_list must be from the same address_space: the blockdev's.
   *
   * address_spaces which do not place buffers at ->private_list via these
   * utility functions are free to use private_lock and private_list for
   * whatever they want.  The only requirement is that list_empty(private_list)
   * be true at clear_inode() time.
   *
   * FIXME: clear_inode should not call invalidate_inode_buffers().  The
   * filesystems should do that.  invalidate_inode_buffers() should just go
   * BUG_ON(!list_empty).
   *
   * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
   * take an address_space, not an inode.  And it should be called
   * mark_buffer_dirty_fsync() to clearly define why those buffers are being
   * queued up.
   *
   * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
   * list if it is already on a list.  Because if the buffer is on a list,
   * it *must* already be on the right one.  If not, the filesystem is being
   * silly.  This will save a ton of locking.  But first we have to ensure
   * that buffers are taken *off* the old inode's list when they are freed
   * (presumably in truncate).  That requires careful auditing of all
   * filesystems (do it inside bforget()).  It could also be done by bringing
   * b_inode back.
   */
  
  /*
   * The buffer's backing address_space's private_lock must be held
   */
dbacefc9c   Thomas Petazzoni   fs/buffer.c: unin...
469
  static void __remove_assoc_queue(struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
470
471
  {
  	list_del_init(&bh->b_assoc_buffers);
58ff407be   Jan Kara   [PATCH] Fix IO er...
472
  	WARN_ON(!bh->b_assoc_map);
58ff407be   Jan Kara   [PATCH] Fix IO er...
473
  	bh->b_assoc_map = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
  }
  
  int inode_has_buffers(struct inode *inode)
  {
  	return !list_empty(&inode->i_data.private_list);
  }
  
  /*
   * osync is designed to support O_SYNC io.  It waits synchronously for
   * all already-submitted IO to complete, but does not queue any new
   * writes to the disk.
   *
   * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
   * you dirty the buffers, and then use osync_inode_buffers to wait for
   * completion.  Any other dirty buffers which are not yet queued for
   * write will not be flushed to disk by the osync.
   */
  static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
  {
  	struct buffer_head *bh;
  	struct list_head *p;
  	int err = 0;
  
  	spin_lock(lock);
  repeat:
  	list_for_each_prev(p, list) {
  		bh = BH_ENTRY(p);
  		if (buffer_locked(bh)) {
  			get_bh(bh);
  			spin_unlock(lock);
  			wait_on_buffer(bh);
  			if (!buffer_uptodate(bh))
  				err = -EIO;
  			brelse(bh);
  			spin_lock(lock);
  			goto repeat;
  		}
  	}
  	spin_unlock(lock);
  	return err;
  }
08fdc8a01   Mateusz Guzik   buffer.c: call th...
515
  void emergency_thaw_bdev(struct super_block *sb)
c2d754385   Eric Sandeen   filesystem freeze...
516
  {
01a05b337   Al Viro   new helper: itera...
517
  	while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
a1c6f0573   Dmitry Monakhov   fs: use block_dev...
518
519
  		printk(KERN_WARNING "Emergency Thaw on %pg
  ", sb->s_bdev);
01a05b337   Al Viro   new helper: itera...
520
  }
c2d754385   Eric Sandeen   filesystem freeze...
521

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
522
  /**
78a4a50a8   Randy Dunlap   docbook: fix file...
523
   * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
524
   * @mapping: the mapping which wants those buffers written
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
525
526
527
528
   *
   * Starts I/O against the buffers at mapping->private_list, and waits upon
   * that I/O.
   *
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
529
530
531
   * Basically, this is a convenience function for fsync().
   * @mapping is a file or directory which needs those buffers to be written for
   * a successful fsync().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
532
533
534
   */
  int sync_mapping_buffers(struct address_space *mapping)
  {
252aa6f5b   Rafael Aquini   mm: redefine addr...
535
  	struct address_space *buffer_mapping = mapping->private_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
  
  	if (buffer_mapping == NULL || list_empty(&mapping->private_list))
  		return 0;
  
  	return fsync_buffers_list(&buffer_mapping->private_lock,
  					&mapping->private_list);
  }
  EXPORT_SYMBOL(sync_mapping_buffers);
  
  /*
   * Called when we've recently written block `bblock', and it is known that
   * `bblock' was for a buffer_boundary() buffer.  This means that the block at
   * `bblock + 1' is probably a dirty indirect block.  Hunt it down and, if it's
   * dirty, schedule it for IO.  So that indirects merge nicely with their data.
   */
  void write_boundary_block(struct block_device *bdev,
  			sector_t bblock, unsigned blocksize)
  {
  	struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
  	if (bh) {
  		if (buffer_dirty(bh))
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
557
  			ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
558
559
560
561
562
563
564
565
566
567
  		put_bh(bh);
  	}
  }
  
  void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
  {
  	struct address_space *mapping = inode->i_mapping;
  	struct address_space *buffer_mapping = bh->b_page->mapping;
  
  	mark_buffer_dirty(bh);
252aa6f5b   Rafael Aquini   mm: redefine addr...
568
569
  	if (!mapping->private_data) {
  		mapping->private_data = buffer_mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
570
  	} else {
252aa6f5b   Rafael Aquini   mm: redefine addr...
571
  		BUG_ON(mapping->private_data != buffer_mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
572
  	}
535ee2fbf   Jan Kara   buffer_head: fix ...
573
  	if (!bh->b_assoc_map) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
574
575
576
  		spin_lock(&buffer_mapping->private_lock);
  		list_move_tail(&bh->b_assoc_buffers,
  				&mapping->private_list);
58ff407be   Jan Kara   [PATCH] Fix IO er...
577
  		bh->b_assoc_map = mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
578
579
580
581
582
583
  		spin_unlock(&buffer_mapping->private_lock);
  	}
  }
  EXPORT_SYMBOL(mark_buffer_dirty_inode);
  
  /*
ec82e1c1c   Matthew Wilcox   fs: Convert buffe...
584
   * Mark the page dirty, and set it dirty in the page cache, and mark the inode
787d2214c   Nick Piggin   fs: introduce som...
585
586
587
588
   * dirty.
   *
   * If warn is true, then emit a warning if the page is not uptodate and has
   * not been truncated.
c4843a759   Greg Thelen   memcg: add per cg...
589
   *
81f8c3a46   Johannes Weiner   mm: memcontrol: g...
590
   * The caller must hold lock_page_memcg().
787d2214c   Nick Piggin   fs: introduce som...
591
   */
f82b37641   Matthew Wilcox   export __set_page...
592
  void __set_page_dirty(struct page *page, struct address_space *mapping,
62cccb8c8   Johannes Weiner   mm: simplify lock...
593
  			     int warn)
787d2214c   Nick Piggin   fs: introduce som...
594
  {
227d53b39   KOSAKI Motohiro   mm: __set_page_di...
595
  	unsigned long flags;
b93b01631   Matthew Wilcox   page cache: use x...
596
  	xa_lock_irqsave(&mapping->i_pages, flags);
787d2214c   Nick Piggin   fs: introduce som...
597
598
  	if (page->mapping) {	/* Race with truncate? */
  		WARN_ON_ONCE(warn && !PageUptodate(page));
62cccb8c8   Johannes Weiner   mm: simplify lock...
599
  		account_page_dirtied(page, mapping);
ec82e1c1c   Matthew Wilcox   fs: Convert buffe...
600
601
  		__xa_set_mark(&mapping->i_pages, page_index(page),
  				PAGECACHE_TAG_DIRTY);
787d2214c   Nick Piggin   fs: introduce som...
602
  	}
b93b01631   Matthew Wilcox   page cache: use x...
603
  	xa_unlock_irqrestore(&mapping->i_pages, flags);
787d2214c   Nick Piggin   fs: introduce som...
604
  }
f82b37641   Matthew Wilcox   export __set_page...
605
  EXPORT_SYMBOL_GPL(__set_page_dirty);
787d2214c   Nick Piggin   fs: introduce som...
606
607
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
   * Add a page to the dirty page list.
   *
   * It is a sad fact of life that this function is called from several places
   * deeply under spinlocking.  It may not sleep.
   *
   * If the page has buffers, the uptodate buffers are set dirty, to preserve
   * dirty-state coherency between the page and the buffers.  It the page does
   * not have buffers then when they are later attached they will all be set
   * dirty.
   *
   * The buffers are dirtied before the page is dirtied.  There's a small race
   * window in which a writepage caller may see the page cleanness but not the
   * buffer dirtiness.  That's fine.  If this code were to set the page dirty
   * before the buffers, a concurrent writepage caller could clear the page dirty
   * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
   * page on the dirty page list.
   *
   * We use private_lock to lock against try_to_free_buffers while using the
   * page's buffer list.  Also use this to protect against clean buffers being
   * added to the page after it was set dirty.
   *
   * FIXME: may need to call ->reservepage here as well.  That's rather up to the
   * address_space though.
   */
  int __set_page_dirty_buffers(struct page *page)
  {
a8e7d49aa   Linus Torvalds   Fix race in creat...
634
  	int newly_dirty;
787d2214c   Nick Piggin   fs: introduce som...
635
  	struct address_space *mapping = page_mapping(page);
ebf7a227d   Nick Piggin   [PATCH] mm: bug i...
636
637
638
  
  	if (unlikely(!mapping))
  		return !TestSetPageDirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
639
640
641
642
643
644
645
646
647
648
649
  
  	spin_lock(&mapping->private_lock);
  	if (page_has_buffers(page)) {
  		struct buffer_head *head = page_buffers(page);
  		struct buffer_head *bh = head;
  
  		do {
  			set_buffer_dirty(bh);
  			bh = bh->b_this_page;
  		} while (bh != head);
  	}
c4843a759   Greg Thelen   memcg: add per cg...
650
  	/*
81f8c3a46   Johannes Weiner   mm: memcontrol: g...
651
652
  	 * Lock out page->mem_cgroup migration to keep PageDirty
  	 * synchronized with per-memcg dirty page counters.
c4843a759   Greg Thelen   memcg: add per cg...
653
  	 */
62cccb8c8   Johannes Weiner   mm: simplify lock...
654
  	lock_page_memcg(page);
a8e7d49aa   Linus Torvalds   Fix race in creat...
655
  	newly_dirty = !TestSetPageDirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
656
  	spin_unlock(&mapping->private_lock);
a8e7d49aa   Linus Torvalds   Fix race in creat...
657
  	if (newly_dirty)
62cccb8c8   Johannes Weiner   mm: simplify lock...
658
  		__set_page_dirty(page, mapping, 1);
c4843a759   Greg Thelen   memcg: add per cg...
659

62cccb8c8   Johannes Weiner   mm: simplify lock...
660
  	unlock_page_memcg(page);
c4843a759   Greg Thelen   memcg: add per cg...
661
662
663
  
  	if (newly_dirty)
  		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
a8e7d49aa   Linus Torvalds   Fix race in creat...
664
  	return newly_dirty;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
  }
  EXPORT_SYMBOL(__set_page_dirty_buffers);
  
  /*
   * Write out and wait upon a list of buffers.
   *
   * We have conflicting pressures: we want to make sure that all
   * initially dirty buffers get waited on, but that any subsequently
   * dirtied buffers don't.  After all, we don't want fsync to last
   * forever if somebody is actively writing to the file.
   *
   * Do this in two main stages: first we copy dirty buffers to a
   * temporary inode list, queueing the writes as we go.  Then we clean
   * up, waiting for those writes to complete.
   * 
   * During this second stage, any subsequent updates to the file may end
   * up refiling the buffer on the original inode's dirty list again, so
   * there is a chance we will end up with a buffer queued for write but
   * not yet completed on that list.  So, as a final cleanup we go through
   * the osync code to catch these locked, dirty buffers without requeuing
   * any newly dirty buffers for write.
   */
  static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
  {
  	struct buffer_head *bh;
  	struct list_head tmp;
7eaceacca   Jens Axboe   block: remove per...
691
  	struct address_space *mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
692
  	int err = 0, err2;
4ee2491ed   Jens Axboe   fs: make fsync_bu...
693
  	struct blk_plug plug;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
694
695
  
  	INIT_LIST_HEAD(&tmp);
4ee2491ed   Jens Axboe   fs: make fsync_bu...
696
  	blk_start_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
697
698
699
700
  
  	spin_lock(lock);
  	while (!list_empty(list)) {
  		bh = BH_ENTRY(list->next);
535ee2fbf   Jan Kara   buffer_head: fix ...
701
  		mapping = bh->b_assoc_map;
58ff407be   Jan Kara   [PATCH] Fix IO er...
702
  		__remove_assoc_queue(bh);
535ee2fbf   Jan Kara   buffer_head: fix ...
703
704
705
  		/* Avoid race with mark_buffer_dirty_inode() which does
  		 * a lockless check and we rely on seeing the dirty bit */
  		smp_mb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
706
707
  		if (buffer_dirty(bh) || buffer_locked(bh)) {
  			list_add(&bh->b_assoc_buffers, &tmp);
535ee2fbf   Jan Kara   buffer_head: fix ...
708
  			bh->b_assoc_map = mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
709
710
711
712
713
  			if (buffer_dirty(bh)) {
  				get_bh(bh);
  				spin_unlock(lock);
  				/*
  				 * Ensure any pending I/O completes so that
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
714
715
716
717
  				 * write_dirty_buffer() actually writes the
  				 * current contents - it is a noop if I/O is
  				 * still in flight on potentially older
  				 * contents.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
718
  				 */
70fd76140   Christoph Hellwig   block,fs: use REQ...
719
  				write_dirty_buffer(bh, REQ_SYNC);
9cf6b720f   Jens Axboe   block: fsync_buff...
720
721
722
723
724
725
726
  
  				/*
  				 * Kick off IO for the previous mapping. Note
  				 * that we will not run the very last mapping,
  				 * wait_on_buffer() will do that for us
  				 * through sync_buffer().
  				 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
727
728
729
730
731
  				brelse(bh);
  				spin_lock(lock);
  			}
  		}
  	}
4ee2491ed   Jens Axboe   fs: make fsync_bu...
732
733
734
  	spin_unlock(lock);
  	blk_finish_plug(&plug);
  	spin_lock(lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
735
736
  	while (!list_empty(&tmp)) {
  		bh = BH_ENTRY(tmp.prev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
737
  		get_bh(bh);
535ee2fbf   Jan Kara   buffer_head: fix ...
738
739
740
741
742
743
744
  		mapping = bh->b_assoc_map;
  		__remove_assoc_queue(bh);
  		/* Avoid race with mark_buffer_dirty_inode() which does
  		 * a lockless check and we rely on seeing the dirty bit */
  		smp_mb();
  		if (buffer_dirty(bh)) {
  			list_add(&bh->b_assoc_buffers,
e3892296d   Jan Kara   vfs: fix NULL poi...
745
  				 &mapping->private_list);
535ee2fbf   Jan Kara   buffer_head: fix ...
746
747
  			bh->b_assoc_map = mapping;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
  		spin_unlock(lock);
  		wait_on_buffer(bh);
  		if (!buffer_uptodate(bh))
  			err = -EIO;
  		brelse(bh);
  		spin_lock(lock);
  	}
  	
  	spin_unlock(lock);
  	err2 = osync_buffers_list(lock, list);
  	if (err)
  		return err;
  	else
  		return err2;
  }
  
  /*
   * Invalidate any and all dirty buffers on a given inode.  We are
   * probably unmounting the fs, but that doesn't mean we have already
   * done a sync().  Just drop the buffers from the inode list.
   *
   * NOTE: we take the inode's blockdev's mapping's private_lock.  Which
   * assumes that all the buffers are against the blockdev.  Not true
   * for reiserfs.
   */
  void invalidate_inode_buffers(struct inode *inode)
  {
  	if (inode_has_buffers(inode)) {
  		struct address_space *mapping = &inode->i_data;
  		struct list_head *list = &mapping->private_list;
252aa6f5b   Rafael Aquini   mm: redefine addr...
778
  		struct address_space *buffer_mapping = mapping->private_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
779
780
781
782
783
784
785
  
  		spin_lock(&buffer_mapping->private_lock);
  		while (!list_empty(list))
  			__remove_assoc_queue(BH_ENTRY(list->next));
  		spin_unlock(&buffer_mapping->private_lock);
  	}
  }
52b19ac99   Jan Kara   udf: Fix BUG_ON()...
786
  EXPORT_SYMBOL(invalidate_inode_buffers);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
787
788
789
790
791
792
793
794
795
796
797
798
799
800
  
  /*
   * Remove any clean buffers from the inode's buffer list.  This is called
   * when we're trying to free the inode itself.  Those buffers can pin it.
   *
   * Returns true if all buffers were removed.
   */
  int remove_inode_buffers(struct inode *inode)
  {
  	int ret = 1;
  
  	if (inode_has_buffers(inode)) {
  		struct address_space *mapping = &inode->i_data;
  		struct list_head *list = &mapping->private_list;
252aa6f5b   Rafael Aquini   mm: redefine addr...
801
  		struct address_space *buffer_mapping = mapping->private_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
  
  		spin_lock(&buffer_mapping->private_lock);
  		while (!list_empty(list)) {
  			struct buffer_head *bh = BH_ENTRY(list->next);
  			if (buffer_dirty(bh)) {
  				ret = 0;
  				break;
  			}
  			__remove_assoc_queue(bh);
  		}
  		spin_unlock(&buffer_mapping->private_lock);
  	}
  	return ret;
  }
  
  /*
   * Create the appropriate buffers when given a page for data area and
   * the size of each buffer.. Use the bh->b_this_page linked list to
   * follow the buffers created.  Return NULL if unable to create more
   * buffers.
   *
   * The retry flag is used to differentiate async IO (paging, swapping)
   * which may not fail from ordinary buffer allocations.
   */
  struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
640ab98fb   Jens Axboe   buffer: have allo...
827
  		bool retry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
828
829
  {
  	struct buffer_head *bh, *head;
f745c6f5f   Shakeel Butt   fs, mm: account b...
830
  	gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
831
  	long offset;
b87d8cefe   Roman Gushchin   mm, memcg: rework...
832
  	struct mem_cgroup *memcg, *old_memcg;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
833

640ab98fb   Jens Axboe   buffer: have allo...
834
835
  	if (retry)
  		gfp |= __GFP_NOFAIL;
f745c6f5f   Shakeel Butt   fs, mm: account b...
836
  	memcg = get_mem_cgroup_from_page(page);
b87d8cefe   Roman Gushchin   mm, memcg: rework...
837
  	old_memcg = set_active_memcg(memcg);
f745c6f5f   Shakeel Butt   fs, mm: account b...
838

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
839
840
841
  	head = NULL;
  	offset = PAGE_SIZE;
  	while ((offset -= size) >= 0) {
640ab98fb   Jens Axboe   buffer: have allo...
842
  		bh = alloc_buffer_head(gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
843
844
  		if (!bh)
  			goto no_grow;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
845
846
847
  		bh->b_this_page = head;
  		bh->b_blocknr = -1;
  		head = bh;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
848
849
850
851
  		bh->b_size = size;
  
  		/* Link the buffer to its page */
  		set_bh_page(bh, page, offset);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
852
  	}
f745c6f5f   Shakeel Butt   fs, mm: account b...
853
  out:
b87d8cefe   Roman Gushchin   mm, memcg: rework...
854
  	set_active_memcg(old_memcg);
f745c6f5f   Shakeel Butt   fs, mm: account b...
855
  	mem_cgroup_put(memcg);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
856
857
858
859
860
861
862
863
864
865
866
867
  	return head;
  /*
   * In case anything failed, we just free everything we got.
   */
  no_grow:
  	if (head) {
  		do {
  			bh = head;
  			head = head->b_this_page;
  			free_buffer_head(bh);
  		} while (head);
  	}
f745c6f5f   Shakeel Butt   fs, mm: account b...
868
  	goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
869
870
871
872
873
874
875
876
877
878
879
880
881
882
  }
  EXPORT_SYMBOL_GPL(alloc_page_buffers);
  
  static inline void
  link_dev_buffers(struct page *page, struct buffer_head *head)
  {
  	struct buffer_head *bh, *tail;
  
  	bh = head;
  	do {
  		tail = bh;
  		bh = bh->b_this_page;
  	} while (bh);
  	tail->b_this_page = head;
45dcfc273   Guoqing Jiang   fs/buffer.c: use ...
883
  	attach_page_private(page, head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
884
  }
bbec0270b   Linus Torvalds   blkdev_max_block:...
885
886
887
888
889
890
891
892
893
894
895
  static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
  {
  	sector_t retval = ~((sector_t)0);
  	loff_t sz = i_size_read(bdev->bd_inode);
  
  	if (sz) {
  		unsigned int sizebits = blksize_bits(size);
  		retval = (sz >> sizebits);
  	}
  	return retval;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
896
897
898
  /*
   * Initialise the state of a blockdev page's buffers.
   */ 
676ce6d5c   Hugh Dickins   block: replace __...
899
  static sector_t
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
900
901
902
903
904
905
  init_page_buffers(struct page *page, struct block_device *bdev,
  			sector_t block, int size)
  {
  	struct buffer_head *head = page_buffers(page);
  	struct buffer_head *bh = head;
  	int uptodate = PageUptodate(page);
bbec0270b   Linus Torvalds   blkdev_max_block:...
906
  	sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
907
908
909
  
  	do {
  		if (!buffer_mapped(bh)) {
01950a349   Eric Biggers   fs/buffer.c: fold...
910
911
  			bh->b_end_io = NULL;
  			bh->b_private = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
912
913
914
915
  			bh->b_bdev = bdev;
  			bh->b_blocknr = block;
  			if (uptodate)
  				set_buffer_uptodate(bh);
080399aaa   Jeff Moyer   block: don't mark...
916
917
  			if (block < end_block)
  				set_buffer_mapped(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
918
919
920
921
  		}
  		block++;
  		bh = bh->b_this_page;
  	} while (bh != head);
676ce6d5c   Hugh Dickins   block: replace __...
922
923
924
925
926
  
  	/*
  	 * Caller needs to validate requested block against end of device.
  	 */
  	return end_block;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
927
928
929
930
931
  }
  
  /*
   * Create the page-cache page that contains the requested block.
   *
676ce6d5c   Hugh Dickins   block: replace __...
932
   * This is used purely for blockdev mappings.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
933
   */
676ce6d5c   Hugh Dickins   block: replace __...
934
  static int
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
935
  grow_dev_page(struct block_device *bdev, sector_t block,
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
936
  	      pgoff_t index, int size, int sizebits, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
937
938
939
940
  {
  	struct inode *inode = bdev->bd_inode;
  	struct page *page;
  	struct buffer_head *bh;
676ce6d5c   Hugh Dickins   block: replace __...
941
  	sector_t end_block;
c4b4c2a78   Zhiqiang Liu   buffer: remove us...
942
  	int ret = 0;
84235de39   Johannes Weiner   fs: buffer: move ...
943
  	gfp_t gfp_mask;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
944

c62d25556   Michal Hocko   mm, fs: introduce...
945
  	gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
946

84235de39   Johannes Weiner   fs: buffer: move ...
947
948
949
950
951
952
953
954
955
  	/*
  	 * XXX: __getblk_slow() can not really deal with failure and
  	 * will endlessly loop on improvised global reclaim.  Prefer
  	 * looping in the allocator rather than here, at least that
  	 * code knows what it's doing.
  	 */
  	gfp_mask |= __GFP_NOFAIL;
  
  	page = find_or_create_page(inode->i_mapping, index, gfp_mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
956

e827f9235   Eric Sesterhenn   BUG_ON() Conversi...
957
  	BUG_ON(!PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
958
959
960
961
  
  	if (page_has_buffers(page)) {
  		bh = page_buffers(page);
  		if (bh->b_size == size) {
676ce6d5c   Hugh Dickins   block: replace __...
962
  			end_block = init_page_buffers(page, bdev,
f2d5a9443   Anton Altaparmakov   Fix nasty 32-bit ...
963
964
  						(sector_t)index << sizebits,
  						size);
676ce6d5c   Hugh Dickins   block: replace __...
965
  			goto done;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
966
967
968
969
970
971
972
973
  		}
  		if (!try_to_free_buffers(page))
  			goto failed;
  	}
  
  	/*
  	 * Allocate some buffers for this page
  	 */
94dc24c0c   Jens Axboe   buffer: grow_dev_...
974
  	bh = alloc_page_buffers(page, size, true);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
975
976
977
978
979
980
981
982
  
  	/*
  	 * Link the page to the buffers and initialise them.  Take the
  	 * lock to be atomic wrt __find_get_block(), which does not
  	 * run under the page lock.
  	 */
  	spin_lock(&inode->i_mapping->private_lock);
  	link_dev_buffers(page, bh);
f2d5a9443   Anton Altaparmakov   Fix nasty 32-bit ...
983
984
  	end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
  			size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
985
  	spin_unlock(&inode->i_mapping->private_lock);
676ce6d5c   Hugh Dickins   block: replace __...
986
987
  done:
  	ret = (block < end_block) ? 1 : -ENXIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
988
  failed:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
989
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
990
  	put_page(page);
676ce6d5c   Hugh Dickins   block: replace __...
991
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
992
993
994
995
996
  }
  
  /*
   * Create buffers for the specified block device block's page.  If
   * that page was dirty, the buffers are set dirty also.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
997
   */
858119e15   Arjan van de Ven   [PATCH] Unlinline...
998
  static int
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
999
  grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1000
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1001
1002
1003
1004
1005
1006
1007
1008
1009
  	pgoff_t index;
  	int sizebits;
  
  	sizebits = -1;
  	do {
  		sizebits++;
  	} while ((size << sizebits) < PAGE_SIZE);
  
  	index = block >> sizebits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1010

e56579338   Andrew Morton   [PATCH] grow_buff...
1011
1012
1013
1014
1015
  	/*
  	 * Check for a block which wants to lie outside our maximum possible
  	 * pagecache index.  (this comparison is done using sector_t types).
  	 */
  	if (unlikely(index != block >> sizebits)) {
e56579338   Andrew Morton   [PATCH] grow_buff...
1016
  		printk(KERN_ERR "%s: requested out-of-range block %llu for "
a1c6f0573   Dmitry Monakhov   fs: use block_dev...
1017
1018
  			"device %pg
  ",
8e24eea72   Harvey Harrison   fs: replace remai...
1019
  			__func__, (unsigned long long)block,
a1c6f0573   Dmitry Monakhov   fs: use block_dev...
1020
  			bdev);
e56579338   Andrew Morton   [PATCH] grow_buff...
1021
1022
  		return -EIO;
  	}
676ce6d5c   Hugh Dickins   block: replace __...
1023

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1024
  	/* Create a page with the proper size buffers.. */
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1025
  	return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1026
  }
0026ba400   Eric Biggers   fs/buffer.c: make...
1027
  static struct buffer_head *
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1028
1029
  __getblk_slow(struct block_device *bdev, sector_t block,
  	     unsigned size, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1030
1031
  {
  	/* Size must be multiple of hard sectorsize */
e1defc4ff   Martin K. Petersen   block: Do away wi...
1032
  	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1033
1034
1035
1036
  			(size < 512 || size > PAGE_SIZE))) {
  		printk(KERN_ERR "getblk(): invalid block size %d requested
  ",
  					size);
e1defc4ff   Martin K. Petersen   block: Do away wi...
1037
1038
1039
  		printk(KERN_ERR "logical block size: %d
  ",
  					bdev_logical_block_size(bdev));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1040
1041
1042
1043
  
  		dump_stack();
  		return NULL;
  	}
676ce6d5c   Hugh Dickins   block: replace __...
1044
1045
1046
  	for (;;) {
  		struct buffer_head *bh;
  		int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1047
1048
1049
1050
  
  		bh = __find_get_block(bdev, block, size);
  		if (bh)
  			return bh;
676ce6d5c   Hugh Dickins   block: replace __...
1051

3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1052
  		ret = grow_buffers(bdev, block, size, gfp);
676ce6d5c   Hugh Dickins   block: replace __...
1053
1054
  		if (ret < 0)
  			return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1055
1056
1057
1058
1059
1060
1061
  	}
  }
  
  /*
   * The relationship between dirty buffers and dirty pages:
   *
   * Whenever a page has any dirty buffers, the page's dirty bit is set, and
ec82e1c1c   Matthew Wilcox   fs: Convert buffe...
1062
   * the page is tagged dirty in the page cache.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
   *
   * At all times, the dirtiness of the buffers represents the dirtiness of
   * subsections of the page.  If the page has buffers, the page dirty bit is
   * merely a hint about the true dirty state.
   *
   * When a page is set dirty in its entirety, all its buffers are marked dirty
   * (if the page has buffers).
   *
   * When a buffer is marked dirty, its page is dirtied, but the page's other
   * buffers are not.
   *
   * Also.  When blockdev buffers are explicitly read with bread(), they
   * individually become uptodate.  But their backing page remains not
   * uptodate - even if all of its buffers are uptodate.  A subsequent
   * block_read_full_page() against that page will discover all the uptodate
   * buffers, will set the page uptodate and will perform no I/O.
   */
  
  /**
   * mark_buffer_dirty - mark a buffer_head as needing writeout
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
1083
   * @bh: the buffer_head to mark dirty
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1084
   *
ec82e1c1c   Matthew Wilcox   fs: Convert buffe...
1085
1086
1087
   * mark_buffer_dirty() will set the dirty bit against the buffer, then set
   * its backing page dirty, then tag the page as dirty in the page cache
   * and then attach the address_space's inode to its superblock's dirty
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1088
1089
1090
   * inode list.
   *
   * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
b93b01631   Matthew Wilcox   page cache: use x...
1091
   * i_pages lock and mapping->host->i_lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1092
   */
fc9b52cd8   Harvey Harrison   fs: remove fastca...
1093
  void mark_buffer_dirty(struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1094
  {
787d2214c   Nick Piggin   fs: introduce som...
1095
  	WARN_ON_ONCE(!buffer_uptodate(bh));
1be62dc19   Linus Torvalds   Be more careful a...
1096

5305cb830   Tejun Heo   block: add block_...
1097
  	trace_block_dirty_buffer(bh);
1be62dc19   Linus Torvalds   Be more careful a...
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
  	/*
  	 * Very *carefully* optimize the it-is-already-dirty case.
  	 *
  	 * Don't let the final "is it dirty" escape to before we
  	 * perhaps modified the buffer.
  	 */
  	if (buffer_dirty(bh)) {
  		smp_mb();
  		if (buffer_dirty(bh))
  			return;
  	}
a8e7d49aa   Linus Torvalds   Fix race in creat...
1109
1110
  	if (!test_set_buffer_dirty(bh)) {
  		struct page *page = bh->b_page;
c4843a759   Greg Thelen   memcg: add per cg...
1111
  		struct address_space *mapping = NULL;
c4843a759   Greg Thelen   memcg: add per cg...
1112

62cccb8c8   Johannes Weiner   mm: simplify lock...
1113
  		lock_page_memcg(page);
8e9d78ede   Linus Torvalds   Re-introduce page...
1114
  		if (!TestSetPageDirty(page)) {
c4843a759   Greg Thelen   memcg: add per cg...
1115
  			mapping = page_mapping(page);
8e9d78ede   Linus Torvalds   Re-introduce page...
1116
  			if (mapping)
62cccb8c8   Johannes Weiner   mm: simplify lock...
1117
  				__set_page_dirty(page, mapping, 0);
8e9d78ede   Linus Torvalds   Re-introduce page...
1118
  		}
62cccb8c8   Johannes Weiner   mm: simplify lock...
1119
  		unlock_page_memcg(page);
c4843a759   Greg Thelen   memcg: add per cg...
1120
1121
  		if (mapping)
  			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
a8e7d49aa   Linus Torvalds   Fix race in creat...
1122
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1123
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
1124
  EXPORT_SYMBOL(mark_buffer_dirty);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125

87354e5de   Jeff Layton   buffer: set error...
1126
1127
  void mark_buffer_write_io_error(struct buffer_head *bh)
  {
485e9605c   Jeff Layton   fs/buffer.c: reco...
1128
  	struct super_block *sb;
87354e5de   Jeff Layton   buffer: set error...
1129
1130
1131
1132
1133
1134
  	set_buffer_write_io_error(bh);
  	/* FIXME: do we need to set this in both places? */
  	if (bh->b_page && bh->b_page->mapping)
  		mapping_set_error(bh->b_page->mapping, -EIO);
  	if (bh->b_assoc_map)
  		mapping_set_error(bh->b_assoc_map, -EIO);
485e9605c   Jeff Layton   fs/buffer.c: reco...
1135
1136
1137
1138
1139
  	rcu_read_lock();
  	sb = READ_ONCE(bh->b_bdev->bd_super);
  	if (sb)
  		errseq_set(&sb->s_wb_err, -EIO);
  	rcu_read_unlock();
87354e5de   Jeff Layton   buffer: set error...
1140
1141
  }
  EXPORT_SYMBOL(mark_buffer_write_io_error);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
  /*
   * Decrement a buffer_head's reference count.  If all buffers against a page
   * have zero reference count, are clean and unlocked, and if the page is clean
   * and unlocked then try_to_free_buffers() may strip the buffers from the page
   * in preparation for freeing it (sometimes, rarely, buffers are removed from
   * a page but it ends up not being freed, and buffers may later be reattached).
   */
  void __brelse(struct buffer_head * buf)
  {
  	if (atomic_read(&buf->b_count)) {
  		put_bh(buf);
  		return;
  	}
5c752ad9f   Arjan van de Ven   Use WARN() in fs/
1155
1156
  	WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1157
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
1158
  EXPORT_SYMBOL(__brelse);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1159
1160
1161
1162
1163
1164
1165
1166
  
  /*
   * bforget() is like brelse(), except it discards any
   * potentially dirty data.
   */
  void __bforget(struct buffer_head *bh)
  {
  	clear_buffer_dirty(bh);
535ee2fbf   Jan Kara   buffer_head: fix ...
1167
  	if (bh->b_assoc_map) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1168
1169
1170
1171
  		struct address_space *buffer_mapping = bh->b_page->mapping;
  
  		spin_lock(&buffer_mapping->private_lock);
  		list_del_init(&bh->b_assoc_buffers);
58ff407be   Jan Kara   [PATCH] Fix IO er...
1172
  		bh->b_assoc_map = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1173
1174
1175
1176
  		spin_unlock(&buffer_mapping->private_lock);
  	}
  	__brelse(bh);
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
1177
  EXPORT_SYMBOL(__bforget);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
  
  static struct buffer_head *__bread_slow(struct buffer_head *bh)
  {
  	lock_buffer(bh);
  	if (buffer_uptodate(bh)) {
  		unlock_buffer(bh);
  		return bh;
  	} else {
  		get_bh(bh);
  		bh->b_end_io = end_buffer_read_sync;
2a222ca99   Mike Christie   fs: have submit_b...
1188
  		submit_bh(REQ_OP_READ, 0, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
  		wait_on_buffer(bh);
  		if (buffer_uptodate(bh))
  			return bh;
  	}
  	brelse(bh);
  	return NULL;
  }
  
  /*
   * Per-cpu buffer LRU implementation.  To reduce the cost of __find_get_block().
   * The bhs[] array is sorted - newest buffer is at bhs[0].  Buffers have their
   * refcount elevated by one when they're in an LRU.  A buffer can only appear
   * once in a particular CPU's LRU.  A single buffer can be present in multiple
   * CPU's LRUs at the same time.
   *
   * This is a transparent caching front-end to sb_bread(), sb_getblk() and
   * sb_find_get_block().
   *
   * The LRUs themselves only need locking against invalidate_bh_lrus.  We use
   * a local interrupt disable for that.
   */
86cf78d73   Sebastien Buisson   fs/buffer.c: incr...
1210
  #define BH_LRU_SIZE	16
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
  
  struct bh_lru {
  	struct buffer_head *bhs[BH_LRU_SIZE];
  };
  
  static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
  
  #ifdef CONFIG_SMP
  #define bh_lru_lock()	local_irq_disable()
  #define bh_lru_unlock()	local_irq_enable()
  #else
  #define bh_lru_lock()	preempt_disable()
  #define bh_lru_unlock()	preempt_enable()
  #endif
  
  static inline void check_irqs_on(void)
  {
  #ifdef irqs_disabled
  	BUG_ON(irqs_disabled());
  #endif
  }
  
  /*
241f01fbe   Eric Biggers   fs/buffer.c: make...
1234
1235
1236
   * Install a buffer_head into this cpu's LRU.  If not already in the LRU, it is
   * inserted at the front, and the buffer_head at the back if any is evicted.
   * Or, if already in the LRU it is moved to the front.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1237
1238
1239
   */
  static void bh_lru_install(struct buffer_head *bh)
  {
241f01fbe   Eric Biggers   fs/buffer.c: make...
1240
1241
1242
  	struct buffer_head *evictee = bh;
  	struct bh_lru *b;
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1243
1244
1245
  
  	check_irqs_on();
  	bh_lru_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1246

241f01fbe   Eric Biggers   fs/buffer.c: make...
1247
1248
1249
1250
1251
1252
  	b = this_cpu_ptr(&bh_lrus);
  	for (i = 0; i < BH_LRU_SIZE; i++) {
  		swap(evictee, b->bhs[i]);
  		if (evictee == bh) {
  			bh_lru_unlock();
  			return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1253
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1254
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1255

241f01fbe   Eric Biggers   fs/buffer.c: make...
1256
1257
1258
  	get_bh(bh);
  	bh_lru_unlock();
  	brelse(evictee);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1259
1260
1261
1262
1263
  }
  
  /*
   * Look up the bh in this cpu's LRU.  If it's there, move it to the head.
   */
858119e15   Arjan van de Ven   [PATCH] Unlinline...
1264
  static struct buffer_head *
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1265
  lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1266
1267
  {
  	struct buffer_head *ret = NULL;
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1268
  	unsigned int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1269
1270
1271
  
  	check_irqs_on();
  	bh_lru_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1272
  	for (i = 0; i < BH_LRU_SIZE; i++) {
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
1273
  		struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1274

9470dd5d3   Zach Brown   fs: check bh bloc...
1275
1276
  		if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
  		    bh->b_size == size) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1277
1278
  			if (i) {
  				while (i) {
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
1279
1280
  					__this_cpu_write(bh_lrus.bhs[i],
  						__this_cpu_read(bh_lrus.bhs[i - 1]));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1281
1282
  					i--;
  				}
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
1283
  				__this_cpu_write(bh_lrus.bhs[0], bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
  			}
  			get_bh(bh);
  			ret = bh;
  			break;
  		}
  	}
  	bh_lru_unlock();
  	return ret;
  }
  
  /*
   * Perform a pagecache lookup for the matching buffer.  If it's there, refresh
   * it in the LRU and mark it as accessed.  If it is not present then return
   * NULL
   */
  struct buffer_head *
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1300
  __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1301
1302
1303
1304
  {
  	struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
  
  	if (bh == NULL) {
2457aec63   Mel Gorman   mm: non-atomicall...
1305
  		/* __find_get_block_slow will mark the page accessed */
385fd4c59   Coywolf Qi Hunt   [PATCH] __find_ge...
1306
  		bh = __find_get_block_slow(bdev, block);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1307
1308
  		if (bh)
  			bh_lru_install(bh);
2457aec63   Mel Gorman   mm: non-atomicall...
1309
  	} else
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1310
  		touch_buffer(bh);
2457aec63   Mel Gorman   mm: non-atomicall...
1311

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1312
1313
1314
1315
1316
  	return bh;
  }
  EXPORT_SYMBOL(__find_get_block);
  
  /*
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1317
   * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1318
1319
1320
   * which corresponds to the passed block_device, block and size. The
   * returned buffer has its reference count incremented.
   *
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1321
1322
   * __getblk_gfp() will lock up the machine if grow_dev_page's
   * try_to_free_buffers() attempt is failing.  FIXME, perhaps?
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1323
1324
   */
  struct buffer_head *
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1325
1326
  __getblk_gfp(struct block_device *bdev, sector_t block,
  	     unsigned size, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1327
1328
1329
1330
1331
  {
  	struct buffer_head *bh = __find_get_block(bdev, block, size);
  
  	might_sleep();
  	if (bh == NULL)
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1332
  		bh = __getblk_slow(bdev, block, size, gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1333
1334
  	return bh;
  }
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1335
  EXPORT_SYMBOL(__getblk_gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1336
1337
1338
1339
  
  /*
   * Do async read-ahead on a buffer..
   */
3991d3bd1   Tomasz Kvarsin   [PATCH] warning f...
1340
  void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1341
1342
  {
  	struct buffer_head *bh = __getblk(bdev, block, size);
a3e713b5f   Andrew Morton   [PATCH] __bread o...
1343
  	if (likely(bh)) {
70246286e   Christoph Hellwig   block: get rid of...
1344
  		ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
a3e713b5f   Andrew Morton   [PATCH] __bread o...
1345
1346
  		brelse(bh);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1347
1348
  }
  EXPORT_SYMBOL(__breadahead);
d87f63925   Roman Gushchin   ext4: use non-mov...
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
  void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
  		      gfp_t gfp)
  {
  	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
  	if (likely(bh)) {
  		ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
  		brelse(bh);
  	}
  }
  EXPORT_SYMBOL(__breadahead_gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1359
  /**
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1360
   *  __bread_gfp() - reads a specified block and returns the bh
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
1361
   *  @bdev: the block_device to read from
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1362
1363
   *  @block: number of block
   *  @size: size (in bytes) to read
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1364
1365
   *  @gfp: page allocation flag
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1366
   *  Reads a specified block, and returns buffer head that contains it.
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1367
1368
   *  The page cache can be allocated from non-movable area
   *  not to prevent page migration if you set gfp to zero.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1369
1370
1371
   *  It returns NULL if the block was unreadable.
   */
  struct buffer_head *
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1372
1373
  __bread_gfp(struct block_device *bdev, sector_t block,
  		   unsigned size, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1374
  {
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1375
  	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1376

a3e713b5f   Andrew Morton   [PATCH] __bread o...
1377
  	if (likely(bh) && !buffer_uptodate(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1378
1379
1380
  		bh = __bread_slow(bh);
  	return bh;
  }
3b5e6454a   Gioh Kim   fs/buffer.c: supp...
1381
  EXPORT_SYMBOL(__bread_gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
  
  /*
   * invalidate_bh_lrus() is called rarely - but not only at unmount.
   * This doesn't race because it runs in each cpu either in irq
   * or with preempt disabled.
   */
  static void invalidate_bh_lru(void *arg)
  {
  	struct bh_lru *b = &get_cpu_var(bh_lrus);
  	int i;
  
  	for (i = 0; i < BH_LRU_SIZE; i++) {
  		brelse(b->bhs[i]);
  		b->bhs[i] = NULL;
  	}
  	put_cpu_var(bh_lrus);
  }
42be35d03   Gilad Ben-Yossef   fs: only send IPI...
1399
1400
1401
1402
1403
  
  static bool has_bh_in_lru(int cpu, void *dummy)
  {
  	struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1404
  	
42be35d03   Gilad Ben-Yossef   fs: only send IPI...
1405
1406
  	for (i = 0; i < BH_LRU_SIZE; i++) {
  		if (b->bhs[i])
1d7066797   Saurav Girepunje   fs/buffer.c: fix ...
1407
  			return true;
42be35d03   Gilad Ben-Yossef   fs: only send IPI...
1408
  	}
1d7066797   Saurav Girepunje   fs/buffer.c: fix ...
1409
  	return false;
42be35d03   Gilad Ben-Yossef   fs: only send IPI...
1410
  }
f9a14399a   Peter Zijlstra   mm: optimize kill...
1411
  void invalidate_bh_lrus(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1412
  {
cb923159b   Sebastian Andrzej Siewior   smp: Remove alloc...
1413
  	on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1414
  }
9db5579be   Nick Piggin   rewrite rd
1415
  EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1416
1417
1418
1419
1420
  
  void set_bh_page(struct buffer_head *bh,
  		struct page *page, unsigned long offset)
  {
  	bh->b_page = page;
e827f9235   Eric Sesterhenn   BUG_ON() Conversi...
1421
  	BUG_ON(offset >= PAGE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
  	if (PageHighMem(page))
  		/*
  		 * This catches illegal uses and preserves the offset:
  		 */
  		bh->b_data = (char *)(0 + offset);
  	else
  		bh->b_data = page_address(page) + offset;
  }
  EXPORT_SYMBOL(set_bh_page);
  
  /*
   * Called when truncating a buffer on a page completely.
   */
e7470ee89   Mel Gorman   fs: buffer: do no...
1435
1436
1437
1438
1439
  
  /* Bits that are cleared during an invalidate */
  #define BUFFER_FLAGS_DISCARD \
  	(1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
  	 1 << BH_Delay | 1 << BH_Unwritten)
858119e15   Arjan van de Ven   [PATCH] Unlinline...
1440
  static void discard_buffer(struct buffer_head * bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1441
  {
e7470ee89   Mel Gorman   fs: buffer: do no...
1442
  	unsigned long b_state, b_state_old;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1443
1444
1445
  	lock_buffer(bh);
  	clear_buffer_dirty(bh);
  	bh->b_bdev = NULL;
e7470ee89   Mel Gorman   fs: buffer: do no...
1446
1447
1448
1449
1450
1451
1452
1453
  	b_state = bh->b_state;
  	for (;;) {
  		b_state_old = cmpxchg(&bh->b_state, b_state,
  				      (b_state & ~BUFFER_FLAGS_DISCARD));
  		if (b_state_old == b_state)
  			break;
  		b_state = b_state_old;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1454
1455
1456
1457
  	unlock_buffer(bh);
  }
  
  /**
814e1d25a   Wang Sheng-Hui   cleanup: vfs: sma...
1458
   * block_invalidatepage - invalidate part or all of a buffer-backed page
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1459
1460
   *
   * @page: the page which is affected
d47992f86   Lukas Czerner   mm: change invali...
1461
1462
   * @offset: start of the range to invalidate
   * @length: length of the range to invalidate
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1463
1464
   *
   * block_invalidatepage() is called when all or part of the page has become
814e1d25a   Wang Sheng-Hui   cleanup: vfs: sma...
1465
   * invalidated by a truncate operation.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1466
1467
1468
1469
1470
1471
1472
   *
   * block_invalidatepage() does not have to release all buffers, but it must
   * ensure that no dirty buffer is left outside @offset and that no I/O
   * is underway against any of the blocks which are outside the truncation
   * point.  Because the caller is about to free (and possibly reuse) those
   * blocks on-disk.
   */
d47992f86   Lukas Czerner   mm: change invali...
1473
1474
  void block_invalidatepage(struct page *page, unsigned int offset,
  			  unsigned int length)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1475
1476
1477
  {
  	struct buffer_head *head, *bh, *next;
  	unsigned int curr_off = 0;
d47992f86   Lukas Czerner   mm: change invali...
1478
  	unsigned int stop = length + offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1479
1480
1481
1482
  
  	BUG_ON(!PageLocked(page));
  	if (!page_has_buffers(page))
  		goto out;
d47992f86   Lukas Czerner   mm: change invali...
1483
1484
1485
  	/*
  	 * Check for overflow
  	 */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1486
  	BUG_ON(stop > PAGE_SIZE || stop < length);
d47992f86   Lukas Czerner   mm: change invali...
1487

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1488
1489
1490
1491
1492
1493
1494
  	head = page_buffers(page);
  	bh = head;
  	do {
  		unsigned int next_off = curr_off + bh->b_size;
  		next = bh->b_this_page;
  
  		/*
d47992f86   Lukas Czerner   mm: change invali...
1495
1496
1497
1498
1499
1500
  		 * Are we still fully in range ?
  		 */
  		if (next_off > stop)
  			goto out;
  
  		/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
  		 * is this block fully invalidated?
  		 */
  		if (offset <= curr_off)
  			discard_buffer(bh);
  		curr_off = next_off;
  		bh = next;
  	} while (bh != head);
  
  	/*
  	 * We release buffers only if the entire page is being invalidated.
  	 * The get_block cached value has been unconditionally invalidated,
  	 * so real IO is not possible anymore.
  	 */
3172485f4   Jeff Moyer   block_invalidatep...
1514
  	if (length == PAGE_SIZE)
2ff28e22b   NeilBrown   [PATCH] Make addr...
1515
  		try_to_release_page(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1516
  out:
2ff28e22b   NeilBrown   [PATCH] Make addr...
1517
  	return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1518
1519
  }
  EXPORT_SYMBOL(block_invalidatepage);
d47992f86   Lukas Czerner   mm: change invali...
1520

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1521
1522
1523
1524
1525
1526
1527
1528
1529
  /*
   * We attach and possibly dirty the buffers atomically wrt
   * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers
   * is already excluded via the page lock.
   */
  void create_empty_buffers(struct page *page,
  			unsigned long blocksize, unsigned long b_state)
  {
  	struct buffer_head *bh, *head, *tail;
640ab98fb   Jens Axboe   buffer: have allo...
1530
  	head = alloc_page_buffers(page, blocksize, true);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
  	bh = head;
  	do {
  		bh->b_state |= b_state;
  		tail = bh;
  		bh = bh->b_this_page;
  	} while (bh);
  	tail->b_this_page = head;
  
  	spin_lock(&page->mapping->private_lock);
  	if (PageUptodate(page) || PageDirty(page)) {
  		bh = head;
  		do {
  			if (PageDirty(page))
  				set_buffer_dirty(bh);
  			if (PageUptodate(page))
  				set_buffer_uptodate(bh);
  			bh = bh->b_this_page;
  		} while (bh != head);
  	}
45dcfc273   Guoqing Jiang   fs/buffer.c: use ...
1550
  	attach_page_private(page, head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1551
1552
1553
  	spin_unlock(&page->mapping->private_lock);
  }
  EXPORT_SYMBOL(create_empty_buffers);
29f3ad7d8   Jan Kara   fs: Provide funct...
1554
1555
1556
1557
1558
  /**
   * clean_bdev_aliases: clean a range of buffers in block device
   * @bdev: Block device to clean buffers in
   * @block: Start of a range of blocks to clean
   * @len: Number of blocks to clean
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1559
   *
29f3ad7d8   Jan Kara   fs: Provide funct...
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
   * We are taking a range of blocks for data and we don't want writeback of any
   * buffer-cache aliases starting from return from this function and until the
   * moment when something will explicitly mark the buffer dirty (hopefully that
   * will not happen until we will free that block ;-) We don't even need to mark
   * it not-uptodate - nobody can expect anything from a newly allocated buffer
   * anyway. We used to use unmap_buffer() for such invalidation, but that was
   * wrong. We definitely don't want to mark the alias unmapped, for example - it
   * would confuse anyone who might pick it with bread() afterwards...
   *
   * Also..  Note that bforget() doesn't lock the buffer.  So there can be
   * writeout I/O going on against recently-freed buffers.  We don't wait on that
   * I/O in bforget() - it's more efficient to wait on the I/O only if we really
   * need to.  That happens here.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1573
   */
29f3ad7d8   Jan Kara   fs: Provide funct...
1574
  void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1575
  {
29f3ad7d8   Jan Kara   fs: Provide funct...
1576
1577
1578
1579
1580
  	struct inode *bd_inode = bdev->bd_inode;
  	struct address_space *bd_mapping = bd_inode->i_mapping;
  	struct pagevec pvec;
  	pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
  	pgoff_t end;
c10f778dd   Jan Kara   fs: fix performan...
1581
  	int i, count;
29f3ad7d8   Jan Kara   fs: Provide funct...
1582
1583
  	struct buffer_head *bh;
  	struct buffer_head *head;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1584

29f3ad7d8   Jan Kara   fs: Provide funct...
1585
  	end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
866798201   Mel Gorman   mm, pagevec: remo...
1586
  	pagevec_init(&pvec);
397162ffa   Jan Kara   mm: remove nr_pag...
1587
  	while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
c10f778dd   Jan Kara   fs: fix performan...
1588
1589
  		count = pagevec_count(&pvec);
  		for (i = 0; i < count; i++) {
29f3ad7d8   Jan Kara   fs: Provide funct...
1590
  			struct page *page = pvec.pages[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1591

29f3ad7d8   Jan Kara   fs: Provide funct...
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
  			if (!page_has_buffers(page))
  				continue;
  			/*
  			 * We use page lock instead of bd_mapping->private_lock
  			 * to pin buffers here since we can afford to sleep and
  			 * it scales better than a global spinlock lock.
  			 */
  			lock_page(page);
  			/* Recheck when the page is locked which pins bhs */
  			if (!page_has_buffers(page))
  				goto unlock_page;
  			head = page_buffers(page);
  			bh = head;
  			do {
6c006a9d9   Chandan Rajendra   clean_bdev_aliase...
1606
  				if (!buffer_mapped(bh) || (bh->b_blocknr < block))
29f3ad7d8   Jan Kara   fs: Provide funct...
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
  					goto next;
  				if (bh->b_blocknr >= block + len)
  					break;
  				clear_buffer_dirty(bh);
  				wait_on_buffer(bh);
  				clear_buffer_req(bh);
  next:
  				bh = bh->b_this_page;
  			} while (bh != head);
  unlock_page:
  			unlock_page(page);
  		}
  		pagevec_release(&pvec);
  		cond_resched();
c10f778dd   Jan Kara   fs: fix performan...
1621
1622
1623
  		/* End of range already reached? */
  		if (index > end || !index)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1624
1625
  	}
  }
29f3ad7d8   Jan Kara   fs: Provide funct...
1626
  EXPORT_SYMBOL(clean_bdev_aliases);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1627
1628
  
  /*
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
   * Size is a power-of-two in the range 512..PAGE_SIZE,
   * and the case we care about most is PAGE_SIZE.
   *
   * So this *could* possibly be written with those
   * constraints in mind (relevant mostly if some
   * architecture has a slow bit-scan instruction)
   */
  static inline int block_size_bits(unsigned int blocksize)
  {
  	return ilog2(blocksize);
  }
  
  static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
  {
  	BUG_ON(!PageLocked(page));
  
  	if (!page_has_buffers(page))
6aa7de059   Mark Rutland   locking/atomics: ...
1646
1647
  		create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
  				     b_state);
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1648
1649
1650
1651
  	return page_buffers(page);
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
   * NOTE! All mapped/uptodate combinations are valid:
   *
   *	Mapped	Uptodate	Meaning
   *
   *	No	No		"unknown" - must do get_block()
   *	No	Yes		"hole" - zero-filled
   *	Yes	No		"allocated" - allocated on disk, not read in
   *	Yes	Yes		"valid" - allocated and up-to-date in memory.
   *
   * "Dirty" is valid only with the last case (mapped+uptodate).
   */
  
  /*
   * While block_write_full_page is writing back the dirty buffers under
   * the page lock, whoever dirtied the buffers may decide to clean them
   * again at any time.  We handle that by only looking at the buffer
   * state inside lock_buffer().
   *
   * If block_write_full_page() is called for regular writeback
   * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
   * locked buffer.   This only can happen if someone has written the buffer
   * directly, with submit_bh().  At the address_space level PageWriteback
   * prevents this contention from occurring.
6e34eeddf   Theodore Ts'o   block_write_full_...
1675
1676
   *
   * If block_write_full_page() is called with wbc->sync_mode ==
70fd76140   Christoph Hellwig   block,fs: use REQ...
1677
   * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
721a9602e   Jens Axboe   block: kill off R...
1678
   * causes the writes to be flagged as synchronous writes.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1679
   */
b4bba3890   Benjamin Marzinski   fs: export __bloc...
1680
  int __block_write_full_page(struct inode *inode, struct page *page,
35c80d5f4   Chris Mason   Add block_write_f...
1681
1682
  			get_block_t *get_block, struct writeback_control *wbc,
  			bh_end_io_t *handler)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1683
1684
1685
1686
  {
  	int err;
  	sector_t block;
  	sector_t last_block;
f0fbd5fc0   Andrew Morton   [PATCH] __block_w...
1687
  	struct buffer_head *bh, *head;
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1688
  	unsigned int blocksize, bbits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1689
  	int nr_underway = 0;
7637241e6   Jens Axboe   writeback: add wb...
1690
  	int write_flags = wbc_to_write_flags(wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1691

45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1692
  	head = create_page_buffers(page, inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1693
  					(1 << BH_Dirty)|(1 << BH_Uptodate));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
  
  	/*
  	 * Be very careful.  We have no exclusion from __set_page_dirty_buffers
  	 * here, and the (potentially unmapped) buffers may become dirty at
  	 * any time.  If a buffer becomes dirty here after we've inspected it
  	 * then we just miss that fact, and the page stays dirty.
  	 *
  	 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
  	 * handle that here by just cleaning them.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1704
  	bh = head;
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1705
1706
  	blocksize = bh->b_size;
  	bbits = block_size_bits(blocksize);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1707
  	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1708
  	last_block = (i_size_read(inode) - 1) >> bbits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
  
  	/*
  	 * Get all the dirty buffers mapped to disk addresses and
  	 * handle any aliases from the underlying blockdev's mapping.
  	 */
  	do {
  		if (block > last_block) {
  			/*
  			 * mapped buffers outside i_size will occur, because
  			 * this page can be outside i_size when there is a
  			 * truncate in progress.
  			 */
  			/*
  			 * The buffer was zeroed by block_write_full_page()
  			 */
  			clear_buffer_dirty(bh);
  			set_buffer_uptodate(bh);
29a814d2e   Alex Tomas   vfs: add hooks fo...
1726
1727
  		} else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
  			   buffer_dirty(bh)) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
1728
  			WARN_ON(bh->b_size != blocksize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1729
1730
1731
  			err = get_block(inode, block, bh, 1);
  			if (err)
  				goto recover;
29a814d2e   Alex Tomas   vfs: add hooks fo...
1732
  			clear_buffer_delay(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1733
1734
1735
  			if (buffer_new(bh)) {
  				/* blockdev mappings never come here */
  				clear_buffer_new(bh);
e64855c6c   Jan Kara   fs: Add helper to...
1736
  				clean_bdev_bh_alias(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1737
1738
1739
1740
1741
1742
1743
  			}
  		}
  		bh = bh->b_this_page;
  		block++;
  	} while (bh != head);
  
  	do {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1744
1745
1746
1747
1748
  		if (!buffer_mapped(bh))
  			continue;
  		/*
  		 * If it's a fully non-blocking write attempt and we cannot
  		 * lock the buffer then redirty the page.  Note that this can
5b0830cb9   Jens Axboe   writeback: get ri...
1749
1750
1751
  		 * potentially cause a busy-wait loop from writeback threads
  		 * and kswapd activity, but those code paths have their own
  		 * higher-level throttling.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1752
  		 */
1b430beee   Wu Fengguang   writeback: remove...
1753
  		if (wbc->sync_mode != WB_SYNC_NONE) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1754
  			lock_buffer(bh);
ca5de404f   Nick Piggin   fs: rename buffer...
1755
  		} else if (!trylock_buffer(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1756
1757
1758
1759
  			redirty_page_for_writepage(wbc, page);
  			continue;
  		}
  		if (test_clear_buffer_dirty(bh)) {
35c80d5f4   Chris Mason   Add block_write_f...
1760
  			mark_buffer_async_write_endio(bh, handler);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
  		} else {
  			unlock_buffer(bh);
  		}
  	} while ((bh = bh->b_this_page) != head);
  
  	/*
  	 * The page and its buffers are protected by PageWriteback(), so we can
  	 * drop the bh refcounts early.
  	 */
  	BUG_ON(PageWriteback(page));
  	set_page_writeback(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1772
1773
1774
1775
  
  	do {
  		struct buffer_head *next = bh->b_this_page;
  		if (buffer_async_write(bh)) {
8e8f92988   Jens Axboe   fs: add support f...
1776
1777
  			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
  					inode->i_write_hint, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1778
1779
  			nr_underway++;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1780
1781
  		bh = next;
  	} while (bh != head);
05937baae   Andrew Morton   [PATCH] __block_w...
1782
  	unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1783
1784
1785
1786
1787
1788
1789
1790
1791
  
  	err = 0;
  done:
  	if (nr_underway == 0) {
  		/*
  		 * The page was marked dirty, but the buffers were
  		 * clean.  Someone wrote them back by hand with
  		 * ll_rw_block/submit_bh.  A rare case.
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1792
  		end_page_writeback(page);
3d67f2d7c   Nick Piggin   fs: buffer don't ...
1793

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1794
1795
1796
1797
  		/*
  		 * The page and buffer_heads can be released at any time from
  		 * here on.
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
  	}
  	return err;
  
  recover:
  	/*
  	 * ENOSPC, or some other error.  We may already have added some
  	 * blocks to the file, so we need to write these out to avoid
  	 * exposing stale data.
  	 * The page is currently locked and not marked for writeback
  	 */
  	bh = head;
  	/* Recovery: lock and submit the mapped buffers */
  	do {
29a814d2e   Alex Tomas   vfs: add hooks fo...
1811
1812
  		if (buffer_mapped(bh) && buffer_dirty(bh) &&
  		    !buffer_delay(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1813
  			lock_buffer(bh);
35c80d5f4   Chris Mason   Add block_write_f...
1814
  			mark_buffer_async_write_endio(bh, handler);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
  		} else {
  			/*
  			 * The buffer may have been set dirty during
  			 * attachment to a dirty page.
  			 */
  			clear_buffer_dirty(bh);
  		}
  	} while ((bh = bh->b_this_page) != head);
  	SetPageError(page);
  	BUG_ON(PageWriteback(page));
7e4c3690b   Andrew Morton   block_write_full_...
1825
  	mapping_set_error(page->mapping, err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1826
  	set_page_writeback(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1827
1828
1829
1830
  	do {
  		struct buffer_head *next = bh->b_this_page;
  		if (buffer_async_write(bh)) {
  			clear_buffer_dirty(bh);
8e8f92988   Jens Axboe   fs: add support f...
1831
1832
  			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
  					inode->i_write_hint, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1833
1834
  			nr_underway++;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1835
1836
  		bh = next;
  	} while (bh != head);
ffda9d302   Nick Piggin   [PATCH] fs: fix _...
1837
  	unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1838
1839
  	goto done;
  }
b4bba3890   Benjamin Marzinski   fs: export __bloc...
1840
  EXPORT_SYMBOL(__block_write_full_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1841

afddba49d   Nick Piggin   fs: introduce wri...
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
  /*
   * If a page has any new buffers, zero them out here, and mark them uptodate
   * and dirty so they'll be written out (in order to prevent uninitialised
   * block data from leaking). And clear the new bit.
   */
  void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
  {
  	unsigned int block_start, block_end;
  	struct buffer_head *head, *bh;
  
  	BUG_ON(!PageLocked(page));
  	if (!page_has_buffers(page))
  		return;
  
  	bh = head = page_buffers(page);
  	block_start = 0;
  	do {
  		block_end = block_start + bh->b_size;
  
  		if (buffer_new(bh)) {
  			if (block_end > from && block_start < to) {
  				if (!PageUptodate(page)) {
  					unsigned start, size;
  
  					start = max(from, block_start);
  					size = min(to, block_end) - start;
eebd2aa35   Christoph Lameter   Pagecache zeroing...
1868
  					zero_user(page, start, size);
afddba49d   Nick Piggin   fs: introduce wri...
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
  					set_buffer_uptodate(bh);
  				}
  
  				clear_buffer_new(bh);
  				mark_buffer_dirty(bh);
  			}
  		}
  
  		block_start = block_end;
  		bh = bh->b_this_page;
  	} while (bh != head);
  }
  EXPORT_SYMBOL(page_zero_new_buffers);
ae259a9c8   Christoph Hellwig   fs: introduce iom...
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
  static void
  iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
  		struct iomap *iomap)
  {
  	loff_t offset = block << inode->i_blkbits;
  
  	bh->b_bdev = iomap->bdev;
  
  	/*
  	 * Block points to offset in file we need to map, iomap contains
  	 * the offset at which the map starts. If the map ends before the
  	 * current block, then do not map the buffer and let the caller
  	 * handle it.
  	 */
  	BUG_ON(offset >= iomap->offset + iomap->length);
  
  	switch (iomap->type) {
  	case IOMAP_HOLE:
  		/*
  		 * If the buffer is not up to date or beyond the current EOF,
  		 * we need to mark it as new to ensure sub-block zeroing is
  		 * executed if necessary.
  		 */
  		if (!buffer_uptodate(bh) ||
  		    (offset >= i_size_read(inode)))
  			set_buffer_new(bh);
  		break;
  	case IOMAP_DELALLOC:
  		if (!buffer_uptodate(bh) ||
  		    (offset >= i_size_read(inode)))
  			set_buffer_new(bh);
  		set_buffer_uptodate(bh);
  		set_buffer_mapped(bh);
  		set_buffer_delay(bh);
  		break;
  	case IOMAP_UNWRITTEN:
  		/*
3d7b6b21f   Andreas Gruenbacher   iomap: mark newly...
1919
1920
1921
  		 * For unwritten regions, we always need to ensure that regions
  		 * in the block we are not writing to are zeroed. Mark the
  		 * buffer as new to ensure this.
ae259a9c8   Christoph Hellwig   fs: introduce iom...
1922
1923
1924
  		 */
  		set_buffer_new(bh);
  		set_buffer_unwritten(bh);
df561f668   Gustavo A. R. Silva   treewide: Use fal...
1925
  		fallthrough;
ae259a9c8   Christoph Hellwig   fs: introduce iom...
1926
  	case IOMAP_MAPPED:
3d7b6b21f   Andreas Gruenbacher   iomap: mark newly...
1927
1928
  		if ((iomap->flags & IOMAP_F_NEW) ||
  		    offset >= i_size_read(inode))
ae259a9c8   Christoph Hellwig   fs: introduce iom...
1929
  			set_buffer_new(bh);
19fe5f643   Andreas Gruenbacher   iomap: Switch fro...
1930
1931
  		bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
  				inode->i_blkbits;
ae259a9c8   Christoph Hellwig   fs: introduce iom...
1932
1933
1934
1935
1936
1937
1938
  		set_buffer_mapped(bh);
  		break;
  	}
  }
  
  int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
  		get_block_t *get_block, struct iomap *iomap)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1939
  {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1940
  	unsigned from = pos & (PAGE_SIZE - 1);
ebdec241d   Christoph Hellwig   fs: kill block_pr...
1941
  	unsigned to = from + len;
6e1db88d5   Christoph Hellwig   introduce __block...
1942
  	struct inode *inode = page->mapping->host;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1943
1944
1945
1946
1947
1948
1949
  	unsigned block_start, block_end;
  	sector_t block;
  	int err = 0;
  	unsigned blocksize, bbits;
  	struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
  
  	BUG_ON(!PageLocked(page));
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1950
1951
  	BUG_ON(from > PAGE_SIZE);
  	BUG_ON(to > PAGE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1952
  	BUG_ON(from > to);
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
1953
1954
1955
  	head = create_page_buffers(page, inode, 0);
  	blocksize = head->b_size;
  	bbits = block_size_bits(blocksize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1956

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1957
  	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
  
  	for(bh = head, block_start = 0; bh != head || !block_start;
  	    block++, block_start=block_end, bh = bh->b_this_page) {
  		block_end = block_start + blocksize;
  		if (block_end <= from || block_start >= to) {
  			if (PageUptodate(page)) {
  				if (!buffer_uptodate(bh))
  					set_buffer_uptodate(bh);
  			}
  			continue;
  		}
  		if (buffer_new(bh))
  			clear_buffer_new(bh);
  		if (!buffer_mapped(bh)) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
1972
  			WARN_ON(bh->b_size != blocksize);
ae259a9c8   Christoph Hellwig   fs: introduce iom...
1973
1974
1975
1976
1977
1978
1979
  			if (get_block) {
  				err = get_block(inode, block, bh, 1);
  				if (err)
  					break;
  			} else {
  				iomap_to_bh(inode, block, bh, iomap);
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1980
  			if (buffer_new(bh)) {
e64855c6c   Jan Kara   fs: Add helper to...
1981
  				clean_bdev_bh_alias(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1982
  				if (PageUptodate(page)) {
637aff46f   Nick Piggin   fs: fix data-loss...
1983
  					clear_buffer_new(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1984
  					set_buffer_uptodate(bh);
637aff46f   Nick Piggin   fs: fix data-loss...
1985
  					mark_buffer_dirty(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1986
1987
  					continue;
  				}
eebd2aa35   Christoph Lameter   Pagecache zeroing...
1988
1989
1990
1991
  				if (block_end > to || block_start < from)
  					zero_user_segments(page,
  						to, block_end,
  						block_start, from);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1992
1993
1994
1995
1996
1997
1998
1999
2000
  				continue;
  			}
  		}
  		if (PageUptodate(page)) {
  			if (!buffer_uptodate(bh))
  				set_buffer_uptodate(bh);
  			continue; 
  		}
  		if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
33a266dda   David Chinner   [PATCH] Make BH_U...
2001
  		    !buffer_unwritten(bh) &&
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2002
  		     (block_start < from || block_end > to)) {
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
2003
  			ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2004
2005
2006
2007
2008
2009
2010
2011
2012
  			*wait_bh++=bh;
  		}
  	}
  	/*
  	 * If we issued read requests - let them complete.
  	 */
  	while(wait_bh > wait) {
  		wait_on_buffer(*--wait_bh);
  		if (!buffer_uptodate(*wait_bh))
f3ddbdc62   Nick Piggin   [PATCH] fix race ...
2013
  			err = -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2014
  	}
f9f07b6c1   Jan Kara   vfs: Fix data cor...
2015
  	if (unlikely(err))
afddba49d   Nick Piggin   fs: introduce wri...
2016
  		page_zero_new_buffers(page, from, to);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2017
2018
  	return err;
  }
ae259a9c8   Christoph Hellwig   fs: introduce iom...
2019
2020
2021
2022
2023
2024
  
  int __block_write_begin(struct page *page, loff_t pos, unsigned len,
  		get_block_t *get_block)
  {
  	return __block_write_begin_int(page, pos, len, get_block, NULL);
  }
ebdec241d   Christoph Hellwig   fs: kill block_pr...
2025
  EXPORT_SYMBOL(__block_write_begin);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2026
2027
2028
2029
2030
2031
2032
2033
  
  static int __block_commit_write(struct inode *inode, struct page *page,
  		unsigned from, unsigned to)
  {
  	unsigned block_start, block_end;
  	int partial = 0;
  	unsigned blocksize;
  	struct buffer_head *bh, *head;
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2034
2035
  	bh = head = page_buffers(page);
  	blocksize = bh->b_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2036

45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2037
2038
  	block_start = 0;
  	do {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2039
2040
2041
2042
2043
2044
2045
2046
  		block_end = block_start + blocksize;
  		if (block_end <= from || block_start >= to) {
  			if (!buffer_uptodate(bh))
  				partial = 1;
  		} else {
  			set_buffer_uptodate(bh);
  			mark_buffer_dirty(bh);
  		}
afddba49d   Nick Piggin   fs: introduce wri...
2047
  		clear_buffer_new(bh);
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2048
2049
2050
2051
  
  		block_start = block_end;
  		bh = bh->b_this_page;
  	} while (bh != head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
  
  	/*
  	 * If this is a partial write which happened to make all buffers
  	 * uptodate then we can optimize away a bogus readpage() for
  	 * the next read(). Here we 'discover' whether the page went
  	 * uptodate as a result of this (potentially partial) write.
  	 */
  	if (!partial)
  		SetPageUptodate(page);
  	return 0;
  }
  
  /*
155130a4f   Christoph Hellwig   get rid of block_...
2065
2066
2067
   * block_write_begin takes care of the basic task of block allocation and
   * bringing partial write blocks uptodate first.
   *
7bb46a673   npiggin@suse.de   fs: introduce new...
2068
   * The filesystem needs to handle block truncation upon failure.
afddba49d   Nick Piggin   fs: introduce wri...
2069
   */
155130a4f   Christoph Hellwig   get rid of block_...
2070
2071
  int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
  		unsigned flags, struct page **pagep, get_block_t *get_block)
afddba49d   Nick Piggin   fs: introduce wri...
2072
  {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2073
  	pgoff_t index = pos >> PAGE_SHIFT;
afddba49d   Nick Piggin   fs: introduce wri...
2074
  	struct page *page;
6e1db88d5   Christoph Hellwig   introduce __block...
2075
  	int status;
afddba49d   Nick Piggin   fs: introduce wri...
2076

6e1db88d5   Christoph Hellwig   introduce __block...
2077
2078
2079
  	page = grab_cache_page_write_begin(mapping, index, flags);
  	if (!page)
  		return -ENOMEM;
afddba49d   Nick Piggin   fs: introduce wri...
2080

6e1db88d5   Christoph Hellwig   introduce __block...
2081
  	status = __block_write_begin(page, pos, len, get_block);
afddba49d   Nick Piggin   fs: introduce wri...
2082
  	if (unlikely(status)) {
6e1db88d5   Christoph Hellwig   introduce __block...
2083
  		unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2084
  		put_page(page);
6e1db88d5   Christoph Hellwig   introduce __block...
2085
  		page = NULL;
afddba49d   Nick Piggin   fs: introduce wri...
2086
  	}
6e1db88d5   Christoph Hellwig   introduce __block...
2087
  	*pagep = page;
afddba49d   Nick Piggin   fs: introduce wri...
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
  	return status;
  }
  EXPORT_SYMBOL(block_write_begin);
  
  int block_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
  {
  	struct inode *inode = mapping->host;
  	unsigned start;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2098
  	start = pos & (PAGE_SIZE - 1);
afddba49d   Nick Piggin   fs: introduce wri...
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
  
  	if (unlikely(copied < len)) {
  		/*
  		 * The buffers that were written will now be uptodate, so we
  		 * don't have to worry about a readpage reading them and
  		 * overwriting a partial write. However if we have encountered
  		 * a short write and only partially written into a buffer, it
  		 * will not be marked uptodate, so a readpage might come in and
  		 * destroy our partial write.
  		 *
  		 * Do the simplest thing, and just treat any short write to a
  		 * non uptodate page as a zero-length write, and force the
  		 * caller to redo the whole thing.
  		 */
  		if (!PageUptodate(page))
  			copied = 0;
  
  		page_zero_new_buffers(page, start+copied, start+len);
  	}
  	flush_dcache_page(page);
  
  	/* This could be a short (even 0-length) commit */
  	__block_commit_write(inode, page, start, start+copied);
  
  	return copied;
  }
  EXPORT_SYMBOL(block_write_end);
  
  int generic_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
  {
8af54f291   Christoph Hellwig   fs: fold __generi...
2131
2132
2133
  	struct inode *inode = mapping->host;
  	loff_t old_size = inode->i_size;
  	bool i_size_changed = false;
afddba49d   Nick Piggin   fs: introduce wri...
2134
  	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
8af54f291   Christoph Hellwig   fs: fold __generi...
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
  
  	/*
  	 * No need to use i_size_read() here, the i_size cannot change under us
  	 * because we hold i_rwsem.
  	 *
  	 * But it's important to update i_size while still holding page lock:
  	 * page writeout could otherwise come in and zero beyond i_size.
  	 */
  	if (pos + copied > inode->i_size) {
  		i_size_write(inode, pos + copied);
  		i_size_changed = true;
  	}
  
  	unlock_page(page);
7a77dad7e   Andreas Gruenbacher   iomap: Fix use-af...
2149
  	put_page(page);
8af54f291   Christoph Hellwig   fs: fold __generi...
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
  
  	if (old_size < pos)
  		pagecache_isize_extended(inode, old_size, pos);
  	/*
  	 * Don't mark the inode dirty under page lock. First, it unnecessarily
  	 * makes the holding time of page lock longer. Second, it forces lock
  	 * ordering of page lock and transaction start for journaling
  	 * filesystems.
  	 */
  	if (i_size_changed)
  		mark_inode_dirty(inode);
26ddb1f4f   Andreas Gruenbacher   fs: Turn __generi...
2161
  	return copied;
afddba49d   Nick Piggin   fs: introduce wri...
2162
2163
2164
2165
  }
  EXPORT_SYMBOL(generic_write_end);
  
  /*
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2166
2167
2168
2169
2170
2171
   * block_is_partially_uptodate checks whether buffers within a page are
   * uptodate or not.
   *
   * Returns true if all buffers which correspond to a file portion
   * we want to read are uptodate.
   */
c186afb4d   Al Viro   switch ->is_parti...
2172
2173
  int block_is_partially_uptodate(struct page *page, unsigned long from,
  					unsigned long count)
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2174
  {
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2175
2176
2177
2178
2179
2180
2181
  	unsigned block_start, block_end, blocksize;
  	unsigned to;
  	struct buffer_head *bh, *head;
  	int ret = 1;
  
  	if (!page_has_buffers(page))
  		return 0;
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2182
2183
  	head = page_buffers(page);
  	blocksize = head->b_size;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2184
  	to = min_t(unsigned, PAGE_SIZE - from, count);
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2185
  	to = from + to;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2186
  	if (from < blocksize && to > PAGE_SIZE - blocksize)
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2187
  		return 0;
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
  	bh = head;
  	block_start = 0;
  	do {
  		block_end = block_start + blocksize;
  		if (block_end > from && block_start < to) {
  			if (!buffer_uptodate(bh)) {
  				ret = 0;
  				break;
  			}
  			if (block_end >= to)
  				break;
  		}
  		block_start = block_end;
  		bh = bh->b_this_page;
  	} while (bh != head);
  
  	return ret;
  }
  EXPORT_SYMBOL(block_is_partially_uptodate);
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
   * Generic "read page" function for block devices that have the normal
   * get_block functionality. This is most of the block device filesystems.
   * Reads the page asynchronously --- the unlock_buffer() and
   * set/clear_buffer_uptodate() functions propagate buffer state into the
   * page struct once IO has completed.
   */
  int block_read_full_page(struct page *page, get_block_t *get_block)
  {
  	struct inode *inode = page->mapping->host;
  	sector_t iblock, lblock;
  	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2220
  	unsigned int blocksize, bbits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2221
2222
  	int nr, i;
  	int fully_mapped = 1;
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2223
2224
2225
  	head = create_page_buffers(page, inode, 0);
  	blocksize = head->b_size;
  	bbits = block_size_bits(blocksize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2226

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2227
  	iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
45bce8f3e   Linus Torvalds   fs/buffer.c: make...
2228
  	lblock = (i_size_read(inode)+blocksize-1) >> bbits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2229
2230
2231
2232
2233
2234
2235
2236
2237
  	bh = head;
  	nr = 0;
  	i = 0;
  
  	do {
  		if (buffer_uptodate(bh))
  			continue;
  
  		if (!buffer_mapped(bh)) {
c64610ba5   Andrew Morton   [PATCH] block_rea...
2238
  			int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2239
2240
  			fully_mapped = 0;
  			if (iblock < lblock) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
2241
  				WARN_ON(bh->b_size != blocksize);
c64610ba5   Andrew Morton   [PATCH] block_rea...
2242
2243
  				err = get_block(inode, iblock, bh, 0);
  				if (err)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2244
2245
2246
  					SetPageError(page);
  			}
  			if (!buffer_mapped(bh)) {
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2247
  				zero_user(page, i * blocksize, blocksize);
c64610ba5   Andrew Morton   [PATCH] block_rea...
2248
2249
  				if (!err)
  					set_buffer_uptodate(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
  				continue;
  			}
  			/*
  			 * get_block() might have updated the buffer
  			 * synchronously
  			 */
  			if (buffer_uptodate(bh))
  				continue;
  		}
  		arr[nr++] = bh;
  	} while (i++, iblock++, (bh = bh->b_this_page) != head);
  
  	if (fully_mapped)
  		SetPageMappedToDisk(page);
  
  	if (!nr) {
  		/*
  		 * All buffers are uptodate - we can set the page uptodate
  		 * as well. But not if get_block() returned an error.
  		 */
  		if (!PageError(page))
  			SetPageUptodate(page);
  		unlock_page(page);
  		return 0;
  	}
  
  	/* Stage two: lock the buffers */
  	for (i = 0; i < nr; i++) {
  		bh = arr[i];
  		lock_buffer(bh);
  		mark_buffer_async_read(bh);
  	}
  
  	/*
  	 * Stage 3: start the IO.  Check for uptodateness
  	 * inside the buffer lock in case another process reading
  	 * the underlying blockdev brought it uptodate (the sct fix).
  	 */
  	for (i = 0; i < nr; i++) {
  		bh = arr[i];
  		if (buffer_uptodate(bh))
  			end_buffer_async_read(bh, 1);
  		else
2a222ca99   Mike Christie   fs: have submit_b...
2293
  			submit_bh(REQ_OP_READ, 0, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2294
2295
2296
  	}
  	return 0;
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2297
  EXPORT_SYMBOL(block_read_full_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2298
2299
  
  /* utility function for filesystems that need to do work on expanding
89e107877   Nick Piggin   fs: new cont helpers
2300
   * truncates.  Uses filesystem pagecache writes to allow the filesystem to
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2301
2302
   * deal with the hole.  
   */
89e107877   Nick Piggin   fs: new cont helpers
2303
  int generic_cont_expand_simple(struct inode *inode, loff_t size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2304
2305
2306
  {
  	struct address_space *mapping = inode->i_mapping;
  	struct page *page;
89e107877   Nick Piggin   fs: new cont helpers
2307
  	void *fsdata;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2308
  	int err;
c08d3b0e3   npiggin@suse.de   truncate: use new...
2309
2310
  	err = inode_newsize_ok(inode, size);
  	if (err)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2311
  		goto out;
89e107877   Nick Piggin   fs: new cont helpers
2312
  	err = pagecache_write_begin(NULL, mapping, size, 0,
c718a9751   Tetsuo Handa   fs: semove set bu...
2313
  				    AOP_FLAG_CONT_EXPAND, &page, &fsdata);
89e107877   Nick Piggin   fs: new cont helpers
2314
  	if (err)
05eb0b51f   OGAWA Hirofumi   [PATCH] fat: supp...
2315
  		goto out;
05eb0b51f   OGAWA Hirofumi   [PATCH] fat: supp...
2316

89e107877   Nick Piggin   fs: new cont helpers
2317
2318
  	err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
  	BUG_ON(err > 0);
05eb0b51f   OGAWA Hirofumi   [PATCH] fat: supp...
2319

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2320
2321
2322
  out:
  	return err;
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2323
  EXPORT_SYMBOL(generic_cont_expand_simple);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2324

f1e3af72c   Adrian Bunk   make fs/buffer.c:...
2325
2326
  static int cont_expand_zero(struct file *file, struct address_space *mapping,
  			    loff_t pos, loff_t *bytes)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2327
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2328
  	struct inode *inode = mapping->host;
93407472a   Fabian Frederick   fs: add i_blocksi...
2329
  	unsigned int blocksize = i_blocksize(inode);
89e107877   Nick Piggin   fs: new cont helpers
2330
2331
2332
2333
2334
2335
  	struct page *page;
  	void *fsdata;
  	pgoff_t index, curidx;
  	loff_t curpos;
  	unsigned zerofrom, offset, len;
  	int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2336

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2337
2338
  	index = pos >> PAGE_SHIFT;
  	offset = pos & ~PAGE_MASK;
89e107877   Nick Piggin   fs: new cont helpers
2339

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2340
2341
  	while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
  		zerofrom = curpos & ~PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2342
2343
2344
2345
  		if (zerofrom & (blocksize-1)) {
  			*bytes |= (blocksize-1);
  			(*bytes)++;
  		}
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2346
  		len = PAGE_SIZE - zerofrom;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2347

c718a9751   Tetsuo Handa   fs: semove set bu...
2348
2349
  		err = pagecache_write_begin(file, mapping, curpos, len, 0,
  					    &page, &fsdata);
89e107877   Nick Piggin   fs: new cont helpers
2350
2351
  		if (err)
  			goto out;
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2352
  		zero_user(page, zerofrom, len);
89e107877   Nick Piggin   fs: new cont helpers
2353
2354
2355
2356
2357
2358
  		err = pagecache_write_end(file, mapping, curpos, len, len,
  						page, fsdata);
  		if (err < 0)
  			goto out;
  		BUG_ON(err != len);
  		err = 0;
061e97469   OGAWA Hirofumi   Add balance_dirty...
2359
2360
  
  		balance_dirty_pages_ratelimited(mapping);
c2ca0fcd2   Mikulas Patocka   fs: make cont_exp...
2361

08d405c8b   Davidlohr Bueso   fs/: remove calle...
2362
  		if (fatal_signal_pending(current)) {
c2ca0fcd2   Mikulas Patocka   fs: make cont_exp...
2363
2364
2365
  			err = -EINTR;
  			goto out;
  		}
89e107877   Nick Piggin   fs: new cont helpers
2366
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2367

89e107877   Nick Piggin   fs: new cont helpers
2368
2369
  	/* page covers the boundary, find the boundary offset */
  	if (index == curidx) {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2370
  		zerofrom = curpos & ~PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2371
  		/* if we will expand the thing last block will be filled */
89e107877   Nick Piggin   fs: new cont helpers
2372
2373
2374
2375
  		if (offset <= zerofrom) {
  			goto out;
  		}
  		if (zerofrom & (blocksize-1)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2376
2377
2378
  			*bytes |= (blocksize-1);
  			(*bytes)++;
  		}
89e107877   Nick Piggin   fs: new cont helpers
2379
  		len = offset - zerofrom;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2380

c718a9751   Tetsuo Handa   fs: semove set bu...
2381
2382
  		err = pagecache_write_begin(file, mapping, curpos, len, 0,
  					    &page, &fsdata);
89e107877   Nick Piggin   fs: new cont helpers
2383
2384
  		if (err)
  			goto out;
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2385
  		zero_user(page, zerofrom, len);
89e107877   Nick Piggin   fs: new cont helpers
2386
2387
2388
2389
2390
2391
  		err = pagecache_write_end(file, mapping, curpos, len, len,
  						page, fsdata);
  		if (err < 0)
  			goto out;
  		BUG_ON(err != len);
  		err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2392
  	}
89e107877   Nick Piggin   fs: new cont helpers
2393
2394
2395
2396
2397
2398
2399
2400
  out:
  	return err;
  }
  
  /*
   * For moronic filesystems that do not allow holes in file.
   * We may have to extend the file.
   */
282dc1788   Christoph Hellwig   get rid of cont_w...
2401
  int cont_write_begin(struct file *file, struct address_space *mapping,
89e107877   Nick Piggin   fs: new cont helpers
2402
2403
2404
2405
2406
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata,
  			get_block_t *get_block, loff_t *bytes)
  {
  	struct inode *inode = mapping->host;
93407472a   Fabian Frederick   fs: add i_blocksi...
2407
2408
  	unsigned int blocksize = i_blocksize(inode);
  	unsigned int zerofrom;
89e107877   Nick Piggin   fs: new cont helpers
2409
2410
2411
2412
  	int err;
  
  	err = cont_expand_zero(file, mapping, pos, bytes);
  	if (err)
155130a4f   Christoph Hellwig   get rid of block_...
2413
  		return err;
89e107877   Nick Piggin   fs: new cont helpers
2414

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2415
  	zerofrom = *bytes & ~PAGE_MASK;
89e107877   Nick Piggin   fs: new cont helpers
2416
2417
2418
  	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
  		*bytes |= (blocksize-1);
  		(*bytes)++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2419
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2420

155130a4f   Christoph Hellwig   get rid of block_...
2421
  	return block_write_begin(mapping, pos, len, flags, pagep, get_block);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2422
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2423
  EXPORT_SYMBOL(cont_write_begin);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2424

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2425
2426
2427
2428
2429
2430
  int block_commit_write(struct page *page, unsigned from, unsigned to)
  {
  	struct inode *inode = page->mapping->host;
  	__block_commit_write(inode,page,from,to);
  	return 0;
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2431
  EXPORT_SYMBOL(block_commit_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2432

541716902   David Chinner   [FS] Implement bl...
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
  /*
   * block_page_mkwrite() is not allowed to change the file size as it gets
   * called from a page fault handler when a page is first dirtied. Hence we must
   * be careful to check for EOF conditions here. We set the page up correctly
   * for a written page which means we get ENOSPC checking when writing into
   * holes and correct delalloc and unwritten extent mapping on filesystems that
   * support these features.
   *
   * We are not allowed to take the i_mutex here so we have to play games to
   * protect against truncate races as the page could now be beyond EOF.  Because
7bb46a673   npiggin@suse.de   fs: introduce new...
2443
   * truncate writes the inode size before removing pages, once we have the
541716902   David Chinner   [FS] Implement bl...
2444
2445
2446
   * page lock we can determine safely if the page is beyond EOF. If it is not
   * beyond EOF, then the page is guaranteed safe against truncation until we
   * unlock the page.
ea13a8646   Jan Kara   vfs: Block mmappe...
2447
   *
14da92001   Jan Kara   fs: Protect write...
2448
   * Direct callers of this function should protect against filesystem freezing
5c5000296   Ross Zwisler   vfs: remove unuse...
2449
   * using sb_start_pagefault() - sb_end_pagefault() functions.
541716902   David Chinner   [FS] Implement bl...
2450
   */
5c5000296   Ross Zwisler   vfs: remove unuse...
2451
  int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
24da4fab5   Jan Kara   vfs: Create __blo...
2452
  			 get_block_t get_block)
541716902   David Chinner   [FS] Implement bl...
2453
  {
c2ec175c3   Nick Piggin   mm: page_mkwrite ...
2454
  	struct page *page = vmf->page;
496ad9aa8   Al Viro   new helper: file_...
2455
  	struct inode *inode = file_inode(vma->vm_file);
541716902   David Chinner   [FS] Implement bl...
2456
2457
  	unsigned long end;
  	loff_t size;
24da4fab5   Jan Kara   vfs: Create __blo...
2458
  	int ret;
541716902   David Chinner   [FS] Implement bl...
2459
2460
2461
2462
  
  	lock_page(page);
  	size = i_size_read(inode);
  	if ((page->mapping != inode->i_mapping) ||
183363380   Nick Piggin   fix some conversi...
2463
  	    (page_offset(page) > size)) {
24da4fab5   Jan Kara   vfs: Create __blo...
2464
2465
2466
  		/* We overload EFAULT to mean page got truncated */
  		ret = -EFAULT;
  		goto out_unlock;
541716902   David Chinner   [FS] Implement bl...
2467
2468
2469
  	}
  
  	/* page is wholly or partially inside EOF */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2470
2471
  	if (((page->index + 1) << PAGE_SHIFT) > size)
  		end = size & ~PAGE_MASK;
541716902   David Chinner   [FS] Implement bl...
2472
  	else
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2473
  		end = PAGE_SIZE;
541716902   David Chinner   [FS] Implement bl...
2474

ebdec241d   Christoph Hellwig   fs: kill block_pr...
2475
  	ret = __block_write_begin(page, 0, end, get_block);
541716902   David Chinner   [FS] Implement bl...
2476
2477
  	if (!ret)
  		ret = block_commit_write(page, 0, end);
24da4fab5   Jan Kara   vfs: Create __blo...
2478
2479
  	if (unlikely(ret < 0))
  		goto out_unlock;
ea13a8646   Jan Kara   vfs: Block mmappe...
2480
  	set_page_dirty(page);
1d1d1a767   Darrick J. Wong   mm: only enforce ...
2481
  	wait_for_stable_page(page);
24da4fab5   Jan Kara   vfs: Create __blo...
2482
2483
2484
  	return 0;
  out_unlock:
  	unlock_page(page);
541716902   David Chinner   [FS] Implement bl...
2485
  	return ret;
24da4fab5   Jan Kara   vfs: Create __blo...
2486
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2487
  EXPORT_SYMBOL(block_page_mkwrite);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2488
2489
  
  /*
03158cd7e   Nick Piggin   fs: restore nobh
2490
   * nobh_write_begin()'s prereads are special: the buffer_heads are freed
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2491
2492
   * immediately, while under the page lock.  So it needs a special end_io
   * handler which does not touch the bh after unlocking it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2493
2494
2495
   */
  static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
  {
68671f35f   Dmitry Monakhov   mm: add end_buffe...
2496
  	__end_buffer_read_notouch(bh, uptodate);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2497
2498
2499
  }
  
  /*
03158cd7e   Nick Piggin   fs: restore nobh
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
   * Attach the singly-linked list of buffers created by nobh_write_begin, to
   * the page (converting it to circular linked list and taking care of page
   * dirty races).
   */
  static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
  {
  	struct buffer_head *bh;
  
  	BUG_ON(!PageLocked(page));
  
  	spin_lock(&page->mapping->private_lock);
  	bh = head;
  	do {
  		if (PageDirty(page))
  			set_buffer_dirty(bh);
  		if (!bh->b_this_page)
  			bh->b_this_page = head;
  		bh = bh->b_this_page;
  	} while (bh != head);
45dcfc273   Guoqing Jiang   fs/buffer.c: use ...
2519
  	attach_page_private(page, head);
03158cd7e   Nick Piggin   fs: restore nobh
2520
2521
2522
2523
  	spin_unlock(&page->mapping->private_lock);
  }
  
  /*
ea0f04e59   Christoph Hellwig   get rid of nobh_w...
2524
2525
   * On entry, the page is fully not uptodate.
   * On exit the page is fully uptodate in the areas outside (from,to)
7bb46a673   npiggin@suse.de   fs: introduce new...
2526
   * The filesystem needs to handle block truncation upon failure.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2527
   */
ea0f04e59   Christoph Hellwig   get rid of nobh_w...
2528
  int nobh_write_begin(struct address_space *mapping,
03158cd7e   Nick Piggin   fs: restore nobh
2529
2530
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2531
2532
  			get_block_t *get_block)
  {
03158cd7e   Nick Piggin   fs: restore nobh
2533
  	struct inode *inode = mapping->host;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2534
2535
  	const unsigned blkbits = inode->i_blkbits;
  	const unsigned blocksize = 1 << blkbits;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2536
  	struct buffer_head *head, *bh;
03158cd7e   Nick Piggin   fs: restore nobh
2537
2538
2539
  	struct page *page;
  	pgoff_t index;
  	unsigned from, to;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2540
  	unsigned block_in_page;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2541
  	unsigned block_start, block_end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2542
  	sector_t block_in_file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2543
  	int nr_reads = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2544
2545
  	int ret = 0;
  	int is_mapped_to_disk = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2546

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2547
2548
  	index = pos >> PAGE_SHIFT;
  	from = pos & (PAGE_SIZE - 1);
03158cd7e   Nick Piggin   fs: restore nobh
2549
  	to = from + len;
54566b2c1   Nick Piggin   fs: symlink write...
2550
  	page = grab_cache_page_write_begin(mapping, index, flags);
03158cd7e   Nick Piggin   fs: restore nobh
2551
2552
2553
2554
2555
2556
  	if (!page)
  		return -ENOMEM;
  	*pagep = page;
  	*fsdata = NULL;
  
  	if (page_has_buffers(page)) {
309f77ad9   Namhyung Kim   fs/buffer.c: call...
2557
2558
2559
2560
  		ret = __block_write_begin(page, pos, len, get_block);
  		if (unlikely(ret))
  			goto out_release;
  		return ret;
03158cd7e   Nick Piggin   fs: restore nobh
2561
  	}
a4b0672db   Nick Piggin   fs: fix nobh erro...
2562

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2563
2564
  	if (PageMappedToDisk(page))
  		return 0;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2565
2566
2567
2568
2569
2570
2571
2572
2573
  	/*
  	 * Allocate buffers so that we can keep track of state, and potentially
  	 * attach them to the page if an error occurs. In the common case of
  	 * no error, they will just be freed again without ever being attached
  	 * to the page (which is all OK, because we're under the page lock).
  	 *
  	 * Be careful: the buffer linked list is a NULL terminated one, rather
  	 * than the circular one we're used to.
  	 */
640ab98fb   Jens Axboe   buffer: have allo...
2574
  	head = alloc_page_buffers(page, blocksize, false);
03158cd7e   Nick Piggin   fs: restore nobh
2575
2576
2577
2578
  	if (!head) {
  		ret = -ENOMEM;
  		goto out_release;
  	}
a4b0672db   Nick Piggin   fs: fix nobh erro...
2579

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2580
  	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2581
2582
2583
2584
2585
2586
  
  	/*
  	 * We loop across all blocks in the page, whether or not they are
  	 * part of the affected region.  This is so we can discover if the
  	 * page is fully mapped-to-disk.
  	 */
a4b0672db   Nick Piggin   fs: fix nobh erro...
2587
  	for (block_start = 0, block_in_page = 0, bh = head;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2588
  		  block_start < PAGE_SIZE;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2589
  		  block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2590
  		int create;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2591
2592
  		block_end = block_start + blocksize;
  		bh->b_state = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2593
2594
2595
2596
  		create = 1;
  		if (block_start >= to)
  			create = 0;
  		ret = get_block(inode, block_in_file + block_in_page,
a4b0672db   Nick Piggin   fs: fix nobh erro...
2597
  					bh, create);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2598
2599
  		if (ret)
  			goto failed;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2600
  		if (!buffer_mapped(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2601
  			is_mapped_to_disk = 0;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2602
  		if (buffer_new(bh))
e64855c6c   Jan Kara   fs: Add helper to...
2603
  			clean_bdev_bh_alias(bh);
a4b0672db   Nick Piggin   fs: fix nobh erro...
2604
2605
  		if (PageUptodate(page)) {
  			set_buffer_uptodate(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2606
  			continue;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2607
2608
  		}
  		if (buffer_new(bh) || !buffer_mapped(bh)) {
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2609
2610
  			zero_user_segments(page, block_start, from,
  							to, block_end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2611
2612
  			continue;
  		}
a4b0672db   Nick Piggin   fs: fix nobh erro...
2613
  		if (buffer_uptodate(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2614
2615
  			continue;	/* reiserfs does this */
  		if (block_start < from || block_end > to) {
a4b0672db   Nick Piggin   fs: fix nobh erro...
2616
2617
  			lock_buffer(bh);
  			bh->b_end_io = end_buffer_read_nobh;
2a222ca99   Mike Christie   fs: have submit_b...
2618
  			submit_bh(REQ_OP_READ, 0, bh);
a4b0672db   Nick Piggin   fs: fix nobh erro...
2619
  			nr_reads++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2620
2621
2622
2623
  		}
  	}
  
  	if (nr_reads) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2624
2625
2626
2627
2628
  		/*
  		 * The page is locked, so these buffers are protected from
  		 * any VM or truncate activity.  Hence we don't need to care
  		 * for the buffer_head refcounts.
  		 */
a4b0672db   Nick Piggin   fs: fix nobh erro...
2629
  		for (bh = head; bh; bh = bh->b_this_page) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2630
2631
2632
  			wait_on_buffer(bh);
  			if (!buffer_uptodate(bh))
  				ret = -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2633
2634
2635
2636
2637
2638
2639
  		}
  		if (ret)
  			goto failed;
  	}
  
  	if (is_mapped_to_disk)
  		SetPageMappedToDisk(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2640

03158cd7e   Nick Piggin   fs: restore nobh
2641
  	*fsdata = head; /* to be released by nobh_write_end */
a4b0672db   Nick Piggin   fs: fix nobh erro...
2642

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2643
2644
2645
  	return 0;
  
  failed:
03158cd7e   Nick Piggin   fs: restore nobh
2646
  	BUG_ON(!ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2647
  	/*
a4b0672db   Nick Piggin   fs: fix nobh erro...
2648
2649
2650
2651
2652
  	 * Error recovery is a bit difficult. We need to zero out blocks that
  	 * were newly allocated, and dirty them to ensure they get written out.
  	 * Buffers need to be attached to the page at this point, otherwise
  	 * the handling of potential IO errors during writeout would be hard
  	 * (could try doing synchronous writeout, but what if that fails too?)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2653
  	 */
03158cd7e   Nick Piggin   fs: restore nobh
2654
2655
  	attach_nobh_buffers(page, head);
  	page_zero_new_buffers(page, from, to);
a4b0672db   Nick Piggin   fs: fix nobh erro...
2656

03158cd7e   Nick Piggin   fs: restore nobh
2657
2658
  out_release:
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2659
  	put_page(page);
03158cd7e   Nick Piggin   fs: restore nobh
2660
  	*pagep = NULL;
a4b0672db   Nick Piggin   fs: fix nobh erro...
2661

7bb46a673   npiggin@suse.de   fs: introduce new...
2662
2663
  	return ret;
  }
03158cd7e   Nick Piggin   fs: restore nobh
2664
  EXPORT_SYMBOL(nobh_write_begin);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2665

03158cd7e   Nick Piggin   fs: restore nobh
2666
2667
2668
  int nobh_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2669
2670
  {
  	struct inode *inode = page->mapping->host;
efdc31319   Nick Piggin   nobh: nobh_write_...
2671
  	struct buffer_head *head = fsdata;
03158cd7e   Nick Piggin   fs: restore nobh
2672
  	struct buffer_head *bh;
5b41e74ad   Dmitri Monakhov   vfs: fix data lea...
2673
  	BUG_ON(fsdata != NULL && page_has_buffers(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2674

d4cf109f0   Dave Kleikamp   vfs: Don't call a...
2675
  	if (unlikely(copied < len) && head)
5b41e74ad   Dmitri Monakhov   vfs: fix data lea...
2676
2677
2678
2679
  		attach_nobh_buffers(page, head);
  	if (page_has_buffers(page))
  		return generic_write_end(file, mapping, pos, len,
  					copied, page, fsdata);
a4b0672db   Nick Piggin   fs: fix nobh erro...
2680

22c8ca78f   Nick Piggin   [PATCH] fs: fix n...
2681
  	SetPageUptodate(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2682
  	set_page_dirty(page);
03158cd7e   Nick Piggin   fs: restore nobh
2683
2684
  	if (pos+copied > inode->i_size) {
  		i_size_write(inode, pos+copied);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2685
2686
  		mark_inode_dirty(inode);
  	}
03158cd7e   Nick Piggin   fs: restore nobh
2687
2688
  
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2689
  	put_page(page);
03158cd7e   Nick Piggin   fs: restore nobh
2690

03158cd7e   Nick Piggin   fs: restore nobh
2691
2692
2693
2694
2695
2696
2697
  	while (head) {
  		bh = head;
  		head = head->b_this_page;
  		free_buffer_head(bh);
  	}
  
  	return copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2698
  }
03158cd7e   Nick Piggin   fs: restore nobh
2699
  EXPORT_SYMBOL(nobh_write_end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
  
  /*
   * nobh_writepage() - based on block_full_write_page() except
   * that it tries to operate without attaching bufferheads to
   * the page.
   */
  int nobh_writepage(struct page *page, get_block_t *get_block,
  			struct writeback_control *wbc)
  {
  	struct inode * const inode = page->mapping->host;
  	loff_t i_size = i_size_read(inode);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2711
  	const pgoff_t end_index = i_size >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2712
  	unsigned offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2713
2714
2715
2716
2717
2718
2719
  	int ret;
  
  	/* Is the page fully inside i_size? */
  	if (page->index < end_index)
  		goto out;
  
  	/* Is the page fully outside i_size? (truncate in progress) */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2720
  	offset = i_size & (PAGE_SIZE-1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2721
  	if (page->index >= end_index+1 || !offset) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
  		unlock_page(page);
  		return 0; /* don't care */
  	}
  
  	/*
  	 * The page straddles i_size.  It must be zeroed out on each and every
  	 * writepage invocation because it may be mmapped.  "A file is mapped
  	 * in multiples of the page size.  For a file that is not a multiple of
  	 * the  page size, the remaining memory is zeroed when mapped, and
  	 * writes to that region are not written out to the file."
  	 */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2733
  	zero_user_segment(page, offset, PAGE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2734
2735
2736
  out:
  	ret = mpage_writepage(page, get_block, wbc);
  	if (ret == -EAGAIN)
35c80d5f4   Chris Mason   Add block_write_f...
2737
2738
  		ret = __block_write_full_page(inode, page, get_block, wbc,
  					      end_buffer_async_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2739
2740
2741
  	return ret;
  }
  EXPORT_SYMBOL(nobh_writepage);
03158cd7e   Nick Piggin   fs: restore nobh
2742
2743
  int nobh_truncate_page(struct address_space *mapping,
  			loff_t from, get_block_t *get_block)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2744
  {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2745
2746
  	pgoff_t index = from >> PAGE_SHIFT;
  	unsigned offset = from & (PAGE_SIZE-1);
03158cd7e   Nick Piggin   fs: restore nobh
2747
2748
2749
2750
  	unsigned blocksize;
  	sector_t iblock;
  	unsigned length, pos;
  	struct inode *inode = mapping->host;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2751
  	struct page *page;
03158cd7e   Nick Piggin   fs: restore nobh
2752
2753
  	struct buffer_head map_bh;
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2754

93407472a   Fabian Frederick   fs: add i_blocksi...
2755
  	blocksize = i_blocksize(inode);
03158cd7e   Nick Piggin   fs: restore nobh
2756
2757
2758
2759
2760
2761
2762
  	length = offset & (blocksize - 1);
  
  	/* Block boundary? Nothing to do */
  	if (!length)
  		return 0;
  
  	length = blocksize - length;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2763
  	iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2764

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2765
  	page = grab_cache_page(mapping, index);
03158cd7e   Nick Piggin   fs: restore nobh
2766
  	err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2767
2768
  	if (!page)
  		goto out;
03158cd7e   Nick Piggin   fs: restore nobh
2769
2770
2771
  	if (page_has_buffers(page)) {
  has_buffers:
  		unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2772
  		put_page(page);
03158cd7e   Nick Piggin   fs: restore nobh
2773
2774
2775
2776
2777
2778
2779
2780
2781
  		return block_truncate_page(mapping, from, get_block);
  	}
  
  	/* Find the buffer that contains "offset" */
  	pos = blocksize;
  	while (offset >= pos) {
  		iblock++;
  		pos += blocksize;
  	}
460bcf57b   Theodore Ts'o   Fix nobh_truncate...
2782
2783
  	map_bh.b_size = blocksize;
  	map_bh.b_state = 0;
03158cd7e   Nick Piggin   fs: restore nobh
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
  	err = get_block(inode, iblock, &map_bh, 0);
  	if (err)
  		goto unlock;
  	/* unmapped? It's a hole - nothing to do */
  	if (!buffer_mapped(&map_bh))
  		goto unlock;
  
  	/* Ok, it's mapped. Make sure it's up-to-date */
  	if (!PageUptodate(page)) {
  		err = mapping->a_ops->readpage(NULL, page);
  		if (err) {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2795
  			put_page(page);
03158cd7e   Nick Piggin   fs: restore nobh
2796
2797
2798
2799
2800
2801
2802
2803
2804
  			goto out;
  		}
  		lock_page(page);
  		if (!PageUptodate(page)) {
  			err = -EIO;
  			goto unlock;
  		}
  		if (page_has_buffers(page))
  			goto has_buffers;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2805
  	}
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2806
  	zero_user(page, offset, length);
03158cd7e   Nick Piggin   fs: restore nobh
2807
2808
2809
2810
  	set_page_dirty(page);
  	err = 0;
  
  unlock:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2811
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2812
  	put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2813
  out:
03158cd7e   Nick Piggin   fs: restore nobh
2814
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2815
2816
2817
2818
2819
2820
  }
  EXPORT_SYMBOL(nobh_truncate_page);
  
  int block_truncate_page(struct address_space *mapping,
  			loff_t from, get_block_t *get_block)
  {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2821
2822
  	pgoff_t index = from >> PAGE_SHIFT;
  	unsigned offset = from & (PAGE_SIZE-1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2823
  	unsigned blocksize;
54b21a799   Andrew Morton   [PATCH] fix possi...
2824
  	sector_t iblock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2825
2826
2827
2828
  	unsigned length, pos;
  	struct inode *inode = mapping->host;
  	struct page *page;
  	struct buffer_head *bh;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2829
  	int err;
93407472a   Fabian Frederick   fs: add i_blocksi...
2830
  	blocksize = i_blocksize(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2831
2832
2833
2834
2835
2836
2837
  	length = offset & (blocksize - 1);
  
  	/* Block boundary? Nothing to do */
  	if (!length)
  		return 0;
  
  	length = blocksize - length;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2838
  	iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
  	
  	page = grab_cache_page(mapping, index);
  	err = -ENOMEM;
  	if (!page)
  		goto out;
  
  	if (!page_has_buffers(page))
  		create_empty_buffers(page, blocksize, 0);
  
  	/* Find the buffer that contains "offset" */
  	bh = page_buffers(page);
  	pos = blocksize;
  	while (offset >= pos) {
  		bh = bh->b_this_page;
  		iblock++;
  		pos += blocksize;
  	}
  
  	err = 0;
  	if (!buffer_mapped(bh)) {
b0cf2321c   Badari Pulavarty   [PATCH] pass b_si...
2859
  		WARN_ON(bh->b_size != blocksize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
  		err = get_block(inode, iblock, bh, 0);
  		if (err)
  			goto unlock;
  		/* unmapped? It's a hole - nothing to do */
  		if (!buffer_mapped(bh))
  			goto unlock;
  	}
  
  	/* Ok, it's mapped. Make sure it's up-to-date */
  	if (PageUptodate(page))
  		set_buffer_uptodate(bh);
33a266dda   David Chinner   [PATCH] Make BH_U...
2871
  	if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2872
  		err = -EIO;
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
2873
  		ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2874
2875
2876
2877
2878
  		wait_on_buffer(bh);
  		/* Uhhuh. Read error. Complain and punt. */
  		if (!buffer_uptodate(bh))
  			goto unlock;
  	}
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2879
  	zero_user(page, offset, length);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2880
2881
2882
2883
2884
  	mark_buffer_dirty(bh);
  	err = 0;
  
  unlock:
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2885
  	put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2886
2887
2888
  out:
  	return err;
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2889
  EXPORT_SYMBOL(block_truncate_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2890
2891
2892
2893
  
  /*
   * The generic ->writepage function for buffer-backed address_spaces
   */
1b938c082   Matthew Wilcox   fs/buffer.c: remo...
2894
2895
  int block_write_full_page(struct page *page, get_block_t *get_block,
  			struct writeback_control *wbc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2896
2897
2898
  {
  	struct inode * const inode = page->mapping->host;
  	loff_t i_size = i_size_read(inode);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2899
  	const pgoff_t end_index = i_size >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2900
  	unsigned offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2901
2902
2903
  
  	/* Is the page fully inside i_size? */
  	if (page->index < end_index)
35c80d5f4   Chris Mason   Add block_write_f...
2904
  		return __block_write_full_page(inode, page, get_block, wbc,
1b938c082   Matthew Wilcox   fs/buffer.c: remo...
2905
  					       end_buffer_async_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2906
2907
  
  	/* Is the page fully outside i_size? (truncate in progress) */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2908
  	offset = i_size & (PAGE_SIZE-1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2909
  	if (page->index >= end_index+1 || !offset) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2910
2911
2912
2913
2914
2915
  		unlock_page(page);
  		return 0; /* don't care */
  	}
  
  	/*
  	 * The page straddles i_size.  It must be zeroed out on each and every
2a61aa401   Adam Buchbinder   Fix misspellings ...
2916
  	 * writepage invocation because it may be mmapped.  "A file is mapped
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2917
2918
2919
2920
  	 * in multiples of the page size.  For a file that is not a multiple of
  	 * the  page size, the remaining memory is zeroed when mapped, and
  	 * writes to that region are not written out to the file."
  	 */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2921
  	zero_user_segment(page, offset, PAGE_SIZE);
1b938c082   Matthew Wilcox   fs/buffer.c: remo...
2922
2923
  	return __block_write_full_page(inode, page, get_block, wbc,
  							end_buffer_async_write);
35c80d5f4   Chris Mason   Add block_write_f...
2924
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2925
  EXPORT_SYMBOL(block_write_full_page);
35c80d5f4   Chris Mason   Add block_write_f...
2926

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2927
2928
2929
  sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
  			    get_block_t *get_block)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2930
  	struct inode *inode = mapping->host;
2a527d685   Alexander Potapenko   fs: generic_block...
2931
2932
2933
  	struct buffer_head tmp = {
  		.b_size = i_blocksize(inode),
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2934
2935
2936
  	get_block(inode, block, &tmp, 0);
  	return tmp.b_blocknr;
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2937
  EXPORT_SYMBOL(generic_block_bmap);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2938

4246a0b63   Christoph Hellwig   block: add a bi_e...
2939
  static void end_bio_bh_io_sync(struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2940
2941
  {
  	struct buffer_head *bh = bio->bi_private;
b7c44ed9d   Jens Axboe   block: manipulate...
2942
  	if (unlikely(bio_flagged(bio, BIO_QUIET)))
08bafc034   Keith Mannthey   block: Supress Bu...
2943
  		set_bit(BH_Quiet, &bh->b_state);
4e4cbee93   Christoph Hellwig   block: switch bio...
2944
  	bh->b_end_io(bh, !bio->bi_status);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2945
  	bio_put(bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2946
  }
2a222ca99   Mike Christie   fs: have submit_b...
2947
  static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
8e8f92988   Jens Axboe   fs: add support f...
2948
  			 enum rw_hint write_hint, struct writeback_control *wbc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2949
2950
  {
  	struct bio *bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2951
2952
2953
2954
  
  	BUG_ON(!buffer_locked(bh));
  	BUG_ON(!buffer_mapped(bh));
  	BUG_ON(!bh->b_end_io);
8fb0e3424   Aneesh Kumar K.V   vfs: Add BUG_ON f...
2955
2956
  	BUG_ON(buffer_delay(bh));
  	BUG_ON(buffer_unwritten(bh));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2957

48fd4f93a   Jens Axboe   block: submit_bh(...
2958
  	/*
48fd4f93a   Jens Axboe   block: submit_bh(...
2959
  	 * Only clear out a write error when rewriting
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2960
  	 */
2a222ca99   Mike Christie   fs: have submit_b...
2961
  	if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2962
  		clear_buffer_write_io_error(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2963
  	bio = bio_alloc(GFP_NOIO, 1);
4f74d15fe   Eric Biggers   ext4: add inline ...
2964
  	fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
4f024f379   Kent Overstreet   block: Abstract o...
2965
  	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
74d46992e   Christoph Hellwig   block: replace bi...
2966
  	bio_set_dev(bio, bh->b_bdev);
8e8f92988   Jens Axboe   fs: add support f...
2967
  	bio->bi_write_hint = write_hint;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2968

6cf66b4ca   Kent Overstreet   fs: use helper bi...
2969
2970
  	bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
  	BUG_ON(bio->bi_iter.bi_size != bh->b_size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2971
2972
2973
  
  	bio->bi_end_io = end_bio_bh_io_sync;
  	bio->bi_private = bh;
877f962c5   Theodore Ts'o   buffer: add BH_Pr...
2974
  	if (buffer_meta(bh))
2a222ca99   Mike Christie   fs: have submit_b...
2975
  		op_flags |= REQ_META;
877f962c5   Theodore Ts'o   buffer: add BH_Pr...
2976
  	if (buffer_prio(bh))
2a222ca99   Mike Christie   fs: have submit_b...
2977
2978
  		op_flags |= REQ_PRIO;
  	bio_set_op_attrs(bio, op, op_flags);
877f962c5   Theodore Ts'o   buffer: add BH_Pr...
2979

83c9c5471   Ming Lei   fs: move guard_bi...
2980
2981
  	/* Take care of bh's that straddle the end of the device */
  	guard_bio_eod(bio);
fd42df305   Dennis Zhou   blkcg: associate ...
2982
2983
  	if (wbc) {
  		wbc_init_bio(wbc, bio);
34e51a5e1   Tejun Heo   blkcg, writeback:...
2984
  		wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
fd42df305   Dennis Zhou   blkcg: associate ...
2985
  	}
4e49ea4a3   Mike Christie   block/fs/drivers:...
2986
  	submit_bio(bio);
f6454b049   Julia Lawall   block: fix return...
2987
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2988
  }
bafc0dba1   Tejun Heo   buffer, writeback...
2989

020c2833d   Eric Biggers   fs: remove _submi...
2990
  int submit_bh(int op, int op_flags, struct buffer_head *bh)
bafc0dba1   Tejun Heo   buffer, writeback...
2991
  {
8e8f92988   Jens Axboe   fs: add support f...
2992
  	return submit_bh_wbc(op, op_flags, bh, 0, NULL);
713685111   Darrick J. Wong   mm: make snapshot...
2993
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
2994
  EXPORT_SYMBOL(submit_bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2995
2996
2997
  
  /**
   * ll_rw_block: low-level access to block devices (DEPRECATED)
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
2998
   * @op: whether to %READ or %WRITE
ef295ecf0   Christoph Hellwig   block: better op ...
2999
   * @op_flags: req_flag_bits
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3000
3001
3002
   * @nr: number of &struct buffer_heads in the array
   * @bhs: array of pointers to &struct buffer_head
   *
a76622362   Jan Kara   [PATCH] Make ll_r...
3003
   * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
70246286e   Christoph Hellwig   block: get rid of...
3004
3005
3006
   * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE.
   * @op_flags contains flags modifying the detailed I/O behavior, most notably
   * %REQ_RAHEAD.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3007
3008
   *
   * This function drops any buffer that it cannot get a lock on (with the
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
3009
3010
3011
3012
3013
   * BH_Lock state bit), any buffer that appears to be clean when doing a write
   * request, and any buffer that appears to be up-to-date when doing read
   * request.  Further it marks as clean buffers that are processed for
   * writing (the buffer cache won't assume that they are actually clean
   * until the buffer gets unlocked).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3014
3015
   *
   * ll_rw_block sets b_end_io to simple completion handler that marks
e227867f1   Masanari Iida   treewide: Fix typ...
3016
   * the buffer up-to-date (if appropriate), unlocks the buffer and wakes
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3017
3018
3019
3020
3021
   * any waiters. 
   *
   * All of the buffers must be for the same device, and must also be a
   * multiple of the current approved size for the device.
   */
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
3022
  void ll_rw_block(int op, int op_flags,  int nr, struct buffer_head *bhs[])
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3023
3024
3025
3026
3027
  {
  	int i;
  
  	for (i = 0; i < nr; i++) {
  		struct buffer_head *bh = bhs[i];
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
3028
  		if (!trylock_buffer(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3029
  			continue;
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
3030
  		if (op == WRITE) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3031
  			if (test_clear_buffer_dirty(bh)) {
76c3073a8   Andrew Morton   [PATCH] end_buffe...
3032
  				bh->b_end_io = end_buffer_write_sync;
e60e5c50a   OGAWA Hirofumi   [PATCH] Trivial o...
3033
  				get_bh(bh);
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
3034
  				submit_bh(op, op_flags, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3035
3036
3037
  				continue;
  			}
  		} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3038
  			if (!buffer_uptodate(bh)) {
76c3073a8   Andrew Morton   [PATCH] end_buffe...
3039
  				bh->b_end_io = end_buffer_read_sync;
e60e5c50a   OGAWA Hirofumi   [PATCH] Trivial o...
3040
  				get_bh(bh);
dfec8a14f   Mike Christie   fs: have ll_rw_bl...
3041
  				submit_bh(op, op_flags, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3042
3043
3044
3045
  				continue;
  			}
  		}
  		unlock_buffer(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3046
3047
  	}
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
3048
  EXPORT_SYMBOL(ll_rw_block);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3049

2a222ca99   Mike Christie   fs: have submit_b...
3050
  void write_dirty_buffer(struct buffer_head *bh, int op_flags)
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
3051
3052
3053
3054
3055
3056
3057
3058
  {
  	lock_buffer(bh);
  	if (!test_clear_buffer_dirty(bh)) {
  		unlock_buffer(bh);
  		return;
  	}
  	bh->b_end_io = end_buffer_write_sync;
  	get_bh(bh);
2a222ca99   Mike Christie   fs: have submit_b...
3059
  	submit_bh(REQ_OP_WRITE, op_flags, bh);
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
3060
3061
  }
  EXPORT_SYMBOL(write_dirty_buffer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3062
3063
3064
3065
3066
  /*
   * For a data-integrity writeout, we need to wait upon any in-progress I/O
   * and then start new I/O and then wait upon it.  The caller must have a ref on
   * the buffer_head.
   */
2a222ca99   Mike Christie   fs: have submit_b...
3067
  int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3068
3069
3070
3071
3072
3073
  {
  	int ret = 0;
  
  	WARN_ON(atomic_read(&bh->b_count) < 1);
  	lock_buffer(bh);
  	if (test_clear_buffer_dirty(bh)) {
377254b2c   Xianting Tian   fs: prevent BUG_O...
3074
3075
3076
3077
3078
3079
3080
3081
  		/*
  		 * The bh should be mapped, but it might not be if the
  		 * device was hot-removed. Not much we can do but fail the I/O.
  		 */
  		if (!buffer_mapped(bh)) {
  			unlock_buffer(bh);
  			return -EIO;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3082
3083
  		get_bh(bh);
  		bh->b_end_io = end_buffer_write_sync;
2a222ca99   Mike Christie   fs: have submit_b...
3084
  		ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3085
  		wait_on_buffer(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3086
3087
3088
3089
3090
3091
3092
  		if (!ret && !buffer_uptodate(bh))
  			ret = -EIO;
  	} else {
  		unlock_buffer(bh);
  	}
  	return ret;
  }
87e99511e   Christoph Hellwig   kill BH_Ordered flag
3093
3094
3095
3096
  EXPORT_SYMBOL(__sync_dirty_buffer);
  
  int sync_dirty_buffer(struct buffer_head *bh)
  {
70fd76140   Christoph Hellwig   block,fs: use REQ...
3097
  	return __sync_dirty_buffer(bh, REQ_SYNC);
87e99511e   Christoph Hellwig   kill BH_Ordered flag
3098
  }
1fe72eaa0   H Hartley Sweeten   fs/buffer.c: clea...
3099
  EXPORT_SYMBOL(sync_dirty_buffer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
  
  /*
   * try_to_free_buffers() checks if all the buffers on this particular page
   * are unused, and releases them if so.
   *
   * Exclusion against try_to_free_buffers may be obtained by either
   * locking the page or by holding its mapping's private_lock.
   *
   * If the page is dirty but all the buffers are clean then we need to
   * be sure to mark the page clean as well.  This is because the page
   * may be against a block device, and a later reattachment of buffers
   * to a dirty page will set *all* buffers dirty.  Which would corrupt
   * filesystem data on the same device.
   *
   * The same applies to regular filesystem pages: if all the buffers are
   * clean then we set the page clean and proceed.  To do that, we require
   * total exclusion from __set_page_dirty_buffers().  That is obtained with
   * private_lock.
   *
   * try_to_free_buffers() is non-blocking.
   */
  static inline int buffer_busy(struct buffer_head *bh)
  {
  	return atomic_read(&bh->b_count) |
  		(bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
  }
  
  static int
  drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
  {
  	struct buffer_head *head = page_buffers(page);
  	struct buffer_head *bh;
  
  	bh = head;
  	do {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3135
3136
3137
3138
3139
3140
3141
  		if (buffer_busy(bh))
  			goto failed;
  		bh = bh->b_this_page;
  	} while (bh != head);
  
  	do {
  		struct buffer_head *next = bh->b_this_page;
535ee2fbf   Jan Kara   buffer_head: fix ...
3142
  		if (bh->b_assoc_map)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3143
3144
3145
3146
  			__remove_assoc_queue(bh);
  		bh = next;
  	} while (bh != head);
  	*buffers_to_free = head;
45dcfc273   Guoqing Jiang   fs/buffer.c: use ...
3147
  	detach_page_private(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
  	return 1;
  failed:
  	return 0;
  }
  
  int try_to_free_buffers(struct page *page)
  {
  	struct address_space * const mapping = page->mapping;
  	struct buffer_head *buffers_to_free = NULL;
  	int ret = 0;
  
  	BUG_ON(!PageLocked(page));
ecdfc9787   Linus Torvalds   Resurrect 'try_to...
3160
  	if (PageWriteback(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3161
3162
3163
3164
3165
3166
3167
3168
3169
  		return 0;
  
  	if (mapping == NULL) {		/* can this still happen? */
  		ret = drop_buffers(page, &buffers_to_free);
  		goto out;
  	}
  
  	spin_lock(&mapping->private_lock);
  	ret = drop_buffers(page, &buffers_to_free);
ecdfc9787   Linus Torvalds   Resurrect 'try_to...
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
  
  	/*
  	 * If the filesystem writes its buffers by hand (eg ext3)
  	 * then we can have clean buffers against a dirty page.  We
  	 * clean the page here; otherwise the VM will never notice
  	 * that the filesystem did any IO at all.
  	 *
  	 * Also, during truncate, discard_buffer will have marked all
  	 * the page's buffers clean.  We discover that here and clean
  	 * the page also.
87df7241b   Nick Piggin   [PATCH] Fix try_t...
3180
3181
3182
3183
  	 *
  	 * private_lock must be held over this entire operation in order
  	 * to synchronise against __set_page_dirty_buffers and prevent the
  	 * dirty bit from being lost.
ecdfc9787   Linus Torvalds   Resurrect 'try_to...
3184
  	 */
11f81becc   Tejun Heo   page_writeback: r...
3185
3186
  	if (ret)
  		cancel_dirty_page(page);
87df7241b   Nick Piggin   [PATCH] Fix try_t...
3187
  	spin_unlock(&mapping->private_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
  out:
  	if (buffers_to_free) {
  		struct buffer_head *bh = buffers_to_free;
  
  		do {
  			struct buffer_head *next = bh->b_this_page;
  			free_buffer_head(bh);
  			bh = next;
  		} while (bh != buffers_to_free);
  	}
  	return ret;
  }
  EXPORT_SYMBOL(try_to_free_buffers);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3201
3202
3203
3204
3205
  /*
   * There are no bdflush tunables left.  But distributions are
   * still running obsolete flush daemons, so we terminate them here.
   *
   * Use of bdflush() is deprecated and will be removed in a future kernel.
5b0830cb9   Jens Axboe   writeback: get ri...
3206
   * The `flush-X' kernel threads fully replace bdflush daemons and this call.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3207
   */
bdc480e3b   Heiko Carstens   [CVE-2009-0029] S...
3208
  SYSCALL_DEFINE2(bdflush, int, func, long, data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
  {
  	static int msg_count;
  
  	if (!capable(CAP_SYS_ADMIN))
  		return -EPERM;
  
  	if (msg_count < 5) {
  		msg_count++;
  		printk(KERN_INFO
  			"warning: process `%s' used the obsolete bdflush"
  			" system call
  ", current->comm);
  		printk(KERN_INFO "Fix your initscripts?
  ");
  	}
  
  	if (func == 1)
  		do_exit(0);
  	return 0;
  }
  
  /*
   * Buffer-head allocation
   */
a0a9b0433   Shai Fultheim   fs: Move bh_cache...
3233
  static struct kmem_cache *bh_cachep __read_mostly;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3234
3235
3236
3237
3238
  
  /*
   * Once the number of bh's in the machine exceeds this level, we start
   * stripping them in writeback.
   */
43be594a6   Zhang Yanfei   fs/buffer.c: chan...
3239
  static unsigned long max_buffer_heads;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
  
  int buffer_heads_over_limit;
  
  struct bh_accounting {
  	int nr;			/* Number of live bh's */
  	int ratelimit;		/* Limit cacheline bouncing */
  };
  
  static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
  
  static void recalc_bh_state(void)
  {
  	int i;
  	int tot = 0;
ee1be8626   Christoph Lameter   fs: Use this_cpu_...
3254
  	if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3255
  		return;
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3256
  	__this_cpu_write(bh_accounting.ratelimit, 0);
8a1434268   Eric Dumazet   [PATCH] HOTPLUG_C...
3257
  	for_each_online_cpu(i)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3258
3259
3260
  		tot += per_cpu(bh_accounting, i).nr;
  	buffer_heads_over_limit = (tot > max_buffer_heads);
  }
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3261

dd0fc66fb   Al Viro   [PATCH] gfp flags...
3262
  struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3263
  {
019b4d123   Richard Kennedy   fs: buffer_head: ...
3264
  	struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3265
  	if (ret) {
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
3266
  		INIT_LIST_HEAD(&ret->b_assoc_buffers);
f1e67e355   Thomas Gleixner   fs/buffer: Make B...
3267
  		spin_lock_init(&ret->b_uptodate_lock);
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3268
3269
  		preempt_disable();
  		__this_cpu_inc(bh_accounting.nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3270
  		recalc_bh_state();
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3271
  		preempt_enable();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3272
3273
3274
3275
3276
3277
3278
3279
3280
  	}
  	return ret;
  }
  EXPORT_SYMBOL(alloc_buffer_head);
  
  void free_buffer_head(struct buffer_head *bh)
  {
  	BUG_ON(!list_empty(&bh->b_assoc_buffers));
  	kmem_cache_free(bh_cachep, bh);
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3281
3282
  	preempt_disable();
  	__this_cpu_dec(bh_accounting.nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3283
  	recalc_bh_state();
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3284
  	preempt_enable();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3285
3286
  }
  EXPORT_SYMBOL(free_buffer_head);
fc4d24c9b   Sebastian Andrzej Siewior   fs/buffer: Conver...
3287
  static int buffer_exit_cpu_dead(unsigned int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3288
3289
3290
3291
3292
3293
3294
3295
  {
  	int i;
  	struct bh_lru *b = &per_cpu(bh_lrus, cpu);
  
  	for (i = 0; i < BH_LRU_SIZE; i++) {
  		brelse(b->bhs[i]);
  		b->bhs[i] = NULL;
  	}
c7b92516a   Christoph Lameter   fs: Use this_cpu_...
3296
  	this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
8a1434268   Eric Dumazet   [PATCH] HOTPLUG_C...
3297
  	per_cpu(bh_accounting, cpu).nr = 0;
fc4d24c9b   Sebastian Andrzej Siewior   fs/buffer: Conver...
3298
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3299
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3300

389d1b083   Aneesh Kumar K.V   Add buffer head r...
3301
  /**
a6b91919e   Randy Dunlap   fs: fix kernel-do...
3302
   * bh_uptodate_or_lock - Test whether the buffer is uptodate
389d1b083   Aneesh Kumar K.V   Add buffer head r...
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
   * @bh: struct buffer_head
   *
   * Return true if the buffer is up-to-date and false,
   * with the buffer locked, if not.
   */
  int bh_uptodate_or_lock(struct buffer_head *bh)
  {
  	if (!buffer_uptodate(bh)) {
  		lock_buffer(bh);
  		if (!buffer_uptodate(bh))
  			return 0;
  		unlock_buffer(bh);
  	}
  	return 1;
  }
  EXPORT_SYMBOL(bh_uptodate_or_lock);
  
  /**
a6b91919e   Randy Dunlap   fs: fix kernel-do...
3321
   * bh_submit_read - Submit a locked buffer for reading
389d1b083   Aneesh Kumar K.V   Add buffer head r...
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
   * @bh: struct buffer_head
   *
   * Returns zero on success and -EIO on error.
   */
  int bh_submit_read(struct buffer_head *bh)
  {
  	BUG_ON(!buffer_locked(bh));
  
  	if (buffer_uptodate(bh)) {
  		unlock_buffer(bh);
  		return 0;
  	}
  
  	get_bh(bh);
  	bh->b_end_io = end_buffer_read_sync;
2a222ca99   Mike Christie   fs: have submit_b...
3337
  	submit_bh(REQ_OP_READ, 0, bh);
389d1b083   Aneesh Kumar K.V   Add buffer head r...
3338
3339
3340
3341
3342
3343
  	wait_on_buffer(bh);
  	if (buffer_uptodate(bh))
  		return 0;
  	return -EIO;
  }
  EXPORT_SYMBOL(bh_submit_read);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3344
3345
  void __init buffer_init(void)
  {
43be594a6   Zhang Yanfei   fs/buffer.c: chan...
3346
  	unsigned long nrpages;
fc4d24c9b   Sebastian Andrzej Siewior   fs/buffer: Conver...
3347
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3348

b98938c37   Christoph Lameter   bufferhead: rever...
3349
3350
3351
3352
  	bh_cachep = kmem_cache_create("buffer_head",
  			sizeof(struct buffer_head), 0,
  				(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
  				SLAB_MEM_SPREAD),
019b4d123   Richard Kennedy   fs: buffer_head: ...
3353
  				NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3354
3355
3356
3357
3358
3359
  
  	/*
  	 * Limit the bh occupancy to 10% of ZONE_NORMAL
  	 */
  	nrpages = (nr_free_buffer_pages() * 10) / 100;
  	max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
fc4d24c9b   Sebastian Andrzej Siewior   fs/buffer: Conver...
3360
3361
3362
  	ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
  					NULL, buffer_exit_cpu_dead);
  	WARN_ON(ret < 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3363
  }