Blame view

fs/ext3/inode.c 106 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
  /*
   *  linux/fs/ext3/inode.c
   *
   * Copyright (C) 1992, 1993, 1994, 1995
   * Remy Card (card@masi.ibp.fr)
   * Laboratoire MASI - Institut Blaise Pascal
   * Universite Pierre et Marie Curie (Paris VI)
   *
   *  from
   *
   *  linux/fs/minix/inode.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   *
   *  Goal-directed block allocation by Stephen Tweedie
e9ad5620b   Dave Kleikamp   [PATCH] ext3: Mor...
16
   *	(sct@redhat.com), 1993, 1998
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
17
18
19
   *  Big-endian to little-endian byte-swapping/bitmaps by
   *        David S. Miller (davem@caip.rutgers.edu), 1995
   *  64-bit file support on 64-bit platforms by Jakub Jelinek
e9ad5620b   Dave Kleikamp   [PATCH] ext3: Mor...
20
   *	(jj@sunsite.ms.mff.cuni.cz)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
22
23
   *
   *  Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
26
27
  #include <linux/fs.h>
  #include <linux/time.h>
  #include <linux/ext3_jbd.h>
  #include <linux/jbd.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28
29
30
31
32
33
34
35
  #include <linux/highuid.h>
  #include <linux/pagemap.h>
  #include <linux/quotaops.h>
  #include <linux/string.h>
  #include <linux/buffer_head.h>
  #include <linux/writeback.h>
  #include <linux/mpage.h>
  #include <linux/uio.h>
caa38fb0f   Jens Axboe   [PATCH] ext3: mak...
36
  #include <linux/bio.h>
68c9d702b   Josef Bacik   generic block bas...
37
  #include <linux/fiemap.h>
b5ed3112b   Duane Griffin   ext3: ensure fast...
38
  #include <linux/namei.h>
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
39
  #include <trace/events/ext3.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
41
42
43
  #include "xattr.h"
  #include "acl.h"
  
  static int ext3_writepage_trans_blocks(struct inode *inode);
ee3e77f18   Jan Kara   ext3: Improve tru...
44
  static int ext3_block_truncate_page(struct inode *inode, loff_t from);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
47
48
  
  /*
   * Test whether an inode is a fast symlink.
   */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
49
  static int ext3_inode_is_fast_symlink(struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
50
51
52
  {
  	int ea_blocks = EXT3_I(inode)->i_file_acl ?
  		(inode->i_sb->s_blocksize >> 9) : 0;
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
53
  	return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54
  }
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
55
56
  /*
   * The ext3 forget function must perform a revoke if we are freeing data
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
57
   * which has been journaled.  Metadata (eg. indirect blocks) must be
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
58
   * revoked in all cases.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
59
60
61
62
63
   *
   * "bh" may be NULL: a metadata block may have been freed from memory
   * but there may still be a record of it in the journal, and that record
   * still needs to be revoked.
   */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
64
  int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
1c2bf374a   Mingming Cao   [PATCH] ext3_fsbl...
65
  			struct buffer_head *bh, ext3_fsblk_t blocknr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
66
67
68
69
  {
  	int err;
  
  	might_sleep();
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
70
  	trace_ext3_forget(inode, is_metadata, blocknr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
  	BUFFER_TRACE(bh, "enter");
  
  	jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
  		  "data mode %lx
  ",
  		  bh, is_metadata, inode->i_mode,
  		  test_opt(inode->i_sb, DATA_FLAGS));
  
  	/* Never use the revoke function if we are doing full data
  	 * journaling: there is no need to, and a V1 superblock won't
  	 * support it.  Otherwise, only skip the revoke on un-journaled
  	 * data blocks. */
  
  	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ||
  	    (!is_metadata && !ext3_should_journal_data(inode))) {
  		if (bh) {
  			BUFFER_TRACE(bh, "call journal_forget");
  			return ext3_journal_forget(handle, bh);
  		}
  		return 0;
  	}
  
  	/*
  	 * data!=journal && (is_metadata || should_journal_data(inode))
  	 */
  	BUFFER_TRACE(bh, "call ext3_journal_revoke");
  	err = ext3_journal_revoke(handle, blocknr, bh);
  	if (err)
e05b6b524   Harvey Harrison   ext3: replace rem...
99
  		ext3_abort(inode->i_sb, __func__,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
100
101
102
103
104
105
  			   "error %d when attempting revoke", err);
  	BUFFER_TRACE(bh, "exit");
  	return err;
  }
  
  /*
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
106
   * Work out how many blocks we need to proceed with the next chunk of a
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107
108
   * truncate transaction.
   */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
109
  static unsigned long blocks_for_truncate(struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
  {
  	unsigned long needed;
  
  	needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
  
  	/* Give ourselves just enough room to cope with inodes in which
  	 * i_blocks is corrupt: we've seen disk corruptions in the past
  	 * which resulted in random data in an inode which looked enough
  	 * like a regular file for ext3 to try to delete it.  Things
  	 * will go a bit crazy if that happens, but at least we should
  	 * try not to panic the whole kernel. */
  	if (needed < 2)
  		needed = 2;
  
  	/* But we need to bound the transaction so we don't overflow the
  	 * journal. */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
126
  	if (needed > EXT3_MAX_TRANS_DATA)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127
  		needed = EXT3_MAX_TRANS_DATA;
1f54587be   Jan Kara   [PATCH] quota: ex...
128
  	return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
129
  }
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
130
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
131
132
133
134
135
136
137
   * Truncate transactions can be complex and absolutely huge.  So we need to
   * be able to restart the transaction at a conventient checkpoint to make
   * sure we don't overflow the journal.
   *
   * start_transaction gets us a new handle for a truncate transaction,
   * and extend_transaction tries to extend the existing one a bit.  If
   * extend fails, we need to propagate the failure up and restart the
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
138
   * transaction in the top-level truncate loop. --sct
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
139
   */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
140
  static handle_t *start_transaction(struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
  {
  	handle_t *result;
  
  	result = ext3_journal_start(inode, blocks_for_truncate(inode));
  	if (!IS_ERR(result))
  		return result;
  
  	ext3_std_error(inode->i_sb, PTR_ERR(result));
  	return result;
  }
  
  /*
   * Try to extend this transaction for the purposes of truncation.
   *
   * Returns 0 if we managed to create more room.  If we can't create more
   * room, and the transaction must be restarted we return 1.
   */
  static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
  {
  	if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS)
  		return 0;
  	if (!ext3_journal_extend(handle, blocks_for_truncate(inode)))
  		return 0;
  	return 1;
  }
  
  /*
   * Restart the transaction associated with *handle.  This does a commit,
   * so before we call here everything must be consistently dirtied against
   * this transaction.
   */
00171d3c7   Jan Kara   ext3: Fix possibl...
172
  static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
  {
00171d3c7   Jan Kara   ext3: Fix possibl...
174
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
176
  	jbd_debug(2, "restarting handle %p
  ", handle);
00171d3c7   Jan Kara   ext3: Fix possibl...
177
178
179
180
181
182
183
184
185
186
  	/*
  	 * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle
  	 * At this moment, get_block can be called only for blocks inside
  	 * i_size since page cache has been already dropped and writes are
  	 * blocked by i_mutex. So we can safely drop the truncate_mutex.
  	 */
  	mutex_unlock(&EXT3_I(inode)->truncate_mutex);
  	ret = ext3_journal_restart(handle, blocks_for_truncate(inode));
  	mutex_lock(&EXT3_I(inode)->truncate_mutex);
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187
188
189
  }
  
  /*
ac14a95b5   Al Viro   convert ext3 to -...
190
   * Called at inode eviction from icache
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191
   */
ac14a95b5   Al Viro   convert ext3 to -...
192
  void ext3_evict_inode (struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
193
  {
b22570d9a   Jan Kara   ext3: Fix data co...
194
  	struct ext3_inode_info *ei = EXT3_I(inode);
ac14a95b5   Al Viro   convert ext3 to -...
195
  	struct ext3_block_alloc_info *rsv;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
196
  	handle_t *handle;
ac14a95b5   Al Viro   convert ext3 to -...
197
  	int want_delete = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
198

785c4bcc0   Lukas Czerner   ext3: Add fixed t...
199
  	trace_ext3_evict_inode(inode);
ac14a95b5   Al Viro   convert ext3 to -...
200
  	if (!inode->i_nlink && !is_bad_inode(inode)) {
871a29315   Christoph Hellwig   dquot: cleanup dq...
201
  		dquot_initialize(inode);
ac14a95b5   Al Viro   convert ext3 to -...
202
203
  		want_delete = 1;
  	}
907f4554e   Christoph Hellwig   dquot: move dquot...
204

b22570d9a   Jan Kara   ext3: Fix data co...
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
  	/*
  	 * When journalling data dirty buffers are tracked only in the journal.
  	 * So although mm thinks everything is clean and ready for reaping the
  	 * inode might still have some pages to write in the running
  	 * transaction or waiting to be checkpointed. Thus calling
  	 * journal_invalidatepage() (via truncate_inode_pages()) to discard
  	 * these buffers can cause data loss. Also even if we did not discard
  	 * these buffers, we would have no way to find them after the inode
  	 * is reaped and thus user could see stale data if he tries to read
  	 * them before the transaction is checkpointed. So be careful and
  	 * force everything to disk here... We use ei->i_datasync_tid to
  	 * store the newest transaction containing inode's data.
  	 *
  	 * Note that directories do not have this problem because they don't
  	 * use page cache.
bcdd0c160   Dan Carpenter   ext3: NULL derefe...
220
221
222
  	 *
  	 * The s_journal check handles the case when ext3_get_journal() fails
  	 * and puts the journal inode.
b22570d9a   Jan Kara   ext3: Fix data co...
223
224
  	 */
  	if (inode->i_nlink && ext3_should_journal_data(inode) &&
bcdd0c160   Dan Carpenter   ext3: NULL derefe...
225
  	    EXT3_SB(inode->i_sb)->s_journal &&
b22570d9a   Jan Kara   ext3: Fix data co...
226
227
228
229
230
231
232
233
  	    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
  		tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
  		journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
  
  		log_start_commit(journal, commit_tid);
  		log_wait_commit(journal, commit_tid);
  		filemap_write_and_wait(&inode->i_data);
  	}
fef266580   Mark Fasheh   [PATCH] update fi...
234
  	truncate_inode_pages(&inode->i_data, 0);
ac14a95b5   Al Viro   convert ext3 to -...
235
  	ext3_discard_reservation(inode);
b22570d9a   Jan Kara   ext3: Fix data co...
236
237
  	rsv = ei->i_block_alloc_info;
  	ei->i_block_alloc_info = NULL;
ac14a95b5   Al Viro   convert ext3 to -...
238
239
240
241
  	if (unlikely(rsv))
  		kfree(rsv);
  
  	if (!want_delete)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
242
243
244
245
  		goto no_delete;
  
  	handle = start_transaction(inode);
  	if (IS_ERR(handle)) {
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
246
247
248
249
250
  		/*
  		 * If we're going to skip the normal cleanup, we still need to
  		 * make sure that the in-core orphan linked list is properly
  		 * cleaned up.
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
251
252
253
254
255
256
257
258
259
260
  		ext3_orphan_del(NULL, inode);
  		goto no_delete;
  	}
  
  	if (IS_SYNC(inode))
  		handle->h_sync = 1;
  	inode->i_size = 0;
  	if (inode->i_blocks)
  		ext3_truncate(inode);
  	/*
40680f2fa   Jan Kara   ext3: Convert ext...
261
262
263
264
  	 * Kill off the orphan record created when the inode lost the last
  	 * link.  Note that ext3_orphan_del() has to be able to cope with the
  	 * deletion of a non-existent orphan - ext3_truncate() could
  	 * have removed the record.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
265
266
  	 */
  	ext3_orphan_del(handle, inode);
b22570d9a   Jan Kara   ext3: Fix data co...
267
  	ei->i_dtime = get_seconds();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268

ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
269
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
270
271
272
273
  	 * One subtle ordering requirement: if anything has gone wrong
  	 * (transaction abort, IO errors, whatever), then we can still
  	 * do these next steps (the fs will already have been marked as
  	 * having errors), but we can't free the inode if the mark_dirty
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
274
  	 * fails.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
275
  	 */
ac14a95b5   Al Viro   convert ext3 to -...
276
277
278
279
280
281
282
283
284
  	if (ext3_mark_inode_dirty(handle, inode)) {
  		/* If that failed, just dquot_drop() and be done with that */
  		dquot_drop(inode);
  		end_writeback(inode);
  	} else {
  		ext3_xattr_delete_inode(handle, inode);
  		dquot_free_inode(inode);
  		dquot_drop(inode);
  		end_writeback(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
285
  		ext3_free_inode(handle, inode);
ac14a95b5   Al Viro   convert ext3 to -...
286
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
287
288
289
  	ext3_journal_stop(handle);
  	return;
  no_delete:
ac14a95b5   Al Viro   convert ext3 to -...
290
291
  	end_writeback(inode);
  	dquot_drop(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
293
294
295
296
297
298
299
300
301
302
303
  typedef struct {
  	__le32	*p;
  	__le32	key;
  	struct buffer_head *bh;
  } Indirect;
  
  static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
  {
  	p->key = *(p->p = v);
  	p->bh = bh;
  }
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
304
  static int verify_chain(Indirect *from, Indirect *to)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
  {
  	while (from <= to && from->key == *from->p)
  		from++;
  	return (from > to);
  }
  
  /**
   *	ext3_block_to_path - parse the block number into array of offsets
   *	@inode: inode in question (we are only interested in its superblock)
   *	@i_block: block number to be parsed
   *	@offsets: array to store the offsets in
   *      @boundary: set this non-zero if the referred-to block is likely to be
   *             followed (on disk) by an indirect block.
   *
   *	To store the locations of file's data ext3 uses a data structure common
   *	for UNIX filesystems - tree of pointers anchored in the inode, with
   *	data blocks at leaves and indirect blocks in intermediate nodes.
   *	This function translates the block number into path in that tree -
   *	return value is the path length and @offsets[n] is the offset of
   *	pointer to (n+1)th node in the nth one. If @block is out of range
   *	(negative or too large) warning is printed and zero returned.
   *
   *	Note: function doesn't find node addresses, so no IO is needed. All
   *	we need to know is the capacity of indirect blocks (taken from the
   *	inode->i_sb).
   */
  
  /*
   * Portability note: the last comparison (check that we fit into triple
   * indirect block) is spelled differently, because otherwise on an
   * architecture with 32-bit longs and 8Kb pages we might get into trouble
   * if our filesystem had 8Kb blocks. We might use long long, but that would
   * kill us on x86. Oh, well, at least the sign propagation does not matter -
   * i_block would have to be negative in the very beginning, so we would not
   * get there at all.
   */
  
  static int ext3_block_to_path(struct inode *inode,
  			long i_block, int offsets[4], int *boundary)
  {
  	int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
  	int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
  	const long direct_blocks = EXT3_NDIR_BLOCKS,
  		indirect_blocks = ptrs,
  		double_blocks = (1 << (ptrs_bits * 2));
  	int n = 0;
  	int final = 0;
  
  	if (i_block < 0) {
  		ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0");
  	} else if (i_block < direct_blocks) {
  		offsets[n++] = i_block;
  		final = direct_blocks;
  	} else if ( (i_block -= direct_blocks) < indirect_blocks) {
  		offsets[n++] = EXT3_IND_BLOCK;
  		offsets[n++] = i_block;
  		final = ptrs;
  	} else if ((i_block -= indirect_blocks) < double_blocks) {
  		offsets[n++] = EXT3_DIND_BLOCK;
  		offsets[n++] = i_block >> ptrs_bits;
  		offsets[n++] = i_block & (ptrs - 1);
  		final = ptrs;
  	} else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
  		offsets[n++] = EXT3_TIND_BLOCK;
  		offsets[n++] = i_block >> (ptrs_bits * 2);
  		offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
  		offsets[n++] = i_block & (ptrs - 1);
  		final = ptrs;
  	} else {
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
374
  		ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
375
376
  	}
  	if (boundary)
89747d369   Mingming Cao   [PATCH] ext3_get_...
377
  		*boundary = final - 1 - (i_block & (ptrs - 1));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
  	return n;
  }
  
  /**
   *	ext3_get_branch - read the chain of indirect blocks leading to data
   *	@inode: inode in question
   *	@depth: depth of the chain (1 - direct pointer, etc.)
   *	@offsets: offsets of pointers in inode/indirect blocks
   *	@chain: place to store the result
   *	@err: here we store the error value
   *
   *	Function fills the array of triples <key, p, bh> and returns %NULL
   *	if everything went OK or the pointer to the last filled triple
   *	(incomplete one) otherwise. Upon the return chain[i].key contains
   *	the number of (i+1)-th block in the chain (as it is stored in memory,
   *	i.e. little-endian 32-bit), chain[i].p contains the address of that
   *	number (it points into struct inode for i==0 and into the bh->b_data
   *	for i>0) and chain[i].bh points to the buffer_head of i-th indirect
   *	block for i>0 and NULL for i==0. In other words, it holds the block
   *	numbers of the chain, addresses they were taken from (and where we can
   *	verify that chain did not change) and buffer_heads hosting these
   *	numbers.
   *
   *	Function stops when it stumbles upon zero pointer (absent block)
   *		(pointer to last triple returned, *@err == 0)
   *	or when it gets an IO error reading an indirect block
   *		(ditto, *@err == -EIO)
   *	or when it notices that chain had been changed while it was reading
   *		(ditto, *@err == -EAGAIN)
   *	or when it reads all @depth-1 indirect blocks successfully and finds
   *	the whole chain, all way to the data (returns %NULL, *err == 0).
   */
  static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
  				 Indirect chain[4], int *err)
  {
  	struct super_block *sb = inode->i_sb;
  	Indirect *p = chain;
  	struct buffer_head *bh;
  
  	*err = 0;
  	/* i_data is not going away, no lock needed */
  	add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets);
  	if (!p->key)
  		goto no_block;
  	while (--depth) {
  		bh = sb_bread(sb, le32_to_cpu(p->key));
  		if (!bh)
  			goto failure;
  		/* Reader: pointers */
  		if (!verify_chain(chain, p))
  			goto changed;
  		add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
  		/* Reader: end */
  		if (!p->key)
  			goto no_block;
  	}
  	return NULL;
  
  changed:
  	brelse(bh);
  	*err = -EAGAIN;
  	goto no_block;
  failure:
  	*err = -EIO;
  no_block:
  	return p;
  }
  
  /**
   *	ext3_find_near - find a place for allocation with sufficient locality
   *	@inode: owner
   *	@ind: descriptor of indirect block.
   *
1cc8dcf56   Benoit Boissinot   ext*: spelling fi...
451
   *	This function returns the preferred place for block allocation.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452
453
454
455
456
   *	It is used when heuristic for sequential allocation fails.
   *	Rules are:
   *	  + if there is a block to the left of our position - allocate near it.
   *	  + if pointer will live in indirect block - allocate near that block.
   *	  + if pointer will live in inode - allocate in the same
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
457
   *	    cylinder group.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
458
459
460
461
462
463
464
465
   *
   * In the latter case we colour the starting block by the callers PID to
   * prevent it from clashing with concurrent allocations for a different inode
   * in the same block group.   The PID is used here so that functionally related
   * files will be close-by on-disk.
   *
   *	Caller must make sure that @ind is valid and will stay that way.
   */
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
466
  static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
467
468
469
470
  {
  	struct ext3_inode_info *ei = EXT3_I(inode);
  	__le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
  	__le32 *p;
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
471
472
  	ext3_fsblk_t bg_start;
  	ext3_grpblk_t colour;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
473
474
  
  	/* Try to find previous block */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
475
  	for (p = ind->p - 1; p >= start; p--) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
476
477
  		if (*p)
  			return le32_to_cpu(*p);
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
478
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
479
480
481
482
483
484
  
  	/* No such thing, so let's try location of indirect block */
  	if (ind->bh)
  		return ind->bh->b_blocknr;
  
  	/*
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
485
486
  	 * It is going to be referred to from the inode itself? OK, just put it
  	 * into the same cylinder group then.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
487
  	 */
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
488
  	bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
489
490
491
492
493
494
  	colour = (current->pid % 16) *
  			(EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
  	return bg_start + colour;
  }
  
  /**
1cc8dcf56   Benoit Boissinot   ext*: spelling fi...
495
   *	ext3_find_goal - find a preferred place for allocation.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
496
497
   *	@inode: owner
   *	@block:  block we want
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
498
   *	@partial: pointer to the last triple within a chain
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
499
   *
1cc8dcf56   Benoit Boissinot   ext*: spelling fi...
500
   *	Normally this function find the preferred place for block allocation,
fb01bfdac   Akinobu Mita   ext[234]: remove ...
501
   *	returns it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
502
   */
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
503
  static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
fb01bfdac   Akinobu Mita   ext[234]: remove ...
504
  				   Indirect *partial)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
505
  {
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
506
507
508
  	struct ext3_block_alloc_info *block_i;
  
  	block_i =  EXT3_I(inode)->i_block_alloc_info;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
509
510
511
512
513
514
515
  
  	/*
  	 * try the heuristic for sequential allocation,
  	 * failing that at least try to get decent locality.
  	 */
  	if (block_i && (block == block_i->last_alloc_logical_block + 1)
  		&& (block_i->last_alloc_physical_block != 0)) {
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
516
  		return block_i->last_alloc_physical_block + 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
517
  	}
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
518
  	return ext3_find_near(inode, partial);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
519
  }
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
520

b47b24781   Mingming Cao   [PATCH] ext3_get_...
521
  /**
a4c18ad2e   Namhyung Kim   ext3: Update kern...
522
   *	ext3_blks_to_allocate - Look up the block map and count the number
b47b24781   Mingming Cao   [PATCH] ext3_get_...
523
524
   *	of direct blocks need to be allocated for the given branch.
   *
e9ad5620b   Dave Kleikamp   [PATCH] ext3: Mor...
525
   *	@branch: chain of indirect blocks
b47b24781   Mingming Cao   [PATCH] ext3_get_...
526
527
528
529
530
531
532
   *	@k: number of blocks need for indirect blocks
   *	@blks: number of data blocks to be mapped.
   *	@blocks_to_boundary:  the offset in the indirect block
   *
   *	return the total number of blocks to be allocate, including the
   *	direct and indirect blocks.
   */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
533
  static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
b47b24781   Mingming Cao   [PATCH] ext3_get_...
534
535
536
537
538
539
540
541
542
  		int blocks_to_boundary)
  {
  	unsigned long count = 0;
  
  	/*
  	 * Simple case, [t,d]Indirect block(s) has not allocated yet
  	 * then it's clear blocks on that path have not allocated
  	 */
  	if (k > 0) {
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
543
  		/* right now we don't handle cross boundary allocation */
b47b24781   Mingming Cao   [PATCH] ext3_get_...
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
  		if (blks < blocks_to_boundary + 1)
  			count += blks;
  		else
  			count += blocks_to_boundary + 1;
  		return count;
  	}
  
  	count++;
  	while (count < blks && count <= blocks_to_boundary &&
  		le32_to_cpu(*(branch[0].p + count)) == 0) {
  		count++;
  	}
  	return count;
  }
  
  /**
a4c18ad2e   Namhyung Kim   ext3: Update kern...
560
561
562
563
   *	ext3_alloc_blocks - multiple allocate blocks needed for a branch
   *	@handle: handle for this transaction
   *	@inode: owner
   *	@goal: preferred place for allocation
b47b24781   Mingming Cao   [PATCH] ext3_get_...
564
565
   *	@indirect_blks: the number of blocks need to allocate for indirect
   *			blocks
a4c18ad2e   Namhyung Kim   ext3: Update kern...
566
   *	@blks:	number of blocks need to allocated for direct blocks
b47b24781   Mingming Cao   [PATCH] ext3_get_...
567
568
   *	@new_blocks: on return it will store the new block numbers for
   *	the indirect blocks(if needed) and the first direct block,
a4c18ad2e   Namhyung Kim   ext3: Update kern...
569
570
571
   *	@err: here we store the error value
   *
   *	return the number of direct blocks allocated
b47b24781   Mingming Cao   [PATCH] ext3_get_...
572
573
   */
  static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
574
575
  			ext3_fsblk_t goal, int indirect_blks, int blks,
  			ext3_fsblk_t new_blocks[4], int *err)
b47b24781   Mingming Cao   [PATCH] ext3_get_...
576
577
578
579
  {
  	int target, i;
  	unsigned long count = 0;
  	int index = 0;
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
580
  	ext3_fsblk_t current_block = 0;
b47b24781   Mingming Cao   [PATCH] ext3_get_...
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
  	int ret = 0;
  
  	/*
  	 * Here we try to allocate the requested multiple blocks at once,
  	 * on a best-effort basis.
  	 * To build a branch, we should allocate blocks for
  	 * the indirect blocks(if not allocated yet), and at least
  	 * the first direct block of this branch.  That's the
  	 * minimum number of blocks need to allocate(required)
  	 */
  	target = blks + indirect_blks;
  
  	while (1) {
  		count = target;
  		/* allocating blocks for indirect blocks and direct blocks */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
596
  		current_block = ext3_new_blocks(handle,inode,goal,&count,err);
b47b24781   Mingming Cao   [PATCH] ext3_get_...
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
  		if (*err)
  			goto failed_out;
  
  		target -= count;
  		/* allocate blocks for indirect blocks */
  		while (index < indirect_blks && count) {
  			new_blocks[index++] = current_block++;
  			count--;
  		}
  
  		if (count > 0)
  			break;
  	}
  
  	/* save the new block number for the first direct block */
  	new_blocks[index] = current_block;
  
  	/* total number of blocks allocated for direct blocks */
  	ret = count;
  	*err = 0;
  	return ret;
  failed_out:
  	for (i = 0; i <index; i++)
  		ext3_free_blocks(handle, inode, new_blocks[i], 1);
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
623
624
625
  
  /**
   *	ext3_alloc_branch - allocate and set up a chain of blocks.
a4c18ad2e   Namhyung Kim   ext3: Update kern...
626
   *	@handle: handle for this transaction
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
627
   *	@inode: owner
b47b24781   Mingming Cao   [PATCH] ext3_get_...
628
629
   *	@indirect_blks: number of allocated indirect blocks
   *	@blks: number of allocated direct blocks
a4c18ad2e   Namhyung Kim   ext3: Update kern...
630
   *	@goal: preferred place for allocation
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
631
632
633
   *	@offsets: offsets (in the blocks) to store the pointers to next.
   *	@branch: place to store the chain in.
   *
b47b24781   Mingming Cao   [PATCH] ext3_get_...
634
   *	This function allocates blocks, zeroes out all but the last one,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
635
636
637
638
639
640
   *	links them into chain and (if we are synchronous) writes them to disk.
   *	In other words, it prepares a branch that can be spliced onto the
   *	inode. It stores the information about that chain in the branch[], in
   *	the same format as ext3_get_branch() would do. We are calling it after
   *	we had read the existing part of chain and partial points to the last
   *	triple of that (one with zero ->key). Upon the exit we have the same
5b1168792   Glauber de Oliveira Costa   [PATCH] Locking p...
641
   *	picture as after the successful ext3_get_block(), except that in one
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
642
643
644
645
646
647
648
649
650
   *	place chain is disconnected - *branch->p is still zero (we did not
   *	set the last link), but branch->key contains the number that should
   *	be placed into *branch->p to fill that gap.
   *
   *	If allocation fails we free all blocks we've allocated (and forget
   *	their buffer_heads) and return the error value the from failed
   *	ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain
   *	as described above and return 0.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
651
  static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
652
  			int indirect_blks, int *blks, ext3_fsblk_t goal,
b47b24781   Mingming Cao   [PATCH] ext3_get_...
653
  			int *offsets, Indirect *branch)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
654
655
  {
  	int blocksize = inode->i_sb->s_blocksize;
b47b24781   Mingming Cao   [PATCH] ext3_get_...
656
  	int i, n = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
657
  	int err = 0;
b47b24781   Mingming Cao   [PATCH] ext3_get_...
658
659
  	struct buffer_head *bh;
  	int num;
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
660
661
  	ext3_fsblk_t new_blocks[4];
  	ext3_fsblk_t current_block;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
662

b47b24781   Mingming Cao   [PATCH] ext3_get_...
663
664
665
666
  	num = ext3_alloc_blocks(handle, inode, goal, indirect_blks,
  				*blks, new_blocks, &err);
  	if (err)
  		return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
667

b47b24781   Mingming Cao   [PATCH] ext3_get_...
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
  	branch[0].key = cpu_to_le32(new_blocks[0]);
  	/*
  	 * metadata blocks and data blocks are allocated.
  	 */
  	for (n = 1; n <= indirect_blks;  n++) {
  		/*
  		 * Get buffer_head for parent block, zero it out
  		 * and set the pointer to new one, then send
  		 * parent to disk.
  		 */
  		bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
  		branch[n].bh = bh;
  		lock_buffer(bh);
  		BUFFER_TRACE(bh, "call get_create_access");
  		err = ext3_journal_get_create_access(handle, bh);
  		if (err) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
684
  			unlock_buffer(bh);
b47b24781   Mingming Cao   [PATCH] ext3_get_...
685
686
687
  			brelse(bh);
  			goto failed;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
688

b47b24781   Mingming Cao   [PATCH] ext3_get_...
689
690
691
692
693
694
695
696
697
698
699
700
701
  		memset(bh->b_data, 0, blocksize);
  		branch[n].p = (__le32 *) bh->b_data + offsets[n];
  		branch[n].key = cpu_to_le32(new_blocks[n]);
  		*branch[n].p = branch[n].key;
  		if ( n == indirect_blks) {
  			current_block = new_blocks[n];
  			/*
  			 * End of chain, update the last new metablock of
  			 * the chain to point to the new allocated
  			 * data blocks numbers
  			 */
  			for (i=1; i < num; i++)
  				*(branch[n].p + i) = cpu_to_le32(++current_block);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
702
  		}
b47b24781   Mingming Cao   [PATCH] ext3_get_...
703
704
705
  		BUFFER_TRACE(bh, "marking uptodate");
  		set_buffer_uptodate(bh);
  		unlock_buffer(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
706

b47b24781   Mingming Cao   [PATCH] ext3_get_...
707
708
709
710
711
712
713
714
  		BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
  		err = ext3_journal_dirty_metadata(handle, bh);
  		if (err)
  			goto failed;
  	}
  	*blks = num;
  	return err;
  failed:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
715
  	/* Allocation failed, free what we already allocated */
b47b24781   Mingming Cao   [PATCH] ext3_get_...
716
  	for (i = 1; i <= n ; i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
717
718
719
  		BUFFER_TRACE(branch[i].bh, "call journal_forget");
  		ext3_journal_forget(handle, branch[i].bh);
  	}
b47b24781   Mingming Cao   [PATCH] ext3_get_...
720
721
722
723
  	for (i = 0; i <indirect_blks; i++)
  		ext3_free_blocks(handle, inode, new_blocks[i], 1);
  
  	ext3_free_blocks(handle, inode, new_blocks[i], num);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
724
725
726
727
  	return err;
  }
  
  /**
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
728
   * ext3_splice_branch - splice the allocated branch onto inode.
a4c18ad2e   Namhyung Kim   ext3: Update kern...
729
   * @handle: handle for this transaction
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
730
731
   * @inode: owner
   * @block: (logical) number of block we are adding
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
732
733
734
735
736
737
738
   * @where: location of missing link
   * @num:   number of indirect blocks we are adding
   * @blks:  number of direct blocks we are adding
   *
   * This function fills the missing link and does all housekeeping needed in
   * inode (->i_blocks, etc.). In case of success we end up with the full
   * chain to new block and return 0.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
739
   */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
740
741
  static int ext3_splice_branch(handle_t *handle, struct inode *inode,
  			long block, Indirect *where, int num, int blks)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
742
743
744
  {
  	int i;
  	int err = 0;
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
745
  	struct ext3_block_alloc_info *block_i;
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
746
  	ext3_fsblk_t current_block;
fe8bc91c4   Jan Kara   ext3: Wait for pr...
747
  	struct ext3_inode_info *ei = EXT3_I(inode);
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
748

fe8bc91c4   Jan Kara   ext3: Wait for pr...
749
  	block_i = ei->i_block_alloc_info;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
750
751
752
753
754
755
756
757
758
759
760
  	/*
  	 * If we're splicing into a [td]indirect block (as opposed to the
  	 * inode) then we need to get write access to the [td]indirect block
  	 * before the splice.
  	 */
  	if (where->bh) {
  		BUFFER_TRACE(where->bh, "get_write_access");
  		err = ext3_journal_get_write_access(handle, where->bh);
  		if (err)
  			goto err_out;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
761
762
763
  	/* That's it */
  
  	*where->p = where->key;
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
764
765
766
767
768
  
  	/*
  	 * Update the host buffer_head or inode to point to more just allocated
  	 * direct blocks blocks
  	 */
b47b24781   Mingming Cao   [PATCH] ext3_get_...
769
  	if (num == 0 && blks > 1) {
5dea5176e   Mingming Cao   [PATCH] ext3: mul...
770
  		current_block = le32_to_cpu(where->key) + 1;
b47b24781   Mingming Cao   [PATCH] ext3_get_...
771
772
773
  		for (i = 1; i < blks; i++)
  			*(where->p + i ) = cpu_to_le32(current_block++);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
774
775
776
777
778
779
780
  
  	/*
  	 * update the most recently allocated logical & physical block
  	 * in i_block_alloc_info, to assist find the proper goal block for next
  	 * allocation
  	 */
  	if (block_i) {
b47b24781   Mingming Cao   [PATCH] ext3_get_...
781
  		block_i->last_alloc_logical_block = block + blks - 1;
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
782
  		block_i->last_alloc_physical_block =
5dea5176e   Mingming Cao   [PATCH] ext3: mul...
783
  				le32_to_cpu(where[num].key) + blks - 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
784
785
786
787
788
789
  	}
  
  	/* We are done with atomic stuff, now do the rest of housekeeping */
  
  	inode->i_ctime = CURRENT_TIME_SEC;
  	ext3_mark_inode_dirty(handle, inode);
fe8bc91c4   Jan Kara   ext3: Wait for pr...
790
791
  	/* ext3_mark_inode_dirty already updated i_sync_tid */
  	atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
792
793
794
795
  
  	/* had we spliced it onto indirect block? */
  	if (where->bh) {
  		/*
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
796
  		 * If we spliced it onto an indirect block, we haven't
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
797
798
799
800
801
802
803
804
805
806
  		 * altered the inode.  Note however that if it is being spliced
  		 * onto an indirect block at the very end of the file (the
  		 * file is growing) then we *will* alter the inode to reflect
  		 * the new i_size.  But that is not done here - it is done in
  		 * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode.
  		 */
  		jbd_debug(5, "splicing indirect only
  ");
  		BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata");
  		err = ext3_journal_dirty_metadata(handle, where->bh);
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
807
  		if (err)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
808
809
810
811
812
813
814
815
816
817
  			goto err_out;
  	} else {
  		/*
  		 * OK, we spliced it into the inode itself on a direct block.
  		 * Inode was dirtied above.
  		 */
  		jbd_debug(5, "splicing direct
  ");
  	}
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
818
  err_out:
b47b24781   Mingming Cao   [PATCH] ext3_get_...
819
  	for (i = 1; i <= num; i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
820
821
  		BUFFER_TRACE(where[i].bh, "call journal_forget");
  		ext3_journal_forget(handle, where[i].bh);
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
822
  		ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
823
  	}
b47b24781   Mingming Cao   [PATCH] ext3_get_...
824
  	ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
  	return err;
  }
  
  /*
   * Allocation strategy is simple: if we have to allocate something, we will
   * have to go the whole way to leaf. So let's do it before attaching anything
   * to tree, set linkage between the newborn blocks, write them if sync is
   * required, recheck the path, free and repeat if check fails, otherwise
   * set the last missing link (that will protect us from any truncate-generated
   * removals - all blocks on the path are immune now) and possibly force the
   * write on the parent block.
   * That has a nice additional property: no special recovery from the failed
   * allocations is needed - we simply release blocks and do not touch anything
   * reachable from inode.
   *
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
840
   * `handle' can be NULL if create == 0.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
841
842
   *
   * The BKL may not be held on entry here.  Be sure to take it early.
89747d369   Mingming Cao   [PATCH] ext3_get_...
843
844
845
   * return > 0, # of blocks mapped or allocated.
   * return = 0, if plain lookup failed.
   * return < 0, error case.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
846
   */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
847
848
849
  int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
  		sector_t iblock, unsigned long maxblocks,
  		struct buffer_head *bh_result,
43237b549   Jan Kara   ext3: Get rid of ...
850
  		int create)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
851
852
853
854
855
  {
  	int err = -EIO;
  	int offsets[4];
  	Indirect chain[4];
  	Indirect *partial;
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
856
  	ext3_fsblk_t goal;
b47b24781   Mingming Cao   [PATCH] ext3_get_...
857
  	int indirect_blks;
89747d369   Mingming Cao   [PATCH] ext3_get_...
858
859
  	int blocks_to_boundary = 0;
  	int depth;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
860
  	struct ext3_inode_info *ei = EXT3_I(inode);
89747d369   Mingming Cao   [PATCH] ext3_get_...
861
  	int count = 0;
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
862
  	ext3_fsblk_t first_block = 0;
89747d369   Mingming Cao   [PATCH] ext3_get_...
863

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
864

785c4bcc0   Lukas Czerner   ext3: Add fixed t...
865
  	trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
866
  	J_ASSERT(handle != NULL || create == 0);
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
867
  	depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
868
869
870
  
  	if (depth == 0)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
871
872
873
874
  	partial = ext3_get_branch(inode, depth, offsets, chain, &err);
  
  	/* Simplest case - block found, no allocation needed */
  	if (!partial) {
5dea5176e   Mingming Cao   [PATCH] ext3: mul...
875
  		first_block = le32_to_cpu(chain[depth - 1].key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
876
  		clear_buffer_new(bh_result);
89747d369   Mingming Cao   [PATCH] ext3_get_...
877
878
879
  		count++;
  		/*map more blocks*/
  		while (count < maxblocks && count <= blocks_to_boundary) {
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
880
  			ext3_fsblk_t blk;
5dea5176e   Mingming Cao   [PATCH] ext3: mul...
881

e8ef7aaea   Jan Kara   ext3: fix chain v...
882
  			if (!verify_chain(chain, chain + depth - 1)) {
89747d369   Mingming Cao   [PATCH] ext3_get_...
883
884
885
886
887
888
889
890
891
892
893
  				/*
  				 * Indirect block might be removed by
  				 * truncate while we were reading it.
  				 * Handling of that case: forget what we've
  				 * got now. Flag the err as EAGAIN, so it
  				 * will reread.
  				 */
  				err = -EAGAIN;
  				count = 0;
  				break;
  			}
5dea5176e   Mingming Cao   [PATCH] ext3: mul...
894
895
896
  			blk = le32_to_cpu(*(chain[depth-1].p + count));
  
  			if (blk == first_block + count)
89747d369   Mingming Cao   [PATCH] ext3_get_...
897
898
899
900
901
902
  				count++;
  			else
  				break;
  		}
  		if (err != -EAGAIN)
  			goto got_it;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
903
904
905
  	}
  
  	/* Next simple case - plain lookup or failed read of indirect block */
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
906
907
  	if (!create || err == -EIO)
  		goto cleanup;
40680f2fa   Jan Kara   ext3: Convert ext...
908
909
910
  	/*
  	 * Block out ext3_truncate while we alter the tree
  	 */
974615186   Arjan van de Ven   [PATCH] convert e...
911
  	mutex_lock(&ei->truncate_mutex);
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
912
913
914
915
916
917
918
919
920
921
922
923
924
925
  
  	/*
  	 * If the indirect block is missing while we are reading
  	 * the chain(ext3_get_branch() returns -EAGAIN err), or
  	 * if the chain has been changed after we grab the semaphore,
  	 * (either because another process truncated this branch, or
  	 * another get_block allocated this branch) re-grab the chain to see if
  	 * the request block has been allocated or not.
  	 *
  	 * Since we already block the truncate/other get_block
  	 * at this point, we will have the current copy of the chain when we
  	 * splice the branch into the tree.
  	 */
  	if (err == -EAGAIN || !verify_chain(chain, partial)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
926
  		while (partial > chain) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
927
928
929
  			brelse(partial->bh);
  			partial--;
  		}
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
930
931
  		partial = ext3_get_branch(inode, depth, offsets, chain, &err);
  		if (!partial) {
89747d369   Mingming Cao   [PATCH] ext3_get_...
932
  			count++;
974615186   Arjan van de Ven   [PATCH] convert e...
933
  			mutex_unlock(&ei->truncate_mutex);
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
934
935
936
937
938
  			if (err)
  				goto cleanup;
  			clear_buffer_new(bh_result);
  			goto got_it;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
939
940
941
  	}
  
  	/*
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
942
943
944
945
  	 * Okay, we need to do block allocation.  Lazily initialize the block
  	 * allocation info here if necessary
  	*/
  	if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
946
  		ext3_init_block_alloc_info(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
947

fb01bfdac   Akinobu Mita   ext[234]: remove ...
948
  	goal = ext3_find_goal(inode, iblock, partial);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
949

b47b24781   Mingming Cao   [PATCH] ext3_get_...
950
951
  	/* the number of blocks need to allocate for [d,t]indirect blocks */
  	indirect_blks = (chain + depth) - partial - 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
952
953
  
  	/*
b47b24781   Mingming Cao   [PATCH] ext3_get_...
954
955
956
957
958
  	 * Next look up the indirect map to count the totoal number of
  	 * direct blocks to allocate for this branch.
  	 */
  	count = ext3_blks_to_allocate(partial, indirect_blks,
  					maxblocks, blocks_to_boundary);
b47b24781   Mingming Cao   [PATCH] ext3_get_...
959
  	err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
960
  				offsets + (partial - chain), partial);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
961

fe55c4523   Mingming Cao   [PATCH] ext3: rem...
962
963
  	/*
  	 * The ext3_splice_branch call will free and forget any buffers
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
964
965
966
  	 * on the new chain if there is a failure, but that risks using
  	 * up transaction credits, especially for bitmaps where the
  	 * credits cannot be returned.  Can we handle this somehow?  We
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
967
968
  	 * may need to return -EAGAIN upwards in the worst case.  --sct
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
969
  	if (!err)
b47b24781   Mingming Cao   [PATCH] ext3_get_...
970
971
  		err = ext3_splice_branch(handle, inode, iblock,
  					partial, indirect_blks, count);
974615186   Arjan van de Ven   [PATCH] convert e...
972
  	mutex_unlock(&ei->truncate_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
973
974
975
976
  	if (err)
  		goto cleanup;
  
  	set_buffer_new(bh_result);
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
977
978
  got_it:
  	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
20acaa18d   Suparna Bhattacharya   [PATCH] ext3 sequ...
979
  	if (count > blocks_to_boundary)
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
980
  		set_buffer_boundary(bh_result);
89747d369   Mingming Cao   [PATCH] ext3_get_...
981
  	err = count;
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
982
983
984
  	/* Clean up and exit */
  	partial = chain + depth - 1;	/* the whole chain */
  cleanup:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
985
  	while (partial > chain) {
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
986
  		BUFFER_TRACE(partial->bh, "call brelse");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
987
988
989
  		brelse(partial->bh);
  		partial--;
  	}
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
990
991
  	BUFFER_TRACE(bh_result, "returned");
  out:
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
992
993
994
  	trace_ext3_get_blocks_exit(inode, iblock,
  				   depth ? le32_to_cpu(chain[depth-1].key) : 0,
  				   count, err);
fe55c4523   Mingming Cao   [PATCH] ext3: rem...
995
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
996
  }
bd1939de9   Jan Kara   ext3: fix lock in...
997
998
999
1000
1001
1002
1003
1004
1005
1006
  /* Maximum number of blocks we map for direct IO at once. */
  #define DIO_MAX_BLOCKS 4096
  /*
   * Number of credits we need for writing DIO_MAX_BLOCKS:
   * We need sb + group descriptor + bitmap + inode -> 4
   * For B blocks with A block pointers per block we need:
   * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
   * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
   */
  #define DIO_CREDITS 25
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1007

f91a2ad2e   Badari Pulavarty   [PATCH] ext3: mul...
1008
1009
  static int ext3_get_block(struct inode *inode, sector_t iblock,
  			struct buffer_head *bh_result, int create)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1010
  {
3e4fdaf8a   Dmitriy Monakhov   [PATCH] jbd layer...
1011
  	handle_t *handle = ext3_journal_current_handle();
bd1939de9   Jan Kara   ext3: fix lock in...
1012
  	int ret = 0, started = 0;
1d8fa7a2b   Badari Pulavarty   [PATCH] remove ->...
1013
  	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1014

bd1939de9   Jan Kara   ext3: fix lock in...
1015
1016
1017
1018
  	if (create && !handle) {	/* Direct IO write... */
  		if (max_blocks > DIO_MAX_BLOCKS)
  			max_blocks = DIO_MAX_BLOCKS;
  		handle = ext3_journal_start(inode, DIO_CREDITS +
c459001fa   Dmitry Monakhov   ext3: quota macro...
1019
  				EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
bd1939de9   Jan Kara   ext3: fix lock in...
1020
  		if (IS_ERR(handle)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1021
  			ret = PTR_ERR(handle);
bd1939de9   Jan Kara   ext3: fix lock in...
1022
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1023
  		}
bd1939de9   Jan Kara   ext3: fix lock in...
1024
  		started = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1025
  	}
bd1939de9   Jan Kara   ext3: fix lock in...
1026
  	ret = ext3_get_blocks_handle(handle, inode, iblock,
43237b549   Jan Kara   ext3: Get rid of ...
1027
  					max_blocks, bh_result, create);
bd1939de9   Jan Kara   ext3: fix lock in...
1028
1029
1030
  	if (ret > 0) {
  		bh_result->b_size = (ret << inode->i_blkbits);
  		ret = 0;
89747d369   Mingming Cao   [PATCH] ext3_get_...
1031
  	}
bd1939de9   Jan Kara   ext3: fix lock in...
1032
1033
1034
  	if (started)
  		ext3_journal_stop(handle);
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1035
1036
  	return ret;
  }
68c9d702b   Josef Bacik   generic block bas...
1037
1038
1039
1040
1041
1042
  int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  		u64 start, u64 len)
  {
  	return generic_block_fiemap(inode, fieinfo, start, len,
  				    ext3_get_block);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1043
1044
1045
  /*
   * `handle' can be NULL if create is zero
   */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
1046
1047
  struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
  				long block, int create, int *errp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1048
1049
1050
1051
1052
1053
1054
1055
1056
  {
  	struct buffer_head dummy;
  	int fatal = 0, err;
  
  	J_ASSERT(handle != NULL || create == 0);
  
  	dummy.b_state = 0;
  	dummy.b_blocknr = -1000;
  	buffer_trace_init(&dummy.b_history);
89747d369   Mingming Cao   [PATCH] ext3_get_...
1057
  	err = ext3_get_blocks_handle(handle, inode, block, 1,
43237b549   Jan Kara   ext3: Get rid of ...
1058
  					&dummy, create);
3665d0e58   Badari Pulavarty   [PATCH] ext3_getb...
1059
1060
1061
1062
1063
1064
1065
  	/*
  	 * ext3_get_blocks_handle() returns number of blocks
  	 * mapped. 0 in case of a HOLE.
  	 */
  	if (err > 0) {
  		if (err > 1)
  			WARN_ON(1);
89747d369   Mingming Cao   [PATCH] ext3_get_...
1066
  		err = 0;
89747d369   Mingming Cao   [PATCH] ext3_get_...
1067
1068
1069
  	}
  	*errp = err;
  	if (!err && buffer_mapped(&dummy)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1070
1071
  		struct buffer_head *bh;
  		bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
2973dfdb8   Glauber de Oliveira Costa   [PATCH] Test for ...
1072
1073
1074
1075
  		if (!bh) {
  			*errp = -EIO;
  			goto err;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1076
1077
  		if (buffer_new(&dummy)) {
  			J_ASSERT(create != 0);
c80544dc0   Stephen Hemminger   sparse pointer us...
1078
  			J_ASSERT(handle != NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1079

d6859bfca   Andrew Morton   [PATCH] ext3: cle...
1080
1081
1082
1083
1084
1085
1086
  			/*
  			 * Now that we do not always journal data, we should
  			 * keep in mind whether this should always journal the
  			 * new buffer as metadata.  For now, regular file
  			 * writes use ext3_get_block instead, so it's not a
  			 * problem.
  			 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1087
1088
1089
1090
  			lock_buffer(bh);
  			BUFFER_TRACE(bh, "call get_create_access");
  			fatal = ext3_journal_get_create_access(handle, bh);
  			if (!fatal && !buffer_uptodate(bh)) {
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
1091
  				memset(bh->b_data,0,inode->i_sb->s_blocksize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
  				set_buffer_uptodate(bh);
  			}
  			unlock_buffer(bh);
  			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
  			err = ext3_journal_dirty_metadata(handle, bh);
  			if (!fatal)
  				fatal = err;
  		} else {
  			BUFFER_TRACE(bh, "not a new buffer");
  		}
  		if (fatal) {
  			*errp = fatal;
  			brelse(bh);
  			bh = NULL;
  		}
  		return bh;
  	}
2973dfdb8   Glauber de Oliveira Costa   [PATCH] Test for ...
1109
  err:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1110
1111
  	return NULL;
  }
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
1112
  struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1113
1114
1115
1116
1117
1118
1119
  			       int block, int create, int *err)
  {
  	struct buffer_head * bh;
  
  	bh = ext3_getblk(handle, inode, block, create, err);
  	if (!bh)
  		return bh;
d03e1292c   Zheng Liu   ext3: replace ll_...
1120
  	if (bh_uptodate_or_lock(bh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1121
  		return bh;
d03e1292c   Zheng Liu   ext3: replace ll_...
1122
1123
1124
  	get_bh(bh);
  	bh->b_end_io = end_buffer_read_sync;
  	submit_bh(READ | REQ_META | REQ_PRIO, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
  	wait_on_buffer(bh);
  	if (buffer_uptodate(bh))
  		return bh;
  	put_bh(bh);
  	*err = -EIO;
  	return NULL;
  }
  
  static int walk_page_buffers(	handle_t *handle,
  				struct buffer_head *head,
  				unsigned from,
  				unsigned to,
  				int *partial,
  				int (*fn)(	handle_t *handle,
  						struct buffer_head *bh))
  {
  	struct buffer_head *bh;
  	unsigned block_start, block_end;
  	unsigned blocksize = head->b_size;
  	int err, ret = 0;
  	struct buffer_head *next;
  
  	for (	bh = head, block_start = 0;
  		ret == 0 && (bh != head || !block_start);
e9ad5620b   Dave Kleikamp   [PATCH] ext3: Mor...
1149
  		block_start = block_end, bh = next)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
  	{
  		next = bh->b_this_page;
  		block_end = block_start + blocksize;
  		if (block_end <= from || block_start >= to) {
  			if (partial && !buffer_uptodate(bh))
  				*partial = 1;
  			continue;
  		}
  		err = (*fn)(handle, bh);
  		if (!ret)
  			ret = err;
  	}
  	return ret;
  }
  
  /*
   * To preserve ordering, it is essential that the hole instantiation and
   * the data write be encapsulated in a single transaction.  We cannot
   * close off a transaction and start a new one between the ext3_get_block()
   * and the commit_write().  So doing the journal_start at the start of
   * prepare_write() is the right place.
   *
   * Also, this function can nest inside ext3_writepage() ->
   * block_write_full_page(). In that case, we *know* that ext3_writepage()
   * has generated enough buffer credits to do the whole page.  So we won't
   * block on the journal in that case, which is good, because the caller may
   * be PF_MEMALLOC.
   *
   * By accident, ext3 can be reentered when a transaction is open via
   * quota file writes.  If we were to commit the transaction while thus
   * reentered, there can be a deadlock - we would be holding a quota
   * lock, and the commit would never complete if another thread had a
   * transaction open and was blocking on the quota lock - a ranking
   * violation.
   *
   * So what we do is to rely on the fact that journal_stop/journal_start
   * will _not_ run commit under these circumstances because handle->h_ref
   * is elevated.  We'll still have enough credits for the tiny quotafile
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
1188
   * write.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1189
   */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
1190
1191
  static int do_journal_get_write_access(handle_t *handle,
  					struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1192
  {
5f11e6a44   Jan Kara   ext3: Fix dirtyin...
1193
1194
  	int dirty = buffer_dirty(bh);
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1195
1196
  	if (!buffer_mapped(bh) || buffer_freed(bh))
  		return 0;
5f11e6a44   Jan Kara   ext3: Fix dirtyin...
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
  	/*
  	 * __block_prepare_write() could have dirtied some buffers. Clean
  	 * the dirty bit as jbd2_journal_get_write_access() could complain
  	 * otherwise about fs integrity issues. Setting of the dirty bit
  	 * by __block_prepare_write() isn't a real problem here as we clear
  	 * the bit before releasing a page lock and thus writeback cannot
  	 * ever write the buffer.
  	 */
  	if (dirty)
  		clear_buffer_dirty(bh);
  	ret = ext3_journal_get_write_access(handle, bh);
  	if (!ret && dirty)
  		ret = ext3_journal_dirty_metadata(handle, bh);
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1211
  }
68eb3db08   Jan Kara   ext3: Fix data / ...
1212
1213
1214
1215
1216
1217
1218
1219
1220
  /*
   * Truncate blocks that were not used by write. We have to truncate the
   * pagecache as well so that corresponding buffers get properly unmapped.
   */
  static void ext3_truncate_failed_write(struct inode *inode)
  {
  	truncate_inode_pages(inode->i_mapping, inode->i_size);
  	ext3_truncate(inode);
  }
ee3e77f18   Jan Kara   ext3: Improve tru...
1221
1222
1223
1224
1225
1226
1227
1228
1229
  /*
   * Truncate blocks that were not used by direct IO write. We have to zero out
   * the last file block as well because direct IO might have written to it.
   */
  static void ext3_truncate_failed_direct_write(struct inode *inode)
  {
  	ext3_block_truncate_page(inode, inode->i_size);
  	ext3_truncate(inode);
  }
f4fc66a89   Nick Piggin   ext3: convert to ...
1230
1231
1232
  static int ext3_write_begin(struct file *file, struct address_space *mapping,
  				loff_t pos, unsigned len, unsigned flags,
  				struct page **pagep, void **fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1233
  {
f4fc66a89   Nick Piggin   ext3: convert to ...
1234
  	struct inode *inode = mapping->host;
695f6ae0d   Jan Kara   ext3: avoid false...
1235
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1236
1237
  	handle_t *handle;
  	int retries = 0;
f4fc66a89   Nick Piggin   ext3: convert to ...
1238
1239
1240
  	struct page *page;
  	pgoff_t index;
  	unsigned from, to;
695f6ae0d   Jan Kara   ext3: avoid false...
1241
1242
1243
  	/* Reserve one block more for addition to orphan list in case
  	 * we allocate blocks but write fails for some reason */
  	int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
f4fc66a89   Nick Piggin   ext3: convert to ...
1244

785c4bcc0   Lukas Czerner   ext3: Add fixed t...
1245
  	trace_ext3_write_begin(inode, pos, len, flags);
f4fc66a89   Nick Piggin   ext3: convert to ...
1246
1247
1248
  	index = pos >> PAGE_CACHE_SHIFT;
  	from = pos & (PAGE_CACHE_SIZE - 1);
  	to = from + len;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1249
1250
  
  retry:
54566b2c1   Nick Piggin   fs: symlink write...
1251
  	page = grab_cache_page_write_begin(mapping, index, flags);
f4fc66a89   Nick Piggin   ext3: convert to ...
1252
1253
1254
  	if (!page)
  		return -ENOMEM;
  	*pagep = page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1255
  	handle = ext3_journal_start(inode, needed_blocks);
1aa9b4b9b   Andrew Morton   [PATCH] revert "r...
1256
  	if (IS_ERR(handle)) {
f4fc66a89   Nick Piggin   ext3: convert to ...
1257
1258
  		unlock_page(page);
  		page_cache_release(page);
1aa9b4b9b   Andrew Morton   [PATCH] revert "r...
1259
1260
1261
  		ret = PTR_ERR(handle);
  		goto out;
  	}
6e1db88d5   Christoph Hellwig   introduce __block...
1262
  	ret = __block_write_begin(page, pos, len, ext3_get_block);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1263
  	if (ret)
f4fc66a89   Nick Piggin   ext3: convert to ...
1264
  		goto write_begin_failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1265
1266
1267
1268
1269
  
  	if (ext3_should_journal_data(inode)) {
  		ret = walk_page_buffers(handle, page_buffers(page),
  				from, to, NULL, do_journal_get_write_access);
  	}
f4fc66a89   Nick Piggin   ext3: convert to ...
1270
1271
  write_begin_failed:
  	if (ret) {
5ec8b75e3   Aneesh Kumar K.V   ext3: truncate bl...
1272
1273
1274
1275
  		/*
  		 * block_write_begin may have instantiated a few blocks
  		 * outside i_size.  Trim these off again. Don't need
  		 * i_size_read because we hold i_mutex.
695f6ae0d   Jan Kara   ext3: avoid false...
1276
1277
  		 *
  		 * Add inode to orphan list in case we crash before truncate
9eaaa2d57   Jan Kara   ext3: Fix truncat...
1278
1279
  		 * finishes. Do this only if ext3_can_truncate() agrees so
  		 * that orphan processing code is happy.
5ec8b75e3   Aneesh Kumar K.V   ext3: truncate bl...
1280
  		 */
9eaaa2d57   Jan Kara   ext3: Fix truncat...
1281
  		if (pos + len > inode->i_size && ext3_can_truncate(inode))
695f6ae0d   Jan Kara   ext3: avoid false...
1282
1283
1284
1285
1286
  			ext3_orphan_add(handle, inode);
  		ext3_journal_stop(handle);
  		unlock_page(page);
  		page_cache_release(page);
  		if (pos + len > inode->i_size)
68eb3db08   Jan Kara   ext3: Fix data / ...
1287
  			ext3_truncate_failed_write(inode);
f4fc66a89   Nick Piggin   ext3: convert to ...
1288
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1289
1290
  	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
  		goto retry;
1aa9b4b9b   Andrew Morton   [PATCH] revert "r...
1291
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1292
1293
  	return ret;
  }
f4fc66a89   Nick Piggin   ext3: convert to ...
1294

d6859bfca   Andrew Morton   [PATCH] ext3: cle...
1295
  int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1296
1297
1298
  {
  	int err = journal_dirty_data(handle, bh);
  	if (err)
e05b6b524   Harvey Harrison   ext3: replace rem...
1299
  		ext3_journal_abort_handle(__func__, __func__,
f4fc66a89   Nick Piggin   ext3: convert to ...
1300
  						bh, handle, err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1301
1302
  	return err;
  }
695f6ae0d   Jan Kara   ext3: avoid false...
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
  /* For ordered writepage and write_end functions */
  static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  {
  	/*
  	 * Write could have mapped the buffer but it didn't copy the data in
  	 * yet. So avoid filing such buffer into a transaction.
  	 */
  	if (buffer_mapped(bh) && buffer_uptodate(bh))
  		return ext3_journal_dirty_data(handle, bh);
  	return 0;
  }
f4fc66a89   Nick Piggin   ext3: convert to ...
1314
1315
  /* For write_end() in data=journal mode */
  static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1316
1317
1318
1319
1320
1321
1322
1323
  {
  	if (!buffer_mapped(bh) || buffer_freed(bh))
  		return 0;
  	set_buffer_uptodate(bh);
  	return ext3_journal_dirty_metadata(handle, bh);
  }
  
  /*
695f6ae0d   Jan Kara   ext3: avoid false...
1324
1325
1326
1327
   * This is nasty and subtle: ext3_write_begin() could have allocated blocks
   * for the whole page but later we failed to copy the data in. Update inode
   * size according to what we managed to copy. The rest is going to be
   * truncated in write_end function.
f4fc66a89   Nick Piggin   ext3: convert to ...
1328
   */
695f6ae0d   Jan Kara   ext3: avoid false...
1329
  static void update_file_sizes(struct inode *inode, loff_t pos, unsigned copied)
f4fc66a89   Nick Piggin   ext3: convert to ...
1330
  {
695f6ae0d   Jan Kara   ext3: avoid false...
1331
1332
1333
1334
1335
  	/* What matters to us is i_disksize. We don't write i_size anywhere */
  	if (pos + copied > inode->i_size)
  		i_size_write(inode, pos + copied);
  	if (pos + copied > EXT3_I(inode)->i_disksize) {
  		EXT3_I(inode)->i_disksize = pos + copied;
f4fc66a89   Nick Piggin   ext3: convert to ...
1336
1337
  		mark_inode_dirty(inode);
  	}
f4fc66a89   Nick Piggin   ext3: convert to ...
1338
1339
1340
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1341
1342
1343
1344
1345
1346
   * We need to pick up the new inode size which generic_commit_write gave us
   * `file' can be NULL - eg, when called from page_symlink().
   *
   * ext3 never places buffers on inode->i_mapping->private_list.  metadata
   * buffers are managed internally.
   */
f4fc66a89   Nick Piggin   ext3: convert to ...
1347
1348
1349
1350
  static int ext3_ordered_write_end(struct file *file,
  				struct address_space *mapping,
  				loff_t pos, unsigned len, unsigned copied,
  				struct page *page, void *fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1351
1352
  {
  	handle_t *handle = ext3_journal_current_handle();
f4fc66a89   Nick Piggin   ext3: convert to ...
1353
1354
  	struct inode *inode = file->f_mapping->host;
  	unsigned from, to;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1355
  	int ret = 0, ret2;
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
1356
  	trace_ext3_ordered_write_end(inode, pos, len, copied);
695f6ae0d   Jan Kara   ext3: avoid false...
1357
  	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
f4fc66a89   Nick Piggin   ext3: convert to ...
1358

695f6ae0d   Jan Kara   ext3: avoid false...
1359
1360
  	from = pos & (PAGE_CACHE_SIZE - 1);
  	to = from + copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1361
  	ret = walk_page_buffers(handle, page_buffers(page),
695f6ae0d   Jan Kara   ext3: avoid false...
1362
  		from, to, NULL, journal_dirty_data_fn);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1363

695f6ae0d   Jan Kara   ext3: avoid false...
1364
1365
1366
1367
1368
1369
  	if (ret == 0)
  		update_file_sizes(inode, pos, copied);
  	/*
  	 * There may be allocated blocks outside of i_size because
  	 * we failed to copy some data. Prepare for truncate.
  	 */
9eaaa2d57   Jan Kara   ext3: Fix truncat...
1370
  	if (pos + len > inode->i_size && ext3_can_truncate(inode))
695f6ae0d   Jan Kara   ext3: avoid false...
1371
  		ext3_orphan_add(handle, inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1372
1373
1374
  	ret2 = ext3_journal_stop(handle);
  	if (!ret)
  		ret = ret2;
f4fc66a89   Nick Piggin   ext3: convert to ...
1375
1376
  	unlock_page(page);
  	page_cache_release(page);
695f6ae0d   Jan Kara   ext3: avoid false...
1377
  	if (pos + len > inode->i_size)
68eb3db08   Jan Kara   ext3: Fix data / ...
1378
  		ext3_truncate_failed_write(inode);
f4fc66a89   Nick Piggin   ext3: convert to ...
1379
  	return ret ? ret : copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1380
  }
f4fc66a89   Nick Piggin   ext3: convert to ...
1381
1382
1383
1384
  static int ext3_writeback_write_end(struct file *file,
  				struct address_space *mapping,
  				loff_t pos, unsigned len, unsigned copied,
  				struct page *page, void *fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1385
1386
  {
  	handle_t *handle = ext3_journal_current_handle();
f4fc66a89   Nick Piggin   ext3: convert to ...
1387
  	struct inode *inode = file->f_mapping->host;
695f6ae0d   Jan Kara   ext3: avoid false...
1388
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1389

785c4bcc0   Lukas Czerner   ext3: Add fixed t...
1390
  	trace_ext3_writeback_write_end(inode, pos, len, copied);
695f6ae0d   Jan Kara   ext3: avoid false...
1391
1392
1393
1394
1395
1396
  	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
  	update_file_sizes(inode, pos, copied);
  	/*
  	 * There may be allocated blocks outside of i_size because
  	 * we failed to copy some data. Prepare for truncate.
  	 */
9eaaa2d57   Jan Kara   ext3: Fix truncat...
1397
  	if (pos + len > inode->i_size && ext3_can_truncate(inode))
695f6ae0d   Jan Kara   ext3: avoid false...
1398
1399
  		ext3_orphan_add(handle, inode);
  	ret = ext3_journal_stop(handle);
f4fc66a89   Nick Piggin   ext3: convert to ...
1400
1401
  	unlock_page(page);
  	page_cache_release(page);
695f6ae0d   Jan Kara   ext3: avoid false...
1402
  	if (pos + len > inode->i_size)
68eb3db08   Jan Kara   ext3: Fix data / ...
1403
  		ext3_truncate_failed_write(inode);
f4fc66a89   Nick Piggin   ext3: convert to ...
1404
  	return ret ? ret : copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1405
  }
f4fc66a89   Nick Piggin   ext3: convert to ...
1406
1407
1408
1409
  static int ext3_journalled_write_end(struct file *file,
  				struct address_space *mapping,
  				loff_t pos, unsigned len, unsigned copied,
  				struct page *page, void *fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1410
1411
  {
  	handle_t *handle = ext3_journal_current_handle();
f4fc66a89   Nick Piggin   ext3: convert to ...
1412
  	struct inode *inode = mapping->host;
b22570d9a   Jan Kara   ext3: Fix data co...
1413
  	struct ext3_inode_info *ei = EXT3_I(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1414
1415
  	int ret = 0, ret2;
  	int partial = 0;
f4fc66a89   Nick Piggin   ext3: convert to ...
1416
  	unsigned from, to;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1417

785c4bcc0   Lukas Czerner   ext3: Add fixed t...
1418
  	trace_ext3_journalled_write_end(inode, pos, len, copied);
f4fc66a89   Nick Piggin   ext3: convert to ...
1419
1420
1421
1422
1423
1424
  	from = pos & (PAGE_CACHE_SIZE - 1);
  	to = from + len;
  
  	if (copied < len) {
  		if (!PageUptodate(page))
  			copied = 0;
695f6ae0d   Jan Kara   ext3: avoid false...
1425
1426
  		page_zero_new_buffers(page, from + copied, to);
  		to = from + copied;
f4fc66a89   Nick Piggin   ext3: convert to ...
1427
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1428
1429
  
  	ret = walk_page_buffers(handle, page_buffers(page), from,
f4fc66a89   Nick Piggin   ext3: convert to ...
1430
  				to, &partial, write_end_fn);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1431
1432
  	if (!partial)
  		SetPageUptodate(page);
695f6ae0d   Jan Kara   ext3: avoid false...
1433
1434
1435
1436
1437
1438
1439
  
  	if (pos + copied > inode->i_size)
  		i_size_write(inode, pos + copied);
  	/*
  	 * There may be allocated blocks outside of i_size because
  	 * we failed to copy some data. Prepare for truncate.
  	 */
9eaaa2d57   Jan Kara   ext3: Fix truncat...
1440
  	if (pos + len > inode->i_size && ext3_can_truncate(inode))
695f6ae0d   Jan Kara   ext3: avoid false...
1441
  		ext3_orphan_add(handle, inode);
9df93939b   Jan Kara   ext3: Use bitops ...
1442
  	ext3_set_inode_state(inode, EXT3_STATE_JDATA);
b22570d9a   Jan Kara   ext3: Fix data co...
1443
1444
1445
  	atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
  	if (inode->i_size > ei->i_disksize) {
  		ei->i_disksize = inode->i_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1446
  		ret2 = ext3_mark_inode_dirty(handle, inode);
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
1447
  		if (!ret)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1448
1449
  			ret = ret2;
  	}
f4fc66a89   Nick Piggin   ext3: convert to ...
1450

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1451
1452
1453
  	ret2 = ext3_journal_stop(handle);
  	if (!ret)
  		ret = ret2;
f4fc66a89   Nick Piggin   ext3: convert to ...
1454
1455
  	unlock_page(page);
  	page_cache_release(page);
695f6ae0d   Jan Kara   ext3: avoid false...
1456
  	if (pos + len > inode->i_size)
68eb3db08   Jan Kara   ext3: Fix data / ...
1457
  		ext3_truncate_failed_write(inode);
f4fc66a89   Nick Piggin   ext3: convert to ...
1458
  	return ret ? ret : copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1459
  }
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
1460
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1461
1462
1463
1464
1465
1466
1467
1468
   * bmap() is special.  It gets used by applications such as lilo and by
   * the swapper to find the on-disk block of a specific piece of data.
   *
   * Naturally, this is dangerous if the block concerned is still in the
   * journal.  If somebody makes a swapfile on an ext3 data-journaling
   * filesystem and enables swap, then they may get a nasty shock when the
   * data getting swapped to that swapfile suddenly gets overwritten by
   * the original zero's written out previously to the journal and
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
1469
   * awaiting writeback in the kernel's buffer cache.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1470
1471
   *
   * So, if we see any bmap calls here on a modified, data-journaled file,
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
1472
   * take extra steps to flush any blocks which might be in the cache.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1473
1474
1475
1476
1477
1478
   */
  static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
  {
  	struct inode *inode = mapping->host;
  	journal_t *journal;
  	int err;
9df93939b   Jan Kara   ext3: Use bitops ...
1479
  	if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) {
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
1480
  		/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1481
1482
1483
  		 * This is a REALLY heavyweight approach, but the use of
  		 * bmap on dirty files is expected to be extremely rare:
  		 * only if we run lilo or swapon on a freshly made file
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
1484
  		 * do we expect this to happen.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1485
1486
1487
1488
  		 *
  		 * (bmap requires CAP_SYS_RAWIO so this does not
  		 * represent an unprivileged user DOS attack --- we'd be
  		 * in trouble if mortal users could trigger this path at
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
1489
  		 * will.)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1490
1491
1492
1493
1494
1495
1496
  		 *
  		 * NB. EXT3_STATE_JDATA is not set on files other than
  		 * regular files.  If somebody wants to bmap a directory
  		 * or symlink and gets confused because the buffer
  		 * hasn't yet been flushed to disk, they deserve
  		 * everything they get.
  		 */
9df93939b   Jan Kara   ext3: Use bitops ...
1497
  		ext3_clear_inode_state(inode, EXT3_STATE_JDATA);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
  		journal = EXT3_JOURNAL(inode);
  		journal_lock_updates(journal);
  		err = journal_flush(journal);
  		journal_unlock_updates(journal);
  
  		if (err)
  			return 0;
  	}
  
  	return generic_block_bmap(mapping,block,ext3_get_block);
  }
  
  static int bget_one(handle_t *handle, struct buffer_head *bh)
  {
  	get_bh(bh);
  	return 0;
  }
  
  static int bput_one(handle_t *handle, struct buffer_head *bh)
  {
  	put_bh(bh);
  	return 0;
  }
9e80d4077   Jan Kara   ext3: Avoid start...
1521
1522
1523
1524
  static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
  {
  	return !buffer_mapped(bh);
  }
695f6ae0d   Jan Kara   ext3: avoid false...
1525

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
  /*
   * Note that we always start a transaction even if we're not journalling
   * data.  This is to preserve ordering: any hole instantiation within
   * __block_write_full_page -> ext3_get_block() should be journalled
   * along with the data so we don't crash and then get metadata which
   * refers to old data.
   *
   * In all journalling modes block_write_full_page() will start the I/O.
   *
   * Problem:
   *
   *	ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
   *		ext3_writepage()
   *
   * Similar for:
   *
   *	ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
   *
   * Same applies to ext3_get_block().  We will deadlock on various things like
974615186   Arjan van de Ven   [PATCH] convert e...
1545
   * lock_journal and i_truncate_mutex.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
   *
   * Setting PF_MEMALLOC here doesn't work - too many internal memory
   * allocations fail.
   *
   * 16May01: If we're reentered then journal_current_handle() will be
   *	    non-zero. We simply *return*.
   *
   * 1 July 2001: @@@ FIXME:
   *   In journalled data mode, a data buffer may be metadata against the
   *   current transaction.  But the same file is part of a shared mapping
   *   and someone does a writepage() on it.
   *
   *   We will move the buffer onto the async_data list, but *after* it has
   *   been dirtied. So there's a small window where we have dirty data on
   *   BJ_Metadata.
   *
   *   Note that this only applies to the last partial page in the file.  The
   *   bit which block_write_full_page() uses prepare/commit for.  (That's
   *   broken code anyway: it's wrong for msync()).
   *
   *   It's a rare case: affects the final partial page, for journalled data
   *   where the file is subject to bith write() and writepage() in the same
   *   transction.  To fix it we'll need a custom block_write_full_page().
   *   We'll probably need that anyway for journalling writepage() output.
   *
   * We don't honour synchronous mounts for writepage().  That would be
   * disastrous.  Any write() or metadata operation will sync the fs for
   * us.
   *
   * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
   * we don't need to open a transaction here.
   */
  static int ext3_ordered_writepage(struct page *page,
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
1579
  				struct writeback_control *wbc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1580
1581
1582
1583
1584
1585
1586
1587
  {
  	struct inode *inode = page->mapping->host;
  	struct buffer_head *page_bufs;
  	handle_t *handle = NULL;
  	int ret = 0;
  	int err;
  
  	J_ASSERT(PageLocked(page));
33c104d41   Jan Kara   ext3: Don't warn ...
1588
1589
1590
1591
1592
1593
1594
  	/*
  	 * We don't want to warn for emergency remount. The condition is
  	 * ordered to avoid dereferencing inode->i_sb in non-error case to
  	 * avoid slow-downs.
  	 */
  	WARN_ON_ONCE(IS_RDONLY(inode) &&
  		     !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1595
1596
1597
1598
1599
1600
1601
  
  	/*
  	 * We give up here if we're reentered, because it might be for a
  	 * different filesystem.
  	 */
  	if (ext3_journal_current_handle())
  		goto out_fail;
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
1602
  	trace_ext3_ordered_writepage(page);
9e80d4077   Jan Kara   ext3: Avoid start...
1603
1604
1605
  	if (!page_has_buffers(page)) {
  		create_empty_buffers(page, inode->i_sb->s_blocksize,
  				(1 << BH_Dirty)|(1 << BH_Uptodate));
430db323f   Jan Kara   ext3: Try to avoi...
1606
1607
1608
1609
1610
1611
1612
1613
1614
  		page_bufs = page_buffers(page);
  	} else {
  		page_bufs = page_buffers(page);
  		if (!walk_page_buffers(NULL, page_bufs, 0, PAGE_CACHE_SIZE,
  				       NULL, buffer_unmapped)) {
  			/* Provide NULL get_block() to catch bugs if buffers
  			 * weren't really mapped */
  			return block_write_full_page(page, NULL, wbc);
  		}
9e80d4077   Jan Kara   ext3: Avoid start...
1615
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1616
1617
1618
1619
1620
1621
  	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
  
  	if (IS_ERR(handle)) {
  		ret = PTR_ERR(handle);
  		goto out_fail;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
  	walk_page_buffers(handle, page_bufs, 0,
  			PAGE_CACHE_SIZE, NULL, bget_one);
  
  	ret = block_write_full_page(page, ext3_get_block, wbc);
  
  	/*
  	 * The page can become unlocked at any point now, and
  	 * truncate can then come in and change things.  So we
  	 * can't touch *page from now on.  But *page_bufs is
  	 * safe due to elevated refcount.
  	 */
  
  	/*
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
1635
  	 * And attach them to the current transaction.  But only if
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
  	 * block_write_full_page() succeeded.  Otherwise they are unmapped,
  	 * and generally junk.
  	 */
  	if (ret == 0) {
  		err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
  					NULL, journal_dirty_data_fn);
  		if (!ret)
  			ret = err;
  	}
  	walk_page_buffers(handle, page_bufs, 0,
  			PAGE_CACHE_SIZE, NULL, bput_one);
  	err = ext3_journal_stop(handle);
  	if (!ret)
  		ret = err;
  	return ret;
  
  out_fail:
  	redirty_page_for_writepage(wbc, page);
  	unlock_page(page);
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1657
1658
1659
1660
1661
1662
1663
  static int ext3_writeback_writepage(struct page *page,
  				struct writeback_control *wbc)
  {
  	struct inode *inode = page->mapping->host;
  	handle_t *handle = NULL;
  	int ret = 0;
  	int err;
49792c806   Dmitry Monakhov   ext3: add writepa...
1664
  	J_ASSERT(PageLocked(page));
33c104d41   Jan Kara   ext3: Don't warn ...
1665
1666
1667
1668
1669
1670
1671
  	/*
  	 * We don't want to warn for emergency remount. The condition is
  	 * ordered to avoid dereferencing inode->i_sb in non-error case to
  	 * avoid slow-downs.
  	 */
  	WARN_ON_ONCE(IS_RDONLY(inode) &&
  		     !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
49792c806   Dmitry Monakhov   ext3: add writepa...
1672

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1673
1674
  	if (ext3_journal_current_handle())
  		goto out_fail;
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
1675
  	trace_ext3_writeback_writepage(page);
430db323f   Jan Kara   ext3: Try to avoi...
1676
1677
1678
1679
1680
1681
1682
1683
  	if (page_has_buffers(page)) {
  		if (!walk_page_buffers(NULL, page_buffers(page), 0,
  				      PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
  			/* Provide NULL get_block() to catch bugs if buffers
  			 * weren't really mapped */
  			return block_write_full_page(page, NULL, wbc);
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1684
1685
1686
1687
1688
  	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
  	if (IS_ERR(handle)) {
  		ret = PTR_ERR(handle);
  		goto out_fail;
  	}
4c4d39012   Christoph Hellwig   ext3: remove vest...
1689
  	ret = block_write_full_page(page, ext3_get_block, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
  
  	err = ext3_journal_stop(handle);
  	if (!ret)
  		ret = err;
  	return ret;
  
  out_fail:
  	redirty_page_for_writepage(wbc, page);
  	unlock_page(page);
  	return ret;
  }
  
  static int ext3_journalled_writepage(struct page *page,
  				struct writeback_control *wbc)
  {
  	struct inode *inode = page->mapping->host;
  	handle_t *handle = NULL;
  	int ret = 0;
  	int err;
49792c806   Dmitry Monakhov   ext3: add writepa...
1709
  	J_ASSERT(PageLocked(page));
33c104d41   Jan Kara   ext3: Don't warn ...
1710
1711
1712
1713
1714
1715
1716
  	/*
  	 * We don't want to warn for emergency remount. The condition is
  	 * ordered to avoid dereferencing inode->i_sb in non-error case to
  	 * avoid slow-downs.
  	 */
  	WARN_ON_ONCE(IS_RDONLY(inode) &&
  		     !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
49792c806   Dmitry Monakhov   ext3: add writepa...
1717

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1718
1719
  	if (ext3_journal_current_handle())
  		goto no_write;
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
1720
  	trace_ext3_journalled_writepage(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
  	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
  	if (IS_ERR(handle)) {
  		ret = PTR_ERR(handle);
  		goto no_write;
  	}
  
  	if (!page_has_buffers(page) || PageChecked(page)) {
  		/*
  		 * It's mmapped pagecache.  Add buffers and journal it.  There
  		 * doesn't seem much point in redirtying the page here.
  		 */
  		ClearPageChecked(page);
ebdec241d   Christoph Hellwig   fs: kill block_pr...
1733
1734
  		ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE,
  					  ext3_get_block);
ab4eb43ce   Denis Lunev   [PATCH] ext3: jou...
1735
1736
  		if (ret != 0) {
  			ext3_journal_stop(handle);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1737
  			goto out_unlock;
ab4eb43ce   Denis Lunev   [PATCH] ext3: jou...
1738
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1739
1740
1741
1742
  		ret = walk_page_buffers(handle, page_buffers(page), 0,
  			PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
  
  		err = walk_page_buffers(handle, page_buffers(page), 0,
f4fc66a89   Nick Piggin   ext3: convert to ...
1743
  				PAGE_CACHE_SIZE, NULL, write_end_fn);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1744
1745
  		if (ret == 0)
  			ret = err;
9df93939b   Jan Kara   ext3: Use bitops ...
1746
  		ext3_set_inode_state(inode, EXT3_STATE_JDATA);
b22570d9a   Jan Kara   ext3: Fix data co...
1747
1748
  		atomic_set(&EXT3_I(inode)->i_datasync_tid,
  			   handle->h_transaction->t_tid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
  		unlock_page(page);
  	} else {
  		/*
  		 * It may be a page full of checkpoint-mode buffers.  We don't
  		 * really know unless we go poke around in the buffer_heads.
  		 * But block_write_full_page will do the right thing.
  		 */
  		ret = block_write_full_page(page, ext3_get_block, wbc);
  	}
  	err = ext3_journal_stop(handle);
  	if (!ret)
  		ret = err;
  out:
  	return ret;
  
  no_write:
  	redirty_page_for_writepage(wbc, page);
  out_unlock:
  	unlock_page(page);
  	goto out;
  }
  
  static int ext3_readpage(struct file *file, struct page *page)
  {
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
1773
  	trace_ext3_readpage(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1774
1775
1776
1777
1778
1779
1780
1781
1782
  	return mpage_readpage(page, ext3_get_block);
  }
  
  static int
  ext3_readpages(struct file *file, struct address_space *mapping,
  		struct list_head *pages, unsigned nr_pages)
  {
  	return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);
  }
2ff28e22b   NeilBrown   [PATCH] Make addr...
1783
  static void ext3_invalidatepage(struct page *page, unsigned long offset)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1784
1785
  {
  	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
1786
  	trace_ext3_invalidatepage(page, offset);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1787
1788
1789
1790
1791
  	/*
  	 * If it's a full truncate we just forget about the pending dirtying
  	 */
  	if (offset == 0)
  		ClearPageChecked(page);
2ff28e22b   NeilBrown   [PATCH] Make addr...
1792
  	journal_invalidatepage(journal, page, offset);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1793
  }
27496a8c6   Al Viro   [PATCH] gfp_t: fs/*
1794
  static int ext3_releasepage(struct page *page, gfp_t wait)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1795
1796
  {
  	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
1797
  	trace_ext3_releasepage(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
  	WARN_ON(PageChecked(page));
  	if (!page_has_buffers(page))
  		return 0;
  	return journal_try_to_free_buffers(journal, page, wait);
  }
  
  /*
   * If the O_DIRECT write will extend the file then add this inode to the
   * orphan list.  So recovery will truncate it back to the original size
   * if the machine crashes during the write.
   *
   * If the O_DIRECT write is intantiating holes inside i_size and the machine
bd1939de9   Jan Kara   ext3: fix lock in...
1810
1811
   * crashes then stale disk data _may_ be exposed inside the file. But current
   * VFS code falls back into buffered path in that case so we are safe.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1812
1813
1814
1815
1816
1817
1818
1819
   */
  static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
  			const struct iovec *iov, loff_t offset,
  			unsigned long nr_segs)
  {
  	struct file *file = iocb->ki_filp;
  	struct inode *inode = file->f_mapping->host;
  	struct ext3_inode_info *ei = EXT3_I(inode);
bd1939de9   Jan Kara   ext3: fix lock in...
1820
  	handle_t *handle;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1821
1822
1823
  	ssize_t ret;
  	int orphan = 0;
  	size_t count = iov_length(iov, nr_segs);
ea0174a71   Eric Sandeen   ext3: retry faile...
1824
  	int retries = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1825

785c4bcc0   Lukas Czerner   ext3: Add fixed t...
1826
  	trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1827
1828
  	if (rw == WRITE) {
  		loff_t final_size = offset + count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1829
  		if (final_size > inode->i_size) {
bd1939de9   Jan Kara   ext3: fix lock in...
1830
1831
1832
1833
1834
1835
  			/* Credits for sb + inode write */
  			handle = ext3_journal_start(inode, 2);
  			if (IS_ERR(handle)) {
  				ret = PTR_ERR(handle);
  				goto out;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1836
  			ret = ext3_orphan_add(handle, inode);
bd1939de9   Jan Kara   ext3: fix lock in...
1837
1838
1839
1840
  			if (ret) {
  				ext3_journal_stop(handle);
  				goto out;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1841
1842
  			orphan = 1;
  			ei->i_disksize = inode->i_size;
bd1939de9   Jan Kara   ext3: fix lock in...
1843
  			ext3_journal_stop(handle);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1844
1845
  		}
  	}
ea0174a71   Eric Sandeen   ext3: retry faile...
1846
  retry:
aacfc19c6   Christoph Hellwig   fs: simplify the ...
1847
1848
  	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
  				 ext3_get_block);
eafdc7d19   Christoph Hellwig   sort out blockdev...
1849
1850
1851
1852
1853
1854
1855
1856
1857
  	/*
  	 * In case of error extending write may have instantiated a few
  	 * blocks outside i_size. Trim these off again.
  	 */
  	if (unlikely((rw & WRITE) && ret < 0)) {
  		loff_t isize = i_size_read(inode);
  		loff_t end = offset + iov_length(iov, nr_segs);
  
  		if (end > isize)
ee3e77f18   Jan Kara   ext3: Improve tru...
1858
  			ext3_truncate_failed_direct_write(inode);
eafdc7d19   Christoph Hellwig   sort out blockdev...
1859
  	}
ea0174a71   Eric Sandeen   ext3: retry faile...
1860
1861
  	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
  		goto retry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1862

bd1939de9   Jan Kara   ext3: fix lock in...
1863
  	if (orphan) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1864
  		int err;
bd1939de9   Jan Kara   ext3: fix lock in...
1865
1866
1867
1868
  		/* Credits for sb + inode write */
  		handle = ext3_journal_start(inode, 2);
  		if (IS_ERR(handle)) {
  			/* This is really bad luck. We've written the data
7eb4969e0   Jan Kara   ext3: Truncate al...
1869
1870
  			 * but cannot extend i_size. Truncate allocated blocks
  			 * and pretend the write failed... */
ee3e77f18   Jan Kara   ext3: Improve tru...
1871
  			ext3_truncate_failed_direct_write(inode);
bd1939de9   Jan Kara   ext3: fix lock in...
1872
1873
1874
1875
  			ret = PTR_ERR(handle);
  			goto out;
  		}
  		if (inode->i_nlink)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1876
  			ext3_orphan_del(handle, inode);
bd1939de9   Jan Kara   ext3: fix lock in...
1877
  		if (ret > 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
  			loff_t end = offset + ret;
  			if (end > inode->i_size) {
  				ei->i_disksize = end;
  				i_size_write(inode, end);
  				/*
  				 * We're going to return a positive `ret'
  				 * here due to non-zero-length I/O, so there's
  				 * no way of reporting error returns from
  				 * ext3_mark_inode_dirty() to userspace.  So
  				 * ignore it.
  				 */
  				ext3_mark_inode_dirty(handle, inode);
  			}
  		}
  		err = ext3_journal_stop(handle);
  		if (ret == 0)
  			ret = err;
  	}
  out:
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
1897
1898
  	trace_ext3_direct_IO_exit(inode, offset,
  				iov_length(iov, nr_segs), rw, ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
  	return ret;
  }
  
  /*
   * Pages can be marked dirty completely asynchronously from ext3's journalling
   * activity.  By filemap_sync_pte(), try_to_unmap_one(), etc.  We cannot do
   * much here because ->set_page_dirty is called under VFS locks.  The page is
   * not necessarily locked.
   *
   * We cannot just dirty the page and leave attached buffers clean, because the
   * buffers' dirty state is "definitive".  We cannot just set the buffers dirty
   * or jbddirty because all the journalling code will explode.
   *
   * So what we do is to mark the page "pending dirty" and next time writepage
   * is called, propagate that into the buffers appropriately.
   */
  static int ext3_journalled_set_page_dirty(struct page *page)
  {
  	SetPageChecked(page);
  	return __set_page_dirty_nobuffers(page);
  }
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
1920
  static const struct address_space_operations ext3_ordered_aops = {
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
1921
1922
1923
  	.readpage		= ext3_readpage,
  	.readpages		= ext3_readpages,
  	.writepage		= ext3_ordered_writepage,
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
1924
1925
1926
1927
1928
1929
1930
1931
  	.write_begin		= ext3_write_begin,
  	.write_end		= ext3_ordered_write_end,
  	.bmap			= ext3_bmap,
  	.invalidatepage		= ext3_invalidatepage,
  	.releasepage		= ext3_releasepage,
  	.direct_IO		= ext3_direct_IO,
  	.migratepage		= buffer_migrate_page,
  	.is_partially_uptodate  = block_is_partially_uptodate,
aa261f549   Andi Kleen   HWPOISON: Enable ...
1932
  	.error_remove_page	= generic_error_remove_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1933
  };
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
1934
  static const struct address_space_operations ext3_writeback_aops = {
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
1935
1936
1937
  	.readpage		= ext3_readpage,
  	.readpages		= ext3_readpages,
  	.writepage		= ext3_writeback_writepage,
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
1938
1939
1940
1941
1942
1943
1944
1945
  	.write_begin		= ext3_write_begin,
  	.write_end		= ext3_writeback_write_end,
  	.bmap			= ext3_bmap,
  	.invalidatepage		= ext3_invalidatepage,
  	.releasepage		= ext3_releasepage,
  	.direct_IO		= ext3_direct_IO,
  	.migratepage		= buffer_migrate_page,
  	.is_partially_uptodate  = block_is_partially_uptodate,
aa261f549   Andi Kleen   HWPOISON: Enable ...
1946
  	.error_remove_page	= generic_error_remove_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1947
  };
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
1948
  static const struct address_space_operations ext3_journalled_aops = {
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
1949
1950
1951
  	.readpage		= ext3_readpage,
  	.readpages		= ext3_readpages,
  	.writepage		= ext3_journalled_writepage,
8ab22b9ab   Hisashi Hifumi   vfs: pagecache us...
1952
1953
1954
1955
1956
1957
1958
  	.write_begin		= ext3_write_begin,
  	.write_end		= ext3_journalled_write_end,
  	.set_page_dirty		= ext3_journalled_set_page_dirty,
  	.bmap			= ext3_bmap,
  	.invalidatepage		= ext3_invalidatepage,
  	.releasepage		= ext3_releasepage,
  	.is_partially_uptodate  = block_is_partially_uptodate,
aa261f549   Andi Kleen   HWPOISON: Enable ...
1959
  	.error_remove_page	= generic_error_remove_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
  };
  
  void ext3_set_aops(struct inode *inode)
  {
  	if (ext3_should_order_data(inode))
  		inode->i_mapping->a_ops = &ext3_ordered_aops;
  	else if (ext3_should_writeback_data(inode))
  		inode->i_mapping->a_ops = &ext3_writeback_aops;
  	else
  		inode->i_mapping->a_ops = &ext3_journalled_aops;
  }
  
  /*
   * ext3_block_truncate_page() zeroes out a mapping from file offset `from'
   * up to the end of the block which corresponds to `from'.
   * This required during truncate. We need to physically zero the tail end
   * of that block so it doesn't yield old data if the file is later grown.
   */
ee3e77f18   Jan Kara   ext3: Improve tru...
1978
  static int ext3_block_truncate_page(struct inode *inode, loff_t from)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1979
  {
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
1980
  	ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
ee3e77f18   Jan Kara   ext3: Improve tru...
1981
  	unsigned offset = from & (PAGE_CACHE_SIZE - 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1982
  	unsigned blocksize, iblock, length, pos;
ee3e77f18   Jan Kara   ext3: Improve tru...
1983
1984
  	struct page *page;
  	handle_t *handle = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1985
1986
  	struct buffer_head *bh;
  	int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1987

ee3e77f18   Jan Kara   ext3: Improve tru...
1988
  	/* Truncated on block boundary - nothing to do */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1989
  	blocksize = inode->i_sb->s_blocksize;
ee3e77f18   Jan Kara   ext3: Improve tru...
1990
1991
1992
1993
1994
1995
  	if ((from & (blocksize - 1)) == 0)
  		return 0;
  
  	page = grab_cache_page(inode->i_mapping, index);
  	if (!page)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1996
1997
  	length = blocksize - (offset & (blocksize - 1));
  	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
  	if (!page_has_buffers(page))
  		create_empty_buffers(page, blocksize, 0);
  
  	/* Find the buffer that contains "offset" */
  	bh = page_buffers(page);
  	pos = blocksize;
  	while (offset >= pos) {
  		bh = bh->b_this_page;
  		iblock++;
  		pos += blocksize;
  	}
  
  	err = 0;
  	if (buffer_freed(bh)) {
  		BUFFER_TRACE(bh, "freed: skip");
  		goto unlock;
  	}
  
  	if (!buffer_mapped(bh)) {
  		BUFFER_TRACE(bh, "unmapped");
  		ext3_get_block(inode, iblock, bh, 0);
  		/* unmapped? It's a hole - nothing to do */
  		if (!buffer_mapped(bh)) {
  			BUFFER_TRACE(bh, "still unmapped");
  			goto unlock;
  		}
  	}
  
  	/* Ok, it's mapped. Make sure it's up-to-date */
  	if (PageUptodate(page))
  		set_buffer_uptodate(bh);
d03e1292c   Zheng Liu   ext3: replace ll_...
2029
2030
  	if (!bh_uptodate_or_lock(bh)) {
  		err = bh_submit_read(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2031
  		/* Uhhuh. Read error. Complain and punt. */
d03e1292c   Zheng Liu   ext3: replace ll_...
2032
  		if (err)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2033
2034
  			goto unlock;
  	}
ee3e77f18   Jan Kara   ext3: Improve tru...
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
  	/* data=writeback mode doesn't need transaction to zero-out data */
  	if (!ext3_should_writeback_data(inode)) {
  		/* We journal at most one block */
  		handle = ext3_journal_start(inode, 1);
  		if (IS_ERR(handle)) {
  			clear_highpage(page);
  			flush_dcache_page(page);
  			err = PTR_ERR(handle);
  			goto unlock;
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2046
2047
2048
2049
  	if (ext3_should_journal_data(inode)) {
  		BUFFER_TRACE(bh, "get write access");
  		err = ext3_journal_get_write_access(handle, bh);
  		if (err)
ee3e77f18   Jan Kara   ext3: Improve tru...
2050
  			goto stop;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2051
  	}
eebd2aa35   Christoph Lameter   Pagecache zeroing...
2052
  	zero_user(page, offset, length);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
  	BUFFER_TRACE(bh, "zeroed end of block");
  
  	err = 0;
  	if (ext3_should_journal_data(inode)) {
  		err = ext3_journal_dirty_metadata(handle, bh);
  	} else {
  		if (ext3_should_order_data(inode))
  			err = ext3_journal_dirty_data(handle, bh);
  		mark_buffer_dirty(bh);
  	}
ee3e77f18   Jan Kara   ext3: Improve tru...
2063
2064
2065
  stop:
  	if (handle)
  		ext3_journal_stop(handle);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
  
  unlock:
  	unlock_page(page);
  	page_cache_release(page);
  	return err;
  }
  
  /*
   * Probably it should be a library function... search for first non-zero word
   * or memcmp with zero_page, whatever is better for particular architecture.
   * Linus?
   */
  static inline int all_zeroes(__le32 *p, __le32 *q)
  {
  	while (p < q)
  		if (*p++)
  			return 0;
  	return 1;
  }
  
  /**
   *	ext3_find_shared - find the indirect blocks for partial truncation.
   *	@inode:	  inode in question
   *	@depth:	  depth of the affected branch
   *	@offsets: offsets of pointers in that branch (see ext3_block_to_path)
   *	@chain:	  place to store the pointers to partial indirect blocks
   *	@top:	  place to the (detached) top of branch
   *
   *	This is a helper function used by ext3_truncate().
   *
   *	When we do truncate() we may have to clean the ends of several
   *	indirect blocks but leave the blocks themselves alive. Block is
25985edce   Lucas De Marchi   Fix common misspe...
2098
   *	partially truncated if some data below the new i_size is referred
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
   *	from it (and it is on the path to the first completely truncated
   *	data block, indeed).  We have to free the top of that path along
   *	with everything to the right of the path. Since no allocation
   *	past the truncation point is possible until ext3_truncate()
   *	finishes, we may safely do the latter, but top of branch may
   *	require special attention - pageout below the truncation point
   *	might try to populate it.
   *
   *	We atomically detach the top of branch from the tree, store the
   *	block number of its root in *@top, pointers to buffer_heads of
   *	partially truncated blocks - in @chain[].bh and pointers to
   *	their last elements that should not be removed - in
   *	@chain[].p. Return value is the pointer to last filled element
   *	of @chain.
   *
   *	The work left to caller to do the actual freeing of subtrees:
   *		a) free the subtree starting from *@top
   *		b) free the subtrees whose roots are stored in
   *			(@chain[i].p+1 .. end of @chain[i].bh->b_data)
   *		c) free the subtrees growing from the inode past the @chain[0].
   *			(no partially truncated stuff there).  */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
2120
2121
  static Indirect *ext3_find_shared(struct inode *inode, int depth,
  			int offsets[4], Indirect chain[4], __le32 *top)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2122
2123
2124
2125
2126
  {
  	Indirect *partial, *p;
  	int k, err;
  
  	*top = 0;
bf48aabb8   Uwe Kleine-König   tree-wide: fix ty...
2127
  	/* Make k index the deepest non-null offset + 1 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
  	for (k = depth; k > 1 && !offsets[k-1]; k--)
  		;
  	partial = ext3_get_branch(inode, k, offsets, chain, &err);
  	/* Writer: pointers */
  	if (!partial)
  		partial = chain + k-1;
  	/*
  	 * If the branch acquired continuation since we've looked at it -
  	 * fine, it should all survive and (new) top doesn't belong to us.
  	 */
  	if (!partial->key && *partial->p)
  		/* Writer: end */
  		goto no_top;
  	for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
  		;
  	/*
  	 * OK, we've found the last block that must survive. The rest of our
  	 * branch should be detached before unlocking. However, if that rest
  	 * of branch is all ours and does not grow immediately from the inode
  	 * it's easier to cheat and just decrement partial->p.
  	 */
  	if (p == chain + k - 1 && p > chain) {
  		p->p--;
  	} else {
  		*top = *p->p;
  		/* Nope, don't do this in ext3.  Must leave the tree intact */
  #if 0
  		*p->p = 0;
  #endif
  	}
  	/* Writer: end */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
2159
  	while(partial > p) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
  		brelse(partial->bh);
  		partial--;
  	}
  no_top:
  	return partial;
  }
  
  /*
   * Zero a number of block pointers in either an inode or an indirect block.
   * If we restart the transaction we must again get write access to the
   * indirect block for further modification.
   *
   * We release `count' blocks on disk, but (last - first) may be greater
   * than `count' because there can be holes in there.
   */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
2175
  static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
2176
  		struct buffer_head *bh, ext3_fsblk_t block_to_free,
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
2177
  		unsigned long count, __le32 *first, __le32 *last)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2178
2179
2180
2181
2182
  {
  	__le32 *p;
  	if (try_to_extend_transaction(handle, inode)) {
  		if (bh) {
  			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
156e74312   Namhyung Kim   ext3: Add more jo...
2183
2184
  			if (ext3_journal_dirty_metadata(handle, bh))
  				return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2185
2186
  		}
  		ext3_mark_inode_dirty(handle, inode);
00171d3c7   Jan Kara   ext3: Fix possibl...
2187
  		truncate_restart_transaction(handle, inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2188
2189
  		if (bh) {
  			BUFFER_TRACE(bh, "retaking write access");
156e74312   Namhyung Kim   ext3: Add more jo...
2190
2191
  			if (ext3_journal_get_write_access(handle, bh))
  				return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
  		}
  	}
  
  	/*
  	 * Any buffers which are on the journal will be in memory. We find
  	 * them on the hash table so journal_revoke() will run journal_forget()
  	 * on them.  We've already detached each block from the file, so
  	 * bforget() in journal_forget() should be safe.
  	 *
  	 * AKPM: turn on bforget in journal_forget()!!!
  	 */
  	for (p = first; p < last; p++) {
  		u32 nr = le32_to_cpu(*p);
  		if (nr) {
  			struct buffer_head *bh;
  
  			*p = 0;
  			bh = sb_find_get_block(inode->i_sb, nr);
  			ext3_forget(handle, 0, inode, bh, nr);
  		}
  	}
  
  	ext3_free_blocks(handle, inode, block_to_free, count);
  }
  
  /**
   * ext3_free_data - free a list of data blocks
   * @handle:	handle for this transaction
   * @inode:	inode we are dealing with
   * @this_bh:	indirect buffer_head which contains *@first and *@last
   * @first:	array of block numbers
   * @last:	points immediately past the end of array
   *
25985edce   Lucas De Marchi   Fix common misspe...
2225
   * We are freeing all blocks referred from that array (numbers are stored as
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
   * little-endian 32-bit) and updating @inode->i_blocks appropriately.
   *
   * We accumulate contiguous runs of blocks to free.  Conveniently, if these
   * blocks are contiguous then releasing them at one time will only affect one
   * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
   * actually use a lot of journal space.
   *
   * @this_bh will be %NULL if @first and @last point into the inode's direct
   * block pointers.
   */
  static void ext3_free_data(handle_t *handle, struct inode *inode,
  			   struct buffer_head *this_bh,
  			   __le32 *first, __le32 *last)
  {
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
2240
  	ext3_fsblk_t block_to_free = 0;    /* Starting block # of a run */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
2241
  	unsigned long count = 0;	    /* Number of blocks in the run */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2242
2243
2244
  	__le32 *block_to_free_p = NULL;	    /* Pointer into inode/ind
  					       corresponding to
  					       block_to_free */
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
2245
  	ext3_fsblk_t nr;		    /* Current block # */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
  	__le32 *p;			    /* Pointer into inode/ind
  					       for current block */
  	int err;
  
  	if (this_bh) {				/* For indirect block */
  		BUFFER_TRACE(this_bh, "get_write_access");
  		err = ext3_journal_get_write_access(handle, this_bh);
  		/* Important: if we can't update the indirect pointers
  		 * to the blocks, we can't free them. */
  		if (err)
  			return;
  	}
  
  	for (p = first; p < last; p++) {
  		nr = le32_to_cpu(*p);
  		if (nr) {
  			/* accumulate blocks to free if they're contiguous */
  			if (count == 0) {
  				block_to_free = nr;
  				block_to_free_p = p;
  				count = 1;
  			} else if (nr == block_to_free + count) {
  				count++;
  			} else {
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
2270
  				ext3_clear_blocks(handle, inode, this_bh,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
  						  block_to_free,
  						  count, block_to_free_p, p);
  				block_to_free = nr;
  				block_to_free_p = p;
  				count = 1;
  			}
  		}
  	}
  
  	if (count > 0)
  		ext3_clear_blocks(handle, inode, this_bh, block_to_free,
  				  count, block_to_free_p, p);
  
  	if (this_bh) {
  		BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
3ccc3167b   Duane Griffin   ext3: handle dele...
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
  
  		/*
  		 * The buffer head should have an attached journal head at this
  		 * point. However, if the data is corrupted and an indirect
  		 * block pointed to itself, it would have been detached when
  		 * the block was cleared. Check for this instead of OOPSing.
  		 */
  		if (bh2jh(this_bh))
  			ext3_journal_dirty_metadata(handle, this_bh);
  		else
  			ext3_error(inode->i_sb, "ext3_free_data",
  				   "circular indirect block detected, "
  				   "inode=%lu, block=%llu",
  				   inode->i_ino,
  				   (unsigned long long)this_bh->b_blocknr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
  	}
  }
  
  /**
   *	ext3_free_branches - free an array of branches
   *	@handle: JBD handle for this transaction
   *	@inode:	inode we are dealing with
   *	@parent_bh: the buffer_head which contains *@first and *@last
   *	@first:	array of block numbers
   *	@last:	pointer immediately past the end of array
   *	@depth:	depth of the branches to free
   *
25985edce   Lucas De Marchi   Fix common misspe...
2313
   *	We are freeing all blocks referred from these branches (numbers are
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2314
2315
2316
2317
2318
2319
2320
   *	stored as little-endian 32-bit) and updating @inode->i_blocks
   *	appropriately.
   */
  static void ext3_free_branches(handle_t *handle, struct inode *inode,
  			       struct buffer_head *parent_bh,
  			       __le32 *first, __le32 *last, int depth)
  {
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
2321
  	ext3_fsblk_t nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
  	__le32 *p;
  
  	if (is_handle_aborted(handle))
  		return;
  
  	if (depth--) {
  		struct buffer_head *bh;
  		int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
  		p = last;
  		while (--p >= first) {
  			nr = le32_to_cpu(*p);
  			if (!nr)
  				continue;		/* A hole */
  
  			/* Go read the buffer for the next level down */
  			bh = sb_bread(inode->i_sb, nr);
  
  			/*
  			 * A read failure? Report error and clear slot
  			 * (should be rare).
  			 */
  			if (!bh) {
  				ext3_error(inode->i_sb, "ext3_free_branches",
eee194e76   Eric Sandeen   [PATCH] ext3: ino...
2345
  					   "Read failure, inode=%lu, block="E3FSBLK,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
  					   inode->i_ino, nr);
  				continue;
  			}
  
  			/* This zaps the entire block.  Bottom up. */
  			BUFFER_TRACE(bh, "free child branches");
  			ext3_free_branches(handle, inode, bh,
  					   (__le32*)bh->b_data,
  					   (__le32*)bh->b_data + addr_per_block,
  					   depth);
  
  			/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
  			 * Everything below this this pointer has been
  			 * released.  Now let this top-of-subtree go.
  			 *
  			 * We want the freeing of this indirect block to be
  			 * atomic in the journal with the updating of the
  			 * bitmap block which owns it.  So make some room in
  			 * the journal.
  			 *
  			 * We zero the parent pointer *after* freeing its
  			 * pointee in the bitmaps, so if extend_transaction()
  			 * for some reason fails to put the bitmap changes and
  			 * the release into the same transaction, recovery
  			 * will merely complain about releasing a free block,
  			 * rather than leaking blocks.
  			 */
  			if (is_handle_aborted(handle))
  				return;
  			if (try_to_extend_transaction(handle, inode)) {
  				ext3_mark_inode_dirty(handle, inode);
00171d3c7   Jan Kara   ext3: Fix possibl...
2377
  				truncate_restart_transaction(handle, inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2378
  			}
f25f62426   Jan Kara   ext3: Avoid files...
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
  			/*
  			 * We've probably journalled the indirect block several
  			 * times during the truncate.  But it's no longer
  			 * needed and we now drop it from the transaction via
  			 * journal_revoke().
  			 *
  			 * That's easy if it's exclusively part of this
  			 * transaction.  But if it's part of the committing
  			 * transaction then journal_forget() will simply
  			 * brelse() it.  That means that if the underlying
  			 * block is reallocated in ext3_get_block(),
  			 * unmap_underlying_metadata() will find this block
  			 * and will try to get rid of it.  damn, damn. Thus
  			 * we don't allow a block to be reallocated until
  			 * a transaction freeing it has fully committed.
  			 *
  			 * We also have to make sure journal replay after a
  			 * crash does not overwrite non-journaled data blocks
  			 * with old metadata when the block got reallocated for
  			 * data.  Thus we have to store a revoke record for a
  			 * block in the same transaction in which we free the
  			 * block.
  			 */
  			ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
  			ext3_free_blocks(handle, inode, nr, 1);
  
  			if (parent_bh) {
  				/*
  				 * The block which we have just freed is
  				 * pointed to by an indirect block: journal it
  				 */
  				BUFFER_TRACE(parent_bh, "get_write_access");
  				if (!ext3_journal_get_write_access(handle,
  								   parent_bh)){
  					*p = 0;
  					BUFFER_TRACE(parent_bh,
  					"call ext3_journal_dirty_metadata");
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
2416
  					ext3_journal_dirty_metadata(handle,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
  								    parent_bh);
  				}
  			}
  		}
  	} else {
  		/* We have reached the bottom of the tree. */
  		BUFFER_TRACE(parent_bh, "free data blocks");
  		ext3_free_data(handle, inode, parent_bh, first, last);
  	}
  }
ae76dd9a6   Duane Griffin   ext3: handle corr...
2427
2428
  int ext3_can_truncate(struct inode *inode)
  {
ae76dd9a6   Duane Griffin   ext3: handle corr...
2429
2430
2431
2432
2433
2434
2435
2436
  	if (S_ISREG(inode->i_mode))
  		return 1;
  	if (S_ISDIR(inode->i_mode))
  		return 1;
  	if (S_ISLNK(inode->i_mode))
  		return !ext3_inode_is_fast_symlink(inode);
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2437
2438
2439
2440
2441
2442
2443
  /*
   * ext3_truncate()
   *
   * We block out ext3_get_block() block instantiations across the entire
   * transaction, and VFS/VM ensures that ext3_truncate() cannot run
   * simultaneously on behalf of the same inode.
   *
42b2aa86c   Justin P. Mattock   treewide: Fix typ...
2444
   * As we work through the truncate and commit bits of it to the journal there
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
   * is one core, guiding principle: the file's tree must always be consistent on
   * disk.  We must be able to restart the truncate after a crash.
   *
   * The file's tree may be transiently inconsistent in memory (although it
   * probably isn't), but whenever we close off and commit a journal transaction,
   * the contents of (the filesystem + the journal) must be consistent and
   * restartable.  It's pretty simple, really: bottom up, right to left (although
   * left-to-right works OK too).
   *
   * Note that at recovery time, journal replay occurs *before* the restart of
   * truncate against the orphan inode list.
   *
   * The committed inode has the new, desired i_size (which is the same as
   * i_disksize in this case).  After a crash, ext3_orphan_cleanup() will see
   * that this inode's truncate did not complete and it will again call
   * ext3_truncate() to have another go.  So there will be instantiated blocks
   * to the right of the truncation point in a crashed ext3 filesystem.  But
   * that's fine - as long as they are linked from the inode, the post-crash
   * ext3_truncate() run will find them and release them.
   */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
2465
  void ext3_truncate(struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2466
2467
2468
2469
2470
  {
  	handle_t *handle;
  	struct ext3_inode_info *ei = EXT3_I(inode);
  	__le32 *i_data = ei->i_data;
  	int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2471
2472
2473
2474
2475
2476
2477
  	int offsets[4];
  	Indirect chain[4];
  	Indirect *partial;
  	__le32 nr = 0;
  	int n;
  	long last_block;
  	unsigned blocksize = inode->i_sb->s_blocksize;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2478

785c4bcc0   Lukas Czerner   ext3: Add fixed t...
2479
  	trace_ext3_truncate_enter(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2480

ae76dd9a6   Duane Griffin   ext3: handle corr...
2481
  	if (!ext3_can_truncate(inode))
ef43618a4   Jan Kara   ext3: make sure i...
2482
  		goto out_notrans;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2483

f7ab34ea7   Theodore Ts'o   ext3: Add replace...
2484
  	if (inode->i_size == 0 && ext3_should_writeback_data(inode))
9df93939b   Jan Kara   ext3: Use bitops ...
2485
  		ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
f7ab34ea7   Theodore Ts'o   ext3: Add replace...
2486

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2487
  	handle = start_transaction(inode);
ee3e77f18   Jan Kara   ext3: Improve tru...
2488
  	if (IS_ERR(handle))
ef43618a4   Jan Kara   ext3: make sure i...
2489
  		goto out_notrans;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2490
2491
2492
  
  	last_block = (inode->i_size + blocksize-1)
  					>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
  	n = ext3_block_to_path(inode, last_block, offsets, NULL);
  	if (n == 0)
  		goto out_stop;	/* error */
  
  	/*
  	 * OK.  This truncate is going to happen.  We add the inode to the
  	 * orphan list, so that if this truncate spans multiple transactions,
  	 * and we crash, we will resume the truncate when the filesystem
  	 * recovers.  It also marks the inode dirty, to catch the new size.
  	 *
  	 * Implication: the file must always be in a sane, consistent
  	 * truncatable state while each transaction commits.
  	 */
  	if (ext3_orphan_add(handle, inode))
  		goto out_stop;
  
  	/*
  	 * The orphan list entry will now protect us from any crash which
  	 * occurs before the truncate completes, so it is now safe to propagate
  	 * the new, shorter inode size (held for now in i_size) into the
  	 * on-disk inode. We do this via i_disksize, which is the value which
  	 * ext3 *really* writes onto the disk inode.
  	 */
  	ei->i_disksize = inode->i_size;
  
  	/*
  	 * From here we block out all ext3_get_block() callers who want to
  	 * modify the block allocation tree.
  	 */
974615186   Arjan van de Ven   [PATCH] convert e...
2522
  	mutex_lock(&ei->truncate_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
  
  	if (n == 1) {		/* direct blocks */
  		ext3_free_data(handle, inode, NULL, i_data+offsets[0],
  			       i_data + EXT3_NDIR_BLOCKS);
  		goto do_indirects;
  	}
  
  	partial = ext3_find_shared(inode, n, offsets, chain, &nr);
  	/* Kill the top of shared branch (not detached) */
  	if (nr) {
  		if (partial == chain) {
  			/* Shared branch grows from the inode */
  			ext3_free_branches(handle, inode, NULL,
  					   &nr, &nr+1, (chain+n-1) - partial);
  			*partial->p = 0;
  			/*
  			 * We mark the inode dirty prior to restart,
  			 * and prior to stop.  No need for it here.
  			 */
  		} else {
  			/* Shared branch grows from an indirect block */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
  			ext3_free_branches(handle, inode, partial->bh,
  					partial->p,
  					partial->p+1, (chain+n-1) - partial);
  		}
  	}
  	/* Clear the ends of indirect blocks on the shared branch */
  	while (partial > chain) {
  		ext3_free_branches(handle, inode, partial->bh, partial->p + 1,
  				   (__le32*)partial->bh->b_data+addr_per_block,
  				   (chain+n-1) - partial);
  		BUFFER_TRACE(partial->bh, "call brelse");
  		brelse (partial->bh);
  		partial--;
  	}
  do_indirects:
  	/* Kill the remaining (whole) subtrees */
  	switch (offsets[0]) {
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
  	default:
  		nr = i_data[EXT3_IND_BLOCK];
  		if (nr) {
  			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
  			i_data[EXT3_IND_BLOCK] = 0;
  		}
  	case EXT3_IND_BLOCK:
  		nr = i_data[EXT3_DIND_BLOCK];
  		if (nr) {
  			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
  			i_data[EXT3_DIND_BLOCK] = 0;
  		}
  	case EXT3_DIND_BLOCK:
  		nr = i_data[EXT3_TIND_BLOCK];
  		if (nr) {
  			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
  			i_data[EXT3_TIND_BLOCK] = 0;
  		}
  	case EXT3_TIND_BLOCK:
  		;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2581
2582
2583
  	}
  
  	ext3_discard_reservation(inode);
974615186   Arjan van de Ven   [PATCH] convert e...
2584
  	mutex_unlock(&ei->truncate_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2585
2586
  	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
  	ext3_mark_inode_dirty(handle, inode);
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
2587
2588
2589
2590
  	/*
  	 * In a multi-transaction truncate, we only make the final transaction
  	 * synchronous
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2591
2592
2593
2594
2595
2596
2597
  	if (IS_SYNC(inode))
  		handle->h_sync = 1;
  out_stop:
  	/*
  	 * If this was a simple ftruncate(), and the file will remain alive
  	 * then we need to clear up the orphan record which we created above.
  	 * However, if this was a real unlink then we were called by
ac14a95b5   Al Viro   convert ext3 to -...
2598
  	 * ext3_evict_inode(), and we allow that function to clean up the
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2599
2600
2601
2602
2603
2604
  	 * orphan info for us.
  	 */
  	if (inode->i_nlink)
  		ext3_orphan_del(handle, inode);
  
  	ext3_journal_stop(handle);
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
2605
  	trace_ext3_truncate_exit(inode);
ef43618a4   Jan Kara   ext3: make sure i...
2606
2607
2608
2609
2610
2611
2612
2613
  	return;
  out_notrans:
  	/*
  	 * Delete the inode from orphan list so that it doesn't stay there
  	 * forever and trigger assertion on umount.
  	 */
  	if (inode->i_nlink)
  		ext3_orphan_del(NULL, inode);
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
2614
  	trace_ext3_truncate_exit(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2615
  }
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
2616
  static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2617
2618
  		unsigned long ino, struct ext3_iloc *iloc)
  {
e0e369a7d   Akinobu Mita   ext3: use ext3_ge...
2619
  	unsigned long block_group;
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
2620
2621
  	unsigned long offset;
  	ext3_fsblk_t block;
e0e369a7d   Akinobu Mita   ext3: use ext3_ge...
2622
  	struct ext3_group_desc *gdp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2623

2ccb48ebb   Neil Brown   [PATCH] ext3: avo...
2624
2625
2626
2627
2628
2629
  	if (!ext3_valid_inum(sb, ino)) {
  		/*
  		 * This error is already checked for in namei.c unless we are
  		 * looking at an NFS filehandle, in which case no error
  		 * report is needed
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2630
2631
  		return 0;
  	}
2ccb48ebb   Neil Brown   [PATCH] ext3: avo...
2632

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2633
  	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
e0e369a7d   Akinobu Mita   ext3: use ext3_ge...
2634
2635
  	gdp = ext3_get_group_desc(sb, block_group, NULL);
  	if (!gdp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2636
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2637
2638
2639
2640
2641
  	/*
  	 * Figure out the offset within the block group inode table
  	 */
  	offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) *
  		EXT3_INODE_SIZE(sb);
e0e369a7d   Akinobu Mita   ext3: use ext3_ge...
2642
  	block = le32_to_cpu(gdp->bg_inode_table) +
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
  		(offset >> EXT3_BLOCK_SIZE_BITS(sb));
  
  	iloc->block_group = block_group;
  	iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1);
  	return block;
  }
  
  /*
   * ext3_get_inode_loc returns with an extra refcount against the inode's
   * underlying buffer_head on success. If 'in_mem' is true, we have all
   * data in memory that is needed to recreate the on-disk version of this
   * inode.
   */
  static int __ext3_get_inode_loc(struct inode *inode,
  				struct ext3_iloc *iloc, int in_mem)
  {
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
2659
  	ext3_fsblk_t block;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
  	struct buffer_head *bh;
  
  	block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc);
  	if (!block)
  		return -EIO;
  
  	bh = sb_getblk(inode->i_sb, block);
  	if (!bh) {
  		ext3_error (inode->i_sb, "ext3_get_inode_loc",
  				"unable to read inode block - "
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
2670
2671
  				"inode=%lu, block="E3FSBLK,
  				 inode->i_ino, block);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2672
2673
2674
2675
  		return -EIO;
  	}
  	if (!buffer_uptodate(bh)) {
  		lock_buffer(bh);
95450f5a7   Hidehiro Kawai   ext3: don't read ...
2676
2677
2678
2679
2680
2681
2682
2683
2684
  
  		/*
  		 * If the buffer has the write error flag, we have failed
  		 * to write out another inode in the same block.  In this
  		 * case, we don't have to read the block because we may
  		 * read the old inode data successfully.
  		 */
  		if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
  			set_buffer_uptodate(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
  		if (buffer_uptodate(bh)) {
  			/* someone brought it uptodate while we waited */
  			unlock_buffer(bh);
  			goto has_buffer;
  		}
  
  		/*
  		 * If we have all information of the inode in memory and this
  		 * is the only valid inode in the block, we need not read the
  		 * block.
  		 */
  		if (in_mem) {
  			struct buffer_head *bitmap_bh;
  			struct ext3_group_desc *desc;
  			int inodes_per_buffer;
  			int inode_offset, i;
  			int block_group;
  			int start;
  
  			block_group = (inode->i_ino - 1) /
  					EXT3_INODES_PER_GROUP(inode->i_sb);
  			inodes_per_buffer = bh->b_size /
  				EXT3_INODE_SIZE(inode->i_sb);
  			inode_offset = ((inode->i_ino - 1) %
  					EXT3_INODES_PER_GROUP(inode->i_sb));
  			start = inode_offset & ~(inodes_per_buffer - 1);
  
  			/* Is the inode bitmap in cache? */
  			desc = ext3_get_group_desc(inode->i_sb,
  						block_group, NULL);
  			if (!desc)
  				goto make_io;
  
  			bitmap_bh = sb_getblk(inode->i_sb,
  					le32_to_cpu(desc->bg_inode_bitmap));
  			if (!bitmap_bh)
  				goto make_io;
  
  			/*
  			 * If the inode bitmap isn't in cache then the
  			 * optimisation may end up performing two reads instead
  			 * of one, so skip it.
  			 */
  			if (!buffer_uptodate(bitmap_bh)) {
  				brelse(bitmap_bh);
  				goto make_io;
  			}
  			for (i = start; i < start + inodes_per_buffer; i++) {
  				if (i == inode_offset)
  					continue;
  				if (ext3_test_bit(i, bitmap_bh->b_data))
  					break;
  			}
  			brelse(bitmap_bh);
  			if (i == start + inodes_per_buffer) {
  				/* all other inodes are free, so skip I/O */
  				memset(bh->b_data, 0, bh->b_size);
  				set_buffer_uptodate(bh);
  				unlock_buffer(bh);
  				goto has_buffer;
  			}
  		}
  
  make_io:
  		/*
  		 * There are other valid inodes in the buffer, this inode
  		 * has in-inode xattrs, or we don't have this inode in memory.
  		 * Read the block from disk.
  		 */
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
2754
  		trace_ext3_load_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2755
2756
  		get_bh(bh);
  		bh->b_end_io = end_buffer_read_sync;
65299a3b7   Christoph Hellwig   block: separate p...
2757
  		submit_bh(READ | REQ_META | REQ_PRIO, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2758
2759
2760
2761
  		wait_on_buffer(bh);
  		if (!buffer_uptodate(bh)) {
  			ext3_error(inode->i_sb, "ext3_get_inode_loc",
  					"unable to read inode block - "
43d23f903   Mingming Cao   [PATCH] ext3_fsbl...
2762
  					"inode=%lu, block="E3FSBLK,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
  					inode->i_ino, block);
  			brelse(bh);
  			return -EIO;
  		}
  	}
  has_buffer:
  	iloc->bh = bh;
  	return 0;
  }
  
  int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
  {
  	/* We have all inode data except xattrs in memory here. */
  	return __ext3_get_inode_loc(inode, iloc,
9df93939b   Jan Kara   ext3: Use bitops ...
2777
  		!ext3_test_inode_state(inode, EXT3_STATE_XATTR));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
  }
  
  void ext3_set_inode_flags(struct inode *inode)
  {
  	unsigned int flags = EXT3_I(inode)->i_flags;
  
  	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
  	if (flags & EXT3_SYNC_FL)
  		inode->i_flags |= S_SYNC;
  	if (flags & EXT3_APPEND_FL)
  		inode->i_flags |= S_APPEND;
  	if (flags & EXT3_IMMUTABLE_FL)
  		inode->i_flags |= S_IMMUTABLE;
  	if (flags & EXT3_NOATIME_FL)
  		inode->i_flags |= S_NOATIME;
  	if (flags & EXT3_DIRSYNC_FL)
  		inode->i_flags |= S_DIRSYNC;
  }
28be5abb4   Jan Kara   ext3: copy i_flag...
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
  /* Propagate flags from i_flags to EXT3_I(inode)->i_flags */
  void ext3_get_inode_flags(struct ext3_inode_info *ei)
  {
  	unsigned int flags = ei->vfs_inode.i_flags;
  
  	ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL|
  			EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL);
  	if (flags & S_SYNC)
  		ei->i_flags |= EXT3_SYNC_FL;
  	if (flags & S_APPEND)
  		ei->i_flags |= EXT3_APPEND_FL;
  	if (flags & S_IMMUTABLE)
  		ei->i_flags |= EXT3_IMMUTABLE_FL;
  	if (flags & S_NOATIME)
  		ei->i_flags |= EXT3_NOATIME_FL;
  	if (flags & S_DIRSYNC)
  		ei->i_flags |= EXT3_DIRSYNC_FL;
  }
473043dce   David Howells   iget: stop EXT3 f...
2814
  struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2815
2816
2817
  {
  	struct ext3_iloc iloc;
  	struct ext3_inode *raw_inode;
473043dce   David Howells   iget: stop EXT3 f...
2818
  	struct ext3_inode_info *ei;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2819
  	struct buffer_head *bh;
473043dce   David Howells   iget: stop EXT3 f...
2820
  	struct inode *inode;
fe8bc91c4   Jan Kara   ext3: Wait for pr...
2821
2822
  	journal_t *journal = EXT3_SB(sb)->s_journal;
  	transaction_t *transaction;
473043dce   David Howells   iget: stop EXT3 f...
2823
  	long ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2824
  	int block;
473043dce   David Howells   iget: stop EXT3 f...
2825
2826
2827
2828
2829
2830
2831
  	inode = iget_locked(sb, ino);
  	if (!inode)
  		return ERR_PTR(-ENOMEM);
  	if (!(inode->i_state & I_NEW))
  		return inode;
  
  	ei = EXT3_I(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2832
  	ei->i_block_alloc_info = NULL;
473043dce   David Howells   iget: stop EXT3 f...
2833
2834
  	ret = __ext3_get_inode_loc(inode, &iloc, 0);
  	if (ret < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
  		goto bad_inode;
  	bh = iloc.bh;
  	raw_inode = ext3_raw_inode(&iloc);
  	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
  	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
  	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
  	if(!(test_opt (inode->i_sb, NO_UID32))) {
  		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
  		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
  	}
bfe868486   Miklos Szeredi   filesystems: add ...
2845
  	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2846
  	inode->i_size = le32_to_cpu(raw_inode->i_size);
4d7bf11d6   Markus Rechberger   ext2/3/4: fix fil...
2847
2848
2849
  	inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
  	inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
  	inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2850
  	inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
de329820e   Linus Torvalds   ext3: fix broken ...
2851
  	ei->i_state_flags = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
  	ei->i_dir_start_lookup = 0;
  	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
  	/* We now have enough fields to check if the inode was active or not.
  	 * This is needed because nfsd might try to access dead inodes
  	 * the test is that same one that e2fsck uses
  	 * NeilBrown 1999oct15
  	 */
  	if (inode->i_nlink == 0) {
  		if (inode->i_mode == 0 ||
  		    !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
  			/* this inode is deleted */
  			brelse (bh);
473043dce   David Howells   iget: stop EXT3 f...
2864
  			ret = -ESTALE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2865
2866
2867
2868
2869
2870
2871
  			goto bad_inode;
  		}
  		/* The only unlinked inodes we let through here have
  		 * valid i_mode and are being read by the orphan
  		 * recovery code: that's fine, we're about to complete
  		 * the process of deleting those. */
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
  	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
  	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
  #ifdef EXT3_FRAGMENTS
  	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
  	ei->i_frag_no = raw_inode->i_frag;
  	ei->i_frag_size = raw_inode->i_fsize;
  #endif
  	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
  	if (!S_ISREG(inode->i_mode)) {
  		ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
  	} else {
  		inode->i_size |=
  			((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
  	}
  	ei->i_disksize = inode->i_size;
  	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
  	ei->i_block_group = iloc.block_group;
  	/*
  	 * NOTE! The in-memory inode i_data array is in little-endian order
  	 * even on big-endian machines: we do NOT byteswap the block numbers!
  	 */
  	for (block = 0; block < EXT3_N_BLOCKS; block++)
  		ei->i_data[block] = raw_inode->i_block[block];
  	INIT_LIST_HEAD(&ei->i_orphan);
fe8bc91c4   Jan Kara   ext3: Wait for pr...
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
  	/*
  	 * Set transaction id's of transactions that have to be committed
  	 * to finish f[data]sync. We set them to currently running transaction
  	 * as we cannot be sure that the inode or some of its metadata isn't
  	 * part of the transaction - the inode could have been reclaimed and
  	 * now it is reread from disk.
  	 */
  	if (journal) {
  		tid_t tid;
  
  		spin_lock(&journal->j_state_lock);
  		if (journal->j_running_transaction)
  			transaction = journal->j_running_transaction;
  		else
  			transaction = journal->j_committing_transaction;
  		if (transaction)
  			tid = transaction->t_tid;
  		else
  			tid = journal->j_commit_sequence;
  		spin_unlock(&journal->j_state_lock);
  		atomic_set(&ei->i_sync_tid, tid);
  		atomic_set(&ei->i_datasync_tid, tid);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2919
2920
2921
2922
2923
2924
2925
2926
2927
  	if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 &&
  	    EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
  		/*
  		 * When mke2fs creates big inodes it does not zero out
  		 * the unused bytes above EXT3_GOOD_OLD_INODE_SIZE,
  		 * so ignore those first few inodes.
  		 */
  		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
  		if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
e4a10a362   Kirill Korotaev   ext3: lost brelse...
2928
2929
  		    EXT3_INODE_SIZE(inode->i_sb)) {
  			brelse (bh);
473043dce   David Howells   iget: stop EXT3 f...
2930
  			ret = -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2931
  			goto bad_inode;
e4a10a362   Kirill Korotaev   ext3: lost brelse...
2932
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2933
2934
2935
2936
2937
2938
2939
2940
2941
  		if (ei->i_extra_isize == 0) {
  			/* The extra space is currently unused. Use it. */
  			ei->i_extra_isize = sizeof(struct ext3_inode) -
  					    EXT3_GOOD_OLD_INODE_SIZE;
  		} else {
  			__le32 *magic = (void *)raw_inode +
  					EXT3_GOOD_OLD_INODE_SIZE +
  					ei->i_extra_isize;
  			if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC))
9df93939b   Jan Kara   ext3: Use bitops ...
2942
  				 ext3_set_inode_state(inode, EXT3_STATE_XATTR);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
  		}
  	} else
  		ei->i_extra_isize = 0;
  
  	if (S_ISREG(inode->i_mode)) {
  		inode->i_op = &ext3_file_inode_operations;
  		inode->i_fop = &ext3_file_operations;
  		ext3_set_aops(inode);
  	} else if (S_ISDIR(inode->i_mode)) {
  		inode->i_op = &ext3_dir_inode_operations;
  		inode->i_fop = &ext3_dir_operations;
  	} else if (S_ISLNK(inode->i_mode)) {
b5ed3112b   Duane Griffin   ext3: ensure fast...
2955
  		if (ext3_inode_is_fast_symlink(inode)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2956
  			inode->i_op = &ext3_fast_symlink_inode_operations;
b5ed3112b   Duane Griffin   ext3: ensure fast...
2957
2958
2959
  			nd_terminate_link(ei->i_data, inode->i_size,
  				sizeof(ei->i_data) - 1);
  		} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2960
2961
2962
2963
2964
2965
2966
2967
  			inode->i_op = &ext3_symlink_inode_operations;
  			ext3_set_aops(inode);
  		}
  	} else {
  		inode->i_op = &ext3_special_inode_operations;
  		if (raw_inode->i_block[0])
  			init_special_inode(inode, inode->i_mode,
  			   old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
2968
  		else
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2969
2970
2971
2972
2973
  			init_special_inode(inode, inode->i_mode,
  			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
  	}
  	brelse (iloc.bh);
  	ext3_set_inode_flags(inode);
473043dce   David Howells   iget: stop EXT3 f...
2974
2975
  	unlock_new_inode(inode);
  	return inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2976
2977
  
  bad_inode:
473043dce   David Howells   iget: stop EXT3 f...
2978
2979
  	iget_failed(inode);
  	return ERR_PTR(ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2980
2981
2982
2983
2984
2985
2986
2987
2988
  }
  
  /*
   * Post the struct inode info into an on-disk inode location in the
   * buffer-cache.  This gobbles the caller's reference to the
   * buffer_head in the inode location struct.
   *
   * The caller must have write access to iloc->bh.
   */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
2989
2990
  static int ext3_do_update_inode(handle_t *handle,
  				struct inode *inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2991
2992
2993
2994
2995
2996
  				struct ext3_iloc *iloc)
  {
  	struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
  	struct ext3_inode_info *ei = EXT3_I(inode);
  	struct buffer_head *bh = iloc->bh;
  	int err = 0, rc, block;
4f003fd32   Chris Mason   ext3: Add locking...
2997
2998
2999
  again:
  	/* we can't allow multiple procs in here at once, its a bit racey */
  	lock_buffer(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3000
3001
  	/* For fields not not tracking in the in-memory inode,
  	 * initialise them to zero for new inodes. */
9df93939b   Jan Kara   ext3: Use bitops ...
3002
  	if (ext3_test_inode_state(inode, EXT3_STATE_NEW))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3003
  		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
28be5abb4   Jan Kara   ext3: copy i_flag...
3004
  	ext3_get_inode_flags(ei);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
  	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
  	if(!(test_opt(inode->i_sb, NO_UID32))) {
  		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
  		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
  /*
   * Fix up interoperability with old kernels. Otherwise, old inodes get
   * re-used with the upper 16 bits of the uid/gid intact
   */
  		if(!ei->i_dtime) {
  			raw_inode->i_uid_high =
  				cpu_to_le16(high_16_bits(inode->i_uid));
  			raw_inode->i_gid_high =
  				cpu_to_le16(high_16_bits(inode->i_gid));
  		} else {
  			raw_inode->i_uid_high = 0;
  			raw_inode->i_gid_high = 0;
  		}
  	} else {
  		raw_inode->i_uid_low =
  			cpu_to_le16(fs_high2lowuid(inode->i_uid));
  		raw_inode->i_gid_low =
  			cpu_to_le16(fs_high2lowgid(inode->i_gid));
  		raw_inode->i_uid_high = 0;
  		raw_inode->i_gid_high = 0;
  	}
  	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
  	raw_inode->i_size = cpu_to_le32(ei->i_disksize);
  	raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
  	raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
  	raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
  	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
  	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
  	raw_inode->i_flags = cpu_to_le32(ei->i_flags);
  #ifdef EXT3_FRAGMENTS
  	raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
  	raw_inode->i_frag = ei->i_frag_no;
  	raw_inode->i_fsize = ei->i_frag_size;
  #endif
  	raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
  	if (!S_ISREG(inode->i_mode)) {
  		raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
  	} else {
  		raw_inode->i_size_high =
  			cpu_to_le32(ei->i_disksize >> 32);
  		if (ei->i_disksize > 0x7fffffffULL) {
  			struct super_block *sb = inode->i_sb;
  			if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
  					EXT3_FEATURE_RO_COMPAT_LARGE_FILE) ||
  			    EXT3_SB(sb)->s_es->s_rev_level ==
  					cpu_to_le32(EXT3_GOOD_OLD_REV)) {
  			       /* If this is the first large file
  				* created, add a flag to the superblock.
  				*/
4f003fd32   Chris Mason   ext3: Add locking...
3058
  				unlock_buffer(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3059
3060
3061
3062
  				err = ext3_journal_get_write_access(handle,
  						EXT3_SB(sb)->s_sbh);
  				if (err)
  					goto out_brelse;
4f003fd32   Chris Mason   ext3: Add locking...
3063

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3064
3065
3066
  				ext3_update_dynamic_rev(sb);
  				EXT3_SET_RO_COMPAT_FEATURE(sb,
  					EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3067
3068
3069
  				handle->h_sync = 1;
  				err = ext3_journal_dirty_metadata(handle,
  						EXT3_SB(sb)->s_sbh);
4f003fd32   Chris Mason   ext3: Add locking...
3070
3071
  				/* get our lock and start over */
  				goto again;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
  			}
  		}
  	}
  	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
  	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
  		if (old_valid_dev(inode->i_rdev)) {
  			raw_inode->i_block[0] =
  				cpu_to_le32(old_encode_dev(inode->i_rdev));
  			raw_inode->i_block[1] = 0;
  		} else {
  			raw_inode->i_block[0] = 0;
  			raw_inode->i_block[1] =
  				cpu_to_le32(new_encode_dev(inode->i_rdev));
  			raw_inode->i_block[2] = 0;
  		}
  	} else for (block = 0; block < EXT3_N_BLOCKS; block++)
  		raw_inode->i_block[block] = ei->i_data[block];
ff87b37da   Andreas Gruenbacher   [PATCH] ext3 xatt...
3089
  	if (ei->i_extra_isize)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3090
3091
3092
  		raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
  
  	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
4f003fd32   Chris Mason   ext3: Add locking...
3093
  	unlock_buffer(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3094
3095
3096
  	rc = ext3_journal_dirty_metadata(handle, bh);
  	if (!err)
  		err = rc;
9df93939b   Jan Kara   ext3: Use bitops ...
3097
  	ext3_clear_inode_state(inode, EXT3_STATE_NEW);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3098

fe8bc91c4   Jan Kara   ext3: Wait for pr...
3099
  	atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
  out_brelse:
  	brelse (bh);
  	ext3_std_error(inode->i_sb, err);
  	return err;
  }
  
  /*
   * ext3_write_inode()
   *
   * We are called from a few places:
   *
   * - Within generic_file_write() for O_SYNC files.
   *   Here, there will be no transaction running. We wait for any running
   *   trasnaction to commit.
   *
   * - Within sys_sync(), kupdate and such.
   *   We wait on commit, if tol to.
   *
   * - Within prune_icache() (PF_MEMALLOC == true)
   *   Here we simply return.  We can't afford to block kswapd on the
   *   journal commit.
   *
   * In all cases it is actually safe for us to return without doing anything,
   * because the inode has been copied into a raw inode buffer in
   * ext3_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
   * knfsd.
   *
   * Note that we are absolutely dependent upon all inode dirtiers doing the
   * right thing: they *must* call mark_inode_dirty() after dirtying info in
   * which we are interested.
   *
   * It would be a bug for them to not do this.  The code:
   *
   *	mark_inode_dirty(inode)
   *	stuff();
   *	inode->i_size = expr;
   *
   * is in error because a kswapd-driven write_inode() could occur while
   * `stuff()' is running, and the new i_size will be lost.  Plus the inode
   * will no longer be on the superblock's dirty inode list.
   */
a9185b41a   Christoph Hellwig   pass writeback_co...
3141
  int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3142
3143
3144
3145
3146
  {
  	if (current->flags & PF_MEMALLOC)
  		return 0;
  
  	if (ext3_journal_current_handle()) {
9ad163ae0   Jose R. Santos   JBD: Fix JBD warn...
3147
3148
  		jbd_debug(1, "called recursively, non-PF_MEMALLOC!
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3149
3150
3151
  		dump_stack();
  		return -EIO;
  	}
a9185b41a   Christoph Hellwig   pass writeback_co...
3152
  	if (wbc->sync_mode != WB_SYNC_ALL)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
  		return 0;
  
  	return ext3_force_commit(inode->i_sb);
  }
  
  /*
   * ext3_setattr()
   *
   * Called from notify_change.
   *
   * We want to trap VFS attempts to truncate the file as soon as
   * possible.  In particular, we want to make sure that when the VFS
   * shrinks i_size, we put the inode on the orphan list and modify
   * i_disksize immediately, so that during the subsequent flushing of
   * dirty pages and freeing of disk blocks, we can guarantee that any
   * commit will leave the blocks being flushed in an unused state on
   * disk.  (On recovery, the inode will get truncated and the blocks will
   * be freed, so we have a strong guarantee that no future commit will
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
3171
   * leave these blocks visible to the user.)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
   *
   * Called with inode->sem down.
   */
  int ext3_setattr(struct dentry *dentry, struct iattr *attr)
  {
  	struct inode *inode = dentry->d_inode;
  	int error, rc = 0;
  	const unsigned int ia_valid = attr->ia_valid;
  
  	error = inode_change_ok(inode, attr);
  	if (error)
  		return error;
12755627b   Dmitry Monakhov   quota: unify quot...
3184
  	if (is_quota_modification(inode, attr))
871a29315   Christoph Hellwig   dquot: cleanup dq...
3185
  		dquot_initialize(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3186
3187
3188
3189
3190
3191
  	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
  		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
  		handle_t *handle;
  
  		/* (user+group)*(old+new) structure, inode write (sb,
  		 * inode block, ? - but truncate inode update has it) */
c459001fa   Dmitry Monakhov   ext3: quota macro...
3192
3193
  		handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
  					EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3194
3195
3196
3197
  		if (IS_ERR(handle)) {
  			error = PTR_ERR(handle);
  			goto err_out;
  		}
b43fa8284   Christoph Hellwig   dquot: cleanup dq...
3198
  		error = dquot_transfer(inode, attr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
  		if (error) {
  			ext3_journal_stop(handle);
  			return error;
  		}
  		/* Update corresponding info in inode so that everything is in
  		 * one transaction */
  		if (attr->ia_valid & ATTR_UID)
  			inode->i_uid = attr->ia_uid;
  		if (attr->ia_valid & ATTR_GID)
  			inode->i_gid = attr->ia_gid;
  		error = ext3_mark_inode_dirty(handle, inode);
  		ext3_journal_stop(handle);
  	}
562c72aa5   Christoph Hellwig   fs: move inode_di...
3212
3213
  	if (attr->ia_valid & ATTR_SIZE)
  		inode_dio_wait(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
  	if (S_ISREG(inode->i_mode) &&
  	    attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
  		handle_t *handle;
  
  		handle = ext3_journal_start(inode, 3);
  		if (IS_ERR(handle)) {
  			error = PTR_ERR(handle);
  			goto err_out;
  		}
  
  		error = ext3_orphan_add(handle, inode);
ee3e77f18   Jan Kara   ext3: Improve tru...
3225
3226
3227
3228
  		if (error) {
  			ext3_journal_stop(handle);
  			goto err_out;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3229
  		EXT3_I(inode)->i_disksize = attr->ia_size;
ee3e77f18   Jan Kara   ext3: Improve tru...
3230
  		error = ext3_mark_inode_dirty(handle, inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3231
  		ext3_journal_stop(handle);
ee3e77f18   Jan Kara   ext3: Improve tru...
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
  		if (error) {
  			/* Some hard fs error must have happened. Bail out. */
  			ext3_orphan_del(NULL, inode);
  			goto err_out;
  		}
  		rc = ext3_block_truncate_page(inode, attr->ia_size);
  		if (rc) {
  			/* Cleanup orphan list and exit */
  			handle = ext3_journal_start(inode, 3);
  			if (IS_ERR(handle)) {
  				ext3_orphan_del(NULL, inode);
  				goto err_out;
  			}
  			ext3_orphan_del(handle, inode);
  			ext3_journal_stop(handle);
  			goto err_out;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3249
  	}
1025774ce   Christoph Hellwig   remove inode_setattr
3250
3251
  	if ((attr->ia_valid & ATTR_SIZE) &&
  	    attr->ia_size != i_size_read(inode)) {
40680f2fa   Jan Kara   ext3: Convert ext...
3252
3253
  		truncate_setsize(inode, attr->ia_size);
  		ext3_truncate(inode);
1025774ce   Christoph Hellwig   remove inode_setattr
3254
3255
3256
3257
  	}
  
  	setattr_copy(inode, attr);
  	mark_inode_dirty(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3258

1025774ce   Christoph Hellwig   remove inode_setattr
3259
  	if (ia_valid & ATTR_MODE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
  		rc = ext3_acl_chmod(inode);
  
  err_out:
  	ext3_std_error(inode->i_sb, error);
  	if (!error)
  		error = rc;
  	return error;
  }
  
  
  /*
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
3271
   * How many blocks doth make a writepage()?
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
   *
   * With N blocks per page, it may be:
   * N data blocks
   * 2 indirect block
   * 2 dindirect
   * 1 tindirect
   * N+5 bitmap blocks (from the above)
   * N+5 group descriptor summary blocks
   * 1 inode block
   * 1 superblock.
   * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files
   *
   * 3 * (N + 5) + 2 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
   *
   * With ordered or writeback data it's the same, less the N data blocks.
   *
   * If the inode's direct blocks can hold an integral number of pages then a
   * page cannot straddle two indirect blocks, and we can only touch one indirect
   * and dindirect block, and the "5" above becomes "3".
   *
   * This still overestimates under most circumstances.  If we were to pass the
   * start and end offsets in here as well we could do block_to_path() on each
   * block and work out the exact number of indirects which are touched.  Pah.
   */
  
  static int ext3_writepage_trans_blocks(struct inode *inode)
  {
  	int bpp = ext3_journal_blocks_per_page(inode);
  	int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
  	int ret;
  
  	if (ext3_should_journal_data(inode))
  		ret = 3 * (bpp + indirects) + 2;
  	else
523334ba5   Yongqiang Yang   ext3: Fix writepa...
3306
  		ret = 2 * (bpp + indirects) + indirects + 2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3307
3308
  
  #ifdef CONFIG_QUOTA
871a29315   Christoph Hellwig   dquot: cleanup dq...
3309
  	/* We know that structure was already allocated during dquot_initialize so
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3310
  	 * we will be updating only the data blocks + inodes */
c459001fa   Dmitry Monakhov   ext3: quota macro...
3311
  	ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
  #endif
  
  	return ret;
  }
  
  /*
   * The caller must have previously called ext3_reserve_inode_write().
   * Give this, we know that the caller already has write access to iloc->bh.
   */
  int ext3_mark_iloc_dirty(handle_t *handle,
  		struct inode *inode, struct ext3_iloc *iloc)
  {
  	int err = 0;
  
  	/* the do_update_inode consumes one bh->b_count */
  	get_bh(iloc->bh);
  
  	/* ext3_do_update_inode() does journal_dirty_metadata */
  	err = ext3_do_update_inode(handle, inode, iloc);
  	put_bh(iloc->bh);
  	return err;
  }
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
3334
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3335
   * On success, We end up with an outstanding reference count against
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
3336
   * iloc->bh.  This _must_ be cleaned up later.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3337
3338
3339
   */
  
  int
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
3340
  ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
  			 struct ext3_iloc *iloc)
  {
  	int err = 0;
  	if (handle) {
  		err = ext3_get_inode_loc(inode, iloc);
  		if (!err) {
  			BUFFER_TRACE(iloc->bh, "get_write_access");
  			err = ext3_journal_get_write_access(handle, iloc->bh);
  			if (err) {
  				brelse(iloc->bh);
  				iloc->bh = NULL;
  			}
  		}
  	}
  	ext3_std_error(inode->i_sb, err);
  	return err;
  }
  
  /*
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
3360
3361
   * What we do here is to mark the in-core inode as clean with respect to inode
   * dirtiness (it may still be data-dirty).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
   * This means that the in-core inode may be reaped by prune_icache
   * without having to perform any I/O.  This is a very good thing,
   * because *any* task may call prune_icache - even ones which
   * have a transaction open against a different journal.
   *
   * Is this cheating?  Not really.  Sure, we haven't written the
   * inode out, but prune_icache isn't a user-visible syncing function.
   * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
   * we start and wait on commits.
   *
   * Is this efficient/effective?  Well, we're being nice to the system
   * by cleaning up our inodes proactively so they can be reaped
   * without I/O.  But we are potentially leaving up to five seconds'
   * worth of inodes floating about which prune_icache wants us to
   * write out.  One way to fix that would be to get prune_icache()
   * to do a write_super() to free up some memory.  It has the desired
   * effect.
   */
  int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
  {
  	struct ext3_iloc iloc;
  	int err;
  
  	might_sleep();
785c4bcc0   Lukas Czerner   ext3: Add fixed t...
3386
  	trace_ext3_mark_inode_dirty(inode, _RET_IP_);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3387
3388
3389
3390
3391
3392
3393
  	err = ext3_reserve_inode_write(handle, inode, &iloc);
  	if (!err)
  		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
  	return err;
  }
  
  /*
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
3394
   * ext3_dirty_inode() is called from __mark_inode_dirty()
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3395
3396
3397
3398
3399
   *
   * We're really interested in the case where a file is being extended.
   * i_size has been changed by generic_commit_write() and we thus need
   * to include the updated inode in the current transaction.
   *
5dd4056db   Christoph Hellwig   dquot: cleanup sp...
3400
   * Also, dquot_alloc_space() will always dirty the inode when blocks
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3401
3402
3403
3404
3405
3406
   * are allocated to the file.
   *
   * If the inode is marked synchronous, we don't honour that here - doing
   * so would cause a commit on atime updates, which we don't bother doing.
   * We handle synchronous inodes at the highest possible level.
   */
aa3857295   Christoph Hellwig   fs: pass exact ty...
3407
  void ext3_dirty_inode(struct inode *inode, int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
  {
  	handle_t *current_handle = ext3_journal_current_handle();
  	handle_t *handle;
  
  	handle = ext3_journal_start(inode, 2);
  	if (IS_ERR(handle))
  		goto out;
  	if (current_handle &&
  		current_handle->h_transaction != handle->h_transaction) {
  		/* This task has a transaction open against a different fs */
  		printk(KERN_EMERG "%s: transactions do not match!
  ",
e05b6b524   Harvey Harrison   ext3: replace rem...
3420
  		       __func__);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
  	} else {
  		jbd_debug(5, "marking dirty.  outer handle=%p
  ",
  				current_handle);
  		ext3_mark_inode_dirty(handle, inode);
  	}
  	ext3_journal_stop(handle);
  out:
  	return;
  }
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
3431
  #if 0
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
3432
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3433
3434
3435
3436
3437
3438
   * Bind an inode's backing buffer_head into this transaction, to prevent
   * it from being flushed to disk early.  Unlike
   * ext3_reserve_inode_write, this leaves behind no bh reference and
   * returns no iloc structure, so the caller needs to repeat the iloc
   * lookup to mark the inode dirty later.
   */
d6859bfca   Andrew Morton   [PATCH] ext3: cle...
3439
  static int ext3_pin_inode(handle_t *handle, struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
  {
  	struct ext3_iloc iloc;
  
  	int err = 0;
  	if (handle) {
  		err = ext3_get_inode_loc(inode, &iloc);
  		if (!err) {
  			BUFFER_TRACE(iloc.bh, "get_write_access");
  			err = journal_get_write_access(handle, iloc.bh);
  			if (!err)
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
3450
  				err = ext3_journal_dirty_metadata(handle,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
  								  iloc.bh);
  			brelse(iloc.bh);
  		}
  	}
  	ext3_std_error(inode->i_sb, err);
  	return err;
  }
  #endif
  
  int ext3_change_inode_journal_flag(struct inode *inode, int val)
  {
  	journal_t *journal;
  	handle_t *handle;
  	int err;
  
  	/*
  	 * We have to be very careful here: changing a data block's
  	 * journaling status dynamically is dangerous.  If we write a
  	 * data block to the journal, change the status and then delete
  	 * that block, we risk forgetting to revoke the old log record
  	 * from the journal and so a subsequent replay can corrupt data.
  	 * So, first we make sure that the journal is empty and that
  	 * nobody is changing anything.
  	 */
  
  	journal = EXT3_JOURNAL(inode);
e3a68e30d   Dave Hansen   ext3: remove extr...
3477
  	if (is_journal_aborted(journal))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
  		return -EROFS;
  
  	journal_lock_updates(journal);
  	journal_flush(journal);
  
  	/*
  	 * OK, there are no updates running now, and all cached data is
  	 * synced to disk.  We are now in a completely consistent state
  	 * which doesn't have anything in the journal, and we know that
  	 * no filesystem updates are running, so it is safe to modify
  	 * the inode's in-core data-journaling state flag now.
  	 */
  
  	if (val)
  		EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
  	else
  		EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL;
  	ext3_set_aops(inode);
  
  	journal_unlock_updates(journal);
  
  	/* Finally we can mark the inode as dirty. */
  
  	handle = ext3_journal_start(inode, 1);
  	if (IS_ERR(handle))
  		return PTR_ERR(handle);
  
  	err = ext3_mark_inode_dirty(handle, inode);
  	handle->h_sync = 1;
  	ext3_journal_stop(handle);
  	ext3_std_error(inode->i_sb, err);
  
  	return err;
  }