Blame view

fs/jbd/checkpoint.c 21.3 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
  /*
588626996   Uwe Kleine-König   fix file specific...
2
   * linux/fs/jbd/checkpoint.c
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
3
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4
5
6
7
8
9
10
11
   * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
   *
   * Copyright 1999 Red Hat Software --- All Rights Reserved
   *
   * This file is part of the Linux kernel and is made available under
   * the terms of the GNU General Public License, version 2, or at your
   * option, any later version, incorporated herein by reference.
   *
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
12
13
   * Checkpoint routines for the generic filesystem journaling code.
   * Part of the ext2fs journaling system.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
16
17
18
19
20
21
22
23
24
   *
   * Checkpointing is the process of ensuring that a section of the log is
   * committed fully to disk, so that that portion of the log can be
   * reused.
   */
  
  #include <linux/time.h>
  #include <linux/fs.h>
  #include <linux/jbd.h>
  #include <linux/errno.h>
  #include <linux/slab.h>
a212d1a71   Tao Ma   jbd: Use WRITE_SY...
25
  #include <linux/blkdev.h>
99cb1a318   Lukas Czerner   jbd: Add fixed tr...
26
  #include <trace/events/jbd.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
28
  
  /*
78ce89c92   Jan Kara   [PATCH] JBD: spli...
29
   * Unlink a buffer from a transaction checkpoint list.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
31
32
   *
   * Called with j_list_lock held.
   */
78ce89c92   Jan Kara   [PATCH] JBD: spli...
33
  static inline void __buffer_unlink_first(struct journal_head *jh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
34
  {
78ce89c92   Jan Kara   [PATCH] JBD: spli...
35
  	transaction_t *transaction = jh->b_cp_transaction;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
36
37
38
  
  	jh->b_cpnext->b_cpprev = jh->b_cpprev;
  	jh->b_cpprev->b_cpnext = jh->b_cpnext;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
39
  	if (transaction->t_checkpoint_list == jh) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
  		transaction->t_checkpoint_list = jh->b_cpnext;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
  		if (transaction->t_checkpoint_list == jh)
  			transaction->t_checkpoint_list = NULL;
  	}
  }
  
  /*
   * Unlink a buffer from a transaction checkpoint(io) list.
   *
   * Called with j_list_lock held.
   */
  static inline void __buffer_unlink(struct journal_head *jh)
  {
  	transaction_t *transaction = jh->b_cp_transaction;
  
  	__buffer_unlink_first(jh);
  	if (transaction->t_checkpoint_io_list == jh) {
  		transaction->t_checkpoint_io_list = jh->b_cpnext;
  		if (transaction->t_checkpoint_io_list == jh)
  			transaction->t_checkpoint_io_list = NULL;
  	}
  }
  
  /*
   * Move a buffer from the checkpoint list to the checkpoint io list
   *
   * Called with j_list_lock held
   */
  static inline void __buffer_relink_io(struct journal_head *jh)
  {
  	transaction_t *transaction = jh->b_cp_transaction;
  
  	__buffer_unlink_first(jh);
  
  	if (!transaction->t_checkpoint_io_list) {
  		jh->b_cpnext = jh->b_cpprev = jh;
  	} else {
  		jh->b_cpnext = transaction->t_checkpoint_io_list;
  		jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
  		jh->b_cpprev->b_cpnext = jh;
  		jh->b_cpnext->b_cpprev = jh;
  	}
  	transaction->t_checkpoint_io_list = jh;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83
84
85
86
  }
  
  /*
   * Try to release a checkpointed buffer from its transaction.
78ce89c92   Jan Kara   [PATCH] JBD: spli...
87
88
89
   * Returns 1 if we released it and 2 if we also released the
   * whole transaction.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
90
91
92
93
94
95
96
   * Requires j_list_lock
   * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
   */
  static int __try_to_free_cp_buf(struct journal_head *jh)
  {
  	int ret = 0;
  	struct buffer_head *bh = jh2bh(jh);
4afe97853   Hidehiro Kawai   jbd: fix error ha...
97
  	if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
9f818b4ac   Hidehiro Kawai   jbd: test BH_Writ...
98
  	    !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
bb189247f   Jan Kara   jbd: Fix oops in ...
99
100
101
102
103
  		/*
  		 * Get our reference so that bh cannot be freed before
  		 * we unlock it
  		 */
  		get_bh(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
104
  		JBUFFER_TRACE(jh, "remove from checkpoint list");
78ce89c92   Jan Kara   [PATCH] JBD: spli...
105
  		ret = __journal_remove_checkpoint(jh) + 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
106
  		jbd_unlock_bh_state(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107
108
  		BUFFER_TRACE(bh, "release");
  		__brelse(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
109
110
111
112
113
114
115
116
117
118
119
120
121
122
  	} else {
  		jbd_unlock_bh_state(bh);
  	}
  	return ret;
  }
  
  /*
   * __log_wait_for_space: wait until there is space in the journal.
   *
   * Called under j-state_lock *only*.  It will be unlocked if we have to wait
   * for a checkpoint to free up some space in the log.
   */
  void __log_wait_for_space(journal_t *journal)
  {
e219cca08   Theodore Ts'o   jbd: don't give u...
123
  	int nblocks, space_left;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
124
125
126
127
128
129
130
  	assert_spin_locked(&journal->j_state_lock);
  
  	nblocks = jbd_space_needed(journal);
  	while (__log_space_left(journal) < nblocks) {
  		if (journal->j_flags & JFS_ABORT)
  			return;
  		spin_unlock(&journal->j_state_lock);
2c68ee754   Arjan van de Ven   [PATCH] sem2mutex...
131
  		mutex_lock(&journal->j_checkpoint_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132
133
134
  
  		/*
  		 * Test again, another process may have checkpointed while we
be07c4ed4   Duane Griffin   jbd: abort instea...
135
  		 * were waiting for the checkpoint lock. If there are no
e219cca08   Theodore Ts'o   jbd: don't give u...
136
137
138
139
140
141
142
  		 * transactions ready to be checkpointed, try to recover
  		 * journal space by calling cleanup_journal_tail(), and if
  		 * that doesn't work, by waiting for the currently committing
  		 * transaction to complete.  If there is absolutely no way
  		 * to make progress, this is either a BUG or corrupted
  		 * filesystem, so abort the journal and leave a stack
  		 * trace for forensic evidence.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
143
144
  		 */
  		spin_lock(&journal->j_state_lock);
be07c4ed4   Duane Griffin   jbd: abort instea...
145
  		spin_lock(&journal->j_list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146
  		nblocks = jbd_space_needed(journal);
e219cca08   Theodore Ts'o   jbd: don't give u...
147
148
  		space_left = __log_space_left(journal);
  		if (space_left < nblocks) {
be07c4ed4   Duane Griffin   jbd: abort instea...
149
  			int chkpt = journal->j_checkpoint_transactions != NULL;
e219cca08   Theodore Ts'o   jbd: don't give u...
150
  			tid_t tid = 0;
be07c4ed4   Duane Griffin   jbd: abort instea...
151

e219cca08   Theodore Ts'o   jbd: don't give u...
152
153
  			if (journal->j_committing_transaction)
  				tid = journal->j_committing_transaction->t_tid;
be07c4ed4   Duane Griffin   jbd: abort instea...
154
  			spin_unlock(&journal->j_list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
155
  			spin_unlock(&journal->j_state_lock);
be07c4ed4   Duane Griffin   jbd: abort instea...
156
157
  			if (chkpt) {
  				log_do_checkpoint(journal);
e219cca08   Theodore Ts'o   jbd: don't give u...
158
159
160
161
162
  			} else if (cleanup_journal_tail(journal) == 0) {
  				/* We were able to recover space; yay! */
  				;
  			} else if (tid) {
  				log_wait_commit(journal, tid);
be07c4ed4   Duane Griffin   jbd: abort instea...
163
  			} else {
e219cca08   Theodore Ts'o   jbd: don't give u...
164
165
166
167
168
169
170
171
  				printk(KERN_ERR "%s: needed %d blocks and "
  				       "only had %d space available
  ",
  				       __func__, nblocks, space_left);
  				printk(KERN_ERR "%s: no way to get more "
  				       "journal space
  ", __func__);
  				WARN_ON(1);
be07c4ed4   Duane Griffin   jbd: abort instea...
172
173
  				journal_abort(journal, 0);
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
174
  			spin_lock(&journal->j_state_lock);
be07c4ed4   Duane Griffin   jbd: abort instea...
175
176
  		} else {
  			spin_unlock(&journal->j_list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
177
  		}
2c68ee754   Arjan van de Ven   [PATCH] sem2mutex...
178
  		mutex_unlock(&journal->j_checkpoint_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
180
181
182
183
184
185
186
187
  	}
  }
  
  /*
   * We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
   * The caller must restart a list walk.  Wait for someone else to run
   * jbd_unlock_bh_state().
   */
  static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
e7ab8d650   Josh Triplett   [PATCH] jbd: add ...
188
  	__releases(journal->j_list_lock)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
189
190
191
192
193
194
195
196
197
  {
  	get_bh(bh);
  	spin_unlock(&journal->j_list_lock);
  	jbd_lock_bh_state(bh);
  	jbd_unlock_bh_state(bh);
  	put_bh(bh);
  }
  
  /*
78ce89c92   Jan Kara   [PATCH] JBD: spli...
198
199
200
201
   * Clean up transaction's list of buffers submitted for io.
   * We wait for any pending IO to complete and remove any clean
   * buffers. Note that we take the buffers in the opposite ordering
   * from the one in which they were submitted for IO.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
   *
4afe97853   Hidehiro Kawai   jbd: fix error ha...
203
204
205
   * Return 0 on success, and return <0 if some buffers have failed
   * to be written out.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
206
207
   * Called with j_list_lock held.
   */
4afe97853   Hidehiro Kawai   jbd: fix error ha...
208
  static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209
  {
78ce89c92   Jan Kara   [PATCH] JBD: spli...
210
  	struct journal_head *jh;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
  	struct buffer_head *bh;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
212
213
  	tid_t this_tid;
  	int released = 0;
4afe97853   Hidehiro Kawai   jbd: fix error ha...
214
  	int ret = 0;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
215
216
217
218
219
220
  
  	this_tid = transaction->t_tid;
  restart:
  	/* Did somebody clean up the transaction in the meanwhile? */
  	if (journal->j_checkpoint_transactions != transaction ||
  			transaction->t_tid != this_tid)
4afe97853   Hidehiro Kawai   jbd: fix error ha...
221
  		return ret;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
222
223
  	while (!released && transaction->t_checkpoint_io_list) {
  		jh = transaction->t_checkpoint_io_list;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
224
  		bh = jh2bh(jh);
78ce89c92   Jan Kara   [PATCH] JBD: spli...
225
226
227
228
229
  		if (!jbd_trylock_bh_state(bh)) {
  			jbd_sync_bh(journal, bh);
  			spin_lock(&journal->j_list_lock);
  			goto restart;
  		}
bb189247f   Jan Kara   jbd: Fix oops in ...
230
  		get_bh(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
231
  		if (buffer_locked(bh)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
232
  			spin_unlock(&journal->j_list_lock);
78ce89c92   Jan Kara   [PATCH] JBD: spli...
233
  			jbd_unlock_bh_state(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234
235
236
237
  			wait_on_buffer(bh);
  			/* the journal_head may have gone by now */
  			BUFFER_TRACE(bh, "brelse");
  			__brelse(bh);
78ce89c92   Jan Kara   [PATCH] JBD: spli...
238
239
  			spin_lock(&journal->j_list_lock);
  			goto restart;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
240
  		}
9f818b4ac   Hidehiro Kawai   jbd: test BH_Writ...
241
  		if (unlikely(buffer_write_io_error(bh)))
4afe97853   Hidehiro Kawai   jbd: fix error ha...
242
  			ret = -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
243
  		/*
78ce89c92   Jan Kara   [PATCH] JBD: spli...
244
245
  		 * Now in whatever state the buffer currently is, we know that
  		 * it has been written out and so we can drop it from the list
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
246
  		 */
78ce89c92   Jan Kara   [PATCH] JBD: spli...
247
248
  		released = __journal_remove_checkpoint(jh);
  		jbd_unlock_bh_state(bh);
78ce89c92   Jan Kara   [PATCH] JBD: spli...
249
250
  		__brelse(bh);
  	}
4afe97853   Hidehiro Kawai   jbd: fix error ha...
251
252
  
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
253
254
255
256
257
258
259
260
  }
  
  #define NR_BATCH	64
  
  static void
  __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
  {
  	int i;
a212d1a71   Tao Ma   jbd: Use WRITE_SY...
261
  	struct blk_plug plug;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262

a212d1a71   Tao Ma   jbd: Use WRITE_SY...
263
  	blk_start_plug(&plug);
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
264
  	for (i = 0; i < *batch_count; i++)
a212d1a71   Tao Ma   jbd: Use WRITE_SY...
265
266
  		write_dirty_buffer(bhs[i], WRITE_SYNC);
  	blk_finish_plug(&plug);
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
267

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268
269
270
271
272
273
274
275
276
277
278
279
280
  	for (i = 0; i < *batch_count; i++) {
  		struct buffer_head *bh = bhs[i];
  		clear_buffer_jwrite(bh);
  		BUFFER_TRACE(bh, "brelse");
  		__brelse(bh);
  	}
  	*batch_count = 0;
  }
  
  /*
   * Try to flush one buffer from the checkpoint list to disk.
   *
   * Return 1 if something happened which requires us to abort the current
4afe97853   Hidehiro Kawai   jbd: fix error ha...
281
282
   * scan of the checkpoint list.  Return <0 if the buffer has failed to
   * be written out.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
283
   *
78ce89c92   Jan Kara   [PATCH] JBD: spli...
284
   * Called with j_list_lock held and drops it if 1 is returned
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
285
286
   * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
   */
78ce89c92   Jan Kara   [PATCH] JBD: spli...
287
288
  static int __process_buffer(journal_t *journal, struct journal_head *jh,
  			struct buffer_head **bhs, int *batch_count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
289
290
291
  {
  	struct buffer_head *bh = jh2bh(jh);
  	int ret = 0;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
292
  	if (buffer_locked(bh)) {
e4d5e3a49   Namhyung Kim   jbd: Convert atom...
293
  		get_bh(bh);
78ce89c92   Jan Kara   [PATCH] JBD: spli...
294
295
296
297
298
299
300
301
302
303
  		spin_unlock(&journal->j_list_lock);
  		jbd_unlock_bh_state(bh);
  		wait_on_buffer(bh);
  		/* the journal_head may have gone by now */
  		BUFFER_TRACE(bh, "brelse");
  		__brelse(bh);
  		ret = 1;
  	} else if (jh->b_transaction != NULL) {
  		transaction_t *t = jh->b_transaction;
  		tid_t tid = t->t_tid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
304

78ce89c92   Jan Kara   [PATCH] JBD: spli...
305
306
307
308
309
310
  		spin_unlock(&journal->j_list_lock);
  		jbd_unlock_bh_state(bh);
  		log_start_commit(journal, tid);
  		log_wait_commit(journal, tid);
  		ret = 1;
  	} else if (!buffer_dirty(bh)) {
4afe97853   Hidehiro Kawai   jbd: fix error ha...
311
  		ret = 1;
9f818b4ac   Hidehiro Kawai   jbd: test BH_Writ...
312
  		if (unlikely(buffer_write_io_error(bh)))
4afe97853   Hidehiro Kawai   jbd: fix error ha...
313
  			ret = -EIO;
bb189247f   Jan Kara   jbd: Fix oops in ...
314
  		get_bh(bh);
78ce89c92   Jan Kara   [PATCH] JBD: spli...
315
316
317
318
319
  		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
  		BUFFER_TRACE(bh, "remove from checkpoint");
  		__journal_remove_checkpoint(jh);
  		spin_unlock(&journal->j_list_lock);
  		jbd_unlock_bh_state(bh);
78ce89c92   Jan Kara   [PATCH] JBD: spli...
320
  		__brelse(bh);
78ce89c92   Jan Kara   [PATCH] JBD: spli...
321
  	} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322
323
324
325
326
  		/*
  		 * Important: we are about to write the buffer, and
  		 * possibly block, while still holding the journal lock.
  		 * We cannot afford to let the transaction logic start
  		 * messing around with this buffer before we write it to
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
327
  		 * disk, as that would break recoverability.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
328
329
330
331
332
333
  		 */
  		BUFFER_TRACE(bh, "queue");
  		get_bh(bh);
  		J_ASSERT_BH(bh, !buffer_jwrite(bh));
  		set_buffer_jwrite(bh);
  		bhs[*batch_count] = bh;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
334
  		__buffer_relink_io(jh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
335
336
337
  		jbd_unlock_bh_state(bh);
  		(*batch_count)++;
  		if (*batch_count == NR_BATCH) {
78ce89c92   Jan Kara   [PATCH] JBD: spli...
338
  			spin_unlock(&journal->j_list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
339
340
341
  			__flush_batch(journal, bhs, batch_count);
  			ret = 1;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
342
343
344
345
346
  	}
  	return ret;
  }
  
  /*
78ce89c92   Jan Kara   [PATCH] JBD: spli...
347
348
349
   * Perform an actual checkpoint. We take the first transaction on the
   * list of transactions to be checkpointed and send all its buffers
   * to disk. We submit larger chunks of data at once.
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
350
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
351
   * The journal should be locked before calling this function.
4afe97853   Hidehiro Kawai   jbd: fix error ha...
352
   * Called with j_checkpoint_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
353
354
355
   */
  int log_do_checkpoint(journal_t *journal)
  {
78ce89c92   Jan Kara   [PATCH] JBD: spli...
356
357
  	transaction_t *transaction;
  	tid_t this_tid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
358
  	int result;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
359
360
361
  
  	jbd_debug(1, "Start checkpoint
  ");
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
362
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
363
364
  	 * First thing: if there are any transactions in the log which
  	 * don't need checkpointing, just eliminate them from the
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
365
  	 * journal straight away.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
366
367
  	 */
  	result = cleanup_journal_tail(journal);
99cb1a318   Lukas Czerner   jbd: Add fixed tr...
368
  	trace_jbd_checkpoint(journal, result);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
369
370
371
372
373
374
  	jbd_debug(1, "cleanup_journal_tail returned %d
  ", result);
  	if (result <= 0)
  		return result;
  
  	/*
78ce89c92   Jan Kara   [PATCH] JBD: spli...
375
376
  	 * OK, we need to start writing disk blocks.  Take one transaction
  	 * and write it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
377
  	 */
4afe97853   Hidehiro Kawai   jbd: fix error ha...
378
  	result = 0;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
379
380
381
382
383
384
  	spin_lock(&journal->j_list_lock);
  	if (!journal->j_checkpoint_transactions)
  		goto out;
  	transaction = journal->j_checkpoint_transactions;
  	this_tid = transaction->t_tid;
  restart:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
385
  	/*
78ce89c92   Jan Kara   [PATCH] JBD: spli...
386
387
388
  	 * If someone cleaned up this transaction while we slept, we're
  	 * done (maybe it's a new transaction, but it fell at the same
  	 * address).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
389
  	 */
78ce89c92   Jan Kara   [PATCH] JBD: spli...
390
391
392
393
394
  	if (journal->j_checkpoint_transactions == transaction &&
  			transaction->t_tid == this_tid) {
  		int batch_count = 0;
  		struct buffer_head *bhs[NR_BATCH];
  		struct journal_head *jh;
4afe97853   Hidehiro Kawai   jbd: fix error ha...
395
  		int retry = 0, err;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
396
397
  
  		while (!retry && transaction->t_checkpoint_list) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
398
  			struct buffer_head *bh;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
399
  			jh = transaction->t_checkpoint_list;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
400
401
402
  			bh = jh2bh(jh);
  			if (!jbd_trylock_bh_state(bh)) {
  				jbd_sync_bh(journal, bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
403
404
405
  				retry = 1;
  				break;
  			}
78ce89c92   Jan Kara   [PATCH] JBD: spli...
406
  			retry = __process_buffer(journal, jh, bhs,&batch_count);
4afe97853   Hidehiro Kawai   jbd: fix error ha...
407
408
  			if (retry < 0 && !result)
  				result = retry;
95c354fe9   Nick Piggin   spinlock: lockbre...
409
410
  			if (!retry && (need_resched() ||
  				spin_needbreak(&journal->j_list_lock))) {
78ce89c92   Jan Kara   [PATCH] JBD: spli...
411
  				spin_unlock(&journal->j_list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
412
413
414
  				retry = 1;
  				break;
  			}
78ce89c92   Jan Kara   [PATCH] JBD: spli...
415
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
416

00ea81459   Jan Kara   [PATCH] ext3: fix...
417
  		if (batch_count) {
78ce89c92   Jan Kara   [PATCH] JBD: spli...
418
419
420
421
  			if (!retry) {
  				spin_unlock(&journal->j_list_lock);
  				retry = 1;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
422
  			__flush_batch(journal, bhs, &batch_count);
00ea81459   Jan Kara   [PATCH] ext3: fix...
423
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
424

78ce89c92   Jan Kara   [PATCH] JBD: spli...
425
426
427
428
  		if (retry) {
  			spin_lock(&journal->j_list_lock);
  			goto restart;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
429
  		/*
78ce89c92   Jan Kara   [PATCH] JBD: spli...
430
431
  		 * Now we have cleaned up the first transaction's checkpoint
  		 * list. Let's clean up the second one
7c8903f63   Mark Fasheh   [PATCH] jbd: reve...
432
  		 */
4afe97853   Hidehiro Kawai   jbd: fix error ha...
433
434
435
  		err = __wait_cp_io(journal, transaction);
  		if (!result)
  			result = err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
436
  	}
78ce89c92   Jan Kara   [PATCH] JBD: spli...
437
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438
  	spin_unlock(&journal->j_list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
439
  	if (result < 0)
4afe97853   Hidehiro Kawai   jbd: fix error ha...
440
441
442
443
444
  		journal_abort(journal, result);
  	else
  		result = cleanup_journal_tail(journal);
  
  	return (result < 0) ? result : 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
445
446
447
448
449
450
451
  }
  
  /*
   * Check the list of checkpoint transactions for the journal to see if
   * we have already got rid of any since the last update of the log tail
   * in the journal superblock.  If so, we can instantly roll the
   * superblock forward to remove those transactions from the log.
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
452
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453
   * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
454
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
456
457
458
459
   * Called with the journal lock held.
   *
   * This is the only part of the journaling code which really needs to be
   * aware of transaction aborts.  Checkpointing involves writing to the
   * main filesystem area rather than to the journal, so it can proceed
4afe97853   Hidehiro Kawai   jbd: fix error ha...
460
461
462
   * even in abort state, but we must not update the super block if
   * checkpointing may have failed.  Otherwise, we would lose some metadata
   * buffers which should be written-back to the filesystem.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
463
464
465
466
467
468
   */
  
  int cleanup_journal_tail(journal_t *journal)
  {
  	transaction_t * transaction;
  	tid_t		first_tid;
9c28cbcce   Jan Kara   jbd: Journal bloc...
469
  	unsigned int	blocknr, freed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
470

4afe97853   Hidehiro Kawai   jbd: fix error ha...
471
472
  	if (is_journal_aborted(journal))
  		return 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
473
  	/* OK, work out the oldest transaction remaining in the log, and
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
474
475
  	 * the log block it starts at.
  	 *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
  	 * If the log is now empty, we need to work out which is the
  	 * next transaction ID we will write, and where it will
  	 * start. */
  
  	spin_lock(&journal->j_state_lock);
  	spin_lock(&journal->j_list_lock);
  	transaction = journal->j_checkpoint_transactions;
  	if (transaction) {
  		first_tid = transaction->t_tid;
  		blocknr = transaction->t_log_start;
  	} else if ((transaction = journal->j_committing_transaction) != NULL) {
  		first_tid = transaction->t_tid;
  		blocknr = transaction->t_log_start;
  	} else if ((transaction = journal->j_running_transaction) != NULL) {
  		first_tid = transaction->t_tid;
  		blocknr = journal->j_head;
  	} else {
  		first_tid = journal->j_transaction_sequence;
  		blocknr = journal->j_head;
  	}
  	spin_unlock(&journal->j_list_lock);
  	J_ASSERT(blocknr != 0);
  
  	/* If the oldest pinned transaction is at the tail of the log
             already then there's not much we can do right now. */
  	if (journal->j_tail_sequence == first_tid) {
  		spin_unlock(&journal->j_state_lock);
  		return 1;
  	}
  
  	/* OK, update the superblock to recover the freed space.
  	 * Physical blocks come first: have we wrapped beyond the end of
  	 * the log?  */
  	freed = blocknr - journal->j_tail;
  	if (blocknr < journal->j_tail)
  		freed = freed + journal->j_last - journal->j_first;
99cb1a318   Lukas Czerner   jbd: Add fixed tr...
512
  	trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
513
  	jbd_debug(1,
9c28cbcce   Jan Kara   jbd: Journal bloc...
514
515
516
  		  "Cleaning journal tail from %d to %d (offset %u), "
  		  "freeing %u
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
  		  journal->j_tail_sequence, first_tid, blocknr, freed);
  
  	journal->j_free += freed;
  	journal->j_tail_sequence = first_tid;
  	journal->j_tail = blocknr;
  	spin_unlock(&journal->j_state_lock);
  	if (!(journal->j_flags & JFS_ABORT))
  		journal_update_superblock(journal, 1);
  	return 0;
  }
  
  
  /* Checkpoint list management */
  
  /*
78ce89c92   Jan Kara   [PATCH] JBD: spli...
532
533
   * journal_clean_one_cp_list
   *
bb189247f   Jan Kara   jbd: Fix oops in ...
534
535
   * Find all the written-back checkpoint buffers in the given list and release
   * them.
78ce89c92   Jan Kara   [PATCH] JBD: spli...
536
   *
78ce89c92   Jan Kara   [PATCH] JBD: spli...
537
   * Called with j_list_lock held.
90802ed9c   Paul Bolle   treewide: Fix com...
538
   * Returns number of buffers reaped (for debug)
78ce89c92   Jan Kara   [PATCH] JBD: spli...
539
540
541
542
543
544
545
546
547
548
549
   */
  
  static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
  {
  	struct journal_head *last_jh;
  	struct journal_head *next_jh = jh;
  	int ret, freed = 0;
  
  	*released = 0;
  	if (!jh)
  		return 0;
e9ad5620b   Dave Kleikamp   [PATCH] ext3: Mor...
550
  	last_jh = jh->b_cpprev;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
  	do {
  		jh = next_jh;
  		next_jh = jh->b_cpnext;
  		/* Use trylock because of the ranking */
  		if (jbd_trylock_bh_state(jh2bh(jh))) {
  			ret = __try_to_free_cp_buf(jh);
  			if (ret) {
  				freed++;
  				if (ret == 2) {
  					*released = 1;
  					return freed;
  				}
  			}
  		}
  		/*
  		 * This function only frees up some memory
  		 * if possible so we dont have an obligation
  		 * to finish processing. Bail out if preemption
  		 * requested:
  		 */
  		if (need_resched())
  			return freed;
  	} while (jh != last_jh);
  
  	return freed;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
579
580
581
582
583
584
   * journal_clean_checkpoint_list
   *
   * Find all the written-back checkpoint buffers in the journal and release them.
   *
   * Called with the journal locked.
   * Called with j_list_lock held.
78ce89c92   Jan Kara   [PATCH] JBD: spli...
585
   * Returns number of buffers reaped (for debug)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
586
587
588
589
590
   */
  
  int __journal_clean_checkpoint_list(journal_t *journal)
  {
  	transaction_t *transaction, *last_transaction, *next_transaction;
7c8903f63   Mark Fasheh   [PATCH] jbd: reve...
591
  	int ret = 0;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
592
  	int released;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
593
594
  
  	transaction = journal->j_checkpoint_transactions;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
595
  	if (!transaction)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
596
597
598
599
600
  		goto out;
  
  	last_transaction = transaction->t_cpprev;
  	next_transaction = transaction;
  	do {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
601
602
  		transaction = next_transaction;
  		next_transaction = transaction->t_cpnext;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
  		ret += journal_clean_one_cp_list(transaction->
  				t_checkpoint_list, &released);
  		/*
  		 * This function only frees up some memory if possible so we
  		 * dont have an obligation to finish processing. Bail out if
  		 * preemption requested:
  		 */
  		if (need_resched())
  			goto out;
  		if (released)
  			continue;
  		/*
  		 * It is essential that we are as careful as in the case of
  		 * t_checkpoint_list with removing the buffer from the list as
  		 * we can possibly see not yet submitted buffers on io_list
  		 */
  		ret += journal_clean_one_cp_list(transaction->
  				t_checkpoint_io_list, &released);
  		if (need_resched())
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
623
624
625
626
  	} while (transaction != last_transaction);
  out:
  	return ret;
  }
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
627
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
628
629
630
631
632
633
634
635
   * journal_remove_checkpoint: called after a buffer has been committed
   * to disk (either by being write-back flushed to disk, or being
   * committed to the log).
   *
   * We cannot safely clean a transaction out of the log until all of the
   * buffer updates committed in that transaction have safely been stored
   * elsewhere on disk.  To achieve this, all of the buffers in a
   * transaction need to be maintained on the transaction's checkpoint
78ce89c92   Jan Kara   [PATCH] JBD: spli...
636
   * lists until they have been rewritten, at which point this function is
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
637
   * called to remove the buffer from the existing transaction's
78ce89c92   Jan Kara   [PATCH] JBD: spli...
638
639
640
   * checkpoint lists.
   *
   * The function returns 1 if it frees the transaction, 0 otherwise.
bb189247f   Jan Kara   jbd: Fix oops in ...
641
   * The function can free jh and bh.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
642
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
643
   * This function is called with j_list_lock held.
78ce89c92   Jan Kara   [PATCH] JBD: spli...
644
   * This function is called with jbd_lock_bh_state(jh2bh(jh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
645
   */
78ce89c92   Jan Kara   [PATCH] JBD: spli...
646
  int __journal_remove_checkpoint(struct journal_head *jh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
647
648
649
  {
  	transaction_t *transaction;
  	journal_t *journal;
78ce89c92   Jan Kara   [PATCH] JBD: spli...
650
  	int ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
651
652
653
654
655
656
657
658
  
  	JBUFFER_TRACE(jh, "entry");
  
  	if ((transaction = jh->b_cp_transaction) == NULL) {
  		JBUFFER_TRACE(jh, "not on transaction");
  		goto out;
  	}
  	journal = transaction->t_journal;
bb189247f   Jan Kara   jbd: Fix oops in ...
659
  	JBUFFER_TRACE(jh, "removing from transaction");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
660
  	__buffer_unlink(jh);
78ce89c92   Jan Kara   [PATCH] JBD: spli...
661
  	jh->b_cp_transaction = NULL;
bb189247f   Jan Kara   jbd: Fix oops in ...
662
  	journal_put_journal_head(jh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
663

78ce89c92   Jan Kara   [PATCH] JBD: spli...
664
665
  	if (transaction->t_checkpoint_list != NULL ||
  	    transaction->t_checkpoint_io_list != NULL)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
666
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
667
668
669
  
  	/*
  	 * There is one special case to worry about: if we have just pulled the
d4beaf4ab   Jan Kara   jbd: Fix assertio...
670
671
672
  	 * buffer off a running or committing transaction's checkpoing list,
  	 * then even if the checkpoint list is empty, the transaction obviously
  	 * cannot be dropped!
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
673
  	 *
d4beaf4ab   Jan Kara   jbd: Fix assertio...
674
  	 * The locking here around t_state is a bit sleazy.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
675
676
  	 * See the comment at the end of journal_commit_transaction().
  	 */
bb189247f   Jan Kara   jbd: Fix oops in ...
677
  	if (transaction->t_state != T_FINISHED)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
678
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
679
680
681
682
683
684
685
686
687
  
  	/* OK, that was the last buffer for the transaction: we can now
  	   safely remove this transaction from the log */
  
  	__journal_drop_transaction(journal, transaction);
  
  	/* Just in case anybody was waiting for more transactions to be
             checkpointed... */
  	wake_up(&journal->j_wait_logspace);
78ce89c92   Jan Kara   [PATCH] JBD: spli...
688
  	ret = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
689
  out:
78ce89c92   Jan Kara   [PATCH] JBD: spli...
690
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
691
692
693
694
695
696
697
698
699
700
  }
  
  /*
   * journal_insert_checkpoint: put a committed buffer onto a checkpoint
   * list so that we know when it is safe to clean the transaction out of
   * the log.
   *
   * Called with the journal locked.
   * Called with j_list_lock held.
   */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
701
  void __journal_insert_checkpoint(struct journal_head *jh,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
702
703
704
705
706
  			       transaction_t *transaction)
  {
  	JBUFFER_TRACE(jh, "entry");
  	J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
  	J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
bb189247f   Jan Kara   jbd: Fix oops in ...
707
708
  	/* Get reference for checkpointing transaction */
  	journal_grab_journal_head(jh2bh(jh));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
  	jh->b_cp_transaction = transaction;
  
  	if (!transaction->t_checkpoint_list) {
  		jh->b_cpnext = jh->b_cpprev = jh;
  	} else {
  		jh->b_cpnext = transaction->t_checkpoint_list;
  		jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
  		jh->b_cpprev->b_cpnext = jh;
  		jh->b_cpnext->b_cpprev = jh;
  	}
  	transaction->t_checkpoint_list = jh;
  }
  
  /*
   * We've finished with this transaction structure: adios...
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
724
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
   * The transaction must have no links except for the checkpoint by this
   * point.
   *
   * Called with the journal locked.
   * Called with j_list_lock held.
   */
  
  void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
  {
  	assert_spin_locked(&journal->j_list_lock);
  	if (transaction->t_cpnext) {
  		transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
  		transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
  		if (journal->j_checkpoint_transactions == transaction)
  			journal->j_checkpoint_transactions =
  				transaction->t_cpnext;
  		if (journal->j_checkpoint_transactions == transaction)
  			journal->j_checkpoint_transactions = NULL;
  	}
  
  	J_ASSERT(transaction->t_state == T_FINISHED);
  	J_ASSERT(transaction->t_buffers == NULL);
  	J_ASSERT(transaction->t_sync_datalist == NULL);
  	J_ASSERT(transaction->t_forget == NULL);
  	J_ASSERT(transaction->t_iobuf_list == NULL);
  	J_ASSERT(transaction->t_shadow_list == NULL);
  	J_ASSERT(transaction->t_log_list == NULL);
  	J_ASSERT(transaction->t_checkpoint_list == NULL);
78ce89c92   Jan Kara   [PATCH] JBD: spli...
753
  	J_ASSERT(transaction->t_checkpoint_io_list == NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
754
755
756
  	J_ASSERT(transaction->t_updates == 0);
  	J_ASSERT(journal->j_committing_transaction != transaction);
  	J_ASSERT(journal->j_running_transaction != transaction);
99cb1a318   Lukas Czerner   jbd: Add fixed tr...
757
  	trace_jbd_drop_transaction(journal, transaction);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
758
759
760
761
  	jbd_debug(1, "Dropping transaction %d, all done
  ", transaction->t_tid);
  	kfree(transaction);
  }