Blame view

fs/jbd2/transaction.c 68.1 KB
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1
  /*
588626996   Uwe Kleine-König   fix file specific...
2
   * linux/fs/jbd2/transaction.c
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
   *
   * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
   *
   * Copyright 1998 Red Hat corp --- All Rights Reserved
   *
   * This file is part of the Linux kernel and is made available under
   * the terms of the GNU General Public License, version 2, or at your
   * option, any later version, incorporated herein by reference.
   *
   * Generic filesystem transaction handling code; part of the ext2fs
   * journaling system.
   *
   * This file manages transactions (compound commits managed by the
   * journaling code) and handles (individual atomic operations by the
   * filesystem).
   */
  
  #include <linux/time.h>
  #include <linux/fs.h>
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
22
  #include <linux/jbd2.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
23
24
25
  #include <linux/errno.h>
  #include <linux/slab.h>
  #include <linux/timer.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
26
27
  #include <linux/mm.h>
  #include <linux/highmem.h>
e07f7183a   Josef Bacik   jbd2: improve jbd...
28
  #include <linux/hrtimer.h>
47def8267   Theodore Ts'o   jbd2: Remove __GF...
29
  #include <linux/backing-dev.h>
447057546   Randy Dunlap   jbd2: fix build w...
30
  #include <linux/bug.h>
47def8267   Theodore Ts'o   jbd2: Remove __GF...
31
  #include <linux/module.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
32

7ddae8609   Adrian Bunk   [PATCH] make fs/j...
33
  static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
de1b79413   Jan Kara   jbd2: Fix oops in...
34
  static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
7ddae8609   Adrian Bunk   [PATCH] make fs/j...
35

470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
36
  /*
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
37
   * jbd2_get_transaction: obtain a new transaction_t object.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
38
39
40
41
42
43
44
45
46
47
48
   *
   * Simply allocate and initialise a new transaction.  Create it in
   * RUNNING state and add it to the current journal (which should not
   * have an existing running transaction: we only make a new transaction
   * once we have started to commit the old one).
   *
   * Preconditions:
   *	The journal MUST be locked.  We don't perform atomic mallocs on the
   *	new transaction	and we can't block without protecting against other
   *	processes trying to touch the journal while it is in transition.
   *
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
49
50
51
   */
  
  static transaction_t *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
52
  jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
53
54
55
  {
  	transaction->t_journal = journal;
  	transaction->t_state = T_RUNNING;
e07f7183a   Josef Bacik   jbd2: improve jbd...
56
  	transaction->t_start_time = ktime_get();
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
57
58
59
  	transaction->t_tid = journal->j_transaction_sequence++;
  	transaction->t_expires = jiffies + journal->j_commit_interval;
  	spin_lock_init(&transaction->t_handle_lock);
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
60
61
  	atomic_set(&transaction->t_updates, 0);
  	atomic_set(&transaction->t_outstanding_credits, 0);
8dd420466   Theodore Ts'o   jbd2: Remove t_ha...
62
  	atomic_set(&transaction->t_handle_count, 0);
c851ed540   Jan Kara   jbd2: Implement d...
63
  	INIT_LIST_HEAD(&transaction->t_inode_list);
3e624fc72   Theodore Ts'o   ext4: Replace hac...
64
  	INIT_LIST_HEAD(&transaction->t_private_list);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
65
66
  
  	/* Set up the commit timer for the new transaction. */
b1f485f20   Andreas Dilger   jbd2: round commi...
67
  	journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
68
69
70
71
  	add_timer(&journal->j_commit_timer);
  
  	J_ASSERT(journal->j_running_transaction == NULL);
  	journal->j_running_transaction = transaction;
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
72
73
  	transaction->t_max_wait = 0;
  	transaction->t_start = jiffies;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
74
75
76
77
78
79
80
81
82
83
84
85
86
  
  	return transaction;
  }
  
  /*
   * Handle management.
   *
   * A handle_t is an object which represents a single atomic update to a
   * filesystem, and which tracks all of the modifications which form part
   * of that one update.
   */
  
  /*
28e35e42f   Tao Ma   jbd2: Fix the wro...
87
   * Update transaction's maximum wait time, if debugging is enabled.
6d0bf0051   Theodore Ts'o   ext4: clean up co...
88
89
90
91
92
93
94
95
   *
   * In order for t_max_wait to be reliable, it must be protected by a
   * lock.  But doing so will mean that start_this_handle() can not be
   * run in parallel on SMP systems, which limits our scalability.  So
   * unless debugging is enabled, we no longer update t_max_wait, which
   * means that maximum wait time reported by the jbd2_run_stats
   * tracepoint will always be zero.
   */
28e35e42f   Tao Ma   jbd2: Fix the wro...
96
97
  static inline void update_t_max_wait(transaction_t *transaction,
  				     unsigned long ts)
6d0bf0051   Theodore Ts'o   ext4: clean up co...
98
99
  {
  #ifdef CONFIG_JBD2_DEBUG
6d0bf0051   Theodore Ts'o   ext4: clean up co...
100
101
102
103
104
105
106
107
108
109
110
111
  	if (jbd2_journal_enable_debug &&
  	    time_after(transaction->t_start, ts)) {
  		ts = jbd2_time_diff(ts, transaction->t_start);
  		spin_lock(&transaction->t_handle_lock);
  		if (ts > transaction->t_max_wait)
  			transaction->t_max_wait = ts;
  		spin_unlock(&transaction->t_handle_lock);
  	}
  #endif
  }
  
  /*
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
112
113
114
115
116
   * start_this_handle: Given a handle, deal with any locking or stalling
   * needed to make sure that there is enough journal space for the handle
   * to begin.  Attach the handle to a transaction and set up the
   * transaction's buffer credits.
   */
47def8267   Theodore Ts'o   jbd2: Remove __GF...
117
  static int start_this_handle(journal_t *journal, handle_t *handle,
d2159fb7b   Dan Carpenter   jbd2: use gfp_t i...
118
  			     gfp_t gfp_mask)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
119
  {
e44718318   Theodore Ts'o   jbd2: call __jbd2...
120
121
122
123
  	transaction_t	*transaction, *new_transaction = NULL;
  	tid_t		tid;
  	int		needed, need_to_start;
  	int		nblocks = handle->h_buffer_credits;
28e35e42f   Tao Ma   jbd2: Fix the wro...
124
  	unsigned long ts = jiffies;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
125
126
  
  	if (nblocks > journal->j_max_transaction_buffers) {
f2a44523b   Eryu Guan   jbd2: Unify log m...
127
128
  		printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)
  ",
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
129
130
  		       current->comm, nblocks,
  		       journal->j_max_transaction_buffers);
47def8267   Theodore Ts'o   jbd2: Remove __GF...
131
  		return -ENOSPC;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
132
133
134
135
  	}
  
  alloc_transaction:
  	if (!journal->j_running_transaction) {
47def8267   Theodore Ts'o   jbd2: Remove __GF...
136
  		new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
137
  		if (!new_transaction) {
47def8267   Theodore Ts'o   jbd2: Remove __GF...
138
139
140
141
142
143
144
145
146
147
148
149
  			/*
  			 * If __GFP_FS is not present, then we may be
  			 * being called from inside the fs writeback
  			 * layer, so we MUST NOT fail.  Since
  			 * __GFP_NOFAIL is going away, we will arrange
  			 * to retry the allocation ourselves.
  			 */
  			if ((gfp_mask & __GFP_FS) == 0) {
  				congestion_wait(BLK_RW_ASYNC, HZ/50);
  				goto alloc_transaction;
  			}
  			return -ENOMEM;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
150
  		}
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
151
152
153
154
  	}
  
  	jbd_debug(3, "New handle %p going live.
  ", handle);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
155
156
157
158
  	/*
  	 * We need to hold j_state_lock until t_updates has been incremented,
  	 * for proper journal barrier handling
  	 */
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
159
160
  repeat:
  	read_lock(&journal->j_state_lock);
5c2178e78   Theodore Ts'o   jbd2: Add sanity ...
161
  	BUG_ON(journal->j_flags & JBD2_UNMOUNT);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
162
  	if (is_journal_aborted(journal) ||
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
163
  	    (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
164
  		read_unlock(&journal->j_state_lock);
47def8267   Theodore Ts'o   jbd2: Remove __GF...
165
166
  		kfree(new_transaction);
  		return -EROFS;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
167
168
169
170
  	}
  
  	/* Wait on the journal's transaction barrier if necessary */
  	if (journal->j_barrier_count) {
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
171
  		read_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
172
173
174
175
176
177
  		wait_event(journal->j_wait_transaction_locked,
  				journal->j_barrier_count == 0);
  		goto repeat;
  	}
  
  	if (!journal->j_running_transaction) {
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
178
179
  		read_unlock(&journal->j_state_lock);
  		if (!new_transaction)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
180
  			goto alloc_transaction;
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
181
182
183
184
  		write_lock(&journal->j_state_lock);
  		if (!journal->j_running_transaction) {
  			jbd2_get_transaction(journal, new_transaction);
  			new_transaction = NULL;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
185
  		}
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
186
187
  		write_unlock(&journal->j_state_lock);
  		goto repeat;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
188
189
190
191
192
193
194
195
196
197
198
199
200
  	}
  
  	transaction = journal->j_running_transaction;
  
  	/*
  	 * If the current transaction is locked down for commit, wait for the
  	 * lock to be released.
  	 */
  	if (transaction->t_state == T_LOCKED) {
  		DEFINE_WAIT(wait);
  
  		prepare_to_wait(&journal->j_wait_transaction_locked,
  					&wait, TASK_UNINTERRUPTIBLE);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
201
  		read_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
202
203
204
205
206
207
208
209
210
211
  		schedule();
  		finish_wait(&journal->j_wait_transaction_locked, &wait);
  		goto repeat;
  	}
  
  	/*
  	 * If there is not enough space left in the log to write all potential
  	 * buffers requested by this operation, we need to stall pending a log
  	 * checkpoint to free some more log space.
  	 */
8dd420466   Theodore Ts'o   jbd2: Remove t_ha...
212
213
  	needed = atomic_add_return(nblocks,
  				   &transaction->t_outstanding_credits);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
214
215
216
217
218
219
220
221
222
223
224
  
  	if (needed > journal->j_max_transaction_buffers) {
  		/*
  		 * If the current transaction is already too large, then start
  		 * to commit it: we can then go back and attach this handle to
  		 * a new transaction.
  		 */
  		DEFINE_WAIT(wait);
  
  		jbd_debug(2, "Handle %p starting new commit...
  ", handle);
8dd420466   Theodore Ts'o   jbd2: Remove t_ha...
225
  		atomic_sub(nblocks, &transaction->t_outstanding_credits);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
226
227
  		prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
  				TASK_UNINTERRUPTIBLE);
e44718318   Theodore Ts'o   jbd2: call __jbd2...
228
229
  		tid = transaction->t_tid;
  		need_to_start = !tid_geq(journal->j_commit_request, tid);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
230
  		read_unlock(&journal->j_state_lock);
e44718318   Theodore Ts'o   jbd2: call __jbd2...
231
232
  		if (need_to_start)
  			jbd2_log_start_commit(journal, tid);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
  		schedule();
  		finish_wait(&journal->j_wait_transaction_locked, &wait);
  		goto repeat;
  	}
  
  	/*
  	 * The commit code assumes that it can get enough log space
  	 * without forcing a checkpoint.  This is *critical* for
  	 * correctness: a checkpoint of a buffer which is also
  	 * associated with a committing transaction creates a deadlock,
  	 * so commit simply cannot force through checkpoints.
  	 *
  	 * We must therefore ensure the necessary space in the journal
  	 * *before* starting to dirty potentially checkpointed buffers
  	 * in the new transaction.
  	 *
  	 * The worst part is, any transaction currently committing can
  	 * reduce the free space arbitrarily.  Be careful to account for
  	 * those buffers when checkpointing.
  	 */
  
  	/*
  	 * @@@ AKPM: This seems rather over-defensive.  We're giving commit
  	 * a _lot_ of headroom: 1/4 of the journal plus the size of
  	 * the committing transaction.  Really, we only need to give it
  	 * committing_transaction->t_outstanding_credits plus "enough" for
  	 * the log control blocks.
a34f0b313   Uwe Kleine-König   fix comment typos...
260
  	 * Also, this test is inconsistent with the matching one in
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
261
  	 * jbd2_journal_extend().
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
262
  	 */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
263
  	if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
264
265
  		jbd_debug(2, "Handle %p waiting for checkpoint...
  ", handle);
8dd420466   Theodore Ts'o   jbd2: Remove t_ha...
266
  		atomic_sub(nblocks, &transaction->t_outstanding_credits);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
267
268
269
270
271
272
  		read_unlock(&journal->j_state_lock);
  		write_lock(&journal->j_state_lock);
  		if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
  			__jbd2_log_wait_for_space(journal);
  		write_unlock(&journal->j_state_lock);
  		goto repeat;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
273
274
275
  	}
  
  	/* OK, account for the buffers that this operation expects to
8dd420466   Theodore Ts'o   jbd2: Remove t_ha...
276
  	 * use and add the handle to the running transaction. 
8dd420466   Theodore Ts'o   jbd2: Remove t_ha...
277
  	 */
28e35e42f   Tao Ma   jbd2: Fix the wro...
278
  	update_t_max_wait(transaction, ts);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
279
  	handle->h_transaction = transaction;
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
280
  	atomic_inc(&transaction->t_updates);
8dd420466   Theodore Ts'o   jbd2: Remove t_ha...
281
  	atomic_inc(&transaction->t_handle_count);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
282
283
  	jbd_debug(4, "Handle %p given %d credits (total %d, free %d)
  ",
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
284
285
  		  handle, nblocks,
  		  atomic_read(&transaction->t_outstanding_credits),
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
286
  		  __jbd2_log_space_left(journal));
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
287
  	read_unlock(&journal->j_state_lock);
9599b0e59   Jan Kara   jbd2: Annotate tr...
288
289
  
  	lock_map_acquire(&handle->h_lockdep_map);
47def8267   Theodore Ts'o   jbd2: Remove __GF...
290
291
  	kfree(new_transaction);
  	return 0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
292
  }
7b7510662   Mingming Cao   jbd2: add lockdep...
293
  static struct lock_class_key jbd2_handle_key;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
294
295
296
  /* Allocate a new handle.  This should probably be in a slab... */
  static handle_t *new_handle(int nblocks)
  {
af1e76d6b   Mingming Cao   JBD2: jbd2 slab a...
297
  	handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
298
299
300
301
302
  	if (!handle)
  		return NULL;
  	memset(handle, 0, sizeof(*handle));
  	handle->h_buffer_credits = nblocks;
  	handle->h_ref = 1;
7b7510662   Mingming Cao   jbd2: add lockdep...
303
304
  	lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle",
  						&jbd2_handle_key, 0);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
305
306
307
308
  	return handle;
  }
  
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
309
   * handle_t *jbd2_journal_start() - Obtain a new handle.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
310
311
312
313
314
315
316
317
318
319
   * @journal: Journal to start transaction on.
   * @nblocks: number of block buffer we might modify
   *
   * We make sure that the transaction can guarantee at least nblocks of
   * modified buffers in the log.  We block until the log can guarantee
   * that much space.
   *
   * This function is visible to journal users (like ext3fs), so is not
   * called with the journal already locked.
   *
c867516de   Eryu Guan   jbd2: Fix comment...
320
321
   * Return a pointer to a newly allocated handle, or an ERR_PTR() value
   * on failure.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
322
   */
d2159fb7b   Dan Carpenter   jbd2: use gfp_t i...
323
  handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
  {
  	handle_t *handle = journal_current_handle();
  	int err;
  
  	if (!journal)
  		return ERR_PTR(-EROFS);
  
  	if (handle) {
  		J_ASSERT(handle->h_transaction->t_journal == journal);
  		handle->h_ref++;
  		return handle;
  	}
  
  	handle = new_handle(nblocks);
  	if (!handle)
  		return ERR_PTR(-ENOMEM);
  
  	current->journal_info = handle;
47def8267   Theodore Ts'o   jbd2: Remove __GF...
342
  	err = start_this_handle(journal, handle, gfp_mask);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
343
  	if (err < 0) {
af1e76d6b   Mingming Cao   JBD2: jbd2 slab a...
344
  		jbd2_free_handle(handle);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
345
346
347
348
349
  		current->journal_info = NULL;
  		handle = ERR_PTR(err);
  	}
  	return handle;
  }
47def8267   Theodore Ts'o   jbd2: Remove __GF...
350
351
352
353
354
355
356
357
  EXPORT_SYMBOL(jbd2__journal_start);
  
  
  handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
  {
  	return jbd2__journal_start(journal, nblocks, GFP_NOFS);
  }
  EXPORT_SYMBOL(jbd2_journal_start);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
358
359
  
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
360
   * int jbd2_journal_extend() - extend buffer credits.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
361
362
363
364
365
366
367
368
   * @handle:  handle to 'extend'
   * @nblocks: nr blocks to try to extend by.
   *
   * Some transactions, such as large extends and truncates, can be done
   * atomically all at once or in several stages.  The operation requests
   * a credit for a number of buffer modications in advance, but can
   * extend its credit if it needs more.
   *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
369
   * jbd2_journal_extend tries to give the running handle more buffer credits.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
370
371
372
373
374
375
376
377
378
   * It does not guarantee that allocation - this is a best-effort only.
   * The calling process MUST be able to deal cleanly with a failure to
   * extend here.
   *
   * Return 0 on success, non-zero on failure.
   *
   * return code < 0 implies an error
   * return code > 0 implies normal transaction-full status.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
379
  int jbd2_journal_extend(handle_t *handle, int nblocks)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
380
381
382
383
384
385
386
387
388
389
390
  {
  	transaction_t *transaction = handle->h_transaction;
  	journal_t *journal = transaction->t_journal;
  	int result;
  	int wanted;
  
  	result = -EIO;
  	if (is_handle_aborted(handle))
  		goto out;
  
  	result = 1;
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
391
  	read_lock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
392
393
394
395
396
397
398
399
400
401
  
  	/* Don't extend a locked-down transaction! */
  	if (handle->h_transaction->t_state != T_RUNNING) {
  		jbd_debug(3, "denied handle %p %d blocks: "
  			  "transaction not running
  ", handle, nblocks);
  		goto error_out;
  	}
  
  	spin_lock(&transaction->t_handle_lock);
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
402
  	wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
403
404
405
406
407
408
409
  
  	if (wanted > journal->j_max_transaction_buffers) {
  		jbd_debug(3, "denied handle %p %d blocks: "
  			  "transaction too large
  ", handle, nblocks);
  		goto unlock;
  	}
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
410
  	if (wanted > __jbd2_log_space_left(journal)) {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
411
412
413
414
415
416
417
  		jbd_debug(3, "denied handle %p %d blocks: "
  			  "insufficient log space
  ", handle, nblocks);
  		goto unlock;
  	}
  
  	handle->h_buffer_credits += nblocks;
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
418
  	atomic_add(nblocks, &transaction->t_outstanding_credits);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
419
420
421
422
423
424
425
  	result = 0;
  
  	jbd_debug(3, "extended handle %p by %d
  ", handle, nblocks);
  unlock:
  	spin_unlock(&transaction->t_handle_lock);
  error_out:
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
426
  	read_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
427
428
429
430
431
432
  out:
  	return result;
  }
  
  
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
433
   * int jbd2_journal_restart() - restart a handle .
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
434
435
436
437
438
439
   * @handle:  handle to restart
   * @nblocks: nr credits requested
   *
   * Restart a handle for a multi-transaction filesystem
   * operation.
   *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
440
441
   * If the jbd2_journal_extend() call above fails to grant new buffer credits
   * to a running handle, a call to jbd2_journal_restart will commit the
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
442
443
444
445
   * handle's transaction so far and reattach the handle to a new
   * transaction capabable of guaranteeing the requested number of
   * credits.
   */
d2159fb7b   Dan Carpenter   jbd2: use gfp_t i...
446
  int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
447
448
449
  {
  	transaction_t *transaction = handle->h_transaction;
  	journal_t *journal = transaction->t_journal;
e44718318   Theodore Ts'o   jbd2: call __jbd2...
450
451
  	tid_t		tid;
  	int		need_to_start, ret;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
452
453
454
455
456
457
458
459
460
461
  
  	/* If we've had an abort of any type, don't even think about
  	 * actually doing the restart! */
  	if (is_handle_aborted(handle))
  		return 0;
  
  	/*
  	 * First unlink the handle from its current transaction, and start the
  	 * commit on that.
  	 */
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
462
  	J_ASSERT(atomic_read(&transaction->t_updates) > 0);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
463
  	J_ASSERT(journal_current_handle() == handle);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
464
  	read_lock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
465
  	spin_lock(&transaction->t_handle_lock);
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
466
467
468
  	atomic_sub(handle->h_buffer_credits,
  		   &transaction->t_outstanding_credits);
  	if (atomic_dec_and_test(&transaction->t_updates))
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
469
470
471
472
473
  		wake_up(&journal->j_wait_updates);
  	spin_unlock(&transaction->t_handle_lock);
  
  	jbd_debug(2, "restarting handle %p
  ", handle);
e44718318   Theodore Ts'o   jbd2: call __jbd2...
474
475
  	tid = transaction->t_tid;
  	need_to_start = !tid_geq(journal->j_commit_request, tid);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
476
  	read_unlock(&journal->j_state_lock);
e44718318   Theodore Ts'o   jbd2: call __jbd2...
477
478
  	if (need_to_start)
  		jbd2_log_start_commit(journal, tid);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
479

9599b0e59   Jan Kara   jbd2: Annotate tr...
480
  	lock_map_release(&handle->h_lockdep_map);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
481
  	handle->h_buffer_credits = nblocks;
47def8267   Theodore Ts'o   jbd2: Remove __GF...
482
  	ret = start_this_handle(journal, handle, gfp_mask);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
483
484
  	return ret;
  }
47def8267   Theodore Ts'o   jbd2: Remove __GF...
485
  EXPORT_SYMBOL(jbd2__journal_restart);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
486

47def8267   Theodore Ts'o   jbd2: Remove __GF...
487
488
489
490
491
  int jbd2_journal_restart(handle_t *handle, int nblocks)
  {
  	return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
  }
  EXPORT_SYMBOL(jbd2_journal_restart);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
492
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
493
   * void jbd2_journal_lock_updates () - establish a transaction barrier.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
494
495
496
497
498
499
500
501
   * @journal:  Journal to establish a barrier on.
   *
   * This locks out any further updates from being started, and blocks
   * until all existing updates have completed, returning only once the
   * journal is in a quiescent state with no updates running.
   *
   * The journal lock should not be held on entry.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
502
  void jbd2_journal_lock_updates(journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
503
504
  {
  	DEFINE_WAIT(wait);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
505
  	write_lock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
506
507
508
509
510
511
512
513
514
515
  	++journal->j_barrier_count;
  
  	/* Wait until there are no running updates */
  	while (1) {
  		transaction_t *transaction = journal->j_running_transaction;
  
  		if (!transaction)
  			break;
  
  		spin_lock(&transaction->t_handle_lock);
9837d8e98   Jan Kara   jbd2: fix hung pr...
516
517
  		prepare_to_wait(&journal->j_wait_updates, &wait,
  				TASK_UNINTERRUPTIBLE);
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
518
  		if (!atomic_read(&transaction->t_updates)) {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
519
  			spin_unlock(&transaction->t_handle_lock);
9837d8e98   Jan Kara   jbd2: fix hung pr...
520
  			finish_wait(&journal->j_wait_updates, &wait);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
521
522
  			break;
  		}
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
523
  		spin_unlock(&transaction->t_handle_lock);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
524
  		write_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
525
526
  		schedule();
  		finish_wait(&journal->j_wait_updates, &wait);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
527
  		write_lock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
528
  	}
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
529
  	write_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
530
531
532
  
  	/*
  	 * We have now established a barrier against other normal updates, but
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
533
  	 * we also need to barrier against other jbd2_journal_lock_updates() calls
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
534
535
536
537
538
539
540
  	 * to make sure that we serialise special journal-locked operations
  	 * too.
  	 */
  	mutex_lock(&journal->j_barrier);
  }
  
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
541
   * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
542
543
   * @journal:  Journal to release the barrier on.
   *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
544
   * Release a transaction barrier obtained with jbd2_journal_lock_updates().
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
545
546
547
   *
   * Should be called without the journal lock held.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
548
  void jbd2_journal_unlock_updates (journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
549
550
551
552
  {
  	J_ASSERT(journal->j_barrier_count != 0);
  
  	mutex_unlock(&journal->j_barrier);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
553
  	write_lock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
554
  	--journal->j_barrier_count;
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
555
  	write_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
556
557
  	wake_up(&journal->j_wait_transaction_locked);
  }
f91d1d041   Jan Kara   jbd2: Fix a race ...
558
  static void warn_dirty_buffer(struct buffer_head *bh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
559
  {
f91d1d041   Jan Kara   jbd2: Fix a race ...
560
  	char b[BDEVNAME_SIZE];
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
561

f91d1d041   Jan Kara   jbd2: Fix a race ...
562
  	printk(KERN_WARNING
f2a44523b   Eryu Guan   jbd2: Unify log m...
563
  	       "JBD2: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
f91d1d041   Jan Kara   jbd2: Fix a race ...
564
565
566
567
  	       "There's a risk of filesystem corruption in case of system "
  	       "crash.
  ",
  	       bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
  }
  
  /*
   * If the buffer is already part of the current transaction, then there
   * is nothing we need to do.  If it is already part of a prior
   * transaction which we are still committing to disk, then we need to
   * make sure that we do not overwrite the old copy: we do copy-out to
   * preserve the copy going to disk.  We also account the buffer against
   * the handle's metadata buffer credits (unless the buffer is already
   * part of the transaction, that is).
   *
   */
  static int
  do_get_write_access(handle_t *handle, struct journal_head *jh,
  			int force_copy)
  {
  	struct buffer_head *bh;
  	transaction_t *transaction;
  	journal_t *journal;
  	int error;
  	char *frozen_buffer = NULL;
  	int need_copy = 0;
  
  	if (is_handle_aborted(handle))
  		return -EROFS;
  
  	transaction = handle->h_transaction;
  	journal = transaction->t_journal;
cfef2c6a5   Theodore Ts'o   jbd2: Fix a debug...
596
597
  	jbd_debug(5, "journal_head %p, force_copy %d
  ", jh, force_copy);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
  
  	JBUFFER_TRACE(jh, "entry");
  repeat:
  	bh = jh2bh(jh);
  
  	/* @@@ Need to check for errors here at some point. */
  
  	lock_buffer(bh);
  	jbd_lock_bh_state(bh);
  
  	/* We now hold the buffer lock so it is safe to query the buffer
  	 * state.  Is the buffer dirty?
  	 *
  	 * If so, there are two possibilities.  The buffer may be
  	 * non-journaled, and undergoing a quite legitimate writeback.
  	 * Otherwise, it is journaled, and we don't expect dirty buffers
  	 * in that state (the buffers should be marked JBD_Dirty
  	 * instead.)  So either the IO is being done under our own
  	 * control and this is a bug, or it's a third party IO such as
  	 * dump(8) (which may leave the buffer scheduled for read ---
  	 * ie. locked but not dirty) or tune2fs (which may actually have
  	 * the buffer dirtied, ugh.)  */
  
  	if (buffer_dirty(bh)) {
  		/*
  		 * First question: is this buffer already part of the current
  		 * transaction or the existing committing transaction?
  		 */
  		if (jh->b_transaction) {
  			J_ASSERT_JH(jh,
  				jh->b_transaction == transaction ||
  				jh->b_transaction ==
  					journal->j_committing_transaction);
  			if (jh->b_next_transaction)
  				J_ASSERT_JH(jh, jh->b_next_transaction ==
  							transaction);
f91d1d041   Jan Kara   jbd2: Fix a race ...
634
  			warn_dirty_buffer(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
635
636
637
638
639
640
  		}
  		/*
  		 * In any case we need to clean the dirty flag and we must
  		 * do it under the buffer lock to be sure we don't race
  		 * with running write-out.
  		 */
f91d1d041   Jan Kara   jbd2: Fix a race ...
641
642
643
  		JBUFFER_TRACE(jh, "Journalling dirty buffer");
  		clear_buffer_dirty(bh);
  		set_buffer_jbddirty(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
  	}
  
  	unlock_buffer(bh);
  
  	error = -EROFS;
  	if (is_handle_aborted(handle)) {
  		jbd_unlock_bh_state(bh);
  		goto out;
  	}
  	error = 0;
  
  	/*
  	 * The buffer is already part of this transaction if b_transaction or
  	 * b_next_transaction points to it
  	 */
  	if (jh->b_transaction == transaction ||
  	    jh->b_next_transaction == transaction)
  		goto done;
  
  	/*
9fc7c63a1   Josef Bacik   jbd2: fix the way...
664
665
666
667
668
669
  	 * this is the first time this transaction is touching this buffer,
  	 * reset the modified flag
  	 */
         jh->b_modified = 0;
  
  	/*
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
  	 * If there is already a copy-out version of this buffer, then we don't
  	 * need to make another one
  	 */
  	if (jh->b_frozen_data) {
  		JBUFFER_TRACE(jh, "has frozen data");
  		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
  		jh->b_next_transaction = transaction;
  		goto done;
  	}
  
  	/* Is there data here we need to preserve? */
  
  	if (jh->b_transaction && jh->b_transaction != transaction) {
  		JBUFFER_TRACE(jh, "owned by older transaction");
  		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
  		J_ASSERT_JH(jh, jh->b_transaction ==
  					journal->j_committing_transaction);
  
  		/* There is one case we have to be very careful about.
  		 * If the committing transaction is currently writing
  		 * this buffer out to disk and has NOT made a copy-out,
  		 * then we cannot modify the buffer contents at all
  		 * right now.  The essence of copy-out is that it is the
  		 * extra copy, not the primary copy, which gets
  		 * journaled.  If the primary copy is already going to
  		 * disk then we cannot do copy-out here. */
  
  		if (jh->b_jlist == BJ_Shadow) {
  			DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow);
  			wait_queue_head_t *wqh;
  
  			wqh = bit_waitqueue(&bh->b_state, BH_Unshadow);
  
  			JBUFFER_TRACE(jh, "on shadow: sleep");
  			jbd_unlock_bh_state(bh);
  			/* commit wakes up all shadow buffers after IO */
  			for ( ; ; ) {
  				prepare_to_wait(wqh, &wait.wait,
  						TASK_UNINTERRUPTIBLE);
  				if (jh->b_jlist != BJ_Shadow)
  					break;
  				schedule();
  			}
  			finish_wait(wqh, &wait.wait);
  			goto repeat;
  		}
  
  		/* Only do the copy if the currently-owning transaction
  		 * still needs it.  If it is on the Forget list, the
  		 * committing transaction is past that stage.  The
  		 * buffer had better remain locked during the kmalloc,
  		 * but that should be true --- we hold the journal lock
  		 * still and the buffer is already on the BUF_JOURNAL
  		 * list so won't be flushed.
  		 *
  		 * Subtle point, though: if this is a get_undo_access,
  		 * then we will be relying on the frozen_data to contain
  		 * the new value of the committed_data record after the
  		 * transaction, so we HAVE to force the frozen_data copy
  		 * in that case. */
  
  		if (jh->b_jlist != BJ_Forget || force_copy) {
  			JBUFFER_TRACE(jh, "generate frozen data");
  			if (!frozen_buffer) {
  				JBUFFER_TRACE(jh, "allocate memory for buffer");
  				jbd_unlock_bh_state(bh);
  				frozen_buffer =
af1e76d6b   Mingming Cao   JBD2: jbd2 slab a...
737
  					jbd2_alloc(jh2bh(jh)->b_size,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
738
739
740
741
742
  							 GFP_NOFS);
  				if (!frozen_buffer) {
  					printk(KERN_EMERG
  					       "%s: OOM for frozen_buffer
  ",
329d291f5   Harvey Harrison   jdb2: replace rem...
743
  					       __func__);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
  					JBUFFER_TRACE(jh, "oom!");
  					error = -ENOMEM;
  					jbd_lock_bh_state(bh);
  					goto done;
  				}
  				goto repeat;
  			}
  			jh->b_frozen_data = frozen_buffer;
  			frozen_buffer = NULL;
  			need_copy = 1;
  		}
  		jh->b_next_transaction = transaction;
  	}
  
  
  	/*
  	 * Finally, if the buffer is not journaled right now, we need to make
  	 * sure it doesn't get written to disk before the caller actually
  	 * commits the new data
  	 */
  	if (!jh->b_transaction) {
  		JBUFFER_TRACE(jh, "no transaction");
  		J_ASSERT_JH(jh, !jh->b_next_transaction);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
767
768
  		JBUFFER_TRACE(jh, "file as BJ_Reserved");
  		spin_lock(&journal->j_list_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
769
  		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
770
771
772
773
774
775
776
777
778
779
780
781
782
  		spin_unlock(&journal->j_list_lock);
  	}
  
  done:
  	if (need_copy) {
  		struct page *page;
  		int offset;
  		char *source;
  
  		J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
  			    "Possible IO failure.
  ");
  		page = jh2bh(jh)->b_page;
a1dd53318   Theodore Ts'o   jbd2: use offset_...
783
  		offset = offset_in_page(jh2bh(jh)->b_data);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
784
  		source = kmap_atomic(page, KM_USER0);
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
785
786
787
  		/* Fire data frozen trigger just before we copy the data */
  		jbd2_buffer_frozen_trigger(jh, source + offset,
  					   jh->b_triggers);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
788
789
  		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
  		kunmap_atomic(source, KM_USER0);
e06c8227f   Joel Becker   jbd2: Add buffer ...
790
791
792
793
794
795
  
  		/*
  		 * Now that the frozen data is saved off, we need to store
  		 * any matching triggers.
  		 */
  		jh->b_frozen_triggers = jh->b_triggers;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
796
797
798
799
800
801
802
  	}
  	jbd_unlock_bh_state(bh);
  
  	/*
  	 * If we are about to journal a buffer, then any revoke pending on it is
  	 * no longer valid
  	 */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
803
  	jbd2_journal_cancel_revoke(handle, jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
804
805
806
  
  out:
  	if (unlikely(frozen_buffer))	/* It's usually NULL */
af1e76d6b   Mingming Cao   JBD2: jbd2 slab a...
807
  		jbd2_free(frozen_buffer, bh->b_size);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
808
809
810
811
812
813
  
  	JBUFFER_TRACE(jh, "exit");
  	return error;
  }
  
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
814
   * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
815
816
   * @handle: transaction to add buffer modifications to
   * @bh:     bh to be used for metadata writes
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
817
818
819
820
821
822
   *
   * Returns an error code or 0 on success.
   *
   * In full data journalling mode the buffer may be of type BJ_AsyncData,
   * because we're write()ing a buffer which is also part of a shared mapping.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
823
  int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
824
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
825
  	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
826
827
828
829
830
831
  	int rc;
  
  	/* We do not want to get caught playing with fields which the
  	 * log thread also manipulates.  Make sure that the buffer
  	 * completes any outstanding IO before proceeding. */
  	rc = do_get_write_access(handle, jh, 0);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
832
  	jbd2_journal_put_journal_head(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
  	return rc;
  }
  
  
  /*
   * When the user wants to journal a newly created buffer_head
   * (ie. getblk() returned a new buffer and we are going to populate it
   * manually rather than reading off disk), then we need to keep the
   * buffer_head locked until it has been completely filled with new
   * data.  In this case, we should be able to make the assertion that
   * the bh is not already part of an existing transaction.
   *
   * The buffer should already be locked by the caller by this point.
   * There is no lock ranking violation: it was a newly created,
   * unlocked buffer beforehand. */
  
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
850
   * int jbd2_journal_get_create_access () - notify intent to use newly created bh
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
851
852
853
854
855
   * @handle: transaction to new buffer to
   * @bh: new buffer.
   *
   * Call this if you create a new bh.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
856
  int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
857
858
859
  {
  	transaction_t *transaction = handle->h_transaction;
  	journal_t *journal = transaction->t_journal;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
860
  	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
  	int err;
  
  	jbd_debug(5, "journal_head %p
  ", jh);
  	err = -EROFS;
  	if (is_handle_aborted(handle))
  		goto out;
  	err = 0;
  
  	JBUFFER_TRACE(jh, "entry");
  	/*
  	 * The buffer may already belong to this transaction due to pre-zeroing
  	 * in the filesystem's new_block code.  It may also be on the previous,
  	 * committing transaction's lists, but it HAS to be in Forget state in
  	 * that case: the transaction must have deleted the buffer for it to be
  	 * reused here.
  	 */
  	jbd_lock_bh_state(bh);
  	spin_lock(&journal->j_list_lock);
  	J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
  		jh->b_transaction == NULL ||
  		(jh->b_transaction == journal->j_committing_transaction &&
  			  jh->b_jlist == BJ_Forget)));
  
  	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
  	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
  
  	if (jh->b_transaction == NULL) {
f91d1d041   Jan Kara   jbd2: Fix a race ...
889
890
891
892
893
894
895
896
897
  		/*
  		 * Previous jbd2_journal_forget() could have left the buffer
  		 * with jbddirty bit set because it was being committed. When
  		 * the commit finished, we've filed the buffer for
  		 * checkpointing and marked it dirty. Now we are reallocating
  		 * the buffer so the transaction freeing it must have
  		 * committed and so it's safe to clear the dirty bit.
  		 */
  		clear_buffer_dirty(jh2bh(jh));
9fc7c63a1   Josef Bacik   jbd2: fix the way...
898
899
  		/* first access by this transaction */
  		jh->b_modified = 0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
900
  		JBUFFER_TRACE(jh, "file as BJ_Reserved");
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
901
  		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
902
  	} else if (jh->b_transaction == journal->j_committing_transaction) {
9fc7c63a1   Josef Bacik   jbd2: fix the way...
903
904
  		/* first access by this transaction */
  		jh->b_modified = 0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
905
906
907
908
909
910
911
912
913
914
915
916
917
918
  		JBUFFER_TRACE(jh, "set next transaction");
  		jh->b_next_transaction = transaction;
  	}
  	spin_unlock(&journal->j_list_lock);
  	jbd_unlock_bh_state(bh);
  
  	/*
  	 * akpm: I added this.  ext3_alloc_branch can pick up new indirect
  	 * blocks which contain freed but then revoked metadata.  We need
  	 * to cancel the revoke in case we end up freeing it yet again
  	 * and the reallocating as data - this would cause a second revoke,
  	 * which hits an assertion error.
  	 */
  	JBUFFER_TRACE(jh, "cancelling revoke");
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
919
  	jbd2_journal_cancel_revoke(handle, jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
920
  out:
3991b4008   Ding Dinghua   jbd2: fix a poten...
921
  	jbd2_journal_put_journal_head(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
922
923
924
925
  	return err;
  }
  
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
926
   * int jbd2_journal_get_undo_access() -  Notify intent to modify metadata with
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
927
928
929
   *     non-rewindable consequences
   * @handle: transaction
   * @bh: buffer to undo
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
930
931
932
933
934
935
936
937
   *
   * Sometimes there is a need to distinguish between metadata which has
   * been committed to disk and that which has not.  The ext3fs code uses
   * this for freeing and allocating space, we have to make sure that we
   * do not reuse freed space until the deallocation has been committed,
   * since if we overwrote that space we would make the delete
   * un-rewindable in case of a crash.
   *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
938
   * To deal with that, jbd2_journal_get_undo_access requests write access to a
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
939
940
941
942
943
944
945
946
947
948
949
950
   * buffer for parts of non-rewindable operations such as delete
   * operations on the bitmaps.  The journaling code must keep a copy of
   * the buffer's contents prior to the undo_access call until such time
   * as we know that the buffer has definitely been committed to disk.
   *
   * We never need to know which transaction the committed data is part
   * of, buffers touched here are guaranteed to be dirtied later and so
   * will be committed to a new transaction in due course, at which point
   * we can discard the old committed data pointer.
   *
   * Returns error number or 0 on success.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
951
  int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
952
953
  {
  	int err;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
954
  	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
  	char *committed_data = NULL;
  
  	JBUFFER_TRACE(jh, "entry");
  
  	/*
  	 * Do this first --- it can drop the journal lock, so we want to
  	 * make sure that obtaining the committed_data is done
  	 * atomically wrt. completion of any outstanding commits.
  	 */
  	err = do_get_write_access(handle, jh, 1);
  	if (err)
  		goto out;
  
  repeat:
  	if (!jh->b_committed_data) {
af1e76d6b   Mingming Cao   JBD2: jbd2 slab a...
970
  		committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
971
972
973
  		if (!committed_data) {
  			printk(KERN_EMERG "%s: No memory for committed data
  ",
329d291f5   Harvey Harrison   jdb2: replace rem...
974
  				__func__);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
  			err = -ENOMEM;
  			goto out;
  		}
  	}
  
  	jbd_lock_bh_state(bh);
  	if (!jh->b_committed_data) {
  		/* Copy out the current buffer contents into the
  		 * preserved, committed copy. */
  		JBUFFER_TRACE(jh, "generate b_committed data");
  		if (!committed_data) {
  			jbd_unlock_bh_state(bh);
  			goto repeat;
  		}
  
  		jh->b_committed_data = committed_data;
  		committed_data = NULL;
  		memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
  	}
  	jbd_unlock_bh_state(bh);
  out:
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
996
  	jbd2_journal_put_journal_head(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
997
  	if (unlikely(committed_data))
af1e76d6b   Mingming Cao   JBD2: jbd2 slab a...
998
  		jbd2_free(committed_data, bh->b_size);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
999
1000
1001
1002
  	return err;
  }
  
  /**
e06c8227f   Joel Becker   jbd2: Add buffer ...
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
   * void jbd2_journal_set_triggers() - Add triggers for commit writeout
   * @bh: buffer to trigger on
   * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
   *
   * Set any triggers on this journal_head.  This is always safe, because
   * triggers for a committing buffer will be saved off, and triggers for
   * a running transaction will match the buffer in that transaction.
   *
   * Call with NULL to clear the triggers.
   */
  void jbd2_journal_set_triggers(struct buffer_head *bh,
  			       struct jbd2_buffer_trigger_type *type)
  {
  	struct journal_head *jh = bh2jh(bh);
  
  	jh->b_triggers = type;
  }
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
1020
  void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
e06c8227f   Joel Becker   jbd2: Add buffer ...
1021
1022
1023
  				struct jbd2_buffer_trigger_type *triggers)
  {
  	struct buffer_head *bh = jh2bh(jh);
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
1024
  	if (!triggers || !triggers->t_frozen)
e06c8227f   Joel Becker   jbd2: Add buffer ...
1025
  		return;
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
1026
  	triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
e06c8227f   Joel Becker   jbd2: Add buffer ...
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
  }
  
  void jbd2_buffer_abort_trigger(struct journal_head *jh,
  			       struct jbd2_buffer_trigger_type *triggers)
  {
  	if (!triggers || !triggers->t_abort)
  		return;
  
  	triggers->t_abort(triggers, jh2bh(jh));
  }
  
  
  
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1041
   * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1042
1043
1044
1045
1046
1047
   * @handle: transaction to add buffer to.
   * @bh: buffer to mark
   *
   * mark dirty metadata which needs to be journaled as part of the current
   * transaction.
   *
9ea7a0df6   Theodore Ts'o   jbd2: add debuggi...
1048
1049
1050
1051
   * The buffer must have previously had jbd2_journal_get_write_access()
   * called so that it has a valid journal_head attached to the buffer
   * head.
   *
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
   * The buffer is placed on the transaction's metadata list and is marked
   * as belonging to the transaction.
   *
   * Returns error number or 0 on success.
   *
   * Special care needs to be taken if the buffer already belongs to the
   * current committing transaction (in which case we should have frozen
   * data present for that commit).  In that case, we don't relink the
   * buffer: that only gets done when the old transaction finally
   * completes its commit.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1063
  int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1064
1065
1066
1067
  {
  	transaction_t *transaction = handle->h_transaction;
  	journal_t *journal = transaction->t_journal;
  	struct journal_head *jh = bh2jh(bh);
9ea7a0df6   Theodore Ts'o   jbd2: add debuggi...
1068
  	int ret = 0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1069
1070
1071
1072
1073
1074
  
  	jbd_debug(5, "journal_head %p
  ", jh);
  	JBUFFER_TRACE(jh, "entry");
  	if (is_handle_aborted(handle))
  		goto out;
9ea7a0df6   Theodore Ts'o   jbd2: add debuggi...
1075
1076
1077
1078
  	if (!buffer_jbd(bh)) {
  		ret = -EUCLEAN;
  		goto out;
  	}
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
  
  	jbd_lock_bh_state(bh);
  
  	if (jh->b_modified == 0) {
  		/*
  		 * This buffer's got modified and becoming part
  		 * of the transaction. This needs to be done
  		 * once a transaction -bzzz
  		 */
  		jh->b_modified = 1;
  		J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
  		handle->h_buffer_credits--;
  	}
  
  	/*
  	 * fastpath, to avoid expensive locking.  If this buffer is already
  	 * on the running transaction's metadata list there is nothing to do.
  	 * Nobody can take it off again because there is a handle open.
  	 * I _think_ we're OK here with SMP barriers - a mistaken decision will
  	 * result in this test being false, so we go in and take the locks.
  	 */
  	if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
  		JBUFFER_TRACE(jh, "fastpath");
9ea7a0df6   Theodore Ts'o   jbd2: add debuggi...
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
  		if (unlikely(jh->b_transaction !=
  			     journal->j_running_transaction)) {
  			printk(KERN_EMERG "JBD: %s: "
  			       "jh->b_transaction (%llu, %p, %u) != "
  			       "journal->j_running_transaction (%p, %u)",
  			       journal->j_devname,
  			       (unsigned long long) bh->b_blocknr,
  			       jh->b_transaction,
  			       jh->b_transaction ? jh->b_transaction->t_tid : 0,
  			       journal->j_running_transaction,
  			       journal->j_running_transaction ?
  			       journal->j_running_transaction->t_tid : 0);
  			ret = -EINVAL;
  		}
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
  		goto out_unlock_bh;
  	}
  
  	set_buffer_jbddirty(bh);
  
  	/*
  	 * Metadata already on the current transaction list doesn't
  	 * need to be filed.  Metadata on another transaction's list must
  	 * be committing, and will be refiled once the commit completes:
  	 * leave it alone for now.
  	 */
  	if (jh->b_transaction != transaction) {
  		JBUFFER_TRACE(jh, "already on other transaction");
9ea7a0df6   Theodore Ts'o   jbd2: add debuggi...
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
  		if (unlikely(jh->b_transaction !=
  			     journal->j_committing_transaction)) {
  			printk(KERN_EMERG "JBD: %s: "
  			       "jh->b_transaction (%llu, %p, %u) != "
  			       "journal->j_committing_transaction (%p, %u)",
  			       journal->j_devname,
  			       (unsigned long long) bh->b_blocknr,
  			       jh->b_transaction,
  			       jh->b_transaction ? jh->b_transaction->t_tid : 0,
  			       journal->j_committing_transaction,
  			       journal->j_committing_transaction ?
  			       journal->j_committing_transaction->t_tid : 0);
  			ret = -EINVAL;
  		}
  		if (unlikely(jh->b_next_transaction != transaction)) {
  			printk(KERN_EMERG "JBD: %s: "
  			       "jh->b_next_transaction (%llu, %p, %u) != "
  			       "transaction (%p, %u)",
  			       journal->j_devname,
  			       (unsigned long long) bh->b_blocknr,
  			       jh->b_next_transaction,
  			       jh->b_next_transaction ?
  			       jh->b_next_transaction->t_tid : 0,
  			       transaction, transaction->t_tid);
  			ret = -EINVAL;
  		}
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1155
1156
1157
1158
1159
1160
  		/* And this case is illegal: we can't reuse another
  		 * transaction's data buffer, ever. */
  		goto out_unlock_bh;
  	}
  
  	/* That test should have eliminated the following case: */
4019191be   Mingming Cao   jbd2: sparse poin...
1161
  	J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1162
1163
1164
  
  	JBUFFER_TRACE(jh, "file as BJ_Metadata");
  	spin_lock(&journal->j_list_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1165
  	__jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1166
1167
1168
1169
1170
  	spin_unlock(&journal->j_list_lock);
  out_unlock_bh:
  	jbd_unlock_bh_state(bh);
  out:
  	JBUFFER_TRACE(jh, "exit");
447057546   Randy Dunlap   jbd2: fix build w...
1171
  	WARN_ON(ret);	/* All errors are bugs, so dump the stack */
9ea7a0df6   Theodore Ts'o   jbd2: add debuggi...
1172
  	return ret;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1173
1174
1175
  }
  
  /*
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1176
   * jbd2_journal_release_buffer: undo a get_write_access without any buffer
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1177
1178
1179
1180
   * updates, if the update decided in the end that it didn't need access.
   *
   */
  void
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1181
  jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1182
1183
1184
1185
1186
  {
  	BUFFER_TRACE(bh, "entry");
  }
  
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1187
   * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
   * @handle: transaction handle
   * @bh:     bh to 'forget'
   *
   * We can only do the bforget if there are no commits pending against the
   * buffer.  If the buffer is dirty in the current running transaction we
   * can safely unlink it.
   *
   * bh may not be a journalled buffer at all - it may be a non-JBD
   * buffer which came off the hashtable.  Check for this.
   *
   * Decrements bh->b_count by one.
   *
   * Allow this call even if the handle has aborted --- it may be part of
   * the caller's cleanup after an abort.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1203
  int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1204
1205
1206
1207
1208
1209
  {
  	transaction_t *transaction = handle->h_transaction;
  	journal_t *journal = transaction->t_journal;
  	struct journal_head *jh;
  	int drop_reserve = 0;
  	int err = 0;
1dfc3220d   Josef Bacik   jbd2: fix possibl...
1210
  	int was_modified = 0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
  
  	BUFFER_TRACE(bh, "entry");
  
  	jbd_lock_bh_state(bh);
  	spin_lock(&journal->j_list_lock);
  
  	if (!buffer_jbd(bh))
  		goto not_jbd;
  	jh = bh2jh(bh);
  
  	/* Critical error: attempting to delete a bitmap buffer, maybe?
  	 * Don't do any jbd operations, and return an error. */
  	if (!J_EXPECT_JH(jh, !jh->b_committed_data,
  			 "inconsistent data on disk")) {
  		err = -EIO;
  		goto not_jbd;
  	}
1dfc3220d   Josef Bacik   jbd2: fix possibl...
1228
1229
  	/* keep track of wether or not this transaction modified us */
  	was_modified = jh->b_modified;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
  	/*
  	 * The buffer's going from the transaction, we must drop
  	 * all references -bzzz
  	 */
  	jh->b_modified = 0;
  
  	if (jh->b_transaction == handle->h_transaction) {
  		J_ASSERT_JH(jh, !jh->b_frozen_data);
  
  		/* If we are forgetting a buffer which is already part
  		 * of this transaction, then we can just drop it from
  		 * the transaction immediately. */
  		clear_buffer_dirty(bh);
  		clear_buffer_jbddirty(bh);
  
  		JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1dfc3220d   Josef Bacik   jbd2: fix possibl...
1246
1247
1248
1249
1250
1251
  		/*
  		 * we only want to drop a reference if this transaction
  		 * modified the buffer
  		 */
  		if (was_modified)
  			drop_reserve = 1;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
  
  		/*
  		 * We are no longer going to journal this buffer.
  		 * However, the commit of this transaction is still
  		 * important to the buffer: the delete that we are now
  		 * processing might obsolete an old log entry, so by
  		 * committing, we can satisfy the buffer's checkpoint.
  		 *
  		 * So, if we have a checkpoint on the buffer, we should
  		 * now refile the buffer on our BJ_Forget list so that
  		 * we know to remove the checkpoint after we commit.
  		 */
  
  		if (jh->b_cp_transaction) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1266
1267
  			__jbd2_journal_temp_unlink_buffer(jh);
  			__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1268
  		} else {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1269
  			__jbd2_journal_unfile_buffer(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
  			if (!buffer_jbd(bh)) {
  				spin_unlock(&journal->j_list_lock);
  				jbd_unlock_bh_state(bh);
  				__bforget(bh);
  				goto drop;
  			}
  		}
  	} else if (jh->b_transaction) {
  		J_ASSERT_JH(jh, (jh->b_transaction ==
  				 journal->j_committing_transaction));
  		/* However, if the buffer is still owned by a prior
  		 * (committing) transaction, we can't drop it yet... */
  		JBUFFER_TRACE(jh, "belongs to older transaction");
  		/* ... but we CAN drop it from the new transaction if we
  		 * have also modified it since the original commit. */
  
  		if (jh->b_next_transaction) {
  			J_ASSERT(jh->b_next_transaction == transaction);
  			jh->b_next_transaction = NULL;
1dfc3220d   Josef Bacik   jbd2: fix possibl...
1289
1290
1291
1292
1293
1294
1295
  
  			/*
  			 * only drop a reference if this transaction modified
  			 * the buffer
  			 */
  			if (was_modified)
  				drop_reserve = 1;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
  		}
  	}
  
  not_jbd:
  	spin_unlock(&journal->j_list_lock);
  	jbd_unlock_bh_state(bh);
  	__brelse(bh);
  drop:
  	if (drop_reserve) {
  		/* no need to reserve log space for this block -bzzz */
  		handle->h_buffer_credits++;
  	}
  	return err;
  }
  
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1312
   * int jbd2_journal_stop() - complete a transaction
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1313
1314
1315
1316
1317
1318
1319
1320
1321
   * @handle: tranaction to complete.
   *
   * All done for a particular handle.
   *
   * There is not much action needed here.  We just return any remaining
   * buffer credits to the transaction and remove the handle.  The only
   * complication is that we need to start a commit operation if the
   * filesystem is marked for synchronous update.
   *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1322
   * jbd2_journal_stop itself will not usually return an error, but it may
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1323
   * do so in unusual circumstances.  In particular, expect it to
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1324
   * return -EIO if a jbd2_journal_abort has been executed since the
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1325
1326
   * transaction began.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1327
  int jbd2_journal_stop(handle_t *handle)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1328
1329
1330
  {
  	transaction_t *transaction = handle->h_transaction;
  	journal_t *journal = transaction->t_journal;
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
1331
1332
  	int err, wait_for_commit = 0;
  	tid_t tid;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1333
  	pid_t pid;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1334
1335
1336
1337
  	J_ASSERT(journal_current_handle() == handle);
  
  	if (is_handle_aborted(handle))
  		err = -EIO;
3e2a532b2   OGAWA Hirofumi   [PATCH] ext3/4: f...
1338
  	else {
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
1339
  		J_ASSERT(atomic_read(&transaction->t_updates) > 0);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1340
  		err = 0;
3e2a532b2   OGAWA Hirofumi   [PATCH] ext3/4: f...
1341
  	}
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
  
  	if (--handle->h_ref > 0) {
  		jbd_debug(4, "h_ref %d -> %d
  ", handle->h_ref + 1,
  			  handle->h_ref);
  		return err;
  	}
  
  	jbd_debug(4, "Handle %p going down
  ", handle);
  
  	/*
  	 * Implement synchronous transaction batching.  If the handle
  	 * was synchronous, don't force a commit immediately.  Let's
e07f7183a   Josef Bacik   jbd2: improve jbd...
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
  	 * yield and let another thread piggyback onto this
  	 * transaction.  Keep doing that while new threads continue to
  	 * arrive.  It doesn't cost much - we're about to run a commit
  	 * and sleep on IO anyway.  Speeds up many-threaded, many-dir
  	 * operations by 30x or more...
  	 *
  	 * We try and optimize the sleep time against what the
  	 * underlying disk can do, instead of having a static sleep
  	 * time.  This is useful for the case where our storage is so
  	 * fast that it is more optimal to go ahead and force a flush
  	 * and wait for the transaction to be committed than it is to
  	 * wait for an arbitrary amount of time for new writers to
  	 * join the transaction.  We achieve this by measuring how
  	 * long it takes to commit a transaction, and compare it with
  	 * how long this transaction has been running, and if run time
  	 * < commit time then we sleep for the delta and commit.  This
  	 * greatly helps super fast disks that would see slowdowns as
  	 * more threads started doing fsyncs.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1374
  	 *
e07f7183a   Josef Bacik   jbd2: improve jbd...
1375
1376
1377
1378
  	 * But don't do this if this process was the most recent one
  	 * to perform a synchronous write.  We do this to detect the
  	 * case where a single process is doing a stream of sync
  	 * writes.  No point in waiting for joiners in that case.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1379
1380
1381
  	 */
  	pid = current->pid;
  	if (handle->h_sync && journal->j_last_sync_writer != pid) {
e07f7183a   Josef Bacik   jbd2: improve jbd...
1382
  		u64 commit_time, trans_time;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1383
  		journal->j_last_sync_writer = pid;
e07f7183a   Josef Bacik   jbd2: improve jbd...
1384

a931da6ac   Theodore Ts'o   jbd2: Change j_st...
1385
  		read_lock(&journal->j_state_lock);
e07f7183a   Josef Bacik   jbd2: improve jbd...
1386
  		commit_time = journal->j_average_commit_time;
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
1387
  		read_unlock(&journal->j_state_lock);
e07f7183a   Josef Bacik   jbd2: improve jbd...
1388
1389
1390
  
  		trans_time = ktime_to_ns(ktime_sub(ktime_get(),
  						   transaction->t_start_time));
30773840c   Theodore Ts'o   ext4: add fsync b...
1391
1392
  		commit_time = max_t(u64, commit_time,
  				    1000*journal->j_min_batch_time);
e07f7183a   Josef Bacik   jbd2: improve jbd...
1393
  		commit_time = min_t(u64, commit_time,
30773840c   Theodore Ts'o   ext4: add fsync b...
1394
  				    1000*journal->j_max_batch_time);
e07f7183a   Josef Bacik   jbd2: improve jbd...
1395
1396
1397
1398
1399
1400
1401
  
  		if (trans_time < commit_time) {
  			ktime_t expires = ktime_add_ns(ktime_get(),
  						       commit_time);
  			set_current_state(TASK_UNINTERRUPTIBLE);
  			schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
  		}
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1402
  	}
7058548cd   Theodore Ts'o   ext4: Use WRITE_S...
1403
1404
  	if (handle->h_sync)
  		transaction->t_synchronous_commit = 1;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1405
  	current->journal_info = NULL;
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
1406
1407
  	atomic_sub(handle->h_buffer_credits,
  		   &transaction->t_outstanding_credits);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1408
1409
1410
1411
1412
1413
1414
1415
  
  	/*
  	 * If the handle is marked SYNC, we need to set another commit
  	 * going!  We also want to force a commit if the current
  	 * transaction is occupying too much of the log, or if the
  	 * transaction is too old now.
  	 */
  	if (handle->h_sync ||
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
1416
1417
1418
  	    (atomic_read(&transaction->t_outstanding_credits) >
  	     journal->j_max_transaction_buffers) ||
  	    time_after_eq(jiffies, transaction->t_expires)) {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1419
1420
1421
  		/* Do this even for aborted journals: an abort still
  		 * completes the commit thread, it just doesn't write
  		 * anything to disk. */
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1422

470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1423
1424
1425
1426
  		jbd_debug(2, "transaction too old, requesting commit for "
  					"handle %p
  ", handle);
  		/* This is non-blocking */
c35a56a09   Theodore Ts'o   jbd2: Improve sca...
1427
  		jbd2_log_start_commit(journal, transaction->t_tid);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1428
1429
  
  		/*
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1430
  		 * Special case: JBD2_SYNC synchronous updates require us
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1431
1432
1433
  		 * to wait for the commit to complete.
  		 */
  		if (handle->h_sync && !(current->flags & PF_MEMALLOC))
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
1434
  			wait_for_commit = 1;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1435
  	}
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
1436
1437
  	/*
  	 * Once we drop t_updates, if it goes to zero the transaction
25985edce   Lucas De Marchi   Fix common misspe...
1438
  	 * could start committing on us and eventually disappear.  So
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
  	 * once we do this, we must not dereference transaction
  	 * pointer again.
  	 */
  	tid = transaction->t_tid;
  	if (atomic_dec_and_test(&transaction->t_updates)) {
  		wake_up(&journal->j_wait_updates);
  		if (journal->j_barrier_count)
  			wake_up(&journal->j_wait_transaction_locked);
  	}
  
  	if (wait_for_commit)
  		err = jbd2_log_wait_commit(journal, tid);
3295f0ef9   Ingo Molnar   lockdep: rename m...
1451
  	lock_map_release(&handle->h_lockdep_map);
7b7510662   Mingming Cao   jbd2: add lockdep...
1452

af1e76d6b   Mingming Cao   JBD2: jbd2 slab a...
1453
  	jbd2_free_handle(handle);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1454
1455
  	return err;
  }
5648ba5b2   Randy Dunlap   jbd2: fix kernel-...
1456
1457
  /**
   * int jbd2_journal_force_commit() - force any uncommitted transactions
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1458
1459
1460
1461
1462
1463
   * @journal: journal to force
   *
   * For synchronous operations: force any uncommitted transactions
   * to disk.  May seem kludgy, but it reuses all the handle batching
   * code in a very simple manner.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1464
  int jbd2_journal_force_commit(journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1465
1466
1467
  {
  	handle_t *handle;
  	int ret;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1468
  	handle = jbd2_journal_start(journal, 1);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1469
1470
1471
1472
  	if (IS_ERR(handle)) {
  		ret = PTR_ERR(handle);
  	} else {
  		handle->h_sync = 1;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1473
  		ret = jbd2_journal_stop(handle);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
  	}
  	return ret;
  }
  
  /*
   *
   * List management code snippets: various functions for manipulating the
   * transaction buffer lists.
   *
   */
  
  /*
   * Append a buffer to a transaction list, given the transaction's list head
   * pointer.
   *
   * j_list_lock is held.
   *
   * jbd_lock_bh_state(jh2bh(jh)) is held.
   */
  
  static inline void
  __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
  {
  	if (!*list) {
  		jh->b_tnext = jh->b_tprev = jh;
  		*list = jh;
  	} else {
  		/* Insert at the tail of the list to preserve order */
  		struct journal_head *first = *list, *last = first->b_tprev;
  		jh->b_tprev = last;
  		jh->b_tnext = first;
  		last->b_tnext = first->b_tprev = jh;
  	}
  }
  
  /*
   * Remove a buffer from a transaction list, given the transaction's list
   * head pointer.
   *
   * Called with j_list_lock held, and the journal may not be locked.
   *
   * jbd_lock_bh_state(jh2bh(jh)) is held.
   */
  
  static inline void
  __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
  {
  	if (*list == jh) {
  		*list = jh->b_tnext;
  		if (*list == jh)
  			*list = NULL;
  	}
  	jh->b_tprev->b_tnext = jh->b_tnext;
  	jh->b_tnext->b_tprev = jh->b_tprev;
  }
  
  /*
   * Remove a buffer from the appropriate transaction list.
   *
   * Note that this function can *change* the value of
87c89c232   Jan Kara   jbd2: Remove data...
1534
1535
1536
1537
   * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list,
   * t_log_list or t_reserved_list.  If the caller is holding onto a copy of one
   * of these pointers, it could go bad.  Generally the caller needs to re-read
   * the pointer from the transaction_t.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1538
1539
1540
   *
   * Called under j_list_lock.  The journal may not be locked.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1541
  void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
  {
  	struct journal_head **list = NULL;
  	transaction_t *transaction;
  	struct buffer_head *bh = jh2bh(jh);
  
  	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
  	transaction = jh->b_transaction;
  	if (transaction)
  		assert_spin_locked(&transaction->t_journal->j_list_lock);
  
  	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
  	if (jh->b_jlist != BJ_None)
4019191be   Mingming Cao   jbd2: sparse poin...
1554
  		J_ASSERT_JH(jh, transaction != NULL);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1555
1556
1557
1558
  
  	switch (jh->b_jlist) {
  	case BJ_None:
  		return;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
  	case BJ_Metadata:
  		transaction->t_nr_buffers--;
  		J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
  		list = &transaction->t_buffers;
  		break;
  	case BJ_Forget:
  		list = &transaction->t_forget;
  		break;
  	case BJ_IO:
  		list = &transaction->t_iobuf_list;
  		break;
  	case BJ_Shadow:
  		list = &transaction->t_shadow_list;
  		break;
  	case BJ_LogCtl:
  		list = &transaction->t_log_list;
  		break;
  	case BJ_Reserved:
  		list = &transaction->t_reserved_list;
  		break;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1579
1580
1581
1582
1583
1584
1585
  	}
  
  	__blist_del_buffer(list, jh);
  	jh->b_jlist = BJ_None;
  	if (test_clear_buffer_jbddirty(bh))
  		mark_buffer_dirty(bh);	/* Expose it to the VM */
  }
de1b79413   Jan Kara   jbd2: Fix oops in...
1586
1587
1588
1589
1590
1591
1592
1593
  /*
   * Remove buffer from all transactions.
   *
   * Called with bh_state lock and j_list_lock
   *
   * jh and bh may be already freed when this function returns.
   */
  static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1594
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1595
  	__jbd2_journal_temp_unlink_buffer(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1596
  	jh->b_transaction = NULL;
de1b79413   Jan Kara   jbd2: Fix oops in...
1597
  	jbd2_journal_put_journal_head(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1598
  }
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1599
  void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1600
  {
de1b79413   Jan Kara   jbd2: Fix oops in...
1601
1602
1603
1604
1605
  	struct buffer_head *bh = jh2bh(jh);
  
  	/* Get reference so that buffer cannot be freed before we unlock it */
  	get_bh(bh);
  	jbd_lock_bh_state(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1606
  	spin_lock(&journal->j_list_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1607
  	__jbd2_journal_unfile_buffer(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1608
  	spin_unlock(&journal->j_list_lock);
de1b79413   Jan Kara   jbd2: Fix oops in...
1609
1610
  	jbd_unlock_bh_state(bh);
  	__brelse(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1611
1612
1613
  }
  
  /*
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1614
   * Called from jbd2_journal_try_to_free_buffers().
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
   *
   * Called under jbd_lock_bh_state(bh)
   */
  static void
  __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
  {
  	struct journal_head *jh;
  
  	jh = bh2jh(bh);
  
  	if (buffer_locked(bh) || buffer_dirty(bh))
  		goto out;
4019191be   Mingming Cao   jbd2: sparse poin...
1627
  	if (jh->b_next_transaction != NULL)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1628
1629
1630
  		goto out;
  
  	spin_lock(&journal->j_list_lock);
87c89c232   Jan Kara   jbd2: Remove data...
1631
  	if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1632
1633
1634
  		/* written-back checkpointed metadata buffer */
  		if (jh->b_jlist == BJ_None) {
  			JBUFFER_TRACE(jh, "remove from checkpoint list");
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1635
  			__jbd2_journal_remove_checkpoint(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1636
1637
1638
1639
1640
1641
  		}
  	}
  	spin_unlock(&journal->j_list_lock);
  out:
  	return;
  }
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1642
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1643
   * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1644
1645
   * @journal: journal for operation
   * @page: to try and free
530576bbf   Mingming Cao   jbd2: fix race be...
1646
1647
1648
   * @gfp_mask: we use the mask to detect how hard should we try to release
   * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
   * release the buffers.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
   *
   *
   * For all the buffers on this page,
   * if they are fully written out ordered data, move them onto BUF_CLEAN
   * so try_to_free_buffers() can reap them.
   *
   * This function returns non-zero if we wish try_to_free_buffers()
   * to be called. We do this if the page is releasable by try_to_free_buffers().
   * We also do it if the page has locked or dirty buffers and the caller wants
   * us to perform sync or async writeout.
   *
   * This complicates JBD locking somewhat.  We aren't protected by the
   * BKL here.  We wish to remove the buffer from its committing or
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1662
   * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1663
1664
1665
1666
   *
   * This may *change* the value of transaction_t->t_datalist, so anyone
   * who looks at t_datalist needs to lock against this function.
   *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1667
1668
   * Even worse, someone may be doing a jbd2_journal_dirty_data on this
   * buffer.  So we need to lock against that.  jbd2_journal_dirty_data()
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1669
1670
1671
1672
1673
1674
1675
1676
   * will come out of the lock with the buffer dirty, which makes it
   * ineligible for release here.
   *
   * Who else is affected by this?  hmm...  Really the only contender
   * is do_get_write_access() - it could be looking at the buffer while
   * journal_try_to_free_buffer() is changing its state.  But that
   * cannot happen because we never reallocate freed data as metadata
   * while the data is part of a transaction.  Yes?
530576bbf   Mingming Cao   jbd2: fix race be...
1677
1678
   *
   * Return 0 on failure, 1 on success
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1679
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1680
  int jbd2_journal_try_to_free_buffers(journal_t *journal,
530576bbf   Mingming Cao   jbd2: fix race be...
1681
  				struct page *page, gfp_t gfp_mask)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
  {
  	struct buffer_head *head;
  	struct buffer_head *bh;
  	int ret = 0;
  
  	J_ASSERT(PageLocked(page));
  
  	head = page_buffers(page);
  	bh = head;
  	do {
  		struct journal_head *jh;
  
  		/*
  		 * We take our own ref against the journal_head here to avoid
  		 * having to add tons of locking around each instance of
530576bbf   Mingming Cao   jbd2: fix race be...
1697
  		 * jbd2_journal_put_journal_head().
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1698
  		 */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1699
  		jh = jbd2_journal_grab_journal_head(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1700
1701
1702
1703
1704
  		if (!jh)
  			continue;
  
  		jbd_lock_bh_state(bh);
  		__journal_try_to_free_buffer(journal, bh);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1705
  		jbd2_journal_put_journal_head(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1706
1707
1708
1709
  		jbd_unlock_bh_state(bh);
  		if (buffer_jbd(bh))
  			goto busy;
  	} while ((bh = bh->b_this_page) != head);
530576bbf   Mingming Cao   jbd2: fix race be...
1710

470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1711
  	ret = try_to_free_buffers(page);
530576bbf   Mingming Cao   jbd2: fix race be...
1712

470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
  busy:
  	return ret;
  }
  
  /*
   * This buffer is no longer needed.  If it is on an older transaction's
   * checkpoint list we need to record it on this transaction's forget list
   * to pin this buffer (and hence its checkpointing transaction) down until
   * this transaction commits.  If the buffer isn't on a checkpoint list, we
   * release it.
   * Returns non-zero if JBD no longer has an interest in the buffer.
   *
   * Called under j_list_lock.
   *
   * Called under jbd_lock_bh_state(bh).
   */
  static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
  {
  	int may_free = 1;
  	struct buffer_head *bh = jh2bh(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1733
1734
  	if (jh->b_cp_transaction) {
  		JBUFFER_TRACE(jh, "on running+cp transaction");
de1b79413   Jan Kara   jbd2: Fix oops in...
1735
  		__jbd2_journal_temp_unlink_buffer(jh);
f91d1d041   Jan Kara   jbd2: Fix a race ...
1736
1737
1738
1739
1740
1741
  		/*
  		 * We don't want to write the buffer anymore, clear the
  		 * bit so that we don't confuse checks in
  		 * __journal_file_buffer
  		 */
  		clear_buffer_dirty(bh);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1742
  		__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1743
1744
1745
  		may_free = 0;
  	} else {
  		JBUFFER_TRACE(jh, "on running transaction");
de1b79413   Jan Kara   jbd2: Fix oops in...
1746
  		__jbd2_journal_unfile_buffer(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1747
1748
1749
1750
1751
  	}
  	return may_free;
  }
  
  /*
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1752
   * jbd2_journal_invalidatepage
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
   *
   * This code is tricky.  It has a number of cases to deal with.
   *
   * There are two invariants which this code relies on:
   *
   * i_size must be updated on disk before we start calling invalidatepage on the
   * data.
   *
   *  This is done in ext3 by defining an ext3_setattr method which
   *  updates i_size before truncate gets going.  By maintaining this
   *  invariant, we can be sure that it is safe to throw away any buffers
   *  attached to the current transaction: once the transaction commits,
   *  we know that the data will not be needed.
   *
   *  Note however that we can *not* throw away data belonging to the
   *  previous, committing transaction!
   *
   * Any disk blocks which *are* part of the previous, committing
   * transaction (and which therefore cannot be discarded immediately) are
   * not going to be reused in the new running transaction
   *
   *  The bitmap committed_data images guarantee this: any block which is
   *  allocated in one transaction and removed in the next will be marked
   *  as in-use in the committed_data bitmap, so cannot be reused until
   *  the next transaction to delete the block commits.  This means that
   *  leaving committing buffers dirty is quite safe: the disk blocks
   *  cannot be reallocated to a different file and so buffer aliasing is
   *  not possible.
   *
   *
   * The above applies mainly to ordered data mode.  In writeback mode we
   * don't make guarantees about the order in which data hits disk --- in
   * particular we don't guarantee that new dirty data is flushed before
   * transaction commit --- so it is always safe just to discard data
   * immediately in that mode.  --sct
   */
  
  /*
   * The journal_unmap_buffer helper function returns zero if the buffer
   * concerned remains pinned as an anonymous buffer belonging to an older
   * transaction.
   *
   * We're outside-transaction here.  Either or both of j_running_transaction
   * and j_committing_transaction may be NULL.
   */
  static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
  {
  	transaction_t *transaction;
  	struct journal_head *jh;
  	int may_free = 1;
  	int ret;
  
  	BUFFER_TRACE(bh, "entry");
  
  	/*
  	 * It is safe to proceed here without the j_list_lock because the
  	 * buffers cannot be stolen by try_to_free_buffers as long as we are
  	 * holding the page lock. --sct
  	 */
  
  	if (!buffer_jbd(bh))
  		goto zap_buffer_unlocked;
87c89c232   Jan Kara   jbd2: Remove data...
1815
  	/* OK, we have data buffer in journaled mode */
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
1816
  	write_lock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1817
1818
  	jbd_lock_bh_state(bh);
  	spin_lock(&journal->j_list_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1819
  	jh = jbd2_journal_grab_journal_head(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1820
1821
  	if (!jh)
  		goto zap_buffer_no_jh;
ba869023e   dingdinghua   jbd2: delay disca...
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
  	/*
  	 * We cannot remove the buffer from checkpoint lists until the
  	 * transaction adding inode to orphan list (let's call it T)
  	 * is committed.  Otherwise if the transaction changing the
  	 * buffer would be cleaned from the journal before T is
  	 * committed, a crash will cause that the correct contents of
  	 * the buffer will be lost.  On the other hand we have to
  	 * clear the buffer dirty bit at latest at the moment when the
  	 * transaction marking the buffer as freed in the filesystem
  	 * structures is committed because from that moment on the
  	 * buffer can be reallocated and used by a different page.
  	 * Since the block hasn't been freed yet but the inode has
  	 * already been added to orphan list, it is safe for us to add
  	 * the buffer to BJ_Forget list of the newest transaction.
  	 */
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
  	transaction = jh->b_transaction;
  	if (transaction == NULL) {
  		/* First case: not on any transaction.  If it
  		 * has no checkpoint link, then we can zap it:
  		 * it's a writeback-mode buffer so we don't care
  		 * if it hits disk safely. */
  		if (!jh->b_cp_transaction) {
  			JBUFFER_TRACE(jh, "not on any transaction: zap");
  			goto zap_buffer;
  		}
  
  		if (!buffer_dirty(bh)) {
  			/* bdflush has written it.  We can drop it now */
  			goto zap_buffer;
  		}
  
  		/* OK, it must be in the journal but still not
  		 * written fully to disk: it's metadata or
  		 * journaled data... */
  
  		if (journal->j_running_transaction) {
  			/* ... and once the current transaction has
  			 * committed, the buffer won't be needed any
  			 * longer. */
  			JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
  			ret = __dispose_buffer(jh,
  					journal->j_running_transaction);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1864
  			jbd2_journal_put_journal_head(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1865
1866
  			spin_unlock(&journal->j_list_lock);
  			jbd_unlock_bh_state(bh);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
1867
  			write_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
  			return ret;
  		} else {
  			/* There is no currently-running transaction. So the
  			 * orphan record which we wrote for this file must have
  			 * passed into commit.  We must attach this buffer to
  			 * the committing transaction, if it exists. */
  			if (journal->j_committing_transaction) {
  				JBUFFER_TRACE(jh, "give to committing trans");
  				ret = __dispose_buffer(jh,
  					journal->j_committing_transaction);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1878
  				jbd2_journal_put_journal_head(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1879
1880
  				spin_unlock(&journal->j_list_lock);
  				jbd_unlock_bh_state(bh);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
1881
  				write_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1882
1883
1884
1885
1886
1887
1888
1889
1890
  				return ret;
  			} else {
  				/* The orphan record's transaction has
  				 * committed.  We can cleanse this buffer */
  				clear_buffer_jbddirty(bh);
  				goto zap_buffer;
  			}
  		}
  	} else if (transaction == journal->j_committing_transaction) {
9b57988db   Eric Sandeen   [PATCH] jbd2: jou...
1891
  		JBUFFER_TRACE(jh, "on committing transaction");
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1892
  		/*
ba869023e   dingdinghua   jbd2: delay disca...
1893
1894
1895
1896
1897
1898
  		 * The buffer is committing, we simply cannot touch
  		 * it. So we just set j_next_transaction to the
  		 * running transaction (if there is one) and mark
  		 * buffer as freed so that commit code knows it should
  		 * clear dirty bits when it is done with the buffer.
  		 */
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1899
  		set_buffer_freed(bh);
ba869023e   dingdinghua   jbd2: delay disca...
1900
1901
  		if (journal->j_running_transaction && buffer_jbddirty(bh))
  			jh->b_next_transaction = journal->j_running_transaction;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1902
  		jbd2_journal_put_journal_head(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1903
1904
  		spin_unlock(&journal->j_list_lock);
  		jbd_unlock_bh_state(bh);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
1905
  		write_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1906
1907
1908
1909
1910
1911
1912
1913
1914
  		return 0;
  	} else {
  		/* Good, the buffer belongs to the running transaction.
  		 * We are writing our own transaction's data, not any
  		 * previous one's, so it is safe to throw it away
  		 * (remember that we expect the filesystem to have set
  		 * i_size already for this truncate so recovery will not
  		 * expose the disk blocks we are discarding here.) */
  		J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
9b57988db   Eric Sandeen   [PATCH] jbd2: jou...
1915
  		JBUFFER_TRACE(jh, "on running transaction");
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1916
1917
1918
1919
  		may_free = __dispose_buffer(jh, transaction);
  	}
  
  zap_buffer:
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1920
  	jbd2_journal_put_journal_head(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1921
1922
1923
  zap_buffer_no_jh:
  	spin_unlock(&journal->j_list_lock);
  	jbd_unlock_bh_state(bh);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
1924
  	write_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
  zap_buffer_unlocked:
  	clear_buffer_dirty(bh);
  	J_ASSERT_BH(bh, !buffer_jbddirty(bh));
  	clear_buffer_mapped(bh);
  	clear_buffer_req(bh);
  	clear_buffer_new(bh);
  	bh->b_bdev = NULL;
  	return may_free;
  }
  
  /**
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1936
   * void jbd2_journal_invalidatepage()
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1937
1938
1939
1940
1941
1942
1943
   * @journal: journal to use for flush...
   * @page:    page to flush
   * @offset:  length of page to invalidate.
   *
   * Reap page buffers containing data after offset in page.
   *
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1944
  void jbd2_journal_invalidatepage(journal_t *journal,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
  		      struct page *page,
  		      unsigned long offset)
  {
  	struct buffer_head *head, *bh, *next;
  	unsigned int curr_off = 0;
  	int may_free = 1;
  
  	if (!PageLocked(page))
  		BUG();
  	if (!page_has_buffers(page))
  		return;
  
  	/* We will potentially be playing with lists other than just the
  	 * data lists (especially for journaled data mode), so be
  	 * cautious in our locking. */
  
  	head = bh = page_buffers(page);
  	do {
  		unsigned int next_off = curr_off + bh->b_size;
  		next = bh->b_this_page;
  
  		if (offset <= curr_off) {
  			/* This block is wholly outside the truncation point */
  			lock_buffer(bh);
  			may_free &= journal_unmap_buffer(journal, bh);
  			unlock_buffer(bh);
  		}
  		curr_off = next_off;
  		bh = next;
  
  	} while (bh != head);
  
  	if (!offset) {
  		if (may_free && try_to_free_buffers(page))
  			J_ASSERT(!page_has_buffers(page));
  	}
  }
  
  /*
   * File a buffer on the given transaction list.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1986
  void __jbd2_journal_file_buffer(struct journal_head *jh,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
  			transaction_t *transaction, int jlist)
  {
  	struct journal_head **list = NULL;
  	int was_dirty = 0;
  	struct buffer_head *bh = jh2bh(jh);
  
  	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
  	assert_spin_locked(&transaction->t_journal->j_list_lock);
  
  	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
  	J_ASSERT_JH(jh, jh->b_transaction == transaction ||
4019191be   Mingming Cao   jbd2: sparse poin...
1998
  				jh->b_transaction == NULL);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1999
2000
2001
  
  	if (jh->b_transaction && jh->b_jlist == jlist)
  		return;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2002
2003
  	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
  	    jlist == BJ_Shadow || jlist == BJ_Forget) {
f91d1d041   Jan Kara   jbd2: Fix a race ...
2004
2005
2006
2007
2008
2009
2010
2011
2012
  		/*
  		 * For metadata buffers, we track dirty bit in buffer_jbddirty
  		 * instead of buffer_dirty. We should not see a dirty bit set
  		 * here because we clear it in do_get_write_access but e.g.
  		 * tune2fs can modify the sb and set the dirty bit at any time
  		 * so we try to gracefully handle that.
  		 */
  		if (buffer_dirty(bh))
  			warn_dirty_buffer(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2013
2014
2015
2016
2017
2018
  		if (test_clear_buffer_dirty(bh) ||
  		    test_clear_buffer_jbddirty(bh))
  			was_dirty = 1;
  	}
  
  	if (jh->b_transaction)
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
2019
  		__jbd2_journal_temp_unlink_buffer(jh);
de1b79413   Jan Kara   jbd2: Fix oops in...
2020
2021
  	else
  		jbd2_journal_grab_journal_head(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2022
2023
2024
2025
2026
2027
2028
  	jh->b_transaction = transaction;
  
  	switch (jlist) {
  	case BJ_None:
  		J_ASSERT_JH(jh, !jh->b_committed_data);
  		J_ASSERT_JH(jh, !jh->b_frozen_data);
  		return;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
  	case BJ_Metadata:
  		transaction->t_nr_buffers++;
  		list = &transaction->t_buffers;
  		break;
  	case BJ_Forget:
  		list = &transaction->t_forget;
  		break;
  	case BJ_IO:
  		list = &transaction->t_iobuf_list;
  		break;
  	case BJ_Shadow:
  		list = &transaction->t_shadow_list;
  		break;
  	case BJ_LogCtl:
  		list = &transaction->t_log_list;
  		break;
  	case BJ_Reserved:
  		list = &transaction->t_reserved_list;
  		break;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2048
2049
2050
2051
2052
2053
2054
2055
  	}
  
  	__blist_add_buffer(list, jh);
  	jh->b_jlist = jlist;
  
  	if (was_dirty)
  		set_buffer_jbddirty(bh);
  }
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
2056
  void jbd2_journal_file_buffer(struct journal_head *jh,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2057
2058
2059
2060
  				transaction_t *transaction, int jlist)
  {
  	jbd_lock_bh_state(jh2bh(jh));
  	spin_lock(&transaction->t_journal->j_list_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
2061
  	__jbd2_journal_file_buffer(jh, transaction, jlist);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
  	spin_unlock(&transaction->t_journal->j_list_lock);
  	jbd_unlock_bh_state(jh2bh(jh));
  }
  
  /*
   * Remove a buffer from its current buffer list in preparation for
   * dropping it from its current transaction entirely.  If the buffer has
   * already started to be used by a subsequent transaction, refile the
   * buffer on that transaction's metadata list.
   *
de1b79413   Jan Kara   jbd2: Fix oops in...
2072
   * Called under j_list_lock
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2073
   * Called under jbd_lock_bh_state(jh2bh(jh))
de1b79413   Jan Kara   jbd2: Fix oops in...
2074
2075
   *
   * jh and bh may be already free when this function returns
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2076
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
2077
  void __jbd2_journal_refile_buffer(struct journal_head *jh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2078
  {
ba869023e   dingdinghua   jbd2: delay disca...
2079
  	int was_dirty, jlist;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2080
2081
2082
2083
2084
2085
2086
2087
  	struct buffer_head *bh = jh2bh(jh);
  
  	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
  	if (jh->b_transaction)
  		assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
  
  	/* If the buffer is now unused, just drop it. */
  	if (jh->b_next_transaction == NULL) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
2088
  		__jbd2_journal_unfile_buffer(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2089
2090
2091
2092
2093
2094
2095
2096
2097
  		return;
  	}
  
  	/*
  	 * It has been modified by a later transaction: add it to the new
  	 * transaction's metadata list.
  	 */
  
  	was_dirty = test_clear_buffer_jbddirty(bh);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
2098
  	__jbd2_journal_temp_unlink_buffer(jh);
de1b79413   Jan Kara   jbd2: Fix oops in...
2099
2100
2101
2102
2103
  	/*
  	 * We set b_transaction here because b_next_transaction will inherit
  	 * our jh reference and thus __jbd2_journal_file_buffer() must not
  	 * take a new one.
  	 */
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2104
2105
  	jh->b_transaction = jh->b_next_transaction;
  	jh->b_next_transaction = NULL;
ba869023e   dingdinghua   jbd2: delay disca...
2106
2107
2108
2109
2110
2111
2112
  	if (buffer_freed(bh))
  		jlist = BJ_Forget;
  	else if (jh->b_modified)
  		jlist = BJ_Metadata;
  	else
  		jlist = BJ_Reserved;
  	__jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2113
2114
2115
2116
2117
2118
2119
  	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
  
  	if (was_dirty)
  		set_buffer_jbddirty(bh);
  }
  
  /*
de1b79413   Jan Kara   jbd2: Fix oops in...
2120
2121
2122
2123
   * __jbd2_journal_refile_buffer() with necessary locking added. We take our
   * bh reference so that we can safely unlock bh.
   *
   * The jh and bh may be freed by this call.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2124
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
2125
  void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2126
2127
  {
  	struct buffer_head *bh = jh2bh(jh);
de1b79413   Jan Kara   jbd2: Fix oops in...
2128
2129
  	/* Get reference so that buffer cannot be freed before we unlock it */
  	get_bh(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2130
2131
  	jbd_lock_bh_state(bh);
  	spin_lock(&journal->j_list_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
2132
  	__jbd2_journal_refile_buffer(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2133
  	jbd_unlock_bh_state(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
2134
2135
2136
  	spin_unlock(&journal->j_list_lock);
  	__brelse(bh);
  }
c851ed540   Jan Kara   jbd2: Implement d...
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
  
  /*
   * File inode in the inode list of the handle's transaction
   */
  int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
  {
  	transaction_t *transaction = handle->h_transaction;
  	journal_t *journal = transaction->t_journal;
  
  	if (is_handle_aborted(handle))
  		return -EIO;
  
  	jbd_debug(4, "Adding inode %lu, tid:%d
  ", jinode->i_vfs_inode->i_ino,
  			transaction->t_tid);
  
  	/*
  	 * First check whether inode isn't already on the transaction's
  	 * lists without taking the lock. Note that this check is safe
  	 * without the lock as we cannot race with somebody removing inode
  	 * from the transaction. The reason is that we remove inode from the
  	 * transaction only in journal_release_jbd_inode() and when we commit
  	 * the transaction. We are guarded from the first case by holding
  	 * a reference to the inode. We are safe against the second case
  	 * because if jinode->i_transaction == transaction, commit code
  	 * cannot touch the transaction because we hold reference to it,
  	 * and if jinode->i_next_transaction == transaction, commit code
  	 * will only file the inode where we want it.
  	 */
  	if (jinode->i_transaction == transaction ||
  	    jinode->i_next_transaction == transaction)
  		return 0;
  
  	spin_lock(&journal->j_list_lock);
  
  	if (jinode->i_transaction == transaction ||
  	    jinode->i_next_transaction == transaction)
  		goto done;
81be12c81   Jan Kara   jbd2: fix sending...
2175
2176
2177
2178
2179
2180
2181
  	/*
  	 * We only ever set this variable to 1 so the test is safe. Since
  	 * t_need_data_flush is likely to be set, we do the test to save some
  	 * cacheline bouncing
  	 */
  	if (!transaction->t_need_data_flush)
  		transaction->t_need_data_flush = 1;
c851ed540   Jan Kara   jbd2: Implement d...
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
  	/* On some different transaction's list - should be
  	 * the committing one */
  	if (jinode->i_transaction) {
  		J_ASSERT(jinode->i_next_transaction == NULL);
  		J_ASSERT(jinode->i_transaction ==
  					journal->j_committing_transaction);
  		jinode->i_next_transaction = transaction;
  		goto done;
  	}
  	/* Not on any transaction list... */
  	J_ASSERT(!jinode->i_next_transaction);
  	jinode->i_transaction = transaction;
  	list_add(&jinode->i_list, &transaction->t_inode_list);
  done:
  	spin_unlock(&journal->j_list_lock);
  
  	return 0;
  }
  
  /*
7f5aa2150   Jan Kara   jbd2: Avoid possi...
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
   * File truncate and transaction commit interact with each other in a
   * non-trivial way.  If a transaction writing data block A is
   * committing, we cannot discard the data by truncate until we have
   * written them.  Otherwise if we crashed after the transaction with
   * write has committed but before the transaction with truncate has
   * committed, we could see stale data in block A.  This function is a
   * helper to solve this problem.  It starts writeout of the truncated
   * part in case it is in the committing transaction.
   *
   * Filesystem code must call this function when inode is journaled in
   * ordered mode before truncation happens and after the inode has been
   * placed on orphan list with the new inode size. The second condition
   * avoids the race that someone writes new data and we start
   * committing the transaction after this function has been called but
   * before a transaction for truncate is started (and furthermore it
   * allows us to optimize the case where the addition to orphan list
   * happens in the same transaction as write --- we don't have to write
   * any data in such case).
c851ed540   Jan Kara   jbd2: Implement d...
2220
   */
7f5aa2150   Jan Kara   jbd2: Avoid possi...
2221
2222
  int jbd2_journal_begin_ordered_truncate(journal_t *journal,
  					struct jbd2_inode *jinode,
c851ed540   Jan Kara   jbd2: Implement d...
2223
2224
  					loff_t new_size)
  {
7f5aa2150   Jan Kara   jbd2: Avoid possi...
2225
  	transaction_t *inode_trans, *commit_trans;
c851ed540   Jan Kara   jbd2: Implement d...
2226
  	int ret = 0;
7f5aa2150   Jan Kara   jbd2: Avoid possi...
2227
2228
  	/* This is a quick check to avoid locking if not necessary */
  	if (!jinode->i_transaction)
c851ed540   Jan Kara   jbd2: Implement d...
2229
  		goto out;
7f5aa2150   Jan Kara   jbd2: Avoid possi...
2230
2231
2232
  	/* Locks are here just to force reading of recent values, it is
  	 * enough that the transaction was not committing before we started
  	 * a transaction adding the inode to orphan list */
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
2233
  	read_lock(&journal->j_state_lock);
c851ed540   Jan Kara   jbd2: Implement d...
2234
  	commit_trans = journal->j_committing_transaction;
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
2235
  	read_unlock(&journal->j_state_lock);
7f5aa2150   Jan Kara   jbd2: Avoid possi...
2236
2237
2238
2239
2240
  	spin_lock(&journal->j_list_lock);
  	inode_trans = jinode->i_transaction;
  	spin_unlock(&journal->j_list_lock);
  	if (inode_trans == commit_trans) {
  		ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
c851ed540   Jan Kara   jbd2: Implement d...
2241
2242
2243
2244
2245
2246
2247
  			new_size, LLONG_MAX);
  		if (ret)
  			jbd2_journal_abort(journal, ret);
  	}
  out:
  	return ret;
  }