Blame view

fs/jbd2/commit.c 31.4 KB
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1
  /*
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
2
   * linux/fs/jbd2/commit.c
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
   *
   * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
   *
   * Copyright 1998 Red Hat corp --- All Rights Reserved
   *
   * This file is part of the Linux kernel and is made available under
   * the terms of the GNU General Public License, version 2, or at your
   * option, any later version, incorporated herein by reference.
   *
   * Journal commit routines for the generic filesystem journaling code;
   * part of the ext2fs journaling system.
   */
  
  #include <linux/time.h>
  #include <linux/fs.h>
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
18
  #include <linux/jbd2.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
19
20
21
22
  #include <linux/errno.h>
  #include <linux/slab.h>
  #include <linux/mm.h>
  #include <linux/pagemap.h>
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
23
  #include <linux/jiffies.h>
818d276ce   Girish Shilamkar   ext4: Add the jou...
24
  #include <linux/crc32.h>
cd1aac329   Aneesh Kumar K.V   ext4: Add ordered...
25
26
  #include <linux/writeback.h>
  #include <linux/backing-dev.h>
fd98496f4   Theodore Ts'o   jbd2: Add barrier...
27
  #include <linux/bio.h>
0e3d2a631   Theodore Ts'o   ext4: Fix async c...
28
  #include <linux/blkdev.h>
39e3ac259   Brian King   jbd2: Fix I/O han...
29
  #include <linux/bitops.h>
879c5e6b7   Theodore Ts'o   jbd2: convert ins...
30
  #include <trace/events/jbd2.h>
39e3ac259   Brian King   jbd2: Fix I/O han...
31
  #include <asm/system.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  
  /*
   * Default IO end handler for temporary BJ_IO buffer_heads.
   */
  static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
  {
  	BUFFER_TRACE(bh, "");
  	if (uptodate)
  		set_buffer_uptodate(bh);
  	else
  		clear_buffer_uptodate(bh);
  	unlock_buffer(bh);
  }
  
  /*
87c89c232   Jan Kara   jbd2: Remove data...
47
48
   * When an ext4 file is truncated, it is possible that some pages are not
   * successfully freed, because they are attached to a committing transaction.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
   * After the transaction commits, these pages are left on the LRU, with no
   * ->mapping, and with attached buffers.  These pages are trivially reclaimable
   * by the VM, but their apparent absence upsets the VM accounting, and it makes
   * the numbers in /proc/meminfo look odd.
   *
   * So here, we have a buffer which has just come off the forget list.  Look to
   * see if we can strip all buffers from the backing page.
   *
   * Called under lock_journal(), and possibly under journal_datalist_lock.  The
   * caller provided us with a ref against the buffer, and we drop that here.
   */
  static void release_buffer_page(struct buffer_head *bh)
  {
  	struct page *page;
  
  	if (buffer_dirty(bh))
  		goto nope;
  	if (atomic_read(&bh->b_count) != 1)
  		goto nope;
  	page = bh->b_page;
  	if (!page)
  		goto nope;
  	if (page->mapping)
  		goto nope;
  
  	/* OK, it's a truncated page */
529ae9aaa   Nick Piggin   mm: rename page t...
75
  	if (!trylock_page(page))
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
76
77
78
79
80
81
82
83
84
85
86
87
88
89
  		goto nope;
  
  	page_cache_get(page);
  	__brelse(bh);
  	try_to_free_buffers(page);
  	unlock_page(page);
  	page_cache_release(page);
  	return;
  
  nope:
  	__brelse(bh);
  }
  
  /*
818d276ce   Girish Shilamkar   ext4: Add the jou...
90
   * Done it all: now submit the commit record.  We should have
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
91
92
93
94
95
96
   * cleaned up our previous buffers by now, so if we are in abort
   * mode we can now just skip the rest of the journal write
   * entirely.
   *
   * Returns 1 if the journal needs to be aborted or 0 on success
   */
818d276ce   Girish Shilamkar   ext4: Add the jou...
97
98
99
100
  static int journal_submit_commit_record(journal_t *journal,
  					transaction_t *commit_transaction,
  					struct buffer_head **cbh,
  					__u32 crc32_sum)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
101
102
  {
  	struct journal_head *descriptor;
818d276ce   Girish Shilamkar   ext4: Add the jou...
103
  	struct commit_header *tmp;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
104
  	struct buffer_head *bh;
818d276ce   Girish Shilamkar   ext4: Add the jou...
105
  	int ret;
736603ab2   Theodore Ts'o   jbd2: Add commit ...
106
  	struct timespec now = current_kernel_time();
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
107

6cba611e6   Zhang Huan   jbd2: fix potenti...
108
  	*cbh = NULL;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
109
110
  	if (is_journal_aborted(journal))
  		return 0;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
111
  	descriptor = jbd2_journal_get_descriptor_buffer(journal);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
112
113
114
115
  	if (!descriptor)
  		return 1;
  
  	bh = jh2bh(descriptor);
818d276ce   Girish Shilamkar   ext4: Add the jou...
116
117
118
119
  	tmp = (struct commit_header *)bh->b_data;
  	tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
  	tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
  	tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
736603ab2   Theodore Ts'o   jbd2: Add commit ...
120
121
  	tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
  	tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
818d276ce   Girish Shilamkar   ext4: Add the jou...
122
123
124
125
126
127
  
  	if (JBD2_HAS_COMPAT_FEATURE(journal,
  				    JBD2_FEATURE_COMPAT_CHECKSUM)) {
  		tmp->h_chksum_type 	= JBD2_CRC32_CHKSUM;
  		tmp->h_chksum_size 	= JBD2_CRC32_CHKSUM_SIZE;
  		tmp->h_chksum[0] 	= cpu_to_be32(crc32_sum);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
128
  	}
818d276ce   Girish Shilamkar   ext4: Add the jou...
129
130
  	JBUFFER_TRACE(descriptor, "submit commit block");
  	lock_buffer(bh);
45a90bfd9   Theodore Ts'o   jbd2: Fix buffer ...
131
  	clear_buffer_dirty(bh);
818d276ce   Girish Shilamkar   ext4: Add the jou...
132
133
134
135
  	set_buffer_uptodate(bh);
  	bh->b_end_io = journal_end_buffer_io_sync;
  
  	if (journal->j_flags & JBD2_BARRIER &&
0e3d2a631   Theodore Ts'o   ext4: Fix async c...
136
  	    !JBD2_HAS_INCOMPAT_FEATURE(journal,
9c35575bb   Christoph Hellwig   jbd2: replace bar...
137
  				       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
721a9602e   Jens Axboe   block: kill off R...
138
  		ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh);
9c35575bb   Christoph Hellwig   jbd2: replace bar...
139
  	else
721a9602e   Jens Axboe   block: kill off R...
140
  		ret = submit_bh(WRITE_SYNC, bh);
9c35575bb   Christoph Hellwig   jbd2: replace bar...
141

818d276ce   Girish Shilamkar   ext4: Add the jou...
142
143
144
145
146
147
148
149
  	*cbh = bh;
  	return ret;
  }
  
  /*
   * This function along with journal_submit_commit_record
   * allows to write the commit record asynchronously.
   */
fd98496f4   Theodore Ts'o   jbd2: Add barrier...
150
151
  static int journal_wait_on_commit_record(journal_t *journal,
  					 struct buffer_head *bh)
818d276ce   Girish Shilamkar   ext4: Add the jou...
152
153
154
155
156
  {
  	int ret = 0;
  
  	clear_buffer_dirty(bh);
  	wait_on_buffer(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
157

818d276ce   Girish Shilamkar   ext4: Add the jou...
158
159
160
161
162
163
  	if (unlikely(!buffer_uptodate(bh)))
  		ret = -EIO;
  	put_bh(bh);            /* One for getblk() */
  	jbd2_journal_put_journal_head(bh2jh(bh));
  
  	return ret;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
164
  }
818d276ce   Girish Shilamkar   ext4: Add the jou...
165
  /*
cd1aac329   Aneesh Kumar K.V   ext4: Add ordered...
166
167
168
169
170
171
172
173
174
175
176
177
178
   * write the filemap data using writepage() address_space_operations.
   * We don't do block allocation here even for delalloc. We don't
   * use writepages() because with dealyed allocation we may be doing
   * block allocation in writepages().
   */
  static int journal_submit_inode_data_buffers(struct address_space *mapping)
  {
  	int ret;
  	struct writeback_control wbc = {
  		.sync_mode =  WB_SYNC_ALL,
  		.nr_to_write = mapping->nrpages * 2,
  		.range_start = 0,
  		.range_end = i_size_read(mapping->host),
cd1aac329   Aneesh Kumar K.V   ext4: Add ordered...
179
180
181
182
183
184
185
  	};
  
  	ret = generic_writepages(mapping, &wbc);
  	return ret;
  }
  
  /*
c851ed540   Jan Kara   jbd2: Implement d...
186
187
188
189
190
191
192
   * Submit all the data buffers of inode associated with the transaction to
   * disk.
   *
   * We are in a committing transaction. Therefore no new inode can be added to
   * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
   * operate on from being released while we write out pages.
   */
cd1aac329   Aneesh Kumar K.V   ext4: Add ordered...
193
  static int journal_submit_data_buffers(journal_t *journal,
c851ed540   Jan Kara   jbd2: Implement d...
194
195
196
197
198
199
200
201
202
  		transaction_t *commit_transaction)
  {
  	struct jbd2_inode *jinode;
  	int err, ret = 0;
  	struct address_space *mapping;
  
  	spin_lock(&journal->j_list_lock);
  	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
  		mapping = jinode->i_vfs_inode->i_mapping;
39e3ac259   Brian King   jbd2: Fix I/O han...
203
  		set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
c851ed540   Jan Kara   jbd2: Implement d...
204
  		spin_unlock(&journal->j_list_lock);
cd1aac329   Aneesh Kumar K.V   ext4: Add ordered...
205
206
207
208
209
210
  		/*
  		 * submit the inode data buffers. We use writepage
  		 * instead of writepages. Because writepages can do
  		 * block allocation  with delalloc. We need to write
  		 * only allocated blocks here.
  		 */
879c5e6b7   Theodore Ts'o   jbd2: convert ins...
211
  		trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
cd1aac329   Aneesh Kumar K.V   ext4: Add ordered...
212
  		err = journal_submit_inode_data_buffers(mapping);
c851ed540   Jan Kara   jbd2: Implement d...
213
214
215
216
  		if (!ret)
  			ret = err;
  		spin_lock(&journal->j_list_lock);
  		J_ASSERT(jinode->i_transaction == commit_transaction);
39e3ac259   Brian King   jbd2: Fix I/O han...
217
218
  		clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
  		smp_mb__after_clear_bit();
c851ed540   Jan Kara   jbd2: Implement d...
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
  		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
  	}
  	spin_unlock(&journal->j_list_lock);
  	return ret;
  }
  
  /*
   * Wait for data submitted for writeout, refile inodes to proper
   * transaction if needed.
   *
   */
  static int journal_finish_inode_data_buffers(journal_t *journal,
  		transaction_t *commit_transaction)
  {
  	struct jbd2_inode *jinode, *next_i;
  	int err, ret = 0;
cd1aac329   Aneesh Kumar K.V   ext4: Add ordered...
235
  	/* For locking, see the comment in journal_submit_data_buffers() */
c851ed540   Jan Kara   jbd2: Implement d...
236
237
  	spin_lock(&journal->j_list_lock);
  	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
39e3ac259   Brian King   jbd2: Fix I/O han...
238
  		set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
c851ed540   Jan Kara   jbd2: Implement d...
239
240
  		spin_unlock(&journal->j_list_lock);
  		err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
e9e34f4e8   Hidehiro Kawai   jbd2: don't abort...
241
242
243
  		if (err) {
  			/*
  			 * Because AS_EIO is cleared by
94004ed72   Christoph Hellwig   kill wait_on_page...
244
  			 * filemap_fdatawait_range(), set it again so
e9e34f4e8   Hidehiro Kawai   jbd2: don't abort...
245
246
247
248
249
250
251
252
  			 * that user process can get -EIO from fsync().
  			 */
  			set_bit(AS_EIO,
  				&jinode->i_vfs_inode->i_mapping->flags);
  
  			if (!ret)
  				ret = err;
  		}
c851ed540   Jan Kara   jbd2: Implement d...
253
  		spin_lock(&journal->j_list_lock);
39e3ac259   Brian King   jbd2: Fix I/O han...
254
255
  		clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
  		smp_mb__after_clear_bit();
c851ed540   Jan Kara   jbd2: Implement d...
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
  		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
  	}
  
  	/* Now refile inode to proper lists */
  	list_for_each_entry_safe(jinode, next_i,
  				 &commit_transaction->t_inode_list, i_list) {
  		list_del(&jinode->i_list);
  		if (jinode->i_next_transaction) {
  			jinode->i_transaction = jinode->i_next_transaction;
  			jinode->i_next_transaction = NULL;
  			list_add(&jinode->i_list,
  				&jinode->i_transaction->t_inode_list);
  		} else {
  			jinode->i_transaction = NULL;
  		}
  	}
  	spin_unlock(&journal->j_list_lock);
  
  	return ret;
  }
818d276ce   Girish Shilamkar   ext4: Add the jou...
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
  static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
  {
  	struct page *page = bh->b_page;
  	char *addr;
  	__u32 checksum;
  
  	addr = kmap_atomic(page, KM_USER0);
  	checksum = crc32_be(crc32_sum,
  		(void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
  	kunmap_atomic(addr, KM_USER0);
  
  	return checksum;
  }
  
  static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
291
  				   unsigned long long block)
b517bea1c   Zach Brown   [PATCH] 64-bit jb...
292
293
  {
  	tag->t_blocknr = cpu_to_be32(block & (u32)~0);
cd02ff0b1   Mingming Cao   jbd2: JBD_XXX to ...
294
  	if (tag_bytes > JBD2_TAG_SIZE32)
b517bea1c   Zach Brown   [PATCH] 64-bit jb...
295
296
  		tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
  }
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
297
  /*
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
298
   * jbd2_journal_commit_transaction
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
299
300
301
302
   *
   * The primary function for committing a transaction to the log.  This
   * function is called by the journal thread to begin a complete commit.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
303
  void jbd2_journal_commit_transaction(journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
304
  {
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
305
  	struct transaction_stats_s stats;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
306
307
308
309
310
311
  	transaction_t *commit_transaction;
  	struct journal_head *jh, *new_jh, *descriptor;
  	struct buffer_head **wbuf = journal->j_wbuf;
  	int bufs;
  	int flags;
  	int err;
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
312
  	unsigned long long blocknr;
e07f7183a   Josef Bacik   jbd2: improve jbd...
313
314
  	ktime_t start_time;
  	u64 commit_time;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
315
316
317
318
319
320
  	char *tagp = NULL;
  	journal_header_t *header;
  	journal_block_tag_t *tag = NULL;
  	int space_left = 0;
  	int first_tag = 0;
  	int tag_flag;
fb68407b0   Aneesh Kumar K.V   jbd2: Call journa...
321
  	int i, to_free = 0;
b517bea1c   Zach Brown   [PATCH] 64-bit jb...
322
  	int tag_bytes = journal_tag_bytes(journal);
818d276ce   Girish Shilamkar   ext4: Add the jou...
323
324
  	struct buffer_head *cbh = NULL; /* For transactional checksums */
  	__u32 crc32_sum = ~0;
82f04ab47   Jens Axboe   jbd2: finish conv...
325
  	struct blk_plug plug;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
326
327
328
329
330
  
  	/*
  	 * First job: lock down the current transaction and wait for
  	 * all outstanding updates to complete.
  	 */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
331
332
  	/* Do we need to erase the effects of a prior jbd2_journal_flush? */
  	if (journal->j_flags & JBD2_FLUSHED) {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
333
334
  		jbd_debug(3, "super block updated
  ");
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
335
  		jbd2_journal_update_superblock(journal, 1);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
336
337
338
339
340
341
342
343
344
345
  	} else {
  		jbd_debug(3, "superblock not updated
  ");
  	}
  
  	J_ASSERT(journal->j_running_transaction != NULL);
  	J_ASSERT(journal->j_committing_transaction == NULL);
  
  	commit_transaction = journal->j_running_transaction;
  	J_ASSERT(commit_transaction->t_state == T_RUNNING);
879c5e6b7   Theodore Ts'o   jbd2: convert ins...
346
  	trace_jbd2_start_commit(journal, commit_transaction);
f2a44523b   Eryu Guan   jbd2: Unify log m...
347
348
  	jbd_debug(1, "JBD2: starting commit of transaction %d
  ",
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
349
  			commit_transaction->t_tid);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
350
  	write_lock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
351
  	commit_transaction->t_state = T_LOCKED;
879c5e6b7   Theodore Ts'o   jbd2: convert ins...
352
  	trace_jbd2_commit_locking(journal, commit_transaction);
bf6993276   Theodore Ts'o   jbd2: Use tracepo...
353
354
355
356
  	stats.run.rs_wait = commit_transaction->t_max_wait;
  	stats.run.rs_locked = jiffies;
  	stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
  					      stats.run.rs_locked);
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
357

470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
358
  	spin_lock(&commit_transaction->t_handle_lock);
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
359
  	while (atomic_read(&commit_transaction->t_updates)) {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
360
361
362
363
  		DEFINE_WAIT(wait);
  
  		prepare_to_wait(&journal->j_wait_updates, &wait,
  					TASK_UNINTERRUPTIBLE);
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
364
  		if (atomic_read(&commit_transaction->t_updates)) {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
365
  			spin_unlock(&commit_transaction->t_handle_lock);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
366
  			write_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
367
  			schedule();
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
368
  			write_lock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
369
370
371
372
373
  			spin_lock(&commit_transaction->t_handle_lock);
  		}
  		finish_wait(&journal->j_wait_updates, &wait);
  	}
  	spin_unlock(&commit_transaction->t_handle_lock);
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
374
  	J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
375
376
377
378
379
380
381
  			journal->j_max_transaction_buffers);
  
  	/*
  	 * First thing we are allowed to do is to discard any remaining
  	 * BJ_Reserved buffers.  Note, it is _not_ permissible to assume
  	 * that there are no such buffers: if a large filesystem
  	 * operation like a truncate needs to split itself over multiple
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
382
  	 * transactions, then it may try to do a jbd2_journal_restart() while
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
383
384
385
386
387
388
389
  	 * there are still BJ_Reserved buffers outstanding.  These must
  	 * be released cleanly from the current transaction.
  	 *
  	 * In this case, the filesystem must still reserve write access
  	 * again before modifying the buffer in the new transaction, but
  	 * we do not require it to remember exactly which old buffers it
  	 * has reserved.  This is consistent with the existing behaviour
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
390
  	 * that multiple jbd2_journal_get_write_access() calls to the same
25985edce   Lucas De Marchi   Fix common misspe...
391
  	 * buffer are perfectly permissible.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
392
393
394
395
396
  	 */
  	while (commit_transaction->t_reserved_list) {
  		jh = commit_transaction->t_reserved_list;
  		JBUFFER_TRACE(jh, "reserved, unused: refile");
  		/*
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
397
  		 * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
398
399
400
401
402
403
  		 * leave undo-committed data.
  		 */
  		if (jh->b_committed_data) {
  			struct buffer_head *bh = jh2bh(jh);
  
  			jbd_lock_bh_state(bh);
af1e76d6b   Mingming Cao   JBD2: jbd2 slab a...
404
  			jbd2_free(jh->b_committed_data, bh->b_size);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
405
406
407
  			jh->b_committed_data = NULL;
  			jbd_unlock_bh_state(bh);
  		}
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
408
  		jbd2_journal_refile_buffer(journal, jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
409
410
411
412
413
414
415
416
  	}
  
  	/*
  	 * Now try to drop any written-back buffers from the journal's
  	 * checkpoint lists.  We do this *before* commit because it potentially
  	 * frees some memory
  	 */
  	spin_lock(&journal->j_list_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
417
  	__jbd2_journal_clean_checkpoint_list(journal);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
418
  	spin_unlock(&journal->j_list_lock);
f2a44523b   Eryu Guan   jbd2: Unify log m...
419
420
  	jbd_debug(3, "JBD2: commit phase 1
  ");
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
421
422
423
424
  
  	/*
  	 * Switch to a new revoke table.
  	 */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
425
  	jbd2_journal_switch_revoke_table(journal);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
426

879c5e6b7   Theodore Ts'o   jbd2: convert ins...
427
  	trace_jbd2_commit_flushing(journal, commit_transaction);
bf6993276   Theodore Ts'o   jbd2: Use tracepo...
428
429
430
  	stats.run.rs_flushing = jiffies;
  	stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
  					     stats.run.rs_flushing);
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
431

470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
432
433
434
  	commit_transaction->t_state = T_FLUSH;
  	journal->j_committing_transaction = commit_transaction;
  	journal->j_running_transaction = NULL;
e07f7183a   Josef Bacik   jbd2: improve jbd...
435
  	start_time = ktime_get();
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
436
437
  	commit_transaction->t_log_start = journal->j_head;
  	wake_up(&journal->j_wait_transaction_locked);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
438
  	write_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
439

f2a44523b   Eryu Guan   jbd2: Unify log m...
440
441
  	jbd_debug(3, "JBD2: commit phase 2
  ");
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
442
443
  
  	/*
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
444
445
446
  	 * Now start flushing things to disk, in the order they appear
  	 * on the transaction lists.  Data blocks go first.
  	 */
cd1aac329   Aneesh Kumar K.V   ext4: Add ordered...
447
  	err = journal_submit_data_buffers(journal, commit_transaction);
c851ed540   Jan Kara   jbd2: Implement d...
448
449
  	if (err)
  		jbd2_journal_abort(journal, err);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
450

82f04ab47   Jens Axboe   jbd2: finish conv...
451
  	blk_start_plug(&plug);
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
452
  	jbd2_journal_write_revoke_records(journal, commit_transaction,
82f04ab47   Jens Axboe   jbd2: finish conv...
453
454
  					  WRITE_SYNC);
  	blk_finish_plug(&plug);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
455

f2a44523b   Eryu Guan   jbd2: Unify log m...
456
457
  	jbd_debug(3, "JBD2: commit phase 2
  ");
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
458
459
  
  	/*
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
460
461
462
463
  	 * Way to go: we have now written out all of the data for a
  	 * transaction!  Now comes the tricky part: we need to write out
  	 * metadata.  Loop over the transaction's entire buffer list:
  	 */
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
464
  	write_lock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
465
  	commit_transaction->t_state = T_COMMIT;
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
466
  	write_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
467

879c5e6b7   Theodore Ts'o   jbd2: convert ins...
468
  	trace_jbd2_commit_logging(journal, commit_transaction);
bf6993276   Theodore Ts'o   jbd2: Use tracepo...
469
470
471
  	stats.run.rs_logging = jiffies;
  	stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
  					       stats.run.rs_logging);
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
472
473
  	stats.run.rs_blocks =
  		atomic_read(&commit_transaction->t_outstanding_credits);
bf6993276   Theodore Ts'o   jbd2: Use tracepo...
474
  	stats.run.rs_blocks_logged = 0;
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
475

1dfc3220d   Josef Bacik   jbd2: fix possibl...
476
  	J_ASSERT(commit_transaction->t_nr_buffers <=
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
477
  		 atomic_read(&commit_transaction->t_outstanding_credits));
1dfc3220d   Josef Bacik   jbd2: fix possibl...
478

87c89c232   Jan Kara   jbd2: Remove data...
479
  	err = 0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
480
481
  	descriptor = NULL;
  	bufs = 0;
82f04ab47   Jens Axboe   jbd2: finish conv...
482
  	blk_start_plug(&plug);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
483
484
485
486
487
488
489
  	while (commit_transaction->t_buffers) {
  
  		/* Find the next buffer to be journaled... */
  
  		jh = commit_transaction->t_buffers;
  
  		/* If we're in abort mode, we just un-journal the buffer and
7ad7445f6   Hidehiro Kawai   jbd2: don't dirty...
490
  		   release it. */
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
491
492
  
  		if (is_journal_aborted(journal)) {
7ad7445f6   Hidehiro Kawai   jbd2: don't dirty...
493
  			clear_buffer_jbddirty(jh2bh(jh));
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
494
  			JBUFFER_TRACE(jh, "journal is aborting: refile");
e06c8227f   Joel Becker   jbd2: Add buffer ...
495
496
497
498
  			jbd2_buffer_abort_trigger(jh,
  						  jh->b_frozen_data ?
  						  jh->b_frozen_triggers :
  						  jh->b_triggers);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
499
  			jbd2_journal_refile_buffer(journal, jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
  			/* If that was the last one, we need to clean up
  			 * any descriptor buffers which may have been
  			 * already allocated, even if we are now
  			 * aborting. */
  			if (!commit_transaction->t_buffers)
  				goto start_journal_io;
  			continue;
  		}
  
  		/* Make sure we have a descriptor block in which to
  		   record the metadata buffer. */
  
  		if (!descriptor) {
  			struct buffer_head *bh;
  
  			J_ASSERT (bufs == 0);
f2a44523b   Eryu Guan   jbd2: Unify log m...
516
517
  			jbd_debug(4, "JBD2: get descriptor
  ");
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
518

f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
519
  			descriptor = jbd2_journal_get_descriptor_buffer(journal);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
520
  			if (!descriptor) {
a7fa2baf8   Jan Kara   jbd2: fix commit ...
521
  				jbd2_journal_abort(journal, -EIO);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
522
523
524
525
  				continue;
  			}
  
  			bh = jh2bh(descriptor);
f2a44523b   Eryu Guan   jbd2: Unify log m...
526
527
  			jbd_debug(4, "JBD2: got buffer %llu (%p)
  ",
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
528
529
  				(unsigned long long)bh->b_blocknr, bh->b_data);
  			header = (journal_header_t *)&bh->b_data[0];
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
530
531
  			header->h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
  			header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
532
533
534
535
536
537
538
539
540
541
542
543
  			header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);
  
  			tagp = &bh->b_data[sizeof(journal_header_t)];
  			space_left = bh->b_size - sizeof(journal_header_t);
  			first_tag = 1;
  			set_buffer_jwrite(bh);
  			set_buffer_dirty(bh);
  			wbuf[bufs++] = bh;
  
  			/* Record it so that we can wait for IO
                             completion later */
  			BUFFER_TRACE(bh, "ph3: file as descriptor");
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
544
  			jbd2_journal_file_buffer(descriptor, commit_transaction,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
545
546
547
548
  					BJ_LogCtl);
  		}
  
  		/* Where is the buffer to be written? */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
549
  		err = jbd2_journal_next_log_block(journal, &blocknr);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
550
551
552
553
  		/* If the block mapping failed, just abandon the buffer
  		   and repeat this loop: we'll fall into the
  		   refile-on-abort condition above. */
  		if (err) {
a7fa2baf8   Jan Kara   jbd2: fix commit ...
554
  			jbd2_journal_abort(journal, err);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
555
556
557
558
559
560
  			continue;
  		}
  
  		/*
  		 * start_this_handle() uses t_outstanding_credits to determine
  		 * the free space in the log, but this counter is changed
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
561
  		 * by jbd2_journal_next_log_block() also.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
562
  		 */
a51dca9cd   Theodore Ts'o   jbd2: Use atomic ...
563
  		atomic_dec(&commit_transaction->t_outstanding_credits);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
564
565
566
567
568
569
570
571
572
573
574
575
  
  		/* Bump b_count to prevent truncate from stumbling over
                     the shadowed buffer!  @@@ This can go if we ever get
                     rid of the BJ_IO/BJ_Shadow pairing of buffers. */
  		atomic_inc(&jh2bh(jh)->b_count);
  
  		/* Make a temporary IO buffer with which to write it out
                     (this will requeue both the metadata buffer and the
                     temporary IO buffer). new_bh goes on BJ_IO*/
  
  		set_bit(BH_JWrite, &jh2bh(jh)->b_state);
  		/*
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
576
  		 * akpm: jbd2_journal_write_metadata_buffer() sets
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
577
578
579
580
581
  		 * new_bh->b_transaction to commit_transaction.
  		 * We need to clean this up before we release new_bh
  		 * (which is of type BJ_IO)
  		 */
  		JBUFFER_TRACE(jh, "ph3: write metadata");
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
582
  		flags = jbd2_journal_write_metadata_buffer(commit_transaction,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
583
  						      jh, &new_jh, blocknr);
e6ec116b6   Theodore Ts'o   jbd2: Add ENOMEM ...
584
585
586
587
  		if (flags < 0) {
  			jbd2_journal_abort(journal, flags);
  			continue;
  		}
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
588
589
590
591
592
593
594
595
  		set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
  		wbuf[bufs++] = jh2bh(new_jh);
  
  		/* Record the new block's tag in the current descriptor
                     buffer */
  
  		tag_flag = 0;
  		if (flags & 1)
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
596
  			tag_flag |= JBD2_FLAG_ESCAPE;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
597
  		if (!first_tag)
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
598
  			tag_flag |= JBD2_FLAG_SAME_UUID;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
599
600
  
  		tag = (journal_block_tag_t *) tagp;
b517bea1c   Zach Brown   [PATCH] 64-bit jb...
601
  		write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
602
  		tag->t_flags = cpu_to_be32(tag_flag);
b517bea1c   Zach Brown   [PATCH] 64-bit jb...
603
604
  		tagp += tag_bytes;
  		space_left -= tag_bytes;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
605
606
607
608
609
610
611
612
613
614
615
616
617
  
  		if (first_tag) {
  			memcpy (tagp, journal->j_uuid, 16);
  			tagp += 16;
  			space_left -= 16;
  			first_tag = 0;
  		}
  
  		/* If there's no more to do, or if the descriptor is full,
  		   let the IO rip! */
  
  		if (bufs == journal->j_wbufsize ||
  		    commit_transaction->t_buffers == NULL ||
b517bea1c   Zach Brown   [PATCH] 64-bit jb...
618
  		    space_left < tag_bytes + 16) {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
619

f2a44523b   Eryu Guan   jbd2: Unify log m...
620
621
  			jbd_debug(4, "JBD2: Submit %d IOs
  ", bufs);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
622
623
624
625
  
  			/* Write an end-of-descriptor marker before
                             submitting the IOs.  "tag" still points to
                             the last tag we set up. */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
626
  			tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
627
628
629
630
  
  start_journal_io:
  			for (i = 0; i < bufs; i++) {
  				struct buffer_head *bh = wbuf[i];
818d276ce   Girish Shilamkar   ext4: Add the jou...
631
632
633
634
635
636
637
638
  				/*
  				 * Compute checksum.
  				 */
  				if (JBD2_HAS_COMPAT_FEATURE(journal,
  					JBD2_FEATURE_COMPAT_CHECKSUM)) {
  					crc32_sum =
  					    jbd2_checksum_data(crc32_sum, bh);
  				}
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
639
640
641
642
  				lock_buffer(bh);
  				clear_buffer_dirty(bh);
  				set_buffer_uptodate(bh);
  				bh->b_end_io = journal_end_buffer_io_sync;
82f04ab47   Jens Axboe   jbd2: finish conv...
643
  				submit_bh(WRITE_SYNC, bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
644
645
  			}
  			cond_resched();
bf6993276   Theodore Ts'o   jbd2: Use tracepo...
646
  			stats.run.rs_blocks_logged += bufs;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
647
648
649
650
651
652
653
  
  			/* Force a new descriptor to be generated next
                             time round the loop. */
  			descriptor = NULL;
  			bufs = 0;
  		}
  	}
f73bee498   Jan Kara   jbd2: Modify ASYN...
654
655
656
657
658
659
660
661
662
663
  	err = journal_finish_inode_data_buffers(journal, commit_transaction);
  	if (err) {
  		printk(KERN_WARNING
  			"JBD2: Detected IO errors while flushing file data "
  		       "on %s
  ", journal->j_devname);
  		if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR)
  			jbd2_journal_abort(journal, err);
  		err = 0;
  	}
bbd2be369   Jan Kara   jbd2: Add functio...
664
665
666
667
  	write_lock(&journal->j_state_lock);
  	J_ASSERT(commit_transaction->t_state == T_COMMIT);
  	commit_transaction->t_state = T_COMMIT_DFLUSH;
  	write_unlock(&journal->j_state_lock);
cc3e1bea5   Theodore Ts'o   ext4, jbd2: Add b...
668
669
670
671
672
  	/* 
  	 * If the journal is not located on the file system device,
  	 * then we must flush the file system device before we issue
  	 * the commit record
  	 */
81be12c81   Jan Kara   jbd2: fix sending...
673
  	if (commit_transaction->t_need_data_flush &&
cc3e1bea5   Theodore Ts'o   ext4, jbd2: Add b...
674
675
  	    (journal->j_fs_dev != journal->j_dev) &&
  	    (journal->j_flags & JBD2_BARRIER))
dd3932edd   Christoph Hellwig   block: remove BLK...
676
  		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
818d276ce   Girish Shilamkar   ext4: Add the jou...
677

cc3e1bea5   Theodore Ts'o   ext4, jbd2: Add b...
678
  	/* Done it all: now write the commit record asynchronously. */
818d276ce   Girish Shilamkar   ext4: Add the jou...
679
  	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
0e3d2a631   Theodore Ts'o   ext4: Fix async c...
680
  				      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
818d276ce   Girish Shilamkar   ext4: Add the jou...
681
682
683
684
  		err = journal_submit_commit_record(journal, commit_transaction,
  						 &cbh, crc32_sum);
  		if (err)
  			__jbd2_journal_abort_hard(journal);
e9e34f4e8   Hidehiro Kawai   jbd2: don't abort...
685
  	}
c851ed540   Jan Kara   jbd2: Implement d...
686

82f04ab47   Jens Axboe   jbd2: finish conv...
687
  	blk_finish_plug(&plug);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
688
689
690
691
692
693
694
695
696
697
  	/* Lo and behold: we have just managed to send a transaction to
             the log.  Before we can commit it, wait for the IO so far to
             complete.  Control buffers being written are on the
             transaction's t_log_list queue, and metadata buffers are on
             the t_iobuf_list queue.
  
  	   Wait for the buffers in reverse order.  That way we are
  	   less likely to be woken up until all IOs have completed, and
  	   so we incur less scheduling load.
  	*/
f2a44523b   Eryu Guan   jbd2: Unify log m...
698
699
  	jbd_debug(3, "JBD2: commit phase 3
  ");
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
  
  	/*
  	 * akpm: these are BJ_IO, and j_list_lock is not needed.
  	 * See __journal_try_to_free_buffer.
  	 */
  wait_for_iobuf:
  	while (commit_transaction->t_iobuf_list != NULL) {
  		struct buffer_head *bh;
  
  		jh = commit_transaction->t_iobuf_list->b_tprev;
  		bh = jh2bh(jh);
  		if (buffer_locked(bh)) {
  			wait_on_buffer(bh);
  			goto wait_for_iobuf;
  		}
  		if (cond_resched())
  			goto wait_for_iobuf;
  
  		if (unlikely(!buffer_uptodate(bh)))
  			err = -EIO;
  
  		clear_buffer_jwrite(bh);
  
  		JBUFFER_TRACE(jh, "ph4: unfile after journal write");
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
724
  		jbd2_journal_unfile_buffer(journal, jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
725
726
727
  
  		/*
  		 * ->t_iobuf_list should contain only dummy buffer_heads
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
728
  		 * which were created by jbd2_journal_write_metadata_buffer().
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
729
730
  		 */
  		BUFFER_TRACE(bh, "dumping temporary bh");
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
731
  		jbd2_journal_put_journal_head(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
  		__brelse(bh);
  		J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
  		free_buffer_head(bh);
  
  		/* We also have to unlock and free the corresponding
                     shadowed buffer */
  		jh = commit_transaction->t_shadow_list->b_tprev;
  		bh = jh2bh(jh);
  		clear_bit(BH_JWrite, &bh->b_state);
  		J_ASSERT_BH(bh, buffer_jbddirty(bh));
  
  		/* The metadata is now released for reuse, but we need
                     to remember it against this transaction so that when
                     we finally commit, we can do any checkpointing
                     required. */
  		JBUFFER_TRACE(jh, "file as BJ_Forget");
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
748
  		jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
229309cae   Jan Kara   jbd2: Fix forever...
749
750
751
752
753
754
755
  		/*
  		 * Wake up any transactions which were waiting for this IO to
  		 * complete. The barrier must be here so that changes by
  		 * jbd2_journal_file_buffer() take effect before wake_up_bit()
  		 * does the waitqueue check.
  		 */
  		smp_mb();
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
756
757
758
759
760
761
  		wake_up_bit(&bh->b_state, BH_Unshadow);
  		JBUFFER_TRACE(jh, "brelse shadowed buffer");
  		__brelse(bh);
  	}
  
  	J_ASSERT (commit_transaction->t_shadow_list == NULL);
f2a44523b   Eryu Guan   jbd2: Unify log m...
762
763
  	jbd_debug(3, "JBD2: commit phase 4
  ");
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
  
  	/* Here we wait for the revoke record and descriptor record buffers */
   wait_for_ctlbuf:
  	while (commit_transaction->t_log_list != NULL) {
  		struct buffer_head *bh;
  
  		jh = commit_transaction->t_log_list->b_tprev;
  		bh = jh2bh(jh);
  		if (buffer_locked(bh)) {
  			wait_on_buffer(bh);
  			goto wait_for_ctlbuf;
  		}
  		if (cond_resched())
  			goto wait_for_ctlbuf;
  
  		if (unlikely(!buffer_uptodate(bh)))
  			err = -EIO;
  
  		BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
  		clear_buffer_jwrite(bh);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
784
785
  		jbd2_journal_unfile_buffer(journal, jh);
  		jbd2_journal_put_journal_head(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
786
787
788
  		__brelse(bh);		/* One for getblk */
  		/* AKPM: bforget here */
  	}
77e841de8   Hidehiro Kawai   jbd2: abort when ...
789
790
  	if (err)
  		jbd2_journal_abort(journal, err);
f2a44523b   Eryu Guan   jbd2: Unify log m...
791
792
  	jbd_debug(3, "JBD2: commit phase 5
  ");
bbd2be369   Jan Kara   jbd2: Add functio...
793
794
795
796
  	write_lock(&journal->j_state_lock);
  	J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
  	commit_transaction->t_state = T_COMMIT_JFLUSH;
  	write_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
797

818d276ce   Girish Shilamkar   ext4: Add the jou...
798
  	if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
0e3d2a631   Theodore Ts'o   ext4: Fix async c...
799
  				       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
818d276ce   Girish Shilamkar   ext4: Add the jou...
800
801
802
803
804
  		err = journal_submit_commit_record(journal, commit_transaction,
  						&cbh, crc32_sum);
  		if (err)
  			__jbd2_journal_abort_hard(journal);
  	}
6cba611e6   Zhang Huan   jbd2: fix potenti...
805
  	if (cbh)
fd98496f4   Theodore Ts'o   jbd2: Add barrier...
806
  		err = journal_wait_on_commit_record(journal, cbh);
f73bee498   Jan Kara   jbd2: Modify ASYN...
807
808
809
  	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
  				      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
  	    journal->j_flags & JBD2_BARRIER) {
dd3932edd   Christoph Hellwig   block: remove BLK...
810
  		blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL);
f73bee498   Jan Kara   jbd2: Modify ASYN...
811
  	}
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
812
813
  
  	if (err)
a7fa2baf8   Jan Kara   jbd2: fix commit ...
814
  		jbd2_journal_abort(journal, err);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
815
816
817
818
819
  
  	/* End of a transaction!  Finally, we can do checkpoint
             processing: any buffers committed as a result of this
             transaction can be removed from any checkpoint list it was on
             before. */
f2a44523b   Eryu Guan   jbd2: Unify log m...
820
821
  	jbd_debug(3, "JBD2: commit phase 6
  ");
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
822

c851ed540   Jan Kara   jbd2: Implement d...
823
  	J_ASSERT(list_empty(&commit_transaction->t_inode_list));
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
  	J_ASSERT(commit_transaction->t_buffers == NULL);
  	J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
  	J_ASSERT(commit_transaction->t_iobuf_list == NULL);
  	J_ASSERT(commit_transaction->t_shadow_list == NULL);
  	J_ASSERT(commit_transaction->t_log_list == NULL);
  
  restart_loop:
  	/*
  	 * As there are other places (journal_unmap_buffer()) adding buffers
  	 * to this list we have to be careful and hold the j_list_lock.
  	 */
  	spin_lock(&journal->j_list_lock);
  	while (commit_transaction->t_forget) {
  		transaction_t *cp_transaction;
  		struct buffer_head *bh;
de1b79413   Jan Kara   jbd2: Fix oops in...
839
  		int try_to_free = 0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
840
841
842
843
  
  		jh = commit_transaction->t_forget;
  		spin_unlock(&journal->j_list_lock);
  		bh = jh2bh(jh);
de1b79413   Jan Kara   jbd2: Fix oops in...
844
845
846
847
848
  		/*
  		 * Get a reference so that bh cannot be freed before we are
  		 * done with it.
  		 */
  		get_bh(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
849
  		jbd_lock_bh_state(bh);
23e2af351   dingdinghua   jbd2: clean up an...
850
  		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
851
852
853
854
855
856
857
858
859
860
  
  		/*
  		 * If there is undo-protected committed data against
  		 * this buffer, then we can remove it now.  If it is a
  		 * buffer needing such protection, the old frozen_data
  		 * field now points to a committed version of the
  		 * buffer, so rotate that field to the new committed
  		 * data.
  		 *
  		 * Otherwise, we can just throw away the frozen data now.
e06c8227f   Joel Becker   jbd2: Add buffer ...
861
862
863
  		 *
  		 * We also know that the frozen data has already fired
  		 * its triggers if they exist, so we can clear that too.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
864
865
  		 */
  		if (jh->b_committed_data) {
af1e76d6b   Mingming Cao   JBD2: jbd2 slab a...
866
  			jbd2_free(jh->b_committed_data, bh->b_size);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
867
868
869
870
  			jh->b_committed_data = NULL;
  			if (jh->b_frozen_data) {
  				jh->b_committed_data = jh->b_frozen_data;
  				jh->b_frozen_data = NULL;
e06c8227f   Joel Becker   jbd2: Add buffer ...
871
  				jh->b_frozen_triggers = NULL;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
872
873
  			}
  		} else if (jh->b_frozen_data) {
af1e76d6b   Mingming Cao   JBD2: jbd2 slab a...
874
  			jbd2_free(jh->b_frozen_data, bh->b_size);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
875
  			jh->b_frozen_data = NULL;
e06c8227f   Joel Becker   jbd2: Add buffer ...
876
  			jh->b_frozen_triggers = NULL;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
877
878
879
880
881
882
  		}
  
  		spin_lock(&journal->j_list_lock);
  		cp_transaction = jh->b_cp_transaction;
  		if (cp_transaction) {
  			JBUFFER_TRACE(jh, "remove from old cp transaction");
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
883
  			cp_transaction->t_chp_stats.cs_dropped++;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
884
  			__jbd2_journal_remove_checkpoint(jh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
885
886
887
888
  		}
  
  		/* Only re-checkpoint the buffer_head if it is marked
  		 * dirty.  If the buffer was added to the BJ_Forget list
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
889
  		 * by jbd2_journal_forget, it may no longer be dirty and
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
890
891
892
893
894
895
  		 * there's no point in keeping a checkpoint record for
  		 * it. */
  
  		/* A buffer which has been freed while still being
  		 * journaled by a previous transaction may end up still
  		 * being dirty here, but we want to avoid writing back
ba869023e   dingdinghua   jbd2: delay disca...
896
897
898
899
  		 * that buffer in the future after the "add to orphan"
  		 * operation been committed,  That's not only a performance
  		 * gain, it also stops aliasing problems if the buffer is
  		 * left behind for writeback and gets reallocated for another
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
900
  		 * use in a different page. */
ba869023e   dingdinghua   jbd2: delay disca...
901
  		if (buffer_freed(bh) && !jh->b_next_transaction) {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
902
903
904
905
906
907
  			clear_buffer_freed(bh);
  			clear_buffer_jbddirty(bh);
  		}
  
  		if (buffer_jbddirty(bh)) {
  			JBUFFER_TRACE(jh, "add to new checkpointing trans");
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
908
  			__jbd2_journal_insert_checkpoint(jh, commit_transaction);
7ad7445f6   Hidehiro Kawai   jbd2: don't dirty...
909
910
  			if (is_journal_aborted(journal))
  				clear_buffer_jbddirty(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
911
912
  		} else {
  			J_ASSERT_BH(bh, !buffer_dirty(bh));
de1b79413   Jan Kara   jbd2: Fix oops in...
913
914
  			/*
  			 * The buffer on BJ_Forget list and not jbddirty means
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
915
916
917
918
919
  			 * it has been freed by this transaction and hence it
  			 * could not have been reallocated until this
  			 * transaction has committed. *BUT* it could be
  			 * reallocated once we have written all the data to
  			 * disk and before we process the buffer on BJ_Forget
de1b79413   Jan Kara   jbd2: Fix oops in...
920
921
922
923
  			 * list.
  			 */
  			if (!jh->b_next_transaction)
  				try_to_free = 1;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
924
  		}
de1b79413   Jan Kara   jbd2: Fix oops in...
925
926
927
928
929
930
931
  		JBUFFER_TRACE(jh, "refile or unfile buffer");
  		__jbd2_journal_refile_buffer(jh);
  		jbd_unlock_bh_state(bh);
  		if (try_to_free)
  			release_buffer_page(bh);	/* Drops bh reference */
  		else
  			__brelse(bh);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
932
933
934
935
  		cond_resched_lock(&journal->j_list_lock);
  	}
  	spin_unlock(&journal->j_list_lock);
  	/*
f5a7a6b0d   Jan Kara   jbd2: Fix asserti...
936
937
938
939
  	 * This is a bit sleazy.  We use j_list_lock to protect transition
  	 * of a transaction into T_FINISHED state and calling
  	 * __jbd2_journal_drop_transaction(). Otherwise we could race with
  	 * other checkpointing code processing the transaction...
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
940
  	 */
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
941
  	write_lock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
942
943
944
945
946
947
948
  	spin_lock(&journal->j_list_lock);
  	/*
  	 * Now recheck if some buffers did not get attached to the transaction
  	 * while the lock was dropped...
  	 */
  	if (commit_transaction->t_forget) {
  		spin_unlock(&journal->j_list_lock);
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
949
  		write_unlock(&journal->j_state_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
950
951
952
953
  		goto restart_loop;
  	}
  
  	/* Done with this transaction! */
f2a44523b   Eryu Guan   jbd2: Unify log m...
954
955
  	jbd_debug(3, "JBD2: commit phase 7
  ");
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
956

bbd2be369   Jan Kara   jbd2: Add functio...
957
  	J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
958

8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
959
  	commit_transaction->t_start = jiffies;
bf6993276   Theodore Ts'o   jbd2: Use tracepo...
960
961
  	stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
  					      commit_transaction->t_start);
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
962
963
  
  	/*
bf6993276   Theodore Ts'o   jbd2: Use tracepo...
964
  	 * File the transaction statistics
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
965
  	 */
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
966
  	stats.ts_tid = commit_transaction->t_tid;
8dd420466   Theodore Ts'o   jbd2: Remove t_ha...
967
968
  	stats.run.rs_handle_count =
  		atomic_read(&commit_transaction->t_handle_count);
bf6993276   Theodore Ts'o   jbd2: Use tracepo...
969
970
  	trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
  			     commit_transaction->t_tid, &stats.run);
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
971
972
973
974
  
  	/*
  	 * Calculate overall stats
  	 */
bf6993276   Theodore Ts'o   jbd2: Use tracepo...
975
  	spin_lock(&journal->j_history_lock);
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
976
  	journal->j_stats.ts_tid++;
bf6993276   Theodore Ts'o   jbd2: Use tracepo...
977
978
979
980
981
982
983
984
  	journal->j_stats.run.rs_wait += stats.run.rs_wait;
  	journal->j_stats.run.rs_running += stats.run.rs_running;
  	journal->j_stats.run.rs_locked += stats.run.rs_locked;
  	journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
  	journal->j_stats.run.rs_logging += stats.run.rs_logging;
  	journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
  	journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
  	journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
8e85fb3f3   Johann Lombardi   jbd2: jbd2 stats ...
985
  	spin_unlock(&journal->j_history_lock);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
986
987
988
989
  	commit_transaction->t_state = T_FINISHED;
  	J_ASSERT(commit_transaction == journal->j_committing_transaction);
  	journal->j_commit_sequence = commit_transaction->t_tid;
  	journal->j_committing_transaction = NULL;
e07f7183a   Josef Bacik   jbd2: improve jbd...
990
  	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
991

e07f7183a   Josef Bacik   jbd2: improve jbd...
992
993
994
995
996
997
998
999
1000
  	/*
  	 * weight the commit time higher than the average time so we don't
  	 * react too strongly to vast changes in the commit time
  	 */
  	if (likely(journal->j_average_commit_time))
  		journal->j_average_commit_time = (commit_time +
  				journal->j_average_commit_time*3) / 4;
  	else
  		journal->j_average_commit_time = commit_time;
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
1001
  	write_unlock(&journal->j_state_lock);
6c20ec850   Theodore Ts'o   jbd2: Call the co...
1002

f89b77950   Jan Kara   jbd2 commit: fix ...
1003
1004
  	if (commit_transaction->t_checkpoint_list == NULL &&
  	    commit_transaction->t_checkpoint_io_list == NULL) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
1005
  		__jbd2_journal_drop_transaction(journal, commit_transaction);
fb68407b0   Aneesh Kumar K.V   jbd2: Call journa...
1006
  		to_free = 1;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
  	} else {
  		if (journal->j_checkpoint_transactions == NULL) {
  			journal->j_checkpoint_transactions = commit_transaction;
  			commit_transaction->t_cpnext = commit_transaction;
  			commit_transaction->t_cpprev = commit_transaction;
  		} else {
  			commit_transaction->t_cpnext =
  				journal->j_checkpoint_transactions;
  			commit_transaction->t_cpprev =
  				commit_transaction->t_cpnext->t_cpprev;
  			commit_transaction->t_cpnext->t_cpprev =
  				commit_transaction;
  			commit_transaction->t_cpprev->t_cpnext =
  				commit_transaction;
  		}
  	}
  	spin_unlock(&journal->j_list_lock);
fb68407b0   Aneesh Kumar K.V   jbd2: Call journa...
1024
1025
  	if (journal->j_commit_callback)
  		journal->j_commit_callback(journal, commit_transaction);
879c5e6b7   Theodore Ts'o   jbd2: convert ins...
1026
  	trace_jbd2_end_commit(journal, commit_transaction);
f2a44523b   Eryu Guan   jbd2: Unify log m...
1027
1028
  	jbd_debug(1, "JBD2: commit %d complete, head %d
  ",
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1029
  		  journal->j_commit_sequence, journal->j_tail_sequence);
fb68407b0   Aneesh Kumar K.V   jbd2: Call journa...
1030
1031
  	if (to_free)
  		kfree(commit_transaction);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1032
1033
1034
  
  	wake_up(&journal->j_wait_done_commit);
  }