Blame view
fs/jbd2/commit.c
36.4 KB
f51667685
|
1 |
// SPDX-License-Identifier: GPL-2.0+ |
470decc61
|
2 |
/* |
f7f4bccb7
|
3 |
* linux/fs/jbd2/commit.c |
470decc61
|
4 5 6 7 8 |
* * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 * * Copyright 1998 Red Hat corp --- All Rights Reserved * |
470decc61
|
9 10 11 12 13 14 |
* Journal commit routines for the generic filesystem journaling code; * part of the ext2fs journaling system. */ #include <linux/time.h> #include <linux/fs.h> |
f7f4bccb7
|
15 |
#include <linux/jbd2.h> |
470decc61
|
16 17 18 19 |
#include <linux/errno.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/pagemap.h> |
8e85fb3f3
|
20 |
#include <linux/jiffies.h> |
818d276ce
|
21 |
#include <linux/crc32.h> |
cd1aac329
|
22 23 |
#include <linux/writeback.h> #include <linux/backing-dev.h> |
fd98496f4
|
24 |
#include <linux/bio.h> |
0e3d2a631
|
25 |
#include <linux/blkdev.h> |
39e3ac259
|
26 |
#include <linux/bitops.h> |
879c5e6b7
|
27 |
#include <trace/events/jbd2.h> |
470decc61
|
28 29 |
/* |
b34090e5e
|
30 |
* IO end handler for temporary buffer_heads handling writes to the journal. |
470decc61
|
31 32 33 |
*/ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) { |
b34090e5e
|
34 |
struct buffer_head *orig_bh = bh->b_private; |
470decc61
|
35 36 37 38 39 |
BUFFER_TRACE(bh, ""); if (uptodate) set_buffer_uptodate(bh); else clear_buffer_uptodate(bh); |
b34090e5e
|
40 41 |
if (orig_bh) { clear_bit_unlock(BH_Shadow, &orig_bh->b_state); |
4e857c58e
|
42 |
smp_mb__after_atomic(); |
b34090e5e
|
43 44 |
wake_up_bit(&orig_bh->b_state, BH_Shadow); } |
470decc61
|
45 46 47 48 |
unlock_buffer(bh); } /* |
87c89c232
|
49 50 |
* When an ext4 file is truncated, it is possible that some pages are not * successfully freed, because they are attached to a committing transaction. |
470decc61
|
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
* After the transaction commits, these pages are left on the LRU, with no * ->mapping, and with attached buffers. These pages are trivially reclaimable * by the VM, but their apparent absence upsets the VM accounting, and it makes * the numbers in /proc/meminfo look odd. * * So here, we have a buffer which has just come off the forget list. Look to * see if we can strip all buffers from the backing page. * * Called under lock_journal(), and possibly under journal_datalist_lock. The * caller provided us with a ref against the buffer, and we drop that here. */ static void release_buffer_page(struct buffer_head *bh) { struct page *page; if (buffer_dirty(bh)) goto nope; if (atomic_read(&bh->b_count) != 1) goto nope; page = bh->b_page; if (!page) goto nope; if (page->mapping) goto nope; /* OK, it's a truncated page */ |
529ae9aaa
|
77 |
if (!trylock_page(page)) |
470decc61
|
78 |
goto nope; |
09cbfeaf1
|
79 |
get_page(page); |
470decc61
|
80 81 82 |
__brelse(bh); try_to_free_buffers(page); unlock_page(page); |
09cbfeaf1
|
83 |
put_page(page); |
470decc61
|
84 85 86 87 88 |
return; nope: __brelse(bh); } |
e5a120aeb
|
89 |
static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) |
1f56c5890
|
90 91 92 |
{ struct commit_header *h; __u32 csum; |
db9ee2203
|
93 |
if (!jbd2_journal_has_csum_v2or3(j)) |
1f56c5890
|
94 |
return; |
e5a120aeb
|
95 |
h = (struct commit_header *)(bh->b_data); |
1f56c5890
|
96 97 98 |
h->h_chksum_type = 0; h->h_chksum_size = 0; h->h_chksum[0] = 0; |
e5a120aeb
|
99 |
csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); |
1f56c5890
|
100 101 |
h->h_chksum[0] = cpu_to_be32(csum); } |
470decc61
|
102 |
/* |
818d276ce
|
103 |
* Done it all: now submit the commit record. We should have |
470decc61
|
104 105 106 107 108 109 |
* cleaned up our previous buffers by now, so if we are in abort * mode we can now just skip the rest of the journal write * entirely. * * Returns 1 if the journal needs to be aborted or 0 on success */ |
818d276ce
|
110 111 112 113 |
static int journal_submit_commit_record(journal_t *journal, transaction_t *commit_transaction, struct buffer_head **cbh, __u32 crc32_sum) |
470decc61
|
114 |
{ |
818d276ce
|
115 |
struct commit_header *tmp; |
470decc61
|
116 |
struct buffer_head *bh; |
818d276ce
|
117 |
int ret; |
b42d1d6b5
|
118 |
struct timespec64 now; |
470decc61
|
119 |
|
6cba611e6
|
120 |
*cbh = NULL; |
470decc61
|
121 122 |
if (is_journal_aborted(journal)) return 0; |
32ab67159
|
123 124 |
bh = jbd2_journal_get_descriptor_buffer(commit_transaction, JBD2_COMMIT_BLOCK); |
e5a120aeb
|
125 |
if (!bh) |
470decc61
|
126 |
return 1; |
818d276ce
|
127 |
tmp = (struct commit_header *)bh->b_data; |
b42d1d6b5
|
128 |
ktime_get_coarse_real_ts64(&now); |
736603ab2
|
129 130 |
tmp->h_commit_sec = cpu_to_be64(now.tv_sec); tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec); |
818d276ce
|
131 |
|
56316a0d2
|
132 |
if (jbd2_has_feature_checksum(journal)) { |
818d276ce
|
133 134 135 |
tmp->h_chksum_type = JBD2_CRC32_CHKSUM; tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; tmp->h_chksum[0] = cpu_to_be32(crc32_sum); |
470decc61
|
136 |
} |
e5a120aeb
|
137 |
jbd2_commit_block_csum_set(journal, bh); |
470decc61
|
138 |
|
e5a120aeb
|
139 |
BUFFER_TRACE(bh, "submit commit block"); |
818d276ce
|
140 |
lock_buffer(bh); |
45a90bfd9
|
141 |
clear_buffer_dirty(bh); |
818d276ce
|
142 143 144 145 |
set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; if (journal->j_flags & JBD2_BARRIER && |
56316a0d2
|
146 |
!jbd2_has_feature_async_commit(journal)) |
70fd76140
|
147 148 |
ret = submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_PREFLUSH | REQ_FUA, bh); |
9c35575bb
|
149 |
else |
70fd76140
|
150 |
ret = submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); |
9c35575bb
|
151 |
|
818d276ce
|
152 153 154 155 156 157 158 159 |
*cbh = bh; return ret; } /* * This function along with journal_submit_commit_record * allows to write the commit record asynchronously. */ |
fd98496f4
|
160 161 |
static int journal_wait_on_commit_record(journal_t *journal, struct buffer_head *bh) |
818d276ce
|
162 163 164 165 166 |
{ int ret = 0; clear_buffer_dirty(bh); wait_on_buffer(bh); |
470decc61
|
167 |
|
818d276ce
|
168 169 170 |
if (unlikely(!buffer_uptodate(bh))) ret = -EIO; put_bh(bh); /* One for getblk() */ |
818d276ce
|
171 172 |
return ret; |
470decc61
|
173 |
} |
818d276ce
|
174 |
/* |
cd1aac329
|
175 176 |
* write the filemap data using writepage() address_space_operations. * We don't do block allocation here even for delalloc. We don't |
a49773064
|
177 |
* use writepages() because with delayed allocation we may be doing |
cd1aac329
|
178 179 |
* block allocation in writepages(). */ |
aa3c0c61f
|
180 |
int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) |
cd1aac329
|
181 |
{ |
aa3c0c61f
|
182 |
struct address_space *mapping = jinode->i_vfs_inode->i_mapping; |
cd1aac329
|
183 184 185 |
struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = mapping->nrpages * 2, |
aa3c0c61f
|
186 187 |
.range_start = jinode->i_dirty_start, .range_end = jinode->i_dirty_end, |
cd1aac329
|
188 |
}; |
342af94ec
|
189 190 191 192 193 194 |
/* * submit the inode data buffers. We use writepage * instead of writepages. Because writepages can do * block allocation with delalloc. We need to write * only allocated blocks here. */ |
aa3c0c61f
|
195 |
return generic_writepages(mapping, &wbc); |
cd1aac329
|
196 |
} |
ff780b91e
|
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
/* Send all the data buffers related to an inode */ int jbd2_submit_inode_data(struct jbd2_inode *jinode) { if (!jinode || !(jinode->i_flags & JI_WRITE_DATA)) return 0; trace_jbd2_submit_inode_data(jinode->i_vfs_inode); return jbd2_journal_submit_inode_data_buffers(jinode); } EXPORT_SYMBOL(jbd2_submit_inode_data); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode) { if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) || !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping) return 0; return filemap_fdatawait_range_keep_errors( jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start, jinode->i_dirty_end); } EXPORT_SYMBOL(jbd2_wait_inode_data); |
cd1aac329
|
220 |
/* |
c851ed540
|
221 222 223 224 225 226 227 |
* Submit all the data buffers of inode associated with the transaction to * disk. * * We are in a committing transaction. Therefore no new inode can be added to * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently * operate on from being released while we write out pages. */ |
cd1aac329
|
228 |
static int journal_submit_data_buffers(journal_t *journal, |
c851ed540
|
229 230 231 232 |
transaction_t *commit_transaction) { struct jbd2_inode *jinode; int err, ret = 0; |
c851ed540
|
233 234 235 |
spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
41617e1a8
|
236 237 |
if (!(jinode->i_flags & JI_WRITE_DATA)) continue; |
cb0d9d47a
|
238 |
jinode->i_flags |= JI_COMMIT_RUNNING; |
c851ed540
|
239 |
spin_unlock(&journal->j_list_lock); |
342af94ec
|
240 |
/* submit the inode data buffers. */ |
879c5e6b7
|
241 |
trace_jbd2_submit_inode_data(jinode->i_vfs_inode); |
342af94ec
|
242 243 244 245 246 |
if (journal->j_submit_inode_data_buffers) { err = journal->j_submit_inode_data_buffers(jinode); if (!ret) ret = err; } |
c851ed540
|
247 248 |
spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); |
cb0d9d47a
|
249 250 |
jinode->i_flags &= ~JI_COMMIT_RUNNING; smp_mb(); |
c851ed540
|
251 252 253 254 255 |
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); } spin_unlock(&journal->j_list_lock); return ret; } |
aa3c0c61f
|
256 257 258 259 260 261 262 263 |
int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode) { struct address_space *mapping = jinode->i_vfs_inode->i_mapping; return filemap_fdatawait_range_keep_errors(mapping, jinode->i_dirty_start, jinode->i_dirty_end); } |
c851ed540
|
264 265 266 267 268 269 270 271 272 273 |
/* * Wait for data submitted for writeout, refile inodes to proper * transaction if needed. * */ static int journal_finish_inode_data_buffers(journal_t *journal, transaction_t *commit_transaction) { struct jbd2_inode *jinode, *next_i; int err, ret = 0; |
cd1aac329
|
274 |
/* For locking, see the comment in journal_submit_data_buffers() */ |
c851ed540
|
275 276 |
spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
41617e1a8
|
277 278 |
if (!(jinode->i_flags & JI_WAIT_DATA)) continue; |
cb0d9d47a
|
279 |
jinode->i_flags |= JI_COMMIT_RUNNING; |
c851ed540
|
280 |
spin_unlock(&journal->j_list_lock); |
342af94ec
|
281 282 283 284 285 286 |
/* wait for the inode data buffers writeout. */ if (journal->j_finish_inode_data_buffers) { err = journal->j_finish_inode_data_buffers(jinode); if (!ret) ret = err; } |
c851ed540
|
287 |
spin_lock(&journal->j_list_lock); |
cb0d9d47a
|
288 289 |
jinode->i_flags &= ~JI_COMMIT_RUNNING; smp_mb(); |
c851ed540
|
290 291 292 293 294 295 296 297 298 299 300 301 302 303 |
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); } /* Now refile inode to proper lists */ list_for_each_entry_safe(jinode, next_i, &commit_transaction->t_inode_list, i_list) { list_del(&jinode->i_list); if (jinode->i_next_transaction) { jinode->i_transaction = jinode->i_next_transaction; jinode->i_next_transaction = NULL; list_add(&jinode->i_list, &jinode->i_transaction->t_inode_list); } else { jinode->i_transaction = NULL; |
6ba0e7dc6
|
304 305 |
jinode->i_dirty_start = 0; jinode->i_dirty_end = 0; |
c851ed540
|
306 307 308 309 310 311 |
} } spin_unlock(&journal->j_list_lock); return ret; } |
818d276ce
|
312 313 314 315 316 |
static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) { struct page *page = bh->b_page; char *addr; __u32 checksum; |
303a8f2af
|
317 |
addr = kmap_atomic(page); |
818d276ce
|
318 319 |
checksum = crc32_be(crc32_sum, (void *)(addr + offset_in_page(bh->b_data)), bh->b_size); |
303a8f2af
|
320 |
kunmap_atomic(addr); |
818d276ce
|
321 322 323 |
return checksum; } |
db9ee2203
|
324 |
static void write_tag_block(journal_t *j, journal_block_tag_t *tag, |
18eba7aae
|
325 |
unsigned long long block) |
b517bea1c
|
326 327 |
{ tag->t_blocknr = cpu_to_be32(block & (u32)~0); |
56316a0d2
|
328 |
if (jbd2_has_feature_64bit(j)) |
b517bea1c
|
329 330 |
tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); } |
c39008759
|
331 332 333 |
static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, struct buffer_head *bh, __u32 sequence) { |
db9ee2203
|
334 |
journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; |
c39008759
|
335 336 |
struct page *page = bh->b_page; __u8 *addr; |
eee06c567
|
337 |
__u32 csum32; |
18a6ea1e5
|
338 |
__be32 seq; |
c39008759
|
339 |
|
db9ee2203
|
340 |
if (!jbd2_journal_has_csum_v2or3(j)) |
c39008759
|
341 |
return; |
18a6ea1e5
|
342 |
seq = cpu_to_be32(sequence); |
906adea15
|
343 |
addr = kmap_atomic(page); |
18a6ea1e5
|
344 |
csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); |
eee06c567
|
345 346 |
csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data), bh->b_size); |
906adea15
|
347 |
kunmap_atomic(addr); |
c39008759
|
348 |
|
56316a0d2
|
349 |
if (jbd2_has_feature_csum3(j)) |
db9ee2203
|
350 351 352 |
tag3->t_checksum = cpu_to_be32(csum32); else tag->t_checksum = cpu_to_be16(csum32); |
c39008759
|
353 |
} |
470decc61
|
354 |
/* |
f7f4bccb7
|
355 |
* jbd2_journal_commit_transaction |
470decc61
|
356 357 358 359 |
* * The primary function for committing a transaction to the log. This * function is called by the journal thread to begin a complete commit. */ |
f7f4bccb7
|
360 |
void jbd2_journal_commit_transaction(journal_t *journal) |
470decc61
|
361 |
{ |
8e85fb3f3
|
362 |
struct transaction_stats_s stats; |
470decc61
|
363 |
transaction_t *commit_transaction; |
e5a120aeb
|
364 365 |
struct journal_head *jh; struct buffer_head *descriptor; |
470decc61
|
366 367 368 369 |
struct buffer_head **wbuf = journal->j_wbuf; int bufs; int flags; int err; |
18eba7aae
|
370 |
unsigned long long blocknr; |
e07f7183a
|
371 372 |
ktime_t start_time; u64 commit_time; |
470decc61
|
373 |
char *tagp = NULL; |
470decc61
|
374 375 376 377 |
journal_block_tag_t *tag = NULL; int space_left = 0; int first_tag = 0; int tag_flag; |
794446c69
|
378 |
int i; |
b517bea1c
|
379 |
int tag_bytes = journal_tag_bytes(journal); |
818d276ce
|
380 381 |
struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; |
82f04ab47
|
382 |
struct blk_plug plug; |
3339578f0
|
383 384 385 386 |
/* Tail of the journal */ unsigned long first_block; tid_t first_tid; int update_tail; |
3caa487f5
|
387 |
int csum_size = 0; |
f5113effc
|
388 |
LIST_HEAD(io_bufs); |
e5a120aeb
|
389 |
LIST_HEAD(log_bufs); |
3caa487f5
|
390 |
|
db9ee2203
|
391 |
if (jbd2_journal_has_csum_v2or3(journal)) |
3caa487f5
|
392 |
csum_size = sizeof(struct jbd2_journal_block_tail); |
470decc61
|
393 394 395 396 397 |
/* * First job: lock down the current transaction and wait for * all outstanding updates to complete. */ |
f7f4bccb7
|
398 399 |
/* Do we need to erase the effects of a prior jbd2_journal_flush? */ if (journal->j_flags & JBD2_FLUSHED) { |
470decc61
|
400 401 |
jbd_debug(3, "super block updated "); |
6fa7aa50b
|
402 |
mutex_lock_io(&journal->j_checkpoint_mutex); |
79feb521a
|
403 404 405 406 407 408 409 410 411 |
/* * We hold j_checkpoint_mutex so tail cannot change under us. * We don't need any special data guarantees for writing sb * since journal is empty and it is ok for write to be * flushed only with transaction commit. */ jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, journal->j_tail, |
70fd76140
|
412 |
REQ_SYNC); |
a78bb11d7
|
413 |
mutex_unlock(&journal->j_checkpoint_mutex); |
470decc61
|
414 415 416 417 418 419 420 |
} else { jbd_debug(3, "superblock not updated "); } J_ASSERT(journal->j_running_transaction != NULL); J_ASSERT(journal->j_committing_transaction == NULL); |
ff780b91e
|
421 422 423 424 425 426 427 428 429 430 431 |
write_lock(&journal->j_state_lock); journal->j_flags |= JBD2_FULL_COMMIT_ONGOING; while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) { DEFINE_WAIT(wait); prepare_to_wait(&journal->j_fc_wait, &wait, TASK_UNINTERRUPTIBLE); write_unlock(&journal->j_state_lock); schedule(); write_lock(&journal->j_state_lock); finish_wait(&journal->j_fc_wait, &wait); |
cc80586a5
|
432 433 434 435 436 437 438 439 440 |
/* * TODO: by blocking fast commits here, we are increasing * fsync() latency slightly. Strictly speaking, we don't need * to block fast commits until the transaction enters T_FLUSH * state. So an optimization is possible where we block new fast * commits here and wait for existing ones to complete * just before we enter T_FLUSH. That way, the existing fast * commits and this full commit can proceed parallely. */ |
ff780b91e
|
441 442 |
} write_unlock(&journal->j_state_lock); |
470decc61
|
443 |
commit_transaction = journal->j_running_transaction; |
470decc61
|
444 |
|
879c5e6b7
|
445 |
trace_jbd2_start_commit(journal, commit_transaction); |
f2a44523b
|
446 447 |
jbd_debug(1, "JBD2: starting commit of transaction %d ", |
470decc61
|
448 |
commit_transaction->t_tid); |
a931da6ac
|
449 |
write_lock(&journal->j_state_lock); |
ff780b91e
|
450 |
journal->j_fc_off = 0; |
3ca841c10
|
451 |
J_ASSERT(commit_transaction->t_state == T_RUNNING); |
470decc61
|
452 |
commit_transaction->t_state = T_LOCKED; |
879c5e6b7
|
453 |
trace_jbd2_commit_locking(journal, commit_transaction); |
bf6993276
|
454 |
stats.run.rs_wait = commit_transaction->t_max_wait; |
9fff24aa2
|
455 |
stats.run.rs_request_delay = 0; |
bf6993276
|
456 |
stats.run.rs_locked = jiffies; |
9fff24aa2
|
457 458 459 460 |
if (commit_transaction->t_requested) stats.run.rs_request_delay = jbd2_time_diff(commit_transaction->t_requested, stats.run.rs_locked); |
bf6993276
|
461 462 |
stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start, stats.run.rs_locked); |
8e85fb3f3
|
463 |
|
470decc61
|
464 |
spin_lock(&commit_transaction->t_handle_lock); |
a51dca9cd
|
465 |
while (atomic_read(&commit_transaction->t_updates)) { |
470decc61
|
466 467 468 469 |
DEFINE_WAIT(wait); prepare_to_wait(&journal->j_wait_updates, &wait, TASK_UNINTERRUPTIBLE); |
a51dca9cd
|
470 |
if (atomic_read(&commit_transaction->t_updates)) { |
470decc61
|
471 |
spin_unlock(&commit_transaction->t_handle_lock); |
a931da6ac
|
472 |
write_unlock(&journal->j_state_lock); |
470decc61
|
473 |
schedule(); |
a931da6ac
|
474 |
write_lock(&journal->j_state_lock); |
470decc61
|
475 476 477 478 479 |
spin_lock(&commit_transaction->t_handle_lock); } finish_wait(&journal->j_wait_updates, &wait); } spin_unlock(&commit_transaction->t_handle_lock); |
96f1e0974
|
480 481 |
commit_transaction->t_state = T_SWITCH; write_unlock(&journal->j_state_lock); |
470decc61
|
482 |
|
a51dca9cd
|
483 |
J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <= |
470decc61
|
484 485 486 487 488 489 490 |
journal->j_max_transaction_buffers); /* * First thing we are allowed to do is to discard any remaining * BJ_Reserved buffers. Note, it is _not_ permissible to assume * that there are no such buffers: if a large filesystem * operation like a truncate needs to split itself over multiple |
f7f4bccb7
|
491 |
* transactions, then it may try to do a jbd2_journal_restart() while |
470decc61
|
492 493 494 495 496 497 498 |
* there are still BJ_Reserved buffers outstanding. These must * be released cleanly from the current transaction. * * In this case, the filesystem must still reserve write access * again before modifying the buffer in the new transaction, but * we do not require it to remember exactly which old buffers it * has reserved. This is consistent with the existing behaviour |
f7f4bccb7
|
499 |
* that multiple jbd2_journal_get_write_access() calls to the same |
25985edce
|
500 |
* buffer are perfectly permissible. |
470decc61
|
501 502 503 504 505 |
*/ while (commit_transaction->t_reserved_list) { jh = commit_transaction->t_reserved_list; JBUFFER_TRACE(jh, "reserved, unused: refile"); /* |
f7f4bccb7
|
506 |
* A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may |
470decc61
|
507 508 509 510 |
* leave undo-committed data. */ if (jh->b_committed_data) { struct buffer_head *bh = jh2bh(jh); |
464170647
|
511 |
spin_lock(&jh->b_state_lock); |
af1e76d6b
|
512 |
jbd2_free(jh->b_committed_data, bh->b_size); |
470decc61
|
513 |
jh->b_committed_data = NULL; |
464170647
|
514 |
spin_unlock(&jh->b_state_lock); |
470decc61
|
515 |
} |
f7f4bccb7
|
516 |
jbd2_journal_refile_buffer(journal, jh); |
470decc61
|
517 518 519 520 521 522 523 524 |
} /* * Now try to drop any written-back buffers from the journal's * checkpoint lists. We do this *before* commit because it potentially * frees some memory */ spin_lock(&journal->j_list_lock); |
841df7df1
|
525 |
__jbd2_journal_clean_checkpoint_list(journal, false); |
470decc61
|
526 |
spin_unlock(&journal->j_list_lock); |
f2a44523b
|
527 528 |
jbd_debug(3, "JBD2: commit phase 1 "); |
470decc61
|
529 530 |
/* |
1ba37268c
|
531 532 533 534 535 536 |
* Clear revoked flag to reflect there is no revoked buffers * in the next transaction which is going to be started. */ jbd2_clear_buffer_revoked_flags(journal); /* |
470decc61
|
537 538 |
* Switch to a new revoke table. */ |
f7f4bccb7
|
539 |
jbd2_journal_switch_revoke_table(journal); |
470decc61
|
540 |
|
8f7d89f36
|
541 542 543 544 545 |
/* * Reserved credits cannot be claimed anymore, free them */ atomic_sub(atomic_read(&journal->j_reserved_credits), &commit_transaction->t_outstanding_credits); |
96f1e0974
|
546 |
write_lock(&journal->j_state_lock); |
879c5e6b7
|
547 |
trace_jbd2_commit_flushing(journal, commit_transaction); |
bf6993276
|
548 549 550 |
stats.run.rs_flushing = jiffies; stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, stats.run.rs_flushing); |
8e85fb3f3
|
551 |
|
470decc61
|
552 553 554 |
commit_transaction->t_state = T_FLUSH; journal->j_committing_transaction = commit_transaction; journal->j_running_transaction = NULL; |
e07f7183a
|
555 |
start_time = ktime_get(); |
470decc61
|
556 557 |
commit_transaction->t_log_start = journal->j_head; wake_up(&journal->j_wait_transaction_locked); |
a931da6ac
|
558 |
write_unlock(&journal->j_state_lock); |
470decc61
|
559 |
|
cfc7bc896
|
560 561 |
jbd_debug(3, "JBD2: commit phase 2a "); |
470decc61
|
562 563 |
/* |
470decc61
|
564 565 566 |
* Now start flushing things to disk, in the order they appear * on the transaction lists. Data blocks go first. */ |
cd1aac329
|
567 |
err = journal_submit_data_buffers(journal, commit_transaction); |
c851ed540
|
568 569 |
if (err) jbd2_journal_abort(journal, err); |
470decc61
|
570 |
|
82f04ab47
|
571 |
blk_start_plug(&plug); |
9bcf976cb
|
572 |
jbd2_journal_write_revoke_records(commit_transaction, &log_bufs); |
470decc61
|
573 |
|
cfc7bc896
|
574 575 |
jbd_debug(3, "JBD2: commit phase 2b "); |
470decc61
|
576 577 |
/* |
470decc61
|
578 579 580 581 |
* Way to go: we have now written out all of the data for a * transaction! Now comes the tricky part: we need to write out * metadata. Loop over the transaction's entire buffer list: */ |
a931da6ac
|
582 |
write_lock(&journal->j_state_lock); |
470decc61
|
583 |
commit_transaction->t_state = T_COMMIT; |
a931da6ac
|
584 |
write_unlock(&journal->j_state_lock); |
470decc61
|
585 |
|
879c5e6b7
|
586 |
trace_jbd2_commit_logging(journal, commit_transaction); |
bf6993276
|
587 588 589 |
stats.run.rs_logging = jiffies; stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing, stats.run.rs_logging); |
9f356e5a4
|
590 |
stats.run.rs_blocks = commit_transaction->t_nr_buffers; |
bf6993276
|
591 |
stats.run.rs_blocks_logged = 0; |
8e85fb3f3
|
592 |
|
1dfc3220d
|
593 |
J_ASSERT(commit_transaction->t_nr_buffers <= |
a51dca9cd
|
594 |
atomic_read(&commit_transaction->t_outstanding_credits)); |
1dfc3220d
|
595 |
|
87c89c232
|
596 |
err = 0; |
470decc61
|
597 |
bufs = 0; |
e5a120aeb
|
598 |
descriptor = NULL; |
470decc61
|
599 600 601 602 603 604 605 |
while (commit_transaction->t_buffers) { /* Find the next buffer to be journaled... */ jh = commit_transaction->t_buffers; /* If we're in abort mode, we just un-journal the buffer and |
7ad7445f6
|
606 |
release it. */ |
470decc61
|
607 608 |
if (is_journal_aborted(journal)) { |
7ad7445f6
|
609 |
clear_buffer_jbddirty(jh2bh(jh)); |
470decc61
|
610 |
JBUFFER_TRACE(jh, "journal is aborting: refile"); |
e06c8227f
|
611 612 613 614 |
jbd2_buffer_abort_trigger(jh, jh->b_frozen_data ? jh->b_frozen_triggers : jh->b_triggers); |
f7f4bccb7
|
615 |
jbd2_journal_refile_buffer(journal, jh); |
470decc61
|
616 617 618 619 620 621 622 623 624 625 626 627 628 |
/* If that was the last one, we need to clean up * any descriptor buffers which may have been * already allocated, even if we are now * aborting. */ if (!commit_transaction->t_buffers) goto start_journal_io; continue; } /* Make sure we have a descriptor block in which to record the metadata buffer. */ if (!descriptor) { |
470decc61
|
629 |
J_ASSERT (bufs == 0); |
f2a44523b
|
630 631 |
jbd_debug(4, "JBD2: get descriptor "); |
470decc61
|
632 |
|
32ab67159
|
633 634 635 |
descriptor = jbd2_journal_get_descriptor_buffer( commit_transaction, JBD2_DESCRIPTOR_BLOCK); |
470decc61
|
636 |
if (!descriptor) { |
a7fa2baf8
|
637 |
jbd2_journal_abort(journal, -EIO); |
470decc61
|
638 639 |
continue; } |
f2a44523b
|
640 641 |
jbd_debug(4, "JBD2: got buffer %llu (%p) ", |
e5a120aeb
|
642 643 |
(unsigned long long)descriptor->b_blocknr, descriptor->b_data); |
e5a120aeb
|
644 645 646 |
tagp = &descriptor->b_data[sizeof(journal_header_t)]; space_left = descriptor->b_size - sizeof(journal_header_t); |
470decc61
|
647 |
first_tag = 1; |
e5a120aeb
|
648 649 650 |
set_buffer_jwrite(descriptor); set_buffer_dirty(descriptor); wbuf[bufs++] = descriptor; |
470decc61
|
651 652 653 |
/* Record it so that we can wait for IO completion later */ |
e5a120aeb
|
654 655 |
BUFFER_TRACE(descriptor, "ph3: file as descriptor"); jbd2_file_log_bh(&log_bufs, descriptor); |
470decc61
|
656 657 658 |
} /* Where is the buffer to be written? */ |
f7f4bccb7
|
659 |
err = jbd2_journal_next_log_block(journal, &blocknr); |
470decc61
|
660 661 662 663 |
/* If the block mapping failed, just abandon the buffer and repeat this loop: we'll fall into the refile-on-abort condition above. */ if (err) { |
a7fa2baf8
|
664 |
jbd2_journal_abort(journal, err); |
470decc61
|
665 666 667 668 669 |
continue; } /* * start_this_handle() uses t_outstanding_credits to determine |
0db458894
|
670 |
* the free space in the log. |
470decc61
|
671 |
*/ |
a51dca9cd
|
672 |
atomic_dec(&commit_transaction->t_outstanding_credits); |
470decc61
|
673 674 675 |
/* Bump b_count to prevent truncate from stumbling over the shadowed buffer! @@@ This can go if we ever get |
f5113effc
|
676 |
rid of the shadow pairing of buffers. */ |
470decc61
|
677 |
atomic_inc(&jh2bh(jh)->b_count); |
470decc61
|
678 |
/* |
f5113effc
|
679 680 |
* Make a temporary IO buffer with which to write it out * (this will requeue the metadata buffer to BJ_Shadow). |
470decc61
|
681 |
*/ |
f5113effc
|
682 |
set_bit(BH_JWrite, &jh2bh(jh)->b_state); |
470decc61
|
683 |
JBUFFER_TRACE(jh, "ph3: write metadata"); |
f7f4bccb7
|
684 |
flags = jbd2_journal_write_metadata_buffer(commit_transaction, |
f5113effc
|
685 |
jh, &wbuf[bufs], blocknr); |
e6ec116b6
|
686 687 688 689 |
if (flags < 0) { jbd2_journal_abort(journal, flags); continue; } |
f5113effc
|
690 |
jbd2_file_log_bh(&io_bufs, wbuf[bufs]); |
470decc61
|
691 692 693 694 695 696 |
/* Record the new block's tag in the current descriptor buffer */ tag_flag = 0; if (flags & 1) |
f7f4bccb7
|
697 |
tag_flag |= JBD2_FLAG_ESCAPE; |
470decc61
|
698 |
if (!first_tag) |
f7f4bccb7
|
699 |
tag_flag |= JBD2_FLAG_SAME_UUID; |
470decc61
|
700 701 |
tag = (journal_block_tag_t *) tagp; |
db9ee2203
|
702 |
write_tag_block(journal, tag, jh2bh(jh)->b_blocknr); |
8f888ef84
|
703 |
tag->t_flags = cpu_to_be16(tag_flag); |
f5113effc
|
704 |
jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], |
c39008759
|
705 |
commit_transaction->t_tid); |
b517bea1c
|
706 707 |
tagp += tag_bytes; space_left -= tag_bytes; |
f5113effc
|
708 |
bufs++; |
470decc61
|
709 710 711 712 713 714 715 716 717 718 719 720 721 |
if (first_tag) { memcpy (tagp, journal->j_uuid, 16); tagp += 16; space_left -= 16; first_tag = 0; } /* If there's no more to do, or if the descriptor is full, let the IO rip! */ if (bufs == journal->j_wbufsize || commit_transaction->t_buffers == NULL || |
3caa487f5
|
722 |
space_left < tag_bytes + 16 + csum_size) { |
470decc61
|
723 |
|
f2a44523b
|
724 725 |
jbd_debug(4, "JBD2: Submit %d IOs ", bufs); |
470decc61
|
726 727 728 729 |
/* Write an end-of-descriptor marker before submitting the IOs. "tag" still points to the last tag we set up. */ |
8f888ef84
|
730 |
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG); |
470decc61
|
731 |
start_journal_io: |
6e876c3dd
|
732 733 734 |
if (descriptor) jbd2_descriptor_block_csum_set(journal, descriptor); |
470decc61
|
735 736 |
for (i = 0; i < bufs; i++) { struct buffer_head *bh = wbuf[i]; |
818d276ce
|
737 738 739 |
/* * Compute checksum. */ |
56316a0d2
|
740 |
if (jbd2_has_feature_checksum(journal)) { |
818d276ce
|
741 742 743 |
crc32_sum = jbd2_checksum_data(crc32_sum, bh); } |
470decc61
|
744 745 746 747 |
lock_buffer(bh); clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; |
70fd76140
|
748 |
submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); |
470decc61
|
749 750 751 752 753 754 755 756 757 |
} cond_resched(); /* Force a new descriptor to be generated next time round the loop. */ descriptor = NULL; bufs = 0; } } |
f73bee498
|
758 759 760 761 762 763 764 765 766 767 |
err = journal_finish_inode_data_buffers(journal, commit_transaction); if (err) { printk(KERN_WARNING "JBD2: Detected IO errors while flushing file data " "on %s ", journal->j_devname); if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) jbd2_journal_abort(journal, err); err = 0; } |
3339578f0
|
768 769 770 771 772 773 774 775 776 |
/* * Get current oldest transaction in the log before we issue flush * to the filesystem device. After the flush we can be sure that * blocks of all older transactions are checkpointed to persistent * storage and we will be safe to update journal start in the * superblock with the numbers we get here. */ update_tail = jbd2_journal_get_log_tail(journal, &first_tid, &first_block); |
bbd2be369
|
777 |
write_lock(&journal->j_state_lock); |
3339578f0
|
778 779 780 781 782 783 |
if (update_tail) { long freed = first_block - journal->j_tail; if (first_block < journal->j_tail) freed += journal->j_last - journal->j_first; /* Update tail only if we free significant amount of space */ |
ede7dc7fa
|
784 |
if (freed < jbd2_journal_get_max_txn_bufs(journal)) |
3339578f0
|
785 786 |
update_tail = 0; } |
bbd2be369
|
787 788 789 |
J_ASSERT(commit_transaction->t_state == T_COMMIT); commit_transaction->t_state = T_COMMIT_DFLUSH; write_unlock(&journal->j_state_lock); |
3339578f0
|
790 |
|
cc3e1bea5
|
791 792 793 794 795 |
/* * If the journal is not located on the file system device, * then we must flush the file system device before we issue * the commit record */ |
81be12c81
|
796 |
if (commit_transaction->t_need_data_flush && |
cc3e1bea5
|
797 798 |
(journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) |
9398554fb
|
799 |
blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS); |
818d276ce
|
800 |
|
cc3e1bea5
|
801 |
/* Done it all: now write the commit record asynchronously. */ |
56316a0d2
|
802 |
if (jbd2_has_feature_async_commit(journal)) { |
818d276ce
|
803 804 805 |
err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) |
d0a186e0d
|
806 |
jbd2_journal_abort(journal, err); |
e9e34f4e8
|
807 |
} |
c851ed540
|
808 |
|
82f04ab47
|
809 |
blk_finish_plug(&plug); |
470decc61
|
810 811 812 813 |
/* Lo and behold: we have just managed to send a transaction to the log. Before we can commit it, wait for the IO so far to complete. Control buffers being written are on the transaction's t_log_list queue, and metadata buffers are on |
f5113effc
|
814 |
the io_bufs list. |
470decc61
|
815 816 817 818 819 |
Wait for the buffers in reverse order. That way we are less likely to be woken up until all IOs have completed, and so we incur less scheduling load. */ |
f2a44523b
|
820 821 |
jbd_debug(3, "JBD2: commit phase 3 "); |
470decc61
|
822 |
|
f5113effc
|
823 824 825 826 |
while (!list_empty(&io_bufs)) { struct buffer_head *bh = list_entry(io_bufs.prev, struct buffer_head, b_assoc_buffers); |
470decc61
|
827 |
|
f5113effc
|
828 829 |
wait_on_buffer(bh); cond_resched(); |
470decc61
|
830 831 832 |
if (unlikely(!buffer_uptodate(bh))) err = -EIO; |
f5113effc
|
833 |
jbd2_unfile_log_bh(bh); |
015c60330
|
834 |
stats.run.rs_blocks_logged++; |
470decc61
|
835 836 |
/* |
f5113effc
|
837 838 |
* The list contains temporary buffer heads created by * jbd2_journal_write_metadata_buffer(). |
470decc61
|
839 840 |
*/ BUFFER_TRACE(bh, "dumping temporary bh"); |
470decc61
|
841 842 843 |
__brelse(bh); J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); free_buffer_head(bh); |
f5113effc
|
844 |
/* We also have to refile the corresponding shadowed buffer */ |
470decc61
|
845 846 |
jh = commit_transaction->t_shadow_list->b_tprev; bh = jh2bh(jh); |
f5113effc
|
847 |
clear_buffer_jwrite(bh); |
470decc61
|
848 |
J_ASSERT_BH(bh, buffer_jbddirty(bh)); |
b34090e5e
|
849 |
J_ASSERT_BH(bh, !buffer_shadow(bh)); |
470decc61
|
850 851 852 853 854 855 |
/* The metadata is now released for reuse, but we need to remember it against this transaction so that when we finally commit, we can do any checkpointing required. */ JBUFFER_TRACE(jh, "file as BJ_Forget"); |
f7f4bccb7
|
856 |
jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); |
470decc61
|
857 858 859 860 861 |
JBUFFER_TRACE(jh, "brelse shadowed buffer"); __brelse(bh); } J_ASSERT (commit_transaction->t_shadow_list == NULL); |
f2a44523b
|
862 863 |
jbd_debug(3, "JBD2: commit phase 4 "); |
470decc61
|
864 865 |
/* Here we wait for the revoke record and descriptor record buffers */ |
e5a120aeb
|
866 |
while (!list_empty(&log_bufs)) { |
470decc61
|
867 |
struct buffer_head *bh; |
e5a120aeb
|
868 869 870 |
bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers); wait_on_buffer(bh); cond_resched(); |
470decc61
|
871 872 873 874 875 876 |
if (unlikely(!buffer_uptodate(bh))) err = -EIO; BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); clear_buffer_jwrite(bh); |
e5a120aeb
|
877 |
jbd2_unfile_log_bh(bh); |
015c60330
|
878 |
stats.run.rs_blocks_logged++; |
470decc61
|
879 880 881 |
__brelse(bh); /* One for getblk */ /* AKPM: bforget here */ } |
77e841de8
|
882 883 |
if (err) jbd2_journal_abort(journal, err); |
f2a44523b
|
884 885 |
jbd_debug(3, "JBD2: commit phase 5 "); |
bbd2be369
|
886 887 888 889 |
write_lock(&journal->j_state_lock); J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); commit_transaction->t_state = T_COMMIT_JFLUSH; write_unlock(&journal->j_state_lock); |
470decc61
|
890 |
|
56316a0d2
|
891 |
if (!jbd2_has_feature_async_commit(journal)) { |
818d276ce
|
892 893 894 |
err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) |
d0a186e0d
|
895 |
jbd2_journal_abort(journal, err); |
818d276ce
|
896 |
} |
6cba611e6
|
897 |
if (cbh) |
fd98496f4
|
898 |
err = journal_wait_on_commit_record(journal, cbh); |
015c60330
|
899 |
stats.run.rs_blocks_logged++; |
56316a0d2
|
900 |
if (jbd2_has_feature_async_commit(journal) && |
f73bee498
|
901 |
journal->j_flags & JBD2_BARRIER) { |
9398554fb
|
902 |
blkdev_issue_flush(journal->j_dev, GFP_NOFS); |
f73bee498
|
903 |
} |
470decc61
|
904 905 |
if (err) |
a7fa2baf8
|
906 |
jbd2_journal_abort(journal, err); |
470decc61
|
907 |
|
9f356e5a4
|
908 909 |
WARN_ON_ONCE( atomic_read(&commit_transaction->t_outstanding_credits) < 0); |
3339578f0
|
910 911 912 913 914 915 916 |
/* * Now disk caches for filesystem device are flushed so we are safe to * erase checkpointed transactions from the log by updating journal * superblock. */ if (update_tail) jbd2_update_log_tail(journal, first_tid, first_block); |
470decc61
|
917 918 919 920 |
/* End of a transaction! Finally, we can do checkpoint processing: any buffers committed as a result of this transaction can be removed from any checkpoint list it was on before. */ |
f2a44523b
|
921 922 |
jbd_debug(3, "JBD2: commit phase 6 "); |
470decc61
|
923 |
|
c851ed540
|
924 |
J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
470decc61
|
925 926 |
J_ASSERT(commit_transaction->t_buffers == NULL); J_ASSERT(commit_transaction->t_checkpoint_list == NULL); |
470decc61
|
927 |
J_ASSERT(commit_transaction->t_shadow_list == NULL); |
470decc61
|
928 929 930 931 932 933 934 935 936 937 |
restart_loop: /* * As there are other places (journal_unmap_buffer()) adding buffers * to this list we have to be careful and hold the j_list_lock. */ spin_lock(&journal->j_list_lock); while (commit_transaction->t_forget) { transaction_t *cp_transaction; struct buffer_head *bh; |
de1b79413
|
938 |
int try_to_free = 0; |
93108ebb8
|
939 |
bool drop_ref; |
470decc61
|
940 941 942 943 |
jh = commit_transaction->t_forget; spin_unlock(&journal->j_list_lock); bh = jh2bh(jh); |
de1b79413
|
944 945 946 947 948 |
/* * Get a reference so that bh cannot be freed before we are * done with it. */ get_bh(bh); |
464170647
|
949 |
spin_lock(&jh->b_state_lock); |
23e2af351
|
950 |
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); |
470decc61
|
951 952 953 954 955 956 957 958 959 960 |
/* * If there is undo-protected committed data against * this buffer, then we can remove it now. If it is a * buffer needing such protection, the old frozen_data * field now points to a committed version of the * buffer, so rotate that field to the new committed * data. * * Otherwise, we can just throw away the frozen data now. |
e06c8227f
|
961 962 963 |
* * We also know that the frozen data has already fired * its triggers if they exist, so we can clear that too. |
470decc61
|
964 965 |
*/ if (jh->b_committed_data) { |
af1e76d6b
|
966 |
jbd2_free(jh->b_committed_data, bh->b_size); |
470decc61
|
967 968 969 970 |
jh->b_committed_data = NULL; if (jh->b_frozen_data) { jh->b_committed_data = jh->b_frozen_data; jh->b_frozen_data = NULL; |
e06c8227f
|
971 |
jh->b_frozen_triggers = NULL; |
470decc61
|
972 973 |
} } else if (jh->b_frozen_data) { |
af1e76d6b
|
974 |
jbd2_free(jh->b_frozen_data, bh->b_size); |
470decc61
|
975 |
jh->b_frozen_data = NULL; |
e06c8227f
|
976 |
jh->b_frozen_triggers = NULL; |
470decc61
|
977 978 979 980 981 982 |
} spin_lock(&journal->j_list_lock); cp_transaction = jh->b_cp_transaction; if (cp_transaction) { JBUFFER_TRACE(jh, "remove from old cp transaction"); |
8e85fb3f3
|
983 |
cp_transaction->t_chp_stats.cs_dropped++; |
f7f4bccb7
|
984 |
__jbd2_journal_remove_checkpoint(jh); |
470decc61
|
985 986 987 988 |
} /* Only re-checkpoint the buffer_head if it is marked * dirty. If the buffer was added to the BJ_Forget list |
f7f4bccb7
|
989 |
* by jbd2_journal_forget, it may no longer be dirty and |
470decc61
|
990 991 |
* there's no point in keeping a checkpoint record for * it. */ |
b794e7a6e
|
992 |
/* |
6a66a7ded
|
993 994 995 996 997 998 999 1000 1001 |
* A buffer which has been freed while still being journaled * by a previous transaction, refile the buffer to BJ_Forget of * the running transaction. If the just committed transaction * contains "add to orphan" operation, we can completely * invalidate the buffer now. We are rather through in that * since the buffer may be still accessible when blocksize < * pagesize and it is attached to the last partial page. */ if (buffer_freed(bh) && !jh->b_next_transaction) { |
c96dceeab
|
1002 |
struct address_space *mapping; |
6a66a7ded
|
1003 1004 |
clear_buffer_freed(bh); clear_buffer_jbddirty(bh); |
c96dceeab
|
1005 1006 1007 1008 1009 1010 1011 1012 |
/* * Block device buffers need to stay mapped all the * time, so it is enough to clear buffer_jbddirty and * buffer_freed bits. For the file mapping buffers (i.e. * journalled data) we need to unmap buffer and clear * more bits. We also need to be careful about the check * because the data page mapping can get cleared under |
780f66e59
|
1013 1014 1015 1016 |
* our hands. Note that if mapping == NULL, we don't * need to make buffer unmapped because the page is * already detached from the mapping and buffers cannot * get reused. |
c96dceeab
|
1017 1018 1019 1020 1021 1022 1023 1024 |
*/ mapping = READ_ONCE(bh->b_page->mapping); if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) { clear_buffer_mapped(bh); clear_buffer_new(bh); clear_buffer_req(bh); bh->b_bdev = NULL; } |
470decc61
|
1025 1026 1027 1028 |
} if (buffer_jbddirty(bh)) { JBUFFER_TRACE(jh, "add to new checkpointing trans"); |
f7f4bccb7
|
1029 |
__jbd2_journal_insert_checkpoint(jh, commit_transaction); |
7ad7445f6
|
1030 1031 |
if (is_journal_aborted(journal)) clear_buffer_jbddirty(bh); |
470decc61
|
1032 1033 |
} else { J_ASSERT_BH(bh, !buffer_dirty(bh)); |
de1b79413
|
1034 1035 |
/* * The buffer on BJ_Forget list and not jbddirty means |
470decc61
|
1036 1037 1038 1039 1040 |
* it has been freed by this transaction and hence it * could not have been reallocated until this * transaction has committed. *BUT* it could be * reallocated once we have written all the data to * disk and before we process the buffer on BJ_Forget |
de1b79413
|
1041 1042 1043 1044 |
* list. */ if (!jh->b_next_transaction) try_to_free = 1; |
470decc61
|
1045 |
} |
de1b79413
|
1046 |
JBUFFER_TRACE(jh, "refile or unfile buffer"); |
93108ebb8
|
1047 |
drop_ref = __jbd2_journal_refile_buffer(jh); |
464170647
|
1048 |
spin_unlock(&jh->b_state_lock); |
93108ebb8
|
1049 1050 |
if (drop_ref) jbd2_journal_put_journal_head(jh); |
de1b79413
|
1051 1052 1053 1054 |
if (try_to_free) release_buffer_page(bh); /* Drops bh reference */ else __brelse(bh); |
470decc61
|
1055 1056 1057 1058 |
cond_resched_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); /* |
f5a7a6b0d
|
1059 1060 1061 1062 |
* This is a bit sleazy. We use j_list_lock to protect transition * of a transaction into T_FINISHED state and calling * __jbd2_journal_drop_transaction(). Otherwise we could race with * other checkpointing code processing the transaction... |
470decc61
|
1063 |
*/ |
a931da6ac
|
1064 |
write_lock(&journal->j_state_lock); |
470decc61
|
1065 1066 1067 1068 1069 1070 1071 |
spin_lock(&journal->j_list_lock); /* * Now recheck if some buffers did not get attached to the transaction * while the lock was dropped... */ if (commit_transaction->t_forget) { spin_unlock(&journal->j_list_lock); |
a931da6ac
|
1072 |
write_unlock(&journal->j_state_lock); |
470decc61
|
1073 1074 |
goto restart_loop; } |
d4e839d4a
|
1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 |
/* Add the transaction to the checkpoint list * __journal_remove_checkpoint() can not destroy transaction * under us because it is not marked as T_FINISHED yet */ if (journal->j_checkpoint_transactions == NULL) { journal->j_checkpoint_transactions = commit_transaction; commit_transaction->t_cpnext = commit_transaction; commit_transaction->t_cpprev = commit_transaction; } else { commit_transaction->t_cpnext = journal->j_checkpoint_transactions; commit_transaction->t_cpprev = commit_transaction->t_cpnext->t_cpprev; commit_transaction->t_cpnext->t_cpprev = commit_transaction; commit_transaction->t_cpprev->t_cpnext = commit_transaction; } spin_unlock(&journal->j_list_lock); |
470decc61
|
1093 |
/* Done with this transaction! */ |
f2a44523b
|
1094 1095 |
jbd_debug(3, "JBD2: commit phase 7 "); |
470decc61
|
1096 |
|
bbd2be369
|
1097 |
J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); |
470decc61
|
1098 |
|
8e85fb3f3
|
1099 |
commit_transaction->t_start = jiffies; |
bf6993276
|
1100 1101 |
stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging, commit_transaction->t_start); |
8e85fb3f3
|
1102 1103 |
/* |
bf6993276
|
1104 |
* File the transaction statistics |
8e85fb3f3
|
1105 |
*/ |
8e85fb3f3
|
1106 |
stats.ts_tid = commit_transaction->t_tid; |
8dd420466
|
1107 1108 |
stats.run.rs_handle_count = atomic_read(&commit_transaction->t_handle_count); |
bf6993276
|
1109 1110 |
trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, commit_transaction->t_tid, &stats.run); |
42cf3452d
|
1111 |
stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0; |
8e85fb3f3
|
1112 |
|
794446c69
|
1113 |
commit_transaction->t_state = T_COMMIT_CALLBACK; |
470decc61
|
1114 1115 1116 |
J_ASSERT(commit_transaction == journal->j_committing_transaction); journal->j_commit_sequence = commit_transaction->t_tid; journal->j_committing_transaction = NULL; |
e07f7183a
|
1117 |
commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); |
470decc61
|
1118 |
|
e07f7183a
|
1119 1120 1121 1122 1123 1124 1125 1126 1127 |
/* * weight the commit time higher than the average time so we don't * react too strongly to vast changes in the commit time */ if (likely(journal->j_average_commit_time)) journal->j_average_commit_time = (commit_time + journal->j_average_commit_time*3) / 4; else journal->j_average_commit_time = commit_time; |
794446c69
|
1128 |
|
a931da6ac
|
1129 |
write_unlock(&journal->j_state_lock); |
6c20ec850
|
1130 |
|
fb68407b0
|
1131 1132 |
if (journal->j_commit_callback) journal->j_commit_callback(journal, commit_transaction); |
ff780b91e
|
1133 1134 |
if (journal->j_fc_cleanup_callback) journal->j_fc_cleanup_callback(journal, 1); |
fb68407b0
|
1135 |
|
879c5e6b7
|
1136 |
trace_jbd2_end_commit(journal, commit_transaction); |
f2a44523b
|
1137 1138 |
jbd_debug(1, "JBD2: commit %d complete, head %d ", |
470decc61
|
1139 |
journal->j_commit_sequence, journal->j_tail_sequence); |
794446c69
|
1140 |
write_lock(&journal->j_state_lock); |
ff780b91e
|
1141 1142 |
journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING; journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; |
794446c69
|
1143 1144 |
spin_lock(&journal->j_list_lock); commit_transaction->t_state = T_FINISHED; |
d4e839d4a
|
1145 |
/* Check if the transaction can be dropped now that we are finished */ |
794446c69
|
1146 1147 1148 1149 1150 1151 1152 |
if (commit_transaction->t_checkpoint_list == NULL && commit_transaction->t_checkpoint_io_list == NULL) { __jbd2_journal_drop_transaction(journal, commit_transaction); jbd2_journal_free_transaction(commit_transaction); } spin_unlock(&journal->j_list_lock); write_unlock(&journal->j_state_lock); |
470decc61
|
1153 |
wake_up(&journal->j_wait_done_commit); |
ff780b91e
|
1154 |
wake_up(&journal->j_fc_wait); |
42cf3452d
|
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 |
/* * Calculate overall stats */ spin_lock(&journal->j_history_lock); journal->j_stats.ts_tid++; journal->j_stats.ts_requested += stats.ts_requested; journal->j_stats.run.rs_wait += stats.run.rs_wait; journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay; journal->j_stats.run.rs_running += stats.run.rs_running; journal->j_stats.run.rs_locked += stats.run.rs_locked; journal->j_stats.run.rs_flushing += stats.run.rs_flushing; journal->j_stats.run.rs_logging += stats.run.rs_logging; journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count; journal->j_stats.run.rs_blocks += stats.run.rs_blocks; journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; spin_unlock(&journal->j_history_lock); |
470decc61
|
1172 |
} |