Blame view

fs/jbd/revoke.c 21.3 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
  /*
588626996   Uwe Kleine-König   fix file specific...
2
   * linux/fs/jbd/revoke.c
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
3
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4
5
6
7
8
9
10
11
12
13
14
15
16
17
   * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
   *
   * Copyright 2000 Red Hat corp --- All Rights Reserved
   *
   * This file is part of the Linux kernel and is made available under
   * the terms of the GNU General Public License, version 2, or at your
   * option, any later version, incorporated herein by reference.
   *
   * Journal revoke routines for the generic filesystem journaling code;
   * part of the ext2fs journaling system.
   *
   * Revoke is the mechanism used to prevent old log records for deleted
   * metadata from being replayed on top of newer data using the same
   * blocks.  The revoke mechanism is used in two separate places:
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
18
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
   * + Commit: during commit we write the entire list of the current
   *   transaction's revoked blocks to the journal
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
21
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
23
24
25
26
27
28
29
30
31
   * + Recovery: during recovery we record the transaction ID of all
   *   revoked blocks.  If there are multiple revoke records in the log
   *   for a single block, only the last one counts, and if there is a log
   *   entry for a block beyond the last revoke, then that log entry still
   *   gets replayed.
   *
   * We can get interactions between revokes and new log data within a
   * single transaction:
   *
   * Block is revoked and then journaled:
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
32
   *   The desired end result is the journaling of the new block, so we
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33
34
35
36
37
38
39
40
41
42
43
   *   cancel the revoke before the transaction commits.
   *
   * Block is journaled and then revoked:
   *   The revoke must take precedence over the write of the block, so we
   *   need either to cancel the journal entry or to write the revoke
   *   later in the log than the log block.  In this case, we choose the
   *   latter: journaling a block cancels any revoke record for that block
   *   in the current transaction, so any revoke for that block in the
   *   transaction must have happened after the block was journaled and so
   *   the revoke must take precedence.
   *
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
44
   * Block is revoked and then written as data:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
47
48
49
   *   The data write is allowed to succeed, but the revoke is _not_
   *   cancelled.  We still need to prevent old log records from
   *   overwriting the new data.  We don't even need to clear the revoke
   *   bit here.
   *
8c111b3f5   Yongqiang Yang   jbd: clear revoke...
50
51
52
53
   * We cache revoke status of a buffer in the current transaction in b_states
   * bits.  As the name says, revokevalid flag indicates that the cached revoke
   * status of a buffer is valid and we can rely on the cached status.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54
55
56
57
58
59
60
   * Revoke information on buffers is a tri-state value:
   *
   * RevokeValid clear:	no cached revoke status, need to look it up
   * RevokeValid set, Revoked clear:
   *			buffer has not been revoked, and cancel_revoke
   *			need do nothing.
   * RevokeValid set, Revoked set:
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
61
   *			buffer has been revoked.
324338794   Jan Kara   jbd: update locki...
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
   *
   * Locking rules:
   * We keep two hash tables of revoke records. One hashtable belongs to the
   * running transaction (is pointed to by journal->j_revoke), the other one
   * belongs to the committing transaction. Accesses to the second hash table
   * happen only from the kjournald and no other thread touches this table.  Also
   * journal_switch_revoke_table() which switches which hashtable belongs to the
   * running and which to the committing transaction is called only from
   * kjournald. Therefore we need no locks when accessing the hashtable belonging
   * to the committing transaction.
   *
   * All users operating on the hash table belonging to the running transaction
   * have a handle to the transaction. Therefore they are safe from kjournald
   * switching hash tables under them. For operations on the lists of entries in
   * the hash table j_revoke_lock is used.
   *
25985edce   Lucas De Marchi   Fix common misspe...
78
   * Finally, also replay code uses the hash tables but at this moment no one else
324338794   Jan Kara   jbd: update locki...
79
80
   * can touch them (filesystem isn't mounted yet) and hence no locking is
   * needed.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
81
82
83
84
85
86
87
88
89
90
91
   */
  
  #ifndef __KERNEL__
  #include "jfs_user.h"
  #else
  #include <linux/time.h>
  #include <linux/fs.h>
  #include <linux/jbd.h>
  #include <linux/errno.h>
  #include <linux/slab.h>
  #include <linux/list.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
92
  #include <linux/init.h>
38d726d15   Theodore Ts'o   jbd: use SWRITE_S...
93
  #include <linux/bio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
94
  #endif
f482394cc   vignesh babu   is_power_of_2(): jbd
95
  #include <linux/log2.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
96

e18b890bb   Christoph Lameter   [PATCH] slab: rem...
97
98
  static struct kmem_cache *revoke_record_cache;
  static struct kmem_cache *revoke_table_cache;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
99
100
101
102
  
  /* Each revoke record represents one single revoked block.  During
     journal replay, this involves recording the transaction ID of the
     last transaction to revoke this block. */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
103
  struct jbd_revoke_record_s
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
104
105
106
  {
  	struct list_head  hash;
  	tid_t		  sequence;	/* Used for recovery only */
9c28cbcce   Jan Kara   jbd: Journal bloc...
107
  	unsigned int	  blocknr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
108
109
110
111
112
113
114
115
  };
  
  
  /* The revoke table is just a simple hash table of revoke records. */
  struct jbd_revoke_table_s
  {
  	/* It is conceivable that we might want a larger hash table
  	 * for recovery.  Must be a power of two. */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
116
117
  	int		  hash_size;
  	int		  hash_shift;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
119
120
121
122
123
124
  	struct list_head *hash_table;
  };
  
  
  #ifdef __KERNEL__
  static void write_one_revoke_record(journal_t *, transaction_t *,
  				    struct journal_head **, int *,
38d726d15   Theodore Ts'o   jbd: use SWRITE_S...
125
126
  				    struct jbd_revoke_record_s *, int);
  static void flush_descriptor(journal_t *, struct journal_head *, int, int);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127
128
129
130
131
  #endif
  
  /* Utility functions to maintain the revoke table */
  
  /* Borrowed from buffer.c: this is a tried and tested block hash function */
9c28cbcce   Jan Kara   jbd: Journal bloc...
132
  static inline int hash(journal_t *journal, unsigned int block)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
133
134
135
136
137
138
139
140
  {
  	struct jbd_revoke_table_s *table = journal->j_revoke;
  	int hash_shift = table->hash_shift;
  
  	return ((block << (hash_shift - 6)) ^
  		(block >> 13) ^
  		(block << (hash_shift - 12))) & (table->hash_size - 1);
  }
9c28cbcce   Jan Kara   jbd: Journal bloc...
141
  static int insert_revoke_hash(journal_t *journal, unsigned int blocknr,
022a4a7bb   Adrian Bunk   [PATCH] fs/jbd/: ...
142
  			      tid_t seq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
  {
  	struct list_head *hash_list;
  	struct jbd_revoke_record_s *record;
  
  repeat:
  	record = kmem_cache_alloc(revoke_record_cache, GFP_NOFS);
  	if (!record)
  		goto oom;
  
  	record->sequence = seq;
  	record->blocknr = blocknr;
  	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
  	spin_lock(&journal->j_revoke_lock);
  	list_add(&record->hash, hash_list);
  	spin_unlock(&journal->j_revoke_lock);
  	return 0;
  
  oom:
  	if (!journal_oom_retry)
  		return -ENOMEM;
08fc99bfc   Harvey Harrison   jbd: replace rema...
163
164
  	jbd_debug(1, "ENOMEM in %s, retrying
  ", __func__);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
165
166
167
168
169
170
171
  	yield();
  	goto repeat;
  }
  
  /* Find a revoke record in the journal's hash table. */
  
  static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
9c28cbcce   Jan Kara   jbd: Journal bloc...
172
  						      unsigned int blocknr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
  {
  	struct list_head *hash_list;
  	struct jbd_revoke_record_s *record;
  
  	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
  
  	spin_lock(&journal->j_revoke_lock);
  	record = (struct jbd_revoke_record_s *) hash_list->next;
  	while (&(record->hash) != hash_list) {
  		if (record->blocknr == blocknr) {
  			spin_unlock(&journal->j_revoke_lock);
  			return record;
  		}
  		record = (struct jbd_revoke_record_s *) record->hash.next;
  	}
  	spin_unlock(&journal->j_revoke_lock);
  	return NULL;
  }
1984bb763   Duane Griffin   jbd: tidy up revo...
191
192
193
194
195
196
197
198
199
200
201
  void journal_destroy_revoke_caches(void)
  {
  	if (revoke_record_cache) {
  		kmem_cache_destroy(revoke_record_cache);
  		revoke_record_cache = NULL;
  	}
  	if (revoke_table_cache) {
  		kmem_cache_destroy(revoke_table_cache);
  		revoke_table_cache = NULL;
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
203
  int __init journal_init_revoke_caches(void)
  {
1984bb763   Duane Griffin   jbd: tidy up revo...
204
205
  	J_ASSERT(!revoke_record_cache);
  	J_ASSERT(!revoke_table_cache);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
206
207
  	revoke_record_cache = kmem_cache_create("revoke_record",
  					   sizeof(struct jbd_revoke_record_s),
e12ba74d8   Mel Gorman   Group short-lived...
208
209
210
  					   0,
  					   SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
  					   NULL);
1076d17ac   Al Viro   jbd/jbd2 NULL noise
211
  	if (!revoke_record_cache)
1984bb763   Duane Griffin   jbd: tidy up revo...
212
  		goto record_cache_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213
214
215
  
  	revoke_table_cache = kmem_cache_create("revoke_table",
  					   sizeof(struct jbd_revoke_table_s),
e12ba74d8   Mel Gorman   Group short-lived...
216
  					   0, SLAB_TEMPORARY, NULL);
1984bb763   Duane Griffin   jbd: tidy up revo...
217
218
  	if (!revoke_table_cache)
  		goto table_cache_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
219
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
220

1984bb763   Duane Griffin   jbd: tidy up revo...
221
222
223
224
  table_cache_failure:
  	journal_destroy_revoke_caches();
  record_cache_failure:
  	return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
225
  }
f4d79ca2f   Duane Griffin   jbd: eliminate du...
226
  static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
227
  {
f4d79ca2f   Duane Griffin   jbd: eliminate du...
228
229
230
  	int shift = 0;
  	int tmp = hash_size;
  	struct jbd_revoke_table_s *table;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
231

f4d79ca2f   Duane Griffin   jbd: eliminate du...
232
233
234
  	table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
  	if (!table)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
235

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
236
237
  	while((tmp >>= 1UL) != 0UL)
  		shift++;
f4d79ca2f   Duane Griffin   jbd: eliminate du...
238
239
240
  	table->hash_size = hash_size;
  	table->hash_shift = shift;
  	table->hash_table =
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
241
  		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
f4d79ca2f   Duane Griffin   jbd: eliminate du...
242
243
244
245
  	if (!table->hash_table) {
  		kmem_cache_free(revoke_table_cache, table);
  		table = NULL;
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
246
247
248
  	}
  
  	for (tmp = 0; tmp < hash_size; tmp++)
f4d79ca2f   Duane Griffin   jbd: eliminate du...
249
  		INIT_LIST_HEAD(&table->hash_table[tmp]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250

f4d79ca2f   Duane Griffin   jbd: eliminate du...
251
252
253
254
255
256
257
258
259
260
261
262
  out:
  	return table;
  }
  
  static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
  {
  	int i;
  	struct list_head *hash_list;
  
  	for (i = 0; i < table->hash_size; i++) {
  		hash_list = &table->hash_table[i];
  		J_ASSERT(list_empty(hash_list));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
263
  	}
f4d79ca2f   Duane Griffin   jbd: eliminate du...
264
265
266
  	kfree(table->hash_table);
  	kmem_cache_free(revoke_table_cache, table);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
267

f4d79ca2f   Duane Griffin   jbd: eliminate du...
268
269
270
271
  /* Initialise the revoke table for a given journal to a given size. */
  int journal_init_revoke(journal_t *journal, int hash_size)
  {
  	J_ASSERT(journal->j_revoke_table[0] == NULL);
f482394cc   vignesh babu   is_power_of_2(): jbd
272
  	J_ASSERT(is_power_of_2(hash_size));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
273

f4d79ca2f   Duane Griffin   jbd: eliminate du...
274
275
276
  	journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
  	if (!journal->j_revoke_table[0])
  		goto fail0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
277

f4d79ca2f   Duane Griffin   jbd: eliminate du...
278
279
280
  	journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
  	if (!journal->j_revoke_table[1])
  		goto fail1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
281

f4d79ca2f   Duane Griffin   jbd: eliminate du...
282
  	journal->j_revoke = journal->j_revoke_table[1];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
283
284
285
286
  
  	spin_lock_init(&journal->j_revoke_lock);
  
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
287

f4d79ca2f   Duane Griffin   jbd: eliminate du...
288
289
290
291
292
  fail1:
  	journal_destroy_revoke_table(journal->j_revoke_table[0]);
  fail0:
  	return -ENOMEM;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
293

f4d79ca2f   Duane Griffin   jbd: eliminate du...
294
  /* Destroy a journal's revoke table.  The table must already be empty! */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
295
296
  void journal_destroy_revoke(journal_t *journal)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297
  	journal->j_revoke = NULL;
f4d79ca2f   Duane Griffin   jbd: eliminate du...
298
299
300
301
  	if (journal->j_revoke_table[0])
  		journal_destroy_revoke_table(journal->j_revoke_table[0]);
  	if (journal->j_revoke_table[1])
  		journal_destroy_revoke_table(journal->j_revoke_table[1]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
302
303
304
305
  }
  
  
  #ifdef __KERNEL__
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
306
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307
308
309
310
   * journal_revoke: revoke a given buffer_head from the journal.  This
   * prevents the block from being replayed during recovery if we take a
   * crash after this current transaction commits.  Any subsequent
   * metadata writes of the buffer in this transaction cancel the
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
311
   * revoke.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
312
313
314
315
316
   *
   * Note that this call may block --- it is up to the caller to make
   * sure that there are no further calls to journal_write_metadata
   * before the revoke is complete.  In ext3, this implies calling the
   * revoke before clearing the block bitmap when we are deleting
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
317
   * metadata.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
318
319
320
   *
   * Revoke performs a journal_forget on any buffer_head passed in as a
   * parameter, but does _not_ forget the buffer_head if the bh was only
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
321
   * found implicitly.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322
323
324
325
326
327
328
   *
   * bh_in may not be a journalled buffer - it may have come off
   * the hash tables without an attached journal_head.
   *
   * If bh_in is non-zero, journal_revoke() will decrement its b_count
   * by one.
   */
9c28cbcce   Jan Kara   jbd: Journal bloc...
329
  int journal_revoke(handle_t *handle, unsigned int blocknr,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
  		   struct buffer_head *bh_in)
  {
  	struct buffer_head *bh = NULL;
  	journal_t *journal;
  	struct block_device *bdev;
  	int err;
  
  	might_sleep();
  	if (bh_in)
  		BUFFER_TRACE(bh_in, "enter");
  
  	journal = handle->h_transaction->t_journal;
  	if (!journal_set_features(journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)){
  		J_ASSERT (!"Cannot set revoke feature!");
  		return -EINVAL;
  	}
  
  	bdev = journal->j_fs_dev;
  	bh = bh_in;
  
  	if (!bh) {
  		bh = __find_get_block(bdev, blocknr, journal->j_blocksize);
  		if (bh)
  			BUFFER_TRACE(bh, "found on hash");
  	}
  #ifdef JBD_EXPENSIVE_CHECKING
  	else {
  		struct buffer_head *bh2;
  
  		/* If there is a different buffer_head lying around in
  		 * memory anywhere... */
  		bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize);
  		if (bh2) {
  			/* ... and it has RevokeValid status... */
  			if (bh2 != bh && buffer_revokevalid(bh2))
  				/* ...then it better be revoked too,
  				 * since it's illegal to create a revoke
  				 * record against a buffer_head which is
  				 * not marked revoked --- that would
  				 * risk missing a subsequent revoke
  				 * cancel. */
  				J_ASSERT_BH(bh2, buffer_revoked(bh2));
  			put_bh(bh2);
  		}
  	}
  #endif
  
  	/* We really ought not ever to revoke twice in a row without
             first having the revoke cancelled: it's illegal to free a
             block twice without allocating it in between! */
  	if (bh) {
  		if (!J_EXPECT_BH(bh, !buffer_revoked(bh),
  				 "inconsistent data on disk")) {
  			if (!bh_in)
  				brelse(bh);
  			return -EIO;
  		}
  		set_buffer_revoked(bh);
  		set_buffer_revokevalid(bh);
  		if (bh_in) {
  			BUFFER_TRACE(bh_in, "call journal_forget");
  			journal_forget(handle, bh_in);
  		} else {
  			BUFFER_TRACE(bh, "call brelse");
  			__brelse(bh);
  		}
  	}
9c28cbcce   Jan Kara   jbd: Journal bloc...
397
398
  	jbd_debug(2, "insert revoke for block %u, bh_in=%p
  ", blocknr, bh_in);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
  	err = insert_revoke_hash(journal, blocknr,
  				handle->h_transaction->t_tid);
  	BUFFER_TRACE(bh_in, "exit");
  	return err;
  }
  
  /*
   * Cancel an outstanding revoke.  For use only internally by the
   * journaling code (called from journal_get_write_access).
   *
   * We trust buffer_revoked() on the buffer if the buffer is already
   * being journaled: if there is no revoke pending on the buffer, then we
   * don't do anything here.
   *
   * This would break if it were possible for a buffer to be revoked and
   * discarded, and then reallocated within the same transaction.  In such
   * a case we would have lost the revoked bit, but when we arrived here
   * the second time we would still have a pending revoke to cancel.  So,
   * do not trust the Revoked bit on buffers unless RevokeValid is also
   * set.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
   */
  int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
  {
  	struct jbd_revoke_record_s *record;
  	journal_t *journal = handle->h_transaction->t_journal;
  	int need_cancel;
  	int did_revoke = 0;	/* akpm: debug */
  	struct buffer_head *bh = jh2bh(jh);
  
  	jbd_debug(4, "journal_head %p, cancelling revoke
  ", jh);
  
  	/* Is the existing Revoke bit valid?  If so, we trust it, and
  	 * only perform the full cancel if the revoke bit is set.  If
  	 * not, we can't trust the revoke bit, and we need to do the
  	 * full search for a revoke record. */
  	if (test_set_buffer_revokevalid(bh)) {
  		need_cancel = test_clear_buffer_revoked(bh);
  	} else {
  		need_cancel = 1;
  		clear_buffer_revoked(bh);
  	}
  
  	if (need_cancel) {
  		record = find_revoke_record(journal, bh->b_blocknr);
  		if (record) {
  			jbd_debug(4, "cancelled existing revoke on "
  				  "blocknr %llu
  ", (unsigned long long)bh->b_blocknr);
  			spin_lock(&journal->j_revoke_lock);
  			list_del(&record->hash);
  			spin_unlock(&journal->j_revoke_lock);
  			kmem_cache_free(revoke_record_cache, record);
  			did_revoke = 1;
  		}
  	}
  
  #ifdef JBD_EXPENSIVE_CHECKING
  	/* There better not be one left behind by now! */
  	record = find_revoke_record(journal, bh->b_blocknr);
  	J_ASSERT_JH(jh, record == NULL);
  #endif
  
  	/* Finally, have we just cleared revoke on an unhashed
  	 * buffer_head?  If so, we'd better make sure we clear the
  	 * revoked status on any hashed alias too, otherwise the revoke
  	 * state machine will get very upset later on. */
  	if (need_cancel) {
  		struct buffer_head *bh2;
  		bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size);
  		if (bh2) {
  			if (bh2 != bh)
  				clear_buffer_revoked(bh2);
  			__brelse(bh2);
  		}
  	}
  	return did_revoke;
  }
8c111b3f5   Yongqiang Yang   jbd: clear revoke...
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
  /*
   * journal_clear_revoked_flags clears revoked flag of buffers in
   * revoke table to reflect there is no revoked buffer in the next
   * transaction which is going to be started.
   */
  void journal_clear_buffer_revoked_flags(journal_t *journal)
  {
  	struct jbd_revoke_table_s *revoke = journal->j_revoke;
  	int i = 0;
  
  	for (i = 0; i < revoke->hash_size; i++) {
  		struct list_head *hash_list;
  		struct list_head *list_entry;
  		hash_list = &revoke->hash_table[i];
  
  		list_for_each(list_entry, hash_list) {
  			struct jbd_revoke_record_s *record;
  			struct buffer_head *bh;
  			record = (struct jbd_revoke_record_s *)list_entry;
  			bh = __find_get_block(journal->j_fs_dev,
  					      record->blocknr,
  					      journal->j_blocksize);
  			if (bh) {
  				clear_buffer_revoked(bh);
  				__brelse(bh);
  			}
  		}
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
506
507
508
509
510
511
512
513
514
515
516
517
  /* journal_switch_revoke table select j_revoke for next transaction
   * we do not want to suspend any processing until all revokes are
   * written -bzzz
   */
  void journal_switch_revoke_table(journal_t *journal)
  {
  	int i;
  
  	if (journal->j_revoke == journal->j_revoke_table[0])
  		journal->j_revoke = journal->j_revoke_table[1];
  	else
  		journal->j_revoke = journal->j_revoke_table[0];
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
518
  	for (i = 0; i < journal->j_revoke->hash_size; i++)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
519
520
521
522
523
524
  		INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
  }
  
  /*
   * Write revoke records to the journal for all entries in the current
   * revoke hash, deleting the entries as we go.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
525
   */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
526
  void journal_write_revoke_records(journal_t *journal,
38d726d15   Theodore Ts'o   jbd: use SWRITE_S...
527
  				  transaction_t *transaction, int write_op)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
528
529
530
531
532
533
  {
  	struct journal_head *descriptor;
  	struct jbd_revoke_record_s *record;
  	struct jbd_revoke_table_s *revoke;
  	struct list_head *hash_list;
  	int i, offset, count;
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
534
  	descriptor = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
535
536
537
538
539
540
541
542
543
544
545
  	offset = 0;
  	count = 0;
  
  	/* select revoke table for committing transaction */
  	revoke = journal->j_revoke == journal->j_revoke_table[0] ?
  		journal->j_revoke_table[1] : journal->j_revoke_table[0];
  
  	for (i = 0; i < revoke->hash_size; i++) {
  		hash_list = &revoke->hash_table[i];
  
  		while (!list_empty(hash_list)) {
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
546
  			record = (struct jbd_revoke_record_s *)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
547
548
  				hash_list->next;
  			write_one_revoke_record(journal, transaction,
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
549
  						&descriptor, &offset,
38d726d15   Theodore Ts'o   jbd: use SWRITE_S...
550
  						record, write_op);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
551
552
553
554
555
556
  			count++;
  			list_del(&record->hash);
  			kmem_cache_free(revoke_record_cache, record);
  		}
  	}
  	if (descriptor)
38d726d15   Theodore Ts'o   jbd: use SWRITE_S...
557
  		flush_descriptor(journal, descriptor, offset, write_op);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
558
559
560
  	jbd_debug(1, "Wrote %d revoke records
  ", count);
  }
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
561
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
562
   * Write out one revoke record.  We need to create a new descriptor
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
563
   * block if the old one is full or if we have not already created one.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
564
   */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
565
  static void write_one_revoke_record(journal_t *journal,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
566
  				    transaction_t *transaction,
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
567
  				    struct journal_head **descriptorp,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
568
  				    int *offsetp,
38d726d15   Theodore Ts'o   jbd: use SWRITE_S...
569
570
  				    struct jbd_revoke_record_s *record,
  				    int write_op)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
  {
  	struct journal_head *descriptor;
  	int offset;
  	journal_header_t *header;
  
  	/* If we are already aborting, this all becomes a noop.  We
             still need to go round the loop in
             journal_write_revoke_records in order to free all of the
             revoke records: only the IO to the journal is omitted. */
  	if (is_journal_aborted(journal))
  		return;
  
  	descriptor = *descriptorp;
  	offset = *offsetp;
  
  	/* Make sure we have a descriptor with space left for the record */
  	if (descriptor) {
  		if (offset == journal->j_blocksize) {
38d726d15   Theodore Ts'o   jbd: use SWRITE_S...
589
  			flush_descriptor(journal, descriptor, offset, write_op);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
  			descriptor = NULL;
  		}
  	}
  
  	if (!descriptor) {
  		descriptor = journal_get_descriptor_buffer(journal);
  		if (!descriptor)
  			return;
  		header = (journal_header_t *) &jh2bh(descriptor)->b_data[0];
  		header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
  		header->h_blocktype = cpu_to_be32(JFS_REVOKE_BLOCK);
  		header->h_sequence  = cpu_to_be32(transaction->t_tid);
  
  		/* Record it so that we can wait for IO completion later */
  		JBUFFER_TRACE(descriptor, "file as BJ_LogCtl");
  		journal_file_buffer(descriptor, transaction, BJ_LogCtl);
  
  		offset = sizeof(journal_revoke_header_t);
  		*descriptorp = descriptor;
  	}
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
610
  	* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
611
612
613
614
  		cpu_to_be32(record->blocknr);
  	offset += 4;
  	*offsetp = offset;
  }
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
615
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
616
617
618
619
620
   * Flush a revoke descriptor out to the journal.  If we are aborting,
   * this is a noop; otherwise we are generating a buffer which needs to
   * be waited for during commit, so it has to go onto the appropriate
   * journal buffer list.
   */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
621
622
  static void flush_descriptor(journal_t *journal,
  			     struct journal_head *descriptor,
38d726d15   Theodore Ts'o   jbd: use SWRITE_S...
623
  			     int offset, int write_op)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
624
625
626
627
628
629
630
631
632
633
634
635
636
637
  {
  	journal_revoke_header_t *header;
  	struct buffer_head *bh = jh2bh(descriptor);
  
  	if (is_journal_aborted(journal)) {
  		put_bh(bh);
  		return;
  	}
  
  	header = (journal_revoke_header_t *) jh2bh(descriptor)->b_data;
  	header->r_count = cpu_to_be32(offset);
  	set_buffer_jwrite(bh);
  	BUFFER_TRACE(bh, "write");
  	set_buffer_dirty(bh);
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
638
  	write_dirty_buffer(bh, write_op);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
639
640
  }
  #endif
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
641
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
642
643
644
645
646
647
648
649
650
651
   * Revoke support for recovery.
   *
   * Recovery needs to be able to:
   *
   *  record all revoke records, including the tid of the latest instance
   *  of each revoke in the journal
   *
   *  check whether a given block in a given transaction should be replayed
   *  (ie. has not been revoked by a revoke record in that or a subsequent
   *  transaction)
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
652
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
653
654
655
656
657
658
659
   *  empty the revoke table after recovery.
   */
  
  /*
   * First, setting revoke records.  We create a new revoke record for
   * every block ever revoked in the log as we scan it for recovery, and
   * we update the existing records if we find multiple revokes for a
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
660
   * single block.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
661
   */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
662
  int journal_set_revoke(journal_t *journal,
9c28cbcce   Jan Kara   jbd: Journal bloc...
663
  		       unsigned int blocknr,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
664
665
666
667
668
669
670
671
672
673
674
  		       tid_t sequence)
  {
  	struct jbd_revoke_record_s *record;
  
  	record = find_revoke_record(journal, blocknr);
  	if (record) {
  		/* If we have multiple occurrences, only record the
  		 * latest sequence number in the hashed record */
  		if (tid_gt(sequence, record->sequence))
  			record->sequence = sequence;
  		return 0;
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
675
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
676
677
  	return insert_revoke_hash(journal, blocknr, sequence);
  }
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
678
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
679
680
681
682
683
   * Test revoke records.  For a given block referenced in the log, has
   * that block been revoked?  A revoke record with a given transaction
   * sequence number revokes all blocks in that transaction and earlier
   * ones, but later transactions still need replayed.
   */
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
684
  int journal_test_revoke(journal_t *journal,
9c28cbcce   Jan Kara   jbd: Journal bloc...
685
  			unsigned int blocknr,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
  			tid_t sequence)
  {
  	struct jbd_revoke_record_s *record;
  
  	record = find_revoke_record(journal, blocknr);
  	if (!record)
  		return 0;
  	if (tid_gt(sequence, record->sequence))
  		return 0;
  	return 1;
  }
  
  /*
   * Finally, once recovery is over, we need to clear the revoke table so
   * that it can be reused by the running filesystem.
   */
  
  void journal_clear_revoke(journal_t *journal)
  {
  	int i;
  	struct list_head *hash_list;
  	struct jbd_revoke_record_s *record;
  	struct jbd_revoke_table_s *revoke;
  
  	revoke = journal->j_revoke;
  
  	for (i = 0; i < revoke->hash_size; i++) {
  		hash_list = &revoke->hash_table[i];
  		while (!list_empty(hash_list)) {
  			record = (struct jbd_revoke_record_s*) hash_list->next;
  			list_del(&record->hash);
  			kmem_cache_free(revoke_record_cache, record);
  		}
  	}
  }