Blame view

fs/jbd2/revoke.c 21.4 KB
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1
  /*
588626996   Uwe Kleine-König   fix file specific...
2
   * linux/fs/jbd2/revoke.c
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
   *
   * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
   *
   * Copyright 2000 Red Hat corp --- All Rights Reserved
   *
   * This file is part of the Linux kernel and is made available under
   * the terms of the GNU General Public License, version 2, or at your
   * option, any later version, incorporated herein by reference.
   *
   * Journal revoke routines for the generic filesystem journaling code;
   * part of the ext2fs journaling system.
   *
   * Revoke is the mechanism used to prevent old log records for deleted
   * metadata from being replayed on top of newer data using the same
   * blocks.  The revoke mechanism is used in two separate places:
   *
   * + Commit: during commit we write the entire list of the current
   *   transaction's revoked blocks to the journal
   *
   * + Recovery: during recovery we record the transaction ID of all
   *   revoked blocks.  If there are multiple revoke records in the log
   *   for a single block, only the last one counts, and if there is a log
   *   entry for a block beyond the last revoke, then that log entry still
   *   gets replayed.
   *
   * We can get interactions between revokes and new log data within a
   * single transaction:
   *
   * Block is revoked and then journaled:
   *   The desired end result is the journaling of the new block, so we
   *   cancel the revoke before the transaction commits.
   *
   * Block is journaled and then revoked:
   *   The revoke must take precedence over the write of the block, so we
   *   need either to cancel the journal entry or to write the revoke
   *   later in the log than the log block.  In this case, we choose the
   *   latter: journaling a block cancels any revoke record for that block
   *   in the current transaction, so any revoke for that block in the
   *   transaction must have happened after the block was journaled and so
   *   the revoke must take precedence.
   *
   * Block is revoked and then written as data:
   *   The data write is allowed to succeed, but the revoke is _not_
   *   cancelled.  We still need to prevent old log records from
   *   overwriting the new data.  We don't even need to clear the revoke
   *   bit here.
   *
1ba37268c   Yongqiang Yang   jbd2: clear revok...
50
51
52
53
   * We cache revoke status of a buffer in the current transaction in b_states
   * bits.  As the name says, revokevalid flag indicates that the cached revoke
   * status of a buffer is valid and we can rely on the cached status.
   *
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
54
55
56
57
58
59
60
61
   * Revoke information on buffers is a tri-state value:
   *
   * RevokeValid clear:	no cached revoke status, need to look it up
   * RevokeValid set, Revoked clear:
   *			buffer has not been revoked, and cancel_revoke
   *			need do nothing.
   * RevokeValid set, Revoked set:
   *			buffer has been revoked.
86db97c87   Jan Kara   jbd2: Update lock...
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
   *
   * Locking rules:
   * We keep two hash tables of revoke records. One hashtable belongs to the
   * running transaction (is pointed to by journal->j_revoke), the other one
   * belongs to the committing transaction. Accesses to the second hash table
   * happen only from the kjournald and no other thread touches this table.  Also
   * journal_switch_revoke_table() which switches which hashtable belongs to the
   * running and which to the committing transaction is called only from
   * kjournald. Therefore we need no locks when accessing the hashtable belonging
   * to the committing transaction.
   *
   * All users operating on the hash table belonging to the running transaction
   * have a handle to the transaction. Therefore they are safe from kjournald
   * switching hash tables under them. For operations on the lists of entries in
   * the hash table j_revoke_lock is used.
   *
25985edce   Lucas De Marchi   Fix common misspe...
78
   * Finally, also replay code uses the hash tables but at this moment no one else
86db97c87   Jan Kara   jbd2: Update lock...
79
80
   * can touch them (filesystem isn't mounted yet) and hence no locking is
   * needed.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
81
82
83
84
85
86
87
   */
  
  #ifndef __KERNEL__
  #include "jfs_user.h"
  #else
  #include <linux/time.h>
  #include <linux/fs.h>
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
88
  #include <linux/jbd2.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
89
90
91
  #include <linux/errno.h>
  #include <linux/slab.h>
  #include <linux/list.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
92
  #include <linux/init.h>
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
93
  #include <linux/bio.h>
f482394cc   vignesh babu   is_power_of_2(): jbd
94
  #include <linux/log2.h>
d48458d4a   Theodore Ts'o   jbd2: use a bette...
95
  #include <linux/hash.h>
db9ee2203   Darrick J. Wong   jbd2: fix descrip...
96
  #endif
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
97

e18b890bb   Christoph Lameter   [PATCH] slab: rem...
98
99
  static struct kmem_cache *jbd2_revoke_record_cache;
  static struct kmem_cache *jbd2_revoke_table_cache;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
100
101
102
103
  
  /* Each revoke record represents one single revoked block.  During
     journal replay, this involves recording the transaction ID of the
     last transaction to revoke this block. */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
104
  struct jbd2_revoke_record_s
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
105
106
107
  {
  	struct list_head  hash;
  	tid_t		  sequence;	/* Used for recovery only */
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
108
  	unsigned long long	  blocknr;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
109
110
111
112
  };
  
  
  /* The revoke table is just a simple hash table of revoke records. */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
113
  struct jbd2_revoke_table_s
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
114
115
116
117
118
119
120
121
122
123
  {
  	/* It is conceivable that we might want a larger hash table
  	 * for recovery.  Must be a power of two. */
  	int		  hash_size;
  	int		  hash_shift;
  	struct list_head *hash_table;
  };
  
  
  #ifdef __KERNEL__
9bcf976cb   Jan Kara   jbd2: remove unne...
124
  static void write_one_revoke_record(transaction_t *,
e5a120aeb   Jan Kara   jbd2: remove jour...
125
126
  				    struct list_head *,
  				    struct buffer_head **, int *,
9bcf976cb   Jan Kara   jbd2: remove unne...
127
128
  				    struct jbd2_revoke_record_s *);
  static void flush_descriptor(journal_t *, struct buffer_head *, int);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
129
130
131
  #endif
  
  /* Utility functions to maintain the revoke table */
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
132
  static inline int hash(journal_t *journal, unsigned long long block)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
133
  {
d48458d4a   Theodore Ts'o   jbd2: use a bette...
134
  	return hash_64(block, journal->j_revoke->hash_shift);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
135
  }
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
136
  static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
137
138
139
  			      tid_t seq)
  {
  	struct list_head *hash_list;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
140
  	struct jbd2_revoke_record_s *record;
7b506b103   Michal Hocko   jbd2: get rid of ...
141
  	gfp_t gfp_mask = GFP_NOFS;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
142

7b506b103   Michal Hocko   jbd2: get rid of ...
143
144
145
  	if (journal_oom_retry)
  		gfp_mask |= __GFP_NOFAIL;
  	record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
146
  	if (!record)
7b506b103   Michal Hocko   jbd2: get rid of ...
147
  		return -ENOMEM;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
148
149
150
151
152
153
154
155
  
  	record->sequence = seq;
  	record->blocknr = blocknr;
  	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
  	spin_lock(&journal->j_revoke_lock);
  	list_add(&record->hash, hash_list);
  	spin_unlock(&journal->j_revoke_lock);
  	return 0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
156
157
158
  }
  
  /* Find a revoke record in the journal's hash table. */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
159
  static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
160
  						      unsigned long long blocknr)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
161
162
  {
  	struct list_head *hash_list;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
163
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
164
165
166
167
  
  	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
  
  	spin_lock(&journal->j_revoke_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
168
  	record = (struct jbd2_revoke_record_s *) hash_list->next;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
169
170
171
172
173
  	while (&(record->hash) != hash_list) {
  		if (record->blocknr == blocknr) {
  			spin_unlock(&journal->j_revoke_lock);
  			return record;
  		}
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
174
  		record = (struct jbd2_revoke_record_s *) record->hash.next;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
175
176
177
178
  	}
  	spin_unlock(&journal->j_revoke_lock);
  	return NULL;
  }
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
179
180
181
182
183
184
185
186
187
188
189
  void jbd2_journal_destroy_revoke_caches(void)
  {
  	if (jbd2_revoke_record_cache) {
  		kmem_cache_destroy(jbd2_revoke_record_cache);
  		jbd2_revoke_record_cache = NULL;
  	}
  	if (jbd2_revoke_table_cache) {
  		kmem_cache_destroy(jbd2_revoke_table_cache);
  		jbd2_revoke_table_cache = NULL;
  	}
  }
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
190
  int __init jbd2_journal_init_revoke_caches(void)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
191
  {
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
192
193
  	J_ASSERT(!jbd2_revoke_record_cache);
  	J_ASSERT(!jbd2_revoke_table_cache);
9c0e00e5c   Yongqiang Yang   jbd2: use KMEM_CA...
194
195
  	jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s,
  					SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY);
1076d17ac   Al Viro   jbd/jbd2 NULL noise
196
  	if (!jbd2_revoke_record_cache)
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
197
  		goto record_cache_failure;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
198

9c0e00e5c   Yongqiang Yang   jbd2: use KMEM_CA...
199
200
  	jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s,
  					     SLAB_TEMPORARY);
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
201
202
  	if (!jbd2_revoke_table_cache)
  		goto table_cache_failure;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
203
  	return 0;
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
204
205
206
207
  table_cache_failure:
  	jbd2_journal_destroy_revoke_caches();
  record_cache_failure:
  		return -ENOMEM;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
208
  }
83c49523c   Duane Griffin   jbd2: eliminate d...
209
  static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
210
  {
83c49523c   Duane Griffin   jbd2: eliminate d...
211
212
213
  	int shift = 0;
  	int tmp = hash_size;
  	struct jbd2_revoke_table_s *table;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
214

83c49523c   Duane Griffin   jbd2: eliminate d...
215
216
217
  	table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
  	if (!table)
  		goto out;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
218

470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
219
220
  	while((tmp >>= 1UL) != 0UL)
  		shift++;
83c49523c   Duane Griffin   jbd2: eliminate d...
221
222
223
  	table->hash_size = hash_size;
  	table->hash_shift = shift;
  	table->hash_table =
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
224
  		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
83c49523c   Duane Griffin   jbd2: eliminate d...
225
226
227
228
  	if (!table->hash_table) {
  		kmem_cache_free(jbd2_revoke_table_cache, table);
  		table = NULL;
  		goto out;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
229
230
231
  	}
  
  	for (tmp = 0; tmp < hash_size; tmp++)
83c49523c   Duane Griffin   jbd2: eliminate d...
232
  		INIT_LIST_HEAD(&table->hash_table[tmp]);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
233

83c49523c   Duane Griffin   jbd2: eliminate d...
234
235
236
237
238
239
240
241
242
243
244
245
  out:
  	return table;
  }
  
  static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
  {
  	int i;
  	struct list_head *hash_list;
  
  	for (i = 0; i < table->hash_size; i++) {
  		hash_list = &table->hash_table[i];
  		J_ASSERT(list_empty(hash_list));
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
246
  	}
83c49523c   Duane Griffin   jbd2: eliminate d...
247
248
249
  	kfree(table->hash_table);
  	kmem_cache_free(jbd2_revoke_table_cache, table);
  }
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
250

83c49523c   Duane Griffin   jbd2: eliminate d...
251
252
253
254
  /* Initialise the revoke table for a given journal to a given size. */
  int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
  {
  	J_ASSERT(journal->j_revoke_table[0] == NULL);
f482394cc   vignesh babu   is_power_of_2(): jbd
255
  	J_ASSERT(is_power_of_2(hash_size));
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
256

83c49523c   Duane Griffin   jbd2: eliminate d...
257
258
259
  	journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size);
  	if (!journal->j_revoke_table[0])
  		goto fail0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
260

83c49523c   Duane Griffin   jbd2: eliminate d...
261
262
263
  	journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size);
  	if (!journal->j_revoke_table[1])
  		goto fail1;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
264

83c49523c   Duane Griffin   jbd2: eliminate d...
265
  	journal->j_revoke = journal->j_revoke_table[1];
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
266
267
268
269
  
  	spin_lock_init(&journal->j_revoke_lock);
  
  	return 0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
270

83c49523c   Duane Griffin   jbd2: eliminate d...
271
272
273
274
275
  fail1:
  	jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
  fail0:
  	return -ENOMEM;
  }
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
276

83c49523c   Duane Griffin   jbd2: eliminate d...
277
  /* Destroy a journal's revoke table.  The table must already be empty! */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
278
  void jbd2_journal_destroy_revoke(journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
279
  {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
280
  	journal->j_revoke = NULL;
83c49523c   Duane Griffin   jbd2: eliminate d...
281
282
283
284
  	if (journal->j_revoke_table[0])
  		jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
  	if (journal->j_revoke_table[1])
  		jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
285
286
287
288
289
290
  }
  
  
  #ifdef __KERNEL__
  
  /*
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
291
   * jbd2_journal_revoke: revoke a given buffer_head from the journal.  This
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
292
293
294
295
296
297
298
299
300
301
302
   * prevents the block from being replayed during recovery if we take a
   * crash after this current transaction commits.  Any subsequent
   * metadata writes of the buffer in this transaction cancel the
   * revoke.
   *
   * Note that this call may block --- it is up to the caller to make
   * sure that there are no further calls to journal_write_metadata
   * before the revoke is complete.  In ext3, this implies calling the
   * revoke before clearing the block bitmap when we are deleting
   * metadata.
   *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
303
   * Revoke performs a jbd2_journal_forget on any buffer_head passed in as a
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
304
305
306
307
308
309
   * parameter, but does _not_ forget the buffer_head if the bh was only
   * found implicitly.
   *
   * bh_in may not be a journalled buffer - it may have come off
   * the hash tables without an attached journal_head.
   *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
310
   * If bh_in is non-zero, jbd2_journal_revoke() will decrement its b_count
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
311
312
   * by one.
   */
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
313
  int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
314
315
316
317
318
319
320
321
322
323
324
325
  		   struct buffer_head *bh_in)
  {
  	struct buffer_head *bh = NULL;
  	journal_t *journal;
  	struct block_device *bdev;
  	int err;
  
  	might_sleep();
  	if (bh_in)
  		BUFFER_TRACE(bh_in, "enter");
  
  	journal = handle->h_transaction->t_journal;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
326
  	if (!jbd2_journal_set_features(journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)){
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
327
328
329
330
331
332
333
334
335
336
337
338
  		J_ASSERT (!"Cannot set revoke feature!");
  		return -EINVAL;
  	}
  
  	bdev = journal->j_fs_dev;
  	bh = bh_in;
  
  	if (!bh) {
  		bh = __find_get_block(bdev, blocknr, journal->j_blocksize);
  		if (bh)
  			BUFFER_TRACE(bh, "found on hash");
  	}
cd02ff0b1   Mingming Cao   jbd2: JBD_XXX to ...
339
  #ifdef JBD2_EXPENSIVE_CHECKING
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
  	else {
  		struct buffer_head *bh2;
  
  		/* If there is a different buffer_head lying around in
  		 * memory anywhere... */
  		bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize);
  		if (bh2) {
  			/* ... and it has RevokeValid status... */
  			if (bh2 != bh && buffer_revokevalid(bh2))
  				/* ...then it better be revoked too,
  				 * since it's illegal to create a revoke
  				 * record against a buffer_head which is
  				 * not marked revoked --- that would
  				 * risk missing a subsequent revoke
  				 * cancel. */
  				J_ASSERT_BH(bh2, buffer_revoked(bh2));
  			put_bh(bh2);
  		}
  	}
  #endif
  
  	/* We really ought not ever to revoke twice in a row without
             first having the revoke cancelled: it's illegal to free a
             block twice without allocating it in between! */
  	if (bh) {
  		if (!J_EXPECT_BH(bh, !buffer_revoked(bh),
  				 "inconsistent data on disk")) {
  			if (!bh_in)
  				brelse(bh);
  			return -EIO;
  		}
  		set_buffer_revoked(bh);
  		set_buffer_revokevalid(bh);
  		if (bh_in) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
374
375
  			BUFFER_TRACE(bh_in, "call jbd2_journal_forget");
  			jbd2_journal_forget(handle, bh_in);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
376
377
378
379
380
  		} else {
  			BUFFER_TRACE(bh, "call brelse");
  			__brelse(bh);
  		}
  	}
299717696   Mingming Cao   [PATCH] jbd2: sec...
381
382
  	jbd_debug(2, "insert revoke for block %llu, bh_in=%p
  ",blocknr, bh_in);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
383
384
385
386
387
388
389
390
  	err = insert_revoke_hash(journal, blocknr,
  				handle->h_transaction->t_tid);
  	BUFFER_TRACE(bh_in, "exit");
  	return err;
  }
  
  /*
   * Cancel an outstanding revoke.  For use only internally by the
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
391
   * journaling code (called from jbd2_journal_get_write_access).
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
392
393
394
395
396
397
398
399
400
401
402
   *
   * We trust buffer_revoked() on the buffer if the buffer is already
   * being journaled: if there is no revoke pending on the buffer, then we
   * don't do anything here.
   *
   * This would break if it were possible for a buffer to be revoked and
   * discarded, and then reallocated within the same transaction.  In such
   * a case we would have lost the revoked bit, but when we arrived here
   * the second time we would still have a pending revoke to cancel.  So,
   * do not trust the Revoked bit on buffers unless RevokeValid is also
   * set.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
403
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
404
  int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
405
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
406
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
  	journal_t *journal = handle->h_transaction->t_journal;
  	int need_cancel;
  	int did_revoke = 0;	/* akpm: debug */
  	struct buffer_head *bh = jh2bh(jh);
  
  	jbd_debug(4, "journal_head %p, cancelling revoke
  ", jh);
  
  	/* Is the existing Revoke bit valid?  If so, we trust it, and
  	 * only perform the full cancel if the revoke bit is set.  If
  	 * not, we can't trust the revoke bit, and we need to do the
  	 * full search for a revoke record. */
  	if (test_set_buffer_revokevalid(bh)) {
  		need_cancel = test_clear_buffer_revoked(bh);
  	} else {
  		need_cancel = 1;
  		clear_buffer_revoked(bh);
  	}
  
  	if (need_cancel) {
  		record = find_revoke_record(journal, bh->b_blocknr);
  		if (record) {
  			jbd_debug(4, "cancelled existing revoke on "
  				  "blocknr %llu
  ", (unsigned long long)bh->b_blocknr);
  			spin_lock(&journal->j_revoke_lock);
  			list_del(&record->hash);
  			spin_unlock(&journal->j_revoke_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
435
  			kmem_cache_free(jbd2_revoke_record_cache, record);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
436
437
438
  			did_revoke = 1;
  		}
  	}
cd02ff0b1   Mingming Cao   jbd2: JBD_XXX to ...
439
  #ifdef JBD2_EXPENSIVE_CHECKING
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
  	/* There better not be one left behind by now! */
  	record = find_revoke_record(journal, bh->b_blocknr);
  	J_ASSERT_JH(jh, record == NULL);
  #endif
  
  	/* Finally, have we just cleared revoke on an unhashed
  	 * buffer_head?  If so, we'd better make sure we clear the
  	 * revoked status on any hashed alias too, otherwise the revoke
  	 * state machine will get very upset later on. */
  	if (need_cancel) {
  		struct buffer_head *bh2;
  		bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size);
  		if (bh2) {
  			if (bh2 != bh)
  				clear_buffer_revoked(bh2);
  			__brelse(bh2);
  		}
  	}
  	return did_revoke;
  }
1ba37268c   Yongqiang Yang   jbd2: clear revok...
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
  /*
   * journal_clear_revoked_flag clears revoked flag of buffers in
   * revoke table to reflect there is no revoked buffers in the next
   * transaction which is going to be started.
   */
  void jbd2_clear_buffer_revoked_flags(journal_t *journal)
  {
  	struct jbd2_revoke_table_s *revoke = journal->j_revoke;
  	int i = 0;
  
  	for (i = 0; i < revoke->hash_size; i++) {
  		struct list_head *hash_list;
  		struct list_head *list_entry;
  		hash_list = &revoke->hash_table[i];
  
  		list_for_each(list_entry, hash_list) {
  			struct jbd2_revoke_record_s *record;
  			struct buffer_head *bh;
  			record = (struct jbd2_revoke_record_s *)list_entry;
  			bh = __find_get_block(journal->j_fs_dev,
  					      record->blocknr,
  					      journal->j_blocksize);
  			if (bh) {
  				clear_buffer_revoked(bh);
  				__brelse(bh);
  			}
  		}
  	}
  }
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
489
490
491
492
  /* journal_switch_revoke table select j_revoke for next transaction
   * we do not want to suspend any processing until all revokes are
   * written -bzzz
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
493
  void jbd2_journal_switch_revoke_table(journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
  {
  	int i;
  
  	if (journal->j_revoke == journal->j_revoke_table[0])
  		journal->j_revoke = journal->j_revoke_table[1];
  	else
  		journal->j_revoke = journal->j_revoke_table[0];
  
  	for (i = 0; i < journal->j_revoke->hash_size; i++)
  		INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
  }
  
  /*
   * Write revoke records to the journal for all entries in the current
   * revoke hash, deleting the entries as we go.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
509
   */
9bcf976cb   Jan Kara   jbd2: remove unne...
510
511
  void jbd2_journal_write_revoke_records(transaction_t *transaction,
  				       struct list_head *log_bufs)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
512
  {
9bcf976cb   Jan Kara   jbd2: remove unne...
513
  	journal_t *journal = transaction->t_journal;
e5a120aeb   Jan Kara   jbd2: remove jour...
514
  	struct buffer_head *descriptor;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
515
516
  	struct jbd2_revoke_record_s *record;
  	struct jbd2_revoke_table_s *revoke;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
  	struct list_head *hash_list;
  	int i, offset, count;
  
  	descriptor = NULL;
  	offset = 0;
  	count = 0;
  
  	/* select revoke table for committing transaction */
  	revoke = journal->j_revoke == journal->j_revoke_table[0] ?
  		journal->j_revoke_table[1] : journal->j_revoke_table[0];
  
  	for (i = 0; i < revoke->hash_size; i++) {
  		hash_list = &revoke->hash_table[i];
  
  		while (!list_empty(hash_list)) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
532
  			record = (struct jbd2_revoke_record_s *)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
533
  				hash_list->next;
9bcf976cb   Jan Kara   jbd2: remove unne...
534
535
  			write_one_revoke_record(transaction, log_bufs,
  						&descriptor, &offset, record);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
536
537
  			count++;
  			list_del(&record->hash);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
538
  			kmem_cache_free(jbd2_revoke_record_cache, record);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
539
540
541
  		}
  	}
  	if (descriptor)
9bcf976cb   Jan Kara   jbd2: remove unne...
542
  		flush_descriptor(journal, descriptor, offset);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
543
544
545
546
547
548
549
550
  	jbd_debug(1, "Wrote %d revoke records
  ", count);
  }
  
  /*
   * Write out one revoke record.  We need to create a new descriptor
   * block if the old one is full or if we have not already created one.
   */
9bcf976cb   Jan Kara   jbd2: remove unne...
551
  static void write_one_revoke_record(transaction_t *transaction,
e5a120aeb   Jan Kara   jbd2: remove jour...
552
553
  				    struct list_head *log_bufs,
  				    struct buffer_head **descriptorp,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
554
  				    int *offsetp,
9bcf976cb   Jan Kara   jbd2: remove unne...
555
  				    struct jbd2_revoke_record_s *record)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
556
  {
9bcf976cb   Jan Kara   jbd2: remove unne...
557
  	journal_t *journal = transaction->t_journal;
42a7106de   Darrick J. Wong   jbd2: checksum re...
558
  	int csum_size = 0;
e5a120aeb   Jan Kara   jbd2: remove jour...
559
  	struct buffer_head *descriptor;
e531d0bce   Darrick J. Wong   jbd2: fix r_count...
560
  	int sz, offset;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
561
562
563
  
  	/* If we are already aborting, this all becomes a noop.  We
             still need to go round the loop in
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
564
             jbd2_journal_write_revoke_records in order to free all of the
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
565
566
567
568
569
570
             revoke records: only the IO to the journal is omitted. */
  	if (is_journal_aborted(journal))
  		return;
  
  	descriptor = *descriptorp;
  	offset = *offsetp;
42a7106de   Darrick J. Wong   jbd2: checksum re...
571
  	/* Do we need to leave space at the end for a checksum? */
db9ee2203   Darrick J. Wong   jbd2: fix descrip...
572
  	if (jbd2_journal_has_csum_v2or3(journal))
1101cd4d1   Jan Kara   jbd2: unify revok...
573
  		csum_size = sizeof(struct jbd2_journal_block_tail);
42a7106de   Darrick J. Wong   jbd2: checksum re...
574

56316a0d2   Darrick J. Wong   jbd2: clean up fe...
575
  	if (jbd2_has_feature_64bit(journal))
e531d0bce   Darrick J. Wong   jbd2: fix r_count...
576
577
578
  		sz = 8;
  	else
  		sz = 4;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
579
580
  	/* Make sure we have a descriptor with space left for the record */
  	if (descriptor) {
e531d0bce   Darrick J. Wong   jbd2: fix r_count...
581
  		if (offset + sz > journal->j_blocksize - csum_size) {
9bcf976cb   Jan Kara   jbd2: remove unne...
582
  			flush_descriptor(journal, descriptor, offset);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
583
584
585
586
587
  			descriptor = NULL;
  		}
  	}
  
  	if (!descriptor) {
32ab67159   Jan Kara   jbd2: factor out ...
588
589
  		descriptor = jbd2_journal_get_descriptor_buffer(transaction,
  							JBD2_REVOKE_BLOCK);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
590
591
  		if (!descriptor)
  			return;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
592
593
  
  		/* Record it so that we can wait for IO completion later */
e5a120aeb   Jan Kara   jbd2: remove jour...
594
595
  		BUFFER_TRACE(descriptor, "file in log_bufs");
  		jbd2_file_log_bh(log_bufs, descriptor);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
596

f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
597
  		offset = sizeof(jbd2_journal_revoke_header_t);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
598
599
  		*descriptorp = descriptor;
  	}
56316a0d2   Darrick J. Wong   jbd2: clean up fe...
600
  	if (jbd2_has_feature_64bit(journal))
e5a120aeb   Jan Kara   jbd2: remove jour...
601
  		* ((__be64 *)(&descriptor->b_data[offset])) =
b517bea1c   Zach Brown   [PATCH] 64-bit jb...
602
  			cpu_to_be64(record->blocknr);
e531d0bce   Darrick J. Wong   jbd2: fix r_count...
603
  	else
e5a120aeb   Jan Kara   jbd2: remove jour...
604
  		* ((__be32 *)(&descriptor->b_data[offset])) =
b517bea1c   Zach Brown   [PATCH] 64-bit jb...
605
  			cpu_to_be32(record->blocknr);
e531d0bce   Darrick J. Wong   jbd2: fix r_count...
606
  	offset += sz;
b517bea1c   Zach Brown   [PATCH] 64-bit jb...
607

470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
608
609
610
611
612
613
614
615
616
617
618
  	*offsetp = offset;
  }
  
  /*
   * Flush a revoke descriptor out to the journal.  If we are aborting,
   * this is a noop; otherwise we are generating a buffer which needs to
   * be waited for during commit, so it has to go onto the appropriate
   * journal buffer list.
   */
  
  static void flush_descriptor(journal_t *journal,
e5a120aeb   Jan Kara   jbd2: remove jour...
619
  			     struct buffer_head *descriptor,
9bcf976cb   Jan Kara   jbd2: remove unne...
620
  			     int offset)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
621
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
622
  	jbd2_journal_revoke_header_t *header;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
623
624
  
  	if (is_journal_aborted(journal)) {
e5a120aeb   Jan Kara   jbd2: remove jour...
625
  		put_bh(descriptor);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
626
627
  		return;
  	}
e5a120aeb   Jan Kara   jbd2: remove jour...
628
  	header = (jbd2_journal_revoke_header_t *)descriptor->b_data;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
629
  	header->r_count = cpu_to_be32(offset);
1101cd4d1   Jan Kara   jbd2: unify revok...
630
  	jbd2_descriptor_block_csum_set(journal, descriptor);
42a7106de   Darrick J. Wong   jbd2: checksum re...
631

e5a120aeb   Jan Kara   jbd2: remove jour...
632
633
634
  	set_buffer_jwrite(descriptor);
  	BUFFER_TRACE(descriptor, "write");
  	set_buffer_dirty(descriptor);
9bcf976cb   Jan Kara   jbd2: remove unne...
635
  	write_dirty_buffer(descriptor, WRITE_SYNC);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
  }
  #endif
  
  /*
   * Revoke support for recovery.
   *
   * Recovery needs to be able to:
   *
   *  record all revoke records, including the tid of the latest instance
   *  of each revoke in the journal
   *
   *  check whether a given block in a given transaction should be replayed
   *  (ie. has not been revoked by a revoke record in that or a subsequent
   *  transaction)
   *
   *  empty the revoke table after recovery.
   */
  
  /*
   * First, setting revoke records.  We create a new revoke record for
   * every block ever revoked in the log as we scan it for recovery, and
   * we update the existing records if we find multiple revokes for a
   * single block.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
660
  int jbd2_journal_set_revoke(journal_t *journal,
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
661
  		       unsigned long long blocknr,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
662
663
  		       tid_t sequence)
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
664
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
  
  	record = find_revoke_record(journal, blocknr);
  	if (record) {
  		/* If we have multiple occurrences, only record the
  		 * latest sequence number in the hashed record */
  		if (tid_gt(sequence, record->sequence))
  			record->sequence = sequence;
  		return 0;
  	}
  	return insert_revoke_hash(journal, blocknr, sequence);
  }
  
  /*
   * Test revoke records.  For a given block referenced in the log, has
   * that block been revoked?  A revoke record with a given transaction
   * sequence number revokes all blocks in that transaction and earlier
   * ones, but later transactions still need replayed.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
683
  int jbd2_journal_test_revoke(journal_t *journal,
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
684
  			unsigned long long blocknr,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
685
686
  			tid_t sequence)
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
687
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
688
689
690
691
692
693
694
695
696
697
698
699
700
  
  	record = find_revoke_record(journal, blocknr);
  	if (!record)
  		return 0;
  	if (tid_gt(sequence, record->sequence))
  		return 0;
  	return 1;
  }
  
  /*
   * Finally, once recovery is over, we need to clear the revoke table so
   * that it can be reused by the running filesystem.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
701
  void jbd2_journal_clear_revoke(journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
702
703
704
  {
  	int i;
  	struct list_head *hash_list;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
705
706
  	struct jbd2_revoke_record_s *record;
  	struct jbd2_revoke_table_s *revoke;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
707
708
709
710
711
712
  
  	revoke = journal->j_revoke;
  
  	for (i = 0; i < revoke->hash_size; i++) {
  		hash_list = &revoke->hash_table[i];
  		while (!list_empty(hash_list)) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
713
  			record = (struct jbd2_revoke_record_s*) hash_list->next;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
714
  			list_del(&record->hash);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
715
  			kmem_cache_free(jbd2_revoke_record_cache, record);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
716
717
718
  		}
  	}
  }