Blame view

fs/jbd2/revoke.c 21.9 KB
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1
  /*
588626996   Uwe Kleine-König   fix file specific...
2
   * linux/fs/jbd2/revoke.c
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
   *
   * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
   *
   * Copyright 2000 Red Hat corp --- All Rights Reserved
   *
   * This file is part of the Linux kernel and is made available under
   * the terms of the GNU General Public License, version 2, or at your
   * option, any later version, incorporated herein by reference.
   *
   * Journal revoke routines for the generic filesystem journaling code;
   * part of the ext2fs journaling system.
   *
   * Revoke is the mechanism used to prevent old log records for deleted
   * metadata from being replayed on top of newer data using the same
   * blocks.  The revoke mechanism is used in two separate places:
   *
   * + Commit: during commit we write the entire list of the current
   *   transaction's revoked blocks to the journal
   *
   * + Recovery: during recovery we record the transaction ID of all
   *   revoked blocks.  If there are multiple revoke records in the log
   *   for a single block, only the last one counts, and if there is a log
   *   entry for a block beyond the last revoke, then that log entry still
   *   gets replayed.
   *
   * We can get interactions between revokes and new log data within a
   * single transaction:
   *
   * Block is revoked and then journaled:
   *   The desired end result is the journaling of the new block, so we
   *   cancel the revoke before the transaction commits.
   *
   * Block is journaled and then revoked:
   *   The revoke must take precedence over the write of the block, so we
   *   need either to cancel the journal entry or to write the revoke
   *   later in the log than the log block.  In this case, we choose the
   *   latter: journaling a block cancels any revoke record for that block
   *   in the current transaction, so any revoke for that block in the
   *   transaction must have happened after the block was journaled and so
   *   the revoke must take precedence.
   *
   * Block is revoked and then written as data:
   *   The data write is allowed to succeed, but the revoke is _not_
   *   cancelled.  We still need to prevent old log records from
   *   overwriting the new data.  We don't even need to clear the revoke
   *   bit here.
   *
1ba37268c   Yongqiang Yang   jbd2: clear revok...
50
51
52
53
   * We cache revoke status of a buffer in the current transaction in b_states
   * bits.  As the name says, revokevalid flag indicates that the cached revoke
   * status of a buffer is valid and we can rely on the cached status.
   *
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
54
55
56
57
58
59
60
61
   * Revoke information on buffers is a tri-state value:
   *
   * RevokeValid clear:	no cached revoke status, need to look it up
   * RevokeValid set, Revoked clear:
   *			buffer has not been revoked, and cancel_revoke
   *			need do nothing.
   * RevokeValid set, Revoked set:
   *			buffer has been revoked.
86db97c87   Jan Kara   jbd2: Update lock...
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
   *
   * Locking rules:
   * We keep two hash tables of revoke records. One hashtable belongs to the
   * running transaction (is pointed to by journal->j_revoke), the other one
   * belongs to the committing transaction. Accesses to the second hash table
   * happen only from the kjournald and no other thread touches this table.  Also
   * journal_switch_revoke_table() which switches which hashtable belongs to the
   * running and which to the committing transaction is called only from
   * kjournald. Therefore we need no locks when accessing the hashtable belonging
   * to the committing transaction.
   *
   * All users operating on the hash table belonging to the running transaction
   * have a handle to the transaction. Therefore they are safe from kjournald
   * switching hash tables under them. For operations on the lists of entries in
   * the hash table j_revoke_lock is used.
   *
25985edce   Lucas De Marchi   Fix common misspe...
78
   * Finally, also replay code uses the hash tables but at this moment no one else
86db97c87   Jan Kara   jbd2: Update lock...
79
80
   * can touch them (filesystem isn't mounted yet) and hence no locking is
   * needed.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
81
82
83
84
85
86
87
   */
  
  #ifndef __KERNEL__
  #include "jfs_user.h"
  #else
  #include <linux/time.h>
  #include <linux/fs.h>
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
88
  #include <linux/jbd2.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
89
90
91
  #include <linux/errno.h>
  #include <linux/slab.h>
  #include <linux/list.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
92
  #include <linux/init.h>
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
93
  #include <linux/bio.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
94
  #endif
f482394cc   vignesh babu   is_power_of_2(): jbd
95
  #include <linux/log2.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
96

e18b890bb   Christoph Lameter   [PATCH] slab: rem...
97
98
  static struct kmem_cache *jbd2_revoke_record_cache;
  static struct kmem_cache *jbd2_revoke_table_cache;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
99
100
101
102
  
  /* Each revoke record represents one single revoked block.  During
     journal replay, this involves recording the transaction ID of the
     last transaction to revoke this block. */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
103
  struct jbd2_revoke_record_s
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
104
105
106
  {
  	struct list_head  hash;
  	tid_t		  sequence;	/* Used for recovery only */
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
107
  	unsigned long long	  blocknr;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
108
109
110
111
  };
  
  
  /* The revoke table is just a simple hash table of revoke records. */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
112
  struct jbd2_revoke_table_s
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
113
114
115
116
117
118
119
120
121
122
123
124
  {
  	/* It is conceivable that we might want a larger hash table
  	 * for recovery.  Must be a power of two. */
  	int		  hash_size;
  	int		  hash_shift;
  	struct list_head *hash_table;
  };
  
  
  #ifdef __KERNEL__
  static void write_one_revoke_record(journal_t *, transaction_t *,
  				    struct journal_head **, int *,
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
125
126
  				    struct jbd2_revoke_record_s *, int);
  static void flush_descriptor(journal_t *, struct journal_head *, int, int);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
127
128
129
130
131
  #endif
  
  /* Utility functions to maintain the revoke table */
  
  /* Borrowed from buffer.c: this is a tried and tested block hash function */
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
132
  static inline int hash(journal_t *journal, unsigned long long block)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
133
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
134
  	struct jbd2_revoke_table_s *table = journal->j_revoke;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
135
  	int hash_shift = table->hash_shift;
299717696   Mingming Cao   [PATCH] jbd2: sec...
136
  	int hash = (int)block ^ (int)((block >> 31) >> 1);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
137

299717696   Mingming Cao   [PATCH] jbd2: sec...
138
139
140
  	return ((hash << (hash_shift - 6)) ^
  		(hash >> 13) ^
  		(hash << (hash_shift - 12))) & (table->hash_size - 1);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
141
  }
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
142
  static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
143
144
145
  			      tid_t seq)
  {
  	struct list_head *hash_list;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
146
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
147
148
  
  repeat:
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
149
  	record = kmem_cache_alloc(jbd2_revoke_record_cache, GFP_NOFS);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
150
151
152
153
154
155
156
157
158
159
160
161
162
163
  	if (!record)
  		goto oom;
  
  	record->sequence = seq;
  	record->blocknr = blocknr;
  	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
  	spin_lock(&journal->j_revoke_lock);
  	list_add(&record->hash, hash_list);
  	spin_unlock(&journal->j_revoke_lock);
  	return 0;
  
  oom:
  	if (!journal_oom_retry)
  		return -ENOMEM;
329d291f5   Harvey Harrison   jdb2: replace rem...
164
165
  	jbd_debug(1, "ENOMEM in %s, retrying
  ", __func__);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
166
167
168
169
170
  	yield();
  	goto repeat;
  }
  
  /* Find a revoke record in the journal's hash table. */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
171
  static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
172
  						      unsigned long long blocknr)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
173
174
  {
  	struct list_head *hash_list;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
175
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
176
177
178
179
  
  	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
  
  	spin_lock(&journal->j_revoke_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
180
  	record = (struct jbd2_revoke_record_s *) hash_list->next;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
181
182
183
184
185
  	while (&(record->hash) != hash_list) {
  		if (record->blocknr == blocknr) {
  			spin_unlock(&journal->j_revoke_lock);
  			return record;
  		}
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
186
  		record = (struct jbd2_revoke_record_s *) record->hash.next;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
187
188
189
190
  	}
  	spin_unlock(&journal->j_revoke_lock);
  	return NULL;
  }
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
191
192
193
194
195
196
197
198
199
200
201
  void jbd2_journal_destroy_revoke_caches(void)
  {
  	if (jbd2_revoke_record_cache) {
  		kmem_cache_destroy(jbd2_revoke_record_cache);
  		jbd2_revoke_record_cache = NULL;
  	}
  	if (jbd2_revoke_table_cache) {
  		kmem_cache_destroy(jbd2_revoke_table_cache);
  		jbd2_revoke_table_cache = NULL;
  	}
  }
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
202
  int __init jbd2_journal_init_revoke_caches(void)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
203
  {
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
204
205
  	J_ASSERT(!jbd2_revoke_record_cache);
  	J_ASSERT(!jbd2_revoke_table_cache);
a920e9416   Johann Lombardi   [PATCH] jbd2: ren...
206
  	jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
207
  					   sizeof(struct jbd2_revoke_record_s),
77160957e   Mingming Cao   jbd2: Mark jbd2 s...
208
209
210
  					   0,
  					   SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
  					   NULL);
1076d17ac   Al Viro   jbd/jbd2 NULL noise
211
  	if (!jbd2_revoke_record_cache)
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
212
  		goto record_cache_failure;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
213

a920e9416   Johann Lombardi   [PATCH] jbd2: ren...
214
  	jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
215
  					   sizeof(struct jbd2_revoke_table_s),
77160957e   Mingming Cao   jbd2: Mark jbd2 s...
216
  					   0, SLAB_TEMPORARY, NULL);
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
217
218
  	if (!jbd2_revoke_table_cache)
  		goto table_cache_failure;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
219
  	return 0;
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
220
221
222
223
  table_cache_failure:
  	jbd2_journal_destroy_revoke_caches();
  record_cache_failure:
  		return -ENOMEM;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
224
  }
83c49523c   Duane Griffin   jbd2: eliminate d...
225
  static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
226
  {
83c49523c   Duane Griffin   jbd2: eliminate d...
227
228
229
  	int shift = 0;
  	int tmp = hash_size;
  	struct jbd2_revoke_table_s *table;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
230

83c49523c   Duane Griffin   jbd2: eliminate d...
231
232
233
  	table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
  	if (!table)
  		goto out;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
234

470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
235
236
  	while((tmp >>= 1UL) != 0UL)
  		shift++;
83c49523c   Duane Griffin   jbd2: eliminate d...
237
238
239
  	table->hash_size = hash_size;
  	table->hash_shift = shift;
  	table->hash_table =
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
240
  		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
83c49523c   Duane Griffin   jbd2: eliminate d...
241
242
243
244
  	if (!table->hash_table) {
  		kmem_cache_free(jbd2_revoke_table_cache, table);
  		table = NULL;
  		goto out;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
245
246
247
  	}
  
  	for (tmp = 0; tmp < hash_size; tmp++)
83c49523c   Duane Griffin   jbd2: eliminate d...
248
  		INIT_LIST_HEAD(&table->hash_table[tmp]);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
249

83c49523c   Duane Griffin   jbd2: eliminate d...
250
251
252
253
254
255
256
257
258
259
260
261
  out:
  	return table;
  }
  
  static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
  {
  	int i;
  	struct list_head *hash_list;
  
  	for (i = 0; i < table->hash_size; i++) {
  		hash_list = &table->hash_table[i];
  		J_ASSERT(list_empty(hash_list));
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
262
  	}
83c49523c   Duane Griffin   jbd2: eliminate d...
263
264
265
  	kfree(table->hash_table);
  	kmem_cache_free(jbd2_revoke_table_cache, table);
  }
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
266

83c49523c   Duane Griffin   jbd2: eliminate d...
267
268
269
270
  /* Initialise the revoke table for a given journal to a given size. */
  int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
  {
  	J_ASSERT(journal->j_revoke_table[0] == NULL);
f482394cc   vignesh babu   is_power_of_2(): jbd
271
  	J_ASSERT(is_power_of_2(hash_size));
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
272

83c49523c   Duane Griffin   jbd2: eliminate d...
273
274
275
  	journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size);
  	if (!journal->j_revoke_table[0])
  		goto fail0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
276

83c49523c   Duane Griffin   jbd2: eliminate d...
277
278
279
  	journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size);
  	if (!journal->j_revoke_table[1])
  		goto fail1;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
280

83c49523c   Duane Griffin   jbd2: eliminate d...
281
  	journal->j_revoke = journal->j_revoke_table[1];
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
282
283
284
285
  
  	spin_lock_init(&journal->j_revoke_lock);
  
  	return 0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
286

83c49523c   Duane Griffin   jbd2: eliminate d...
287
288
289
290
291
  fail1:
  	jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
  fail0:
  	return -ENOMEM;
  }
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
292

83c49523c   Duane Griffin   jbd2: eliminate d...
293
  /* Destroy a journal's revoke table.  The table must already be empty! */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
294
  void jbd2_journal_destroy_revoke(journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
295
  {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
296
  	journal->j_revoke = NULL;
83c49523c   Duane Griffin   jbd2: eliminate d...
297
298
299
300
  	if (journal->j_revoke_table[0])
  		jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
  	if (journal->j_revoke_table[1])
  		jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
301
302
303
304
305
306
  }
  
  
  #ifdef __KERNEL__
  
  /*
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
307
   * jbd2_journal_revoke: revoke a given buffer_head from the journal.  This
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
308
309
310
311
312
313
314
315
316
317
318
   * prevents the block from being replayed during recovery if we take a
   * crash after this current transaction commits.  Any subsequent
   * metadata writes of the buffer in this transaction cancel the
   * revoke.
   *
   * Note that this call may block --- it is up to the caller to make
   * sure that there are no further calls to journal_write_metadata
   * before the revoke is complete.  In ext3, this implies calling the
   * revoke before clearing the block bitmap when we are deleting
   * metadata.
   *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
319
   * Revoke performs a jbd2_journal_forget on any buffer_head passed in as a
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
320
321
322
323
324
325
   * parameter, but does _not_ forget the buffer_head if the bh was only
   * found implicitly.
   *
   * bh_in may not be a journalled buffer - it may have come off
   * the hash tables without an attached journal_head.
   *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
326
   * If bh_in is non-zero, jbd2_journal_revoke() will decrement its b_count
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
327
328
   * by one.
   */
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
329
  int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
330
331
332
333
334
335
336
337
338
339
340
341
  		   struct buffer_head *bh_in)
  {
  	struct buffer_head *bh = NULL;
  	journal_t *journal;
  	struct block_device *bdev;
  	int err;
  
  	might_sleep();
  	if (bh_in)
  		BUFFER_TRACE(bh_in, "enter");
  
  	journal = handle->h_transaction->t_journal;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
342
  	if (!jbd2_journal_set_features(journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)){
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
343
344
345
346
347
348
349
350
351
352
353
354
  		J_ASSERT (!"Cannot set revoke feature!");
  		return -EINVAL;
  	}
  
  	bdev = journal->j_fs_dev;
  	bh = bh_in;
  
  	if (!bh) {
  		bh = __find_get_block(bdev, blocknr, journal->j_blocksize);
  		if (bh)
  			BUFFER_TRACE(bh, "found on hash");
  	}
cd02ff0b1   Mingming Cao   jbd2: JBD_XXX to ...
355
  #ifdef JBD2_EXPENSIVE_CHECKING
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
  	else {
  		struct buffer_head *bh2;
  
  		/* If there is a different buffer_head lying around in
  		 * memory anywhere... */
  		bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize);
  		if (bh2) {
  			/* ... and it has RevokeValid status... */
  			if (bh2 != bh && buffer_revokevalid(bh2))
  				/* ...then it better be revoked too,
  				 * since it's illegal to create a revoke
  				 * record against a buffer_head which is
  				 * not marked revoked --- that would
  				 * risk missing a subsequent revoke
  				 * cancel. */
  				J_ASSERT_BH(bh2, buffer_revoked(bh2));
  			put_bh(bh2);
  		}
  	}
  #endif
  
  	/* We really ought not ever to revoke twice in a row without
             first having the revoke cancelled: it's illegal to free a
             block twice without allocating it in between! */
  	if (bh) {
  		if (!J_EXPECT_BH(bh, !buffer_revoked(bh),
  				 "inconsistent data on disk")) {
  			if (!bh_in)
  				brelse(bh);
  			return -EIO;
  		}
  		set_buffer_revoked(bh);
  		set_buffer_revokevalid(bh);
  		if (bh_in) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
390
391
  			BUFFER_TRACE(bh_in, "call jbd2_journal_forget");
  			jbd2_journal_forget(handle, bh_in);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
392
393
394
395
396
  		} else {
  			BUFFER_TRACE(bh, "call brelse");
  			__brelse(bh);
  		}
  	}
299717696   Mingming Cao   [PATCH] jbd2: sec...
397
398
  	jbd_debug(2, "insert revoke for block %llu, bh_in=%p
  ",blocknr, bh_in);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
399
400
401
402
403
404
405
406
  	err = insert_revoke_hash(journal, blocknr,
  				handle->h_transaction->t_tid);
  	BUFFER_TRACE(bh_in, "exit");
  	return err;
  }
  
  /*
   * Cancel an outstanding revoke.  For use only internally by the
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
407
   * journaling code (called from jbd2_journal_get_write_access).
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
408
409
410
411
412
413
414
415
416
417
418
   *
   * We trust buffer_revoked() on the buffer if the buffer is already
   * being journaled: if there is no revoke pending on the buffer, then we
   * don't do anything here.
   *
   * This would break if it were possible for a buffer to be revoked and
   * discarded, and then reallocated within the same transaction.  In such
   * a case we would have lost the revoked bit, but when we arrived here
   * the second time we would still have a pending revoke to cancel.  So,
   * do not trust the Revoked bit on buffers unless RevokeValid is also
   * set.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
419
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
420
  int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
421
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
422
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
  	journal_t *journal = handle->h_transaction->t_journal;
  	int need_cancel;
  	int did_revoke = 0;	/* akpm: debug */
  	struct buffer_head *bh = jh2bh(jh);
  
  	jbd_debug(4, "journal_head %p, cancelling revoke
  ", jh);
  
  	/* Is the existing Revoke bit valid?  If so, we trust it, and
  	 * only perform the full cancel if the revoke bit is set.  If
  	 * not, we can't trust the revoke bit, and we need to do the
  	 * full search for a revoke record. */
  	if (test_set_buffer_revokevalid(bh)) {
  		need_cancel = test_clear_buffer_revoked(bh);
  	} else {
  		need_cancel = 1;
  		clear_buffer_revoked(bh);
  	}
  
  	if (need_cancel) {
  		record = find_revoke_record(journal, bh->b_blocknr);
  		if (record) {
  			jbd_debug(4, "cancelled existing revoke on "
  				  "blocknr %llu
  ", (unsigned long long)bh->b_blocknr);
  			spin_lock(&journal->j_revoke_lock);
  			list_del(&record->hash);
  			spin_unlock(&journal->j_revoke_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
451
  			kmem_cache_free(jbd2_revoke_record_cache, record);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
452
453
454
  			did_revoke = 1;
  		}
  	}
cd02ff0b1   Mingming Cao   jbd2: JBD_XXX to ...
455
  #ifdef JBD2_EXPENSIVE_CHECKING
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
  	/* There better not be one left behind by now! */
  	record = find_revoke_record(journal, bh->b_blocknr);
  	J_ASSERT_JH(jh, record == NULL);
  #endif
  
  	/* Finally, have we just cleared revoke on an unhashed
  	 * buffer_head?  If so, we'd better make sure we clear the
  	 * revoked status on any hashed alias too, otherwise the revoke
  	 * state machine will get very upset later on. */
  	if (need_cancel) {
  		struct buffer_head *bh2;
  		bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size);
  		if (bh2) {
  			if (bh2 != bh)
  				clear_buffer_revoked(bh2);
  			__brelse(bh2);
  		}
  	}
  	return did_revoke;
  }
1ba37268c   Yongqiang Yang   jbd2: clear revok...
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
  /*
   * journal_clear_revoked_flag clears revoked flag of buffers in
   * revoke table to reflect there is no revoked buffers in the next
   * transaction which is going to be started.
   */
  void jbd2_clear_buffer_revoked_flags(journal_t *journal)
  {
  	struct jbd2_revoke_table_s *revoke = journal->j_revoke;
  	int i = 0;
  
  	for (i = 0; i < revoke->hash_size; i++) {
  		struct list_head *hash_list;
  		struct list_head *list_entry;
  		hash_list = &revoke->hash_table[i];
  
  		list_for_each(list_entry, hash_list) {
  			struct jbd2_revoke_record_s *record;
  			struct buffer_head *bh;
  			record = (struct jbd2_revoke_record_s *)list_entry;
  			bh = __find_get_block(journal->j_fs_dev,
  					      record->blocknr,
  					      journal->j_blocksize);
  			if (bh) {
  				clear_buffer_revoked(bh);
  				__brelse(bh);
  			}
  		}
  	}
  }
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
505
506
507
508
  /* journal_switch_revoke table select j_revoke for next transaction
   * we do not want to suspend any processing until all revokes are
   * written -bzzz
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
509
  void jbd2_journal_switch_revoke_table(journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
  {
  	int i;
  
  	if (journal->j_revoke == journal->j_revoke_table[0])
  		journal->j_revoke = journal->j_revoke_table[1];
  	else
  		journal->j_revoke = journal->j_revoke_table[0];
  
  	for (i = 0; i < journal->j_revoke->hash_size; i++)
  		INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
  }
  
  /*
   * Write revoke records to the journal for all entries in the current
   * revoke hash, deleting the entries as we go.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
525
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
526
  void jbd2_journal_write_revoke_records(journal_t *journal,
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
527
528
  				       transaction_t *transaction,
  				       int write_op)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
529
530
  {
  	struct journal_head *descriptor;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
531
532
  	struct jbd2_revoke_record_s *record;
  	struct jbd2_revoke_table_s *revoke;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
  	struct list_head *hash_list;
  	int i, offset, count;
  
  	descriptor = NULL;
  	offset = 0;
  	count = 0;
  
  	/* select revoke table for committing transaction */
  	revoke = journal->j_revoke == journal->j_revoke_table[0] ?
  		journal->j_revoke_table[1] : journal->j_revoke_table[0];
  
  	for (i = 0; i < revoke->hash_size; i++) {
  		hash_list = &revoke->hash_table[i];
  
  		while (!list_empty(hash_list)) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
548
  			record = (struct jbd2_revoke_record_s *)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
549
550
551
  				hash_list->next;
  			write_one_revoke_record(journal, transaction,
  						&descriptor, &offset,
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
552
  						record, write_op);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
553
554
  			count++;
  			list_del(&record->hash);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
555
  			kmem_cache_free(jbd2_revoke_record_cache, record);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
556
557
558
  		}
  	}
  	if (descriptor)
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
559
  		flush_descriptor(journal, descriptor, offset, write_op);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
560
561
562
563
564
565
566
567
568
569
570
571
572
  	jbd_debug(1, "Wrote %d revoke records
  ", count);
  }
  
  /*
   * Write out one revoke record.  We need to create a new descriptor
   * block if the old one is full or if we have not already created one.
   */
  
  static void write_one_revoke_record(journal_t *journal,
  				    transaction_t *transaction,
  				    struct journal_head **descriptorp,
  				    int *offsetp,
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
573
574
  				    struct jbd2_revoke_record_s *record,
  				    int write_op)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
575
576
577
578
579
580
581
  {
  	struct journal_head *descriptor;
  	int offset;
  	journal_header_t *header;
  
  	/* If we are already aborting, this all becomes a noop.  We
             still need to go round the loop in
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
582
             jbd2_journal_write_revoke_records in order to free all of the
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
583
584
585
586
587
588
589
590
591
592
             revoke records: only the IO to the journal is omitted. */
  	if (is_journal_aborted(journal))
  		return;
  
  	descriptor = *descriptorp;
  	offset = *offsetp;
  
  	/* Make sure we have a descriptor with space left for the record */
  	if (descriptor) {
  		if (offset == journal->j_blocksize) {
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
593
  			flush_descriptor(journal, descriptor, offset, write_op);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
594
595
596
597
598
  			descriptor = NULL;
  		}
  	}
  
  	if (!descriptor) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
599
  		descriptor = jbd2_journal_get_descriptor_buffer(journal);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
600
601
602
  		if (!descriptor)
  			return;
  		header = (journal_header_t *) &jh2bh(descriptor)->b_data[0];
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
603
604
  		header->h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
  		header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
605
606
607
608
  		header->h_sequence  = cpu_to_be32(transaction->t_tid);
  
  		/* Record it so that we can wait for IO completion later */
  		JBUFFER_TRACE(descriptor, "file as BJ_LogCtl");
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
609
  		jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
610

f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
611
  		offset = sizeof(jbd2_journal_revoke_header_t);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
612
613
  		*descriptorp = descriptor;
  	}
b517bea1c   Zach Brown   [PATCH] 64-bit jb...
614
615
616
617
618
619
620
621
622
623
  	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) {
  		* ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) =
  			cpu_to_be64(record->blocknr);
  		offset += 8;
  
  	} else {
  		* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
  			cpu_to_be32(record->blocknr);
  		offset += 4;
  	}
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
624
625
626
627
628
629
630
631
632
633
634
635
  	*offsetp = offset;
  }
  
  /*
   * Flush a revoke descriptor out to the journal.  If we are aborting,
   * this is a noop; otherwise we are generating a buffer which needs to
   * be waited for during commit, so it has to go onto the appropriate
   * journal buffer list.
   */
  
  static void flush_descriptor(journal_t *journal,
  			     struct journal_head *descriptor,
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
636
  			     int offset, int write_op)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
637
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
638
  	jbd2_journal_revoke_header_t *header;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
639
640
641
642
643
644
  	struct buffer_head *bh = jh2bh(descriptor);
  
  	if (is_journal_aborted(journal)) {
  		put_bh(bh);
  		return;
  	}
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
645
  	header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
646
647
648
649
  	header->r_count = cpu_to_be32(offset);
  	set_buffer_jwrite(bh);
  	BUFFER_TRACE(bh, "write");
  	set_buffer_dirty(bh);
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
650
  	write_dirty_buffer(bh, write_op);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
  }
  #endif
  
  /*
   * Revoke support for recovery.
   *
   * Recovery needs to be able to:
   *
   *  record all revoke records, including the tid of the latest instance
   *  of each revoke in the journal
   *
   *  check whether a given block in a given transaction should be replayed
   *  (ie. has not been revoked by a revoke record in that or a subsequent
   *  transaction)
   *
   *  empty the revoke table after recovery.
   */
  
  /*
   * First, setting revoke records.  We create a new revoke record for
   * every block ever revoked in the log as we scan it for recovery, and
   * we update the existing records if we find multiple revokes for a
   * single block.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
675
  int jbd2_journal_set_revoke(journal_t *journal,
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
676
  		       unsigned long long blocknr,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
677
678
  		       tid_t sequence)
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
679
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
  
  	record = find_revoke_record(journal, blocknr);
  	if (record) {
  		/* If we have multiple occurrences, only record the
  		 * latest sequence number in the hashed record */
  		if (tid_gt(sequence, record->sequence))
  			record->sequence = sequence;
  		return 0;
  	}
  	return insert_revoke_hash(journal, blocknr, sequence);
  }
  
  /*
   * Test revoke records.  For a given block referenced in the log, has
   * that block been revoked?  A revoke record with a given transaction
   * sequence number revokes all blocks in that transaction and earlier
   * ones, but later transactions still need replayed.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
698
  int jbd2_journal_test_revoke(journal_t *journal,
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
699
  			unsigned long long blocknr,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
700
701
  			tid_t sequence)
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
702
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
703
704
705
706
707
708
709
710
711
712
713
714
715
  
  	record = find_revoke_record(journal, blocknr);
  	if (!record)
  		return 0;
  	if (tid_gt(sequence, record->sequence))
  		return 0;
  	return 1;
  }
  
  /*
   * Finally, once recovery is over, we need to clear the revoke table so
   * that it can be reused by the running filesystem.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
716
  void jbd2_journal_clear_revoke(journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
717
718
719
  {
  	int i;
  	struct list_head *hash_list;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
720
721
  	struct jbd2_revoke_record_s *record;
  	struct jbd2_revoke_table_s *revoke;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
722
723
724
725
726
727
  
  	revoke = journal->j_revoke;
  
  	for (i = 0; i < revoke->hash_size; i++) {
  		hash_list = &revoke->hash_table[i];
  		while (!list_empty(hash_list)) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
728
  			record = (struct jbd2_revoke_record_s*) hash_list->next;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
729
  			list_del(&record->hash);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
730
  			kmem_cache_free(jbd2_revoke_record_cache, record);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
731
732
733
  		}
  	}
  }