Blame view

fs/jbd2/revoke.c 20.9 KB
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
1
  /*
588626996   Uwe Kleine-König   fix file specific...
2
   * linux/fs/jbd2/revoke.c
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
   *
   * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
   *
   * Copyright 2000 Red Hat corp --- All Rights Reserved
   *
   * This file is part of the Linux kernel and is made available under
   * the terms of the GNU General Public License, version 2, or at your
   * option, any later version, incorporated herein by reference.
   *
   * Journal revoke routines for the generic filesystem journaling code;
   * part of the ext2fs journaling system.
   *
   * Revoke is the mechanism used to prevent old log records for deleted
   * metadata from being replayed on top of newer data using the same
   * blocks.  The revoke mechanism is used in two separate places:
   *
   * + Commit: during commit we write the entire list of the current
   *   transaction's revoked blocks to the journal
   *
   * + Recovery: during recovery we record the transaction ID of all
   *   revoked blocks.  If there are multiple revoke records in the log
   *   for a single block, only the last one counts, and if there is a log
   *   entry for a block beyond the last revoke, then that log entry still
   *   gets replayed.
   *
   * We can get interactions between revokes and new log data within a
   * single transaction:
   *
   * Block is revoked and then journaled:
   *   The desired end result is the journaling of the new block, so we
   *   cancel the revoke before the transaction commits.
   *
   * Block is journaled and then revoked:
   *   The revoke must take precedence over the write of the block, so we
   *   need either to cancel the journal entry or to write the revoke
   *   later in the log than the log block.  In this case, we choose the
   *   latter: journaling a block cancels any revoke record for that block
   *   in the current transaction, so any revoke for that block in the
   *   transaction must have happened after the block was journaled and so
   *   the revoke must take precedence.
   *
   * Block is revoked and then written as data:
   *   The data write is allowed to succeed, but the revoke is _not_
   *   cancelled.  We still need to prevent old log records from
   *   overwriting the new data.  We don't even need to clear the revoke
   *   bit here.
   *
   * Revoke information on buffers is a tri-state value:
   *
   * RevokeValid clear:	no cached revoke status, need to look it up
   * RevokeValid set, Revoked clear:
   *			buffer has not been revoked, and cancel_revoke
   *			need do nothing.
   * RevokeValid set, Revoked set:
   *			buffer has been revoked.
86db97c87   Jan Kara   jbd2: Update lock...
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
   *
   * Locking rules:
   * We keep two hash tables of revoke records. One hashtable belongs to the
   * running transaction (is pointed to by journal->j_revoke), the other one
   * belongs to the committing transaction. Accesses to the second hash table
   * happen only from the kjournald and no other thread touches this table.  Also
   * journal_switch_revoke_table() which switches which hashtable belongs to the
   * running and which to the committing transaction is called only from
   * kjournald. Therefore we need no locks when accessing the hashtable belonging
   * to the committing transaction.
   *
   * All users operating on the hash table belonging to the running transaction
   * have a handle to the transaction. Therefore they are safe from kjournald
   * switching hash tables under them. For operations on the lists of entries in
   * the hash table j_revoke_lock is used.
   *
   * Finally, also replay code uses the hash tables but at this moment noone else
   * can touch them (filesystem isn't mounted yet) and hence no locking is
   * needed.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
77
78
79
80
81
82
83
   */
  
  #ifndef __KERNEL__
  #include "jfs_user.h"
  #else
  #include <linux/time.h>
  #include <linux/fs.h>
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
84
  #include <linux/jbd2.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
85
86
87
  #include <linux/errno.h>
  #include <linux/slab.h>
  #include <linux/list.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
88
  #include <linux/init.h>
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
89
  #include <linux/bio.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
90
  #endif
f482394cc   vignesh babu   is_power_of_2(): jbd
91
  #include <linux/log2.h>
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
92

e18b890bb   Christoph Lameter   [PATCH] slab: rem...
93
94
  static struct kmem_cache *jbd2_revoke_record_cache;
  static struct kmem_cache *jbd2_revoke_table_cache;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
95
96
97
98
  
  /* Each revoke record represents one single revoked block.  During
     journal replay, this involves recording the transaction ID of the
     last transaction to revoke this block. */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
99
  struct jbd2_revoke_record_s
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
100
101
102
  {
  	struct list_head  hash;
  	tid_t		  sequence;	/* Used for recovery only */
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
103
  	unsigned long long	  blocknr;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
104
105
106
107
  };
  
  
  /* The revoke table is just a simple hash table of revoke records. */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
108
  struct jbd2_revoke_table_s
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
109
110
111
112
113
114
115
116
117
118
119
120
  {
  	/* It is conceivable that we might want a larger hash table
  	 * for recovery.  Must be a power of two. */
  	int		  hash_size;
  	int		  hash_shift;
  	struct list_head *hash_table;
  };
  
  
  #ifdef __KERNEL__
  static void write_one_revoke_record(journal_t *, transaction_t *,
  				    struct journal_head **, int *,
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
121
122
  				    struct jbd2_revoke_record_s *, int);
  static void flush_descriptor(journal_t *, struct journal_head *, int, int);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
123
124
125
126
127
  #endif
  
  /* Utility functions to maintain the revoke table */
  
  /* Borrowed from buffer.c: this is a tried and tested block hash function */
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
128
  static inline int hash(journal_t *journal, unsigned long long block)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
129
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
130
  	struct jbd2_revoke_table_s *table = journal->j_revoke;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
131
  	int hash_shift = table->hash_shift;
299717696   Mingming Cao   [PATCH] jbd2: sec...
132
  	int hash = (int)block ^ (int)((block >> 31) >> 1);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
133

299717696   Mingming Cao   [PATCH] jbd2: sec...
134
135
136
  	return ((hash << (hash_shift - 6)) ^
  		(hash >> 13) ^
  		(hash << (hash_shift - 12))) & (table->hash_size - 1);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
137
  }
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
138
  static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
139
140
141
  			      tid_t seq)
  {
  	struct list_head *hash_list;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
142
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
143
144
  
  repeat:
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
145
  	record = kmem_cache_alloc(jbd2_revoke_record_cache, GFP_NOFS);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
146
147
148
149
150
151
152
153
154
155
156
157
158
159
  	if (!record)
  		goto oom;
  
  	record->sequence = seq;
  	record->blocknr = blocknr;
  	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
  	spin_lock(&journal->j_revoke_lock);
  	list_add(&record->hash, hash_list);
  	spin_unlock(&journal->j_revoke_lock);
  	return 0;
  
  oom:
  	if (!journal_oom_retry)
  		return -ENOMEM;
329d291f5   Harvey Harrison   jdb2: replace rem...
160
161
  	jbd_debug(1, "ENOMEM in %s, retrying
  ", __func__);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
162
163
164
165
166
  	yield();
  	goto repeat;
  }
  
  /* Find a revoke record in the journal's hash table. */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
167
  static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
168
  						      unsigned long long blocknr)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
169
170
  {
  	struct list_head *hash_list;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
171
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
172
173
174
175
  
  	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
  
  	spin_lock(&journal->j_revoke_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
176
  	record = (struct jbd2_revoke_record_s *) hash_list->next;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
177
178
179
180
181
  	while (&(record->hash) != hash_list) {
  		if (record->blocknr == blocknr) {
  			spin_unlock(&journal->j_revoke_lock);
  			return record;
  		}
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
182
  		record = (struct jbd2_revoke_record_s *) record->hash.next;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
183
184
185
186
  	}
  	spin_unlock(&journal->j_revoke_lock);
  	return NULL;
  }
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
187
188
189
190
191
192
193
194
195
196
197
  void jbd2_journal_destroy_revoke_caches(void)
  {
  	if (jbd2_revoke_record_cache) {
  		kmem_cache_destroy(jbd2_revoke_record_cache);
  		jbd2_revoke_record_cache = NULL;
  	}
  	if (jbd2_revoke_table_cache) {
  		kmem_cache_destroy(jbd2_revoke_table_cache);
  		jbd2_revoke_table_cache = NULL;
  	}
  }
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
198
  int __init jbd2_journal_init_revoke_caches(void)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
199
  {
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
200
201
  	J_ASSERT(!jbd2_revoke_record_cache);
  	J_ASSERT(!jbd2_revoke_table_cache);
a920e9416   Johann Lombardi   [PATCH] jbd2: ren...
202
  	jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
203
  					   sizeof(struct jbd2_revoke_record_s),
77160957e   Mingming Cao   jbd2: Mark jbd2 s...
204
205
206
  					   0,
  					   SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
  					   NULL);
1076d17ac   Al Viro   jbd/jbd2 NULL noise
207
  	if (!jbd2_revoke_record_cache)
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
208
  		goto record_cache_failure;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
209

a920e9416   Johann Lombardi   [PATCH] jbd2: ren...
210
  	jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
211
  					   sizeof(struct jbd2_revoke_table_s),
77160957e   Mingming Cao   jbd2: Mark jbd2 s...
212
  					   0, SLAB_TEMPORARY, NULL);
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
213
214
  	if (!jbd2_revoke_table_cache)
  		goto table_cache_failure;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
215
  	return 0;
9fa27c85d   Duane Griffin   jbd2: tidy up rev...
216
217
218
219
  table_cache_failure:
  	jbd2_journal_destroy_revoke_caches();
  record_cache_failure:
  		return -ENOMEM;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
220
  }
83c49523c   Duane Griffin   jbd2: eliminate d...
221
  static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
222
  {
83c49523c   Duane Griffin   jbd2: eliminate d...
223
224
225
  	int shift = 0;
  	int tmp = hash_size;
  	struct jbd2_revoke_table_s *table;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
226

83c49523c   Duane Griffin   jbd2: eliminate d...
227
228
229
  	table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
  	if (!table)
  		goto out;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
230

470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
231
232
  	while((tmp >>= 1UL) != 0UL)
  		shift++;
83c49523c   Duane Griffin   jbd2: eliminate d...
233
234
235
  	table->hash_size = hash_size;
  	table->hash_shift = shift;
  	table->hash_table =
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
236
  		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
83c49523c   Duane Griffin   jbd2: eliminate d...
237
238
239
240
  	if (!table->hash_table) {
  		kmem_cache_free(jbd2_revoke_table_cache, table);
  		table = NULL;
  		goto out;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
241
242
243
  	}
  
  	for (tmp = 0; tmp < hash_size; tmp++)
83c49523c   Duane Griffin   jbd2: eliminate d...
244
  		INIT_LIST_HEAD(&table->hash_table[tmp]);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
245

83c49523c   Duane Griffin   jbd2: eliminate d...
246
247
248
249
250
251
252
253
254
255
256
257
  out:
  	return table;
  }
  
  static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
  {
  	int i;
  	struct list_head *hash_list;
  
  	for (i = 0; i < table->hash_size; i++) {
  		hash_list = &table->hash_table[i];
  		J_ASSERT(list_empty(hash_list));
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
258
  	}
83c49523c   Duane Griffin   jbd2: eliminate d...
259
260
261
  	kfree(table->hash_table);
  	kmem_cache_free(jbd2_revoke_table_cache, table);
  }
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
262

83c49523c   Duane Griffin   jbd2: eliminate d...
263
264
265
266
  /* Initialise the revoke table for a given journal to a given size. */
  int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
  {
  	J_ASSERT(journal->j_revoke_table[0] == NULL);
f482394cc   vignesh babu   is_power_of_2(): jbd
267
  	J_ASSERT(is_power_of_2(hash_size));
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
268

83c49523c   Duane Griffin   jbd2: eliminate d...
269
270
271
  	journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size);
  	if (!journal->j_revoke_table[0])
  		goto fail0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
272

83c49523c   Duane Griffin   jbd2: eliminate d...
273
274
275
  	journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size);
  	if (!journal->j_revoke_table[1])
  		goto fail1;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
276

83c49523c   Duane Griffin   jbd2: eliminate d...
277
  	journal->j_revoke = journal->j_revoke_table[1];
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
278
279
280
281
  
  	spin_lock_init(&journal->j_revoke_lock);
  
  	return 0;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
282

83c49523c   Duane Griffin   jbd2: eliminate d...
283
284
285
286
287
  fail1:
  	jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
  fail0:
  	return -ENOMEM;
  }
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
288

83c49523c   Duane Griffin   jbd2: eliminate d...
289
  /* Destroy a journal's revoke table.  The table must already be empty! */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
290
  void jbd2_journal_destroy_revoke(journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
291
  {
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
292
  	journal->j_revoke = NULL;
83c49523c   Duane Griffin   jbd2: eliminate d...
293
294
295
296
  	if (journal->j_revoke_table[0])
  		jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
  	if (journal->j_revoke_table[1])
  		jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
297
298
299
300
301
302
  }
  
  
  #ifdef __KERNEL__
  
  /*
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
303
   * jbd2_journal_revoke: revoke a given buffer_head from the journal.  This
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
304
305
306
307
308
309
310
311
312
313
314
   * prevents the block from being replayed during recovery if we take a
   * crash after this current transaction commits.  Any subsequent
   * metadata writes of the buffer in this transaction cancel the
   * revoke.
   *
   * Note that this call may block --- it is up to the caller to make
   * sure that there are no further calls to journal_write_metadata
   * before the revoke is complete.  In ext3, this implies calling the
   * revoke before clearing the block bitmap when we are deleting
   * metadata.
   *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
315
   * Revoke performs a jbd2_journal_forget on any buffer_head passed in as a
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
316
317
318
319
320
321
   * parameter, but does _not_ forget the buffer_head if the bh was only
   * found implicitly.
   *
   * bh_in may not be a journalled buffer - it may have come off
   * the hash tables without an attached journal_head.
   *
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
322
   * If bh_in is non-zero, jbd2_journal_revoke() will decrement its b_count
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
323
324
   * by one.
   */
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
325
  int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
326
327
328
329
330
331
332
333
334
335
336
337
  		   struct buffer_head *bh_in)
  {
  	struct buffer_head *bh = NULL;
  	journal_t *journal;
  	struct block_device *bdev;
  	int err;
  
  	might_sleep();
  	if (bh_in)
  		BUFFER_TRACE(bh_in, "enter");
  
  	journal = handle->h_transaction->t_journal;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
338
  	if (!jbd2_journal_set_features(journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)){
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
339
340
341
342
343
344
345
346
347
348
349
350
  		J_ASSERT (!"Cannot set revoke feature!");
  		return -EINVAL;
  	}
  
  	bdev = journal->j_fs_dev;
  	bh = bh_in;
  
  	if (!bh) {
  		bh = __find_get_block(bdev, blocknr, journal->j_blocksize);
  		if (bh)
  			BUFFER_TRACE(bh, "found on hash");
  	}
cd02ff0b1   Mingming Cao   jbd2: JBD_XXX to ...
351
  #ifdef JBD2_EXPENSIVE_CHECKING
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
  	else {
  		struct buffer_head *bh2;
  
  		/* If there is a different buffer_head lying around in
  		 * memory anywhere... */
  		bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize);
  		if (bh2) {
  			/* ... and it has RevokeValid status... */
  			if (bh2 != bh && buffer_revokevalid(bh2))
  				/* ...then it better be revoked too,
  				 * since it's illegal to create a revoke
  				 * record against a buffer_head which is
  				 * not marked revoked --- that would
  				 * risk missing a subsequent revoke
  				 * cancel. */
  				J_ASSERT_BH(bh2, buffer_revoked(bh2));
  			put_bh(bh2);
  		}
  	}
  #endif
  
  	/* We really ought not ever to revoke twice in a row without
             first having the revoke cancelled: it's illegal to free a
             block twice without allocating it in between! */
  	if (bh) {
  		if (!J_EXPECT_BH(bh, !buffer_revoked(bh),
  				 "inconsistent data on disk")) {
  			if (!bh_in)
  				brelse(bh);
  			return -EIO;
  		}
  		set_buffer_revoked(bh);
  		set_buffer_revokevalid(bh);
  		if (bh_in) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
386
387
  			BUFFER_TRACE(bh_in, "call jbd2_journal_forget");
  			jbd2_journal_forget(handle, bh_in);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
388
389
390
391
392
  		} else {
  			BUFFER_TRACE(bh, "call brelse");
  			__brelse(bh);
  		}
  	}
299717696   Mingming Cao   [PATCH] jbd2: sec...
393
394
  	jbd_debug(2, "insert revoke for block %llu, bh_in=%p
  ",blocknr, bh_in);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
395
396
397
398
399
400
401
402
  	err = insert_revoke_hash(journal, blocknr,
  				handle->h_transaction->t_tid);
  	BUFFER_TRACE(bh_in, "exit");
  	return err;
  }
  
  /*
   * Cancel an outstanding revoke.  For use only internally by the
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
403
   * journaling code (called from jbd2_journal_get_write_access).
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
404
405
406
407
408
409
410
411
412
413
414
   *
   * We trust buffer_revoked() on the buffer if the buffer is already
   * being journaled: if there is no revoke pending on the buffer, then we
   * don't do anything here.
   *
   * This would break if it were possible for a buffer to be revoked and
   * discarded, and then reallocated within the same transaction.  In such
   * a case we would have lost the revoked bit, but when we arrived here
   * the second time we would still have a pending revoke to cancel.  So,
   * do not trust the Revoked bit on buffers unless RevokeValid is also
   * set.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
415
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
416
  int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
417
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
418
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
  	journal_t *journal = handle->h_transaction->t_journal;
  	int need_cancel;
  	int did_revoke = 0;	/* akpm: debug */
  	struct buffer_head *bh = jh2bh(jh);
  
  	jbd_debug(4, "journal_head %p, cancelling revoke
  ", jh);
  
  	/* Is the existing Revoke bit valid?  If so, we trust it, and
  	 * only perform the full cancel if the revoke bit is set.  If
  	 * not, we can't trust the revoke bit, and we need to do the
  	 * full search for a revoke record. */
  	if (test_set_buffer_revokevalid(bh)) {
  		need_cancel = test_clear_buffer_revoked(bh);
  	} else {
  		need_cancel = 1;
  		clear_buffer_revoked(bh);
  	}
  
  	if (need_cancel) {
  		record = find_revoke_record(journal, bh->b_blocknr);
  		if (record) {
  			jbd_debug(4, "cancelled existing revoke on "
  				  "blocknr %llu
  ", (unsigned long long)bh->b_blocknr);
  			spin_lock(&journal->j_revoke_lock);
  			list_del(&record->hash);
  			spin_unlock(&journal->j_revoke_lock);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
447
  			kmem_cache_free(jbd2_revoke_record_cache, record);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
448
449
450
  			did_revoke = 1;
  		}
  	}
cd02ff0b1   Mingming Cao   jbd2: JBD_XXX to ...
451
  #ifdef JBD2_EXPENSIVE_CHECKING
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
  	/* There better not be one left behind by now! */
  	record = find_revoke_record(journal, bh->b_blocknr);
  	J_ASSERT_JH(jh, record == NULL);
  #endif
  
  	/* Finally, have we just cleared revoke on an unhashed
  	 * buffer_head?  If so, we'd better make sure we clear the
  	 * revoked status on any hashed alias too, otherwise the revoke
  	 * state machine will get very upset later on. */
  	if (need_cancel) {
  		struct buffer_head *bh2;
  		bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size);
  		if (bh2) {
  			if (bh2 != bh)
  				clear_buffer_revoked(bh2);
  			__brelse(bh2);
  		}
  	}
  	return did_revoke;
  }
  
  /* journal_switch_revoke table select j_revoke for next transaction
   * we do not want to suspend any processing until all revokes are
   * written -bzzz
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
477
  void jbd2_journal_switch_revoke_table(journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
  {
  	int i;
  
  	if (journal->j_revoke == journal->j_revoke_table[0])
  		journal->j_revoke = journal->j_revoke_table[1];
  	else
  		journal->j_revoke = journal->j_revoke_table[0];
  
  	for (i = 0; i < journal->j_revoke->hash_size; i++)
  		INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
  }
  
  /*
   * Write revoke records to the journal for all entries in the current
   * revoke hash, deleting the entries as we go.
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
493
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
494
  void jbd2_journal_write_revoke_records(journal_t *journal,
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
495
496
  				       transaction_t *transaction,
  				       int write_op)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
497
498
  {
  	struct journal_head *descriptor;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
499
500
  	struct jbd2_revoke_record_s *record;
  	struct jbd2_revoke_table_s *revoke;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
  	struct list_head *hash_list;
  	int i, offset, count;
  
  	descriptor = NULL;
  	offset = 0;
  	count = 0;
  
  	/* select revoke table for committing transaction */
  	revoke = journal->j_revoke == journal->j_revoke_table[0] ?
  		journal->j_revoke_table[1] : journal->j_revoke_table[0];
  
  	for (i = 0; i < revoke->hash_size; i++) {
  		hash_list = &revoke->hash_table[i];
  
  		while (!list_empty(hash_list)) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
516
  			record = (struct jbd2_revoke_record_s *)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
517
518
519
  				hash_list->next;
  			write_one_revoke_record(journal, transaction,
  						&descriptor, &offset,
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
520
  						record, write_op);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
521
522
  			count++;
  			list_del(&record->hash);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
523
  			kmem_cache_free(jbd2_revoke_record_cache, record);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
524
525
526
  		}
  	}
  	if (descriptor)
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
527
  		flush_descriptor(journal, descriptor, offset, write_op);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
528
529
530
531
532
533
534
535
536
537
538
539
540
  	jbd_debug(1, "Wrote %d revoke records
  ", count);
  }
  
  /*
   * Write out one revoke record.  We need to create a new descriptor
   * block if the old one is full or if we have not already created one.
   */
  
  static void write_one_revoke_record(journal_t *journal,
  				    transaction_t *transaction,
  				    struct journal_head **descriptorp,
  				    int *offsetp,
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
541
542
  				    struct jbd2_revoke_record_s *record,
  				    int write_op)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
543
544
545
546
547
548
549
  {
  	struct journal_head *descriptor;
  	int offset;
  	journal_header_t *header;
  
  	/* If we are already aborting, this all becomes a noop.  We
             still need to go round the loop in
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
550
             jbd2_journal_write_revoke_records in order to free all of the
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
551
552
553
554
555
556
557
558
559
560
             revoke records: only the IO to the journal is omitted. */
  	if (is_journal_aborted(journal))
  		return;
  
  	descriptor = *descriptorp;
  	offset = *offsetp;
  
  	/* Make sure we have a descriptor with space left for the record */
  	if (descriptor) {
  		if (offset == journal->j_blocksize) {
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
561
  			flush_descriptor(journal, descriptor, offset, write_op);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
562
563
564
565
566
  			descriptor = NULL;
  		}
  	}
  
  	if (!descriptor) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
567
  		descriptor = jbd2_journal_get_descriptor_buffer(journal);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
568
569
570
  		if (!descriptor)
  			return;
  		header = (journal_header_t *) &jh2bh(descriptor)->b_data[0];
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
571
572
  		header->h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
  		header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
573
574
575
576
  		header->h_sequence  = cpu_to_be32(transaction->t_tid);
  
  		/* Record it so that we can wait for IO completion later */
  		JBUFFER_TRACE(descriptor, "file as BJ_LogCtl");
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
577
  		jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
578

f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
579
  		offset = sizeof(jbd2_journal_revoke_header_t);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
580
581
  		*descriptorp = descriptor;
  	}
b517bea1c   Zach Brown   [PATCH] 64-bit jb...
582
583
584
585
586
587
588
589
590
591
  	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) {
  		* ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) =
  			cpu_to_be64(record->blocknr);
  		offset += 8;
  
  	} else {
  		* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
  			cpu_to_be32(record->blocknr);
  		offset += 4;
  	}
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
592
593
594
595
596
597
598
599
600
601
602
603
  	*offsetp = offset;
  }
  
  /*
   * Flush a revoke descriptor out to the journal.  If we are aborting,
   * this is a noop; otherwise we are generating a buffer which needs to
   * be waited for during commit, so it has to go onto the appropriate
   * journal buffer list.
   */
  
  static void flush_descriptor(journal_t *journal,
  			     struct journal_head *descriptor,
67c457a8c   Theodore Ts'o   jbd2: use SWRITE_...
604
  			     int offset, int write_op)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
605
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
606
  	jbd2_journal_revoke_header_t *header;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
607
608
609
610
611
612
  	struct buffer_head *bh = jh2bh(descriptor);
  
  	if (is_journal_aborted(journal)) {
  		put_bh(bh);
  		return;
  	}
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
613
  	header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
614
615
616
617
  	header->r_count = cpu_to_be32(offset);
  	set_buffer_jwrite(bh);
  	BUFFER_TRACE(bh, "write");
  	set_buffer_dirty(bh);
9cb569d60   Christoph Hellwig   remove SWRITE* I/...
618
  	write_dirty_buffer(bh, write_op);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
  }
  #endif
  
  /*
   * Revoke support for recovery.
   *
   * Recovery needs to be able to:
   *
   *  record all revoke records, including the tid of the latest instance
   *  of each revoke in the journal
   *
   *  check whether a given block in a given transaction should be replayed
   *  (ie. has not been revoked by a revoke record in that or a subsequent
   *  transaction)
   *
   *  empty the revoke table after recovery.
   */
  
  /*
   * First, setting revoke records.  We create a new revoke record for
   * every block ever revoked in the log as we scan it for recovery, and
   * we update the existing records if we find multiple revokes for a
   * single block.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
643
  int jbd2_journal_set_revoke(journal_t *journal,
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
644
  		       unsigned long long blocknr,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
645
646
  		       tid_t sequence)
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
647
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
  
  	record = find_revoke_record(journal, blocknr);
  	if (record) {
  		/* If we have multiple occurrences, only record the
  		 * latest sequence number in the hashed record */
  		if (tid_gt(sequence, record->sequence))
  			record->sequence = sequence;
  		return 0;
  	}
  	return insert_revoke_hash(journal, blocknr, sequence);
  }
  
  /*
   * Test revoke records.  For a given block referenced in the log, has
   * that block been revoked?  A revoke record with a given transaction
   * sequence number revokes all blocks in that transaction and earlier
   * ones, but later transactions still need replayed.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
666
  int jbd2_journal_test_revoke(journal_t *journal,
18eba7aae   Mingming Cao   [PATCH] jbd2: swi...
667
  			unsigned long long blocknr,
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
668
669
  			tid_t sequence)
  {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
670
  	struct jbd2_revoke_record_s *record;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
671
672
673
674
675
676
677
678
679
680
681
682
683
  
  	record = find_revoke_record(journal, blocknr);
  	if (!record)
  		return 0;
  	if (tid_gt(sequence, record->sequence))
  		return 0;
  	return 1;
  }
  
  /*
   * Finally, once recovery is over, we need to clear the revoke table so
   * that it can be reused by the running filesystem.
   */
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
684
  void jbd2_journal_clear_revoke(journal_t *journal)
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
685
686
687
  {
  	int i;
  	struct list_head *hash_list;
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
688
689
  	struct jbd2_revoke_record_s *record;
  	struct jbd2_revoke_table_s *revoke;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
690
691
692
693
694
695
  
  	revoke = journal->j_revoke;
  
  	for (i = 0; i < revoke->hash_size; i++) {
  		hash_list = &revoke->hash_table[i];
  		while (!list_empty(hash_list)) {
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
696
  			record = (struct jbd2_revoke_record_s*) hash_list->next;
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
697
  			list_del(&record->hash);
f7f4bccb7   Mingming Cao   [PATCH] jbd2: ren...
698
  			kmem_cache_free(jbd2_revoke_record_cache, record);
470decc61   Dave Kleikamp   [PATCH] jbd2: ini...
699
700
701
  		}
  	}
  }