Blame view

fs/ocfs2/journal.c 57.2 KB
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
  /* -*- mode: c; c-basic-offset: 8; -*-
   * vim: noexpandtab sw=8 ts=8 sts=0:
   *
   * journal.c
   *
   * Defines functions of journalling api
   *
   * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
   *
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public
   * License as published by the Free Software Foundation; either
   * version 2 of the License, or (at your option) any later version.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * General Public License for more details.
   *
   * You should have received a copy of the GNU General Public
   * License along with this program; if not, write to the
   * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   * Boston, MA 021110-1307, USA.
   */
  
  #include <linux/fs.h>
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
  #include <linux/kthread.h>
83273932f   Srinivas Eeda   ocfs2: timer to q...
31
32
  #include <linux/time.h>
  #include <linux/random.h>
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
33

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
34
35
36
37
38
  #include <cluster/masklog.h>
  
  #include "ocfs2.h"
  
  #include "alloc.h"
50655ae9e   Joel Becker   ocfs2: Add journa...
39
  #include "blockcheck.h"
316f4b9f9   Mark Fasheh   ocfs2: Move direc...
40
  #include "dir.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
41
42
43
44
45
46
  #include "dlmglue.h"
  #include "extent_map.h"
  #include "heartbeat.h"
  #include "inode.h"
  #include "journal.h"
  #include "localalloc.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
47
48
  #include "slot_map.h"
  #include "super.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
49
  #include "sysfile.h"
0cf2f7632   Joel Becker   ocfs2: Pass struc...
50
  #include "uptodate.h"
2205363dc   Jan Kara   ocfs2: Implement ...
51
  #include "quota.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
52
53
  
  #include "buffer_head_io.h"
b41079504   Tao Ma   ocfs2: Remove mas...
54
  #include "ocfs2_trace.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
55

34af946a2   Ingo Molnar   [PATCH] spin/rwlo...
56
  DEFINE_SPINLOCK(trans_inc_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
57

83273932f   Srinivas Eeda   ocfs2: timer to q...
58
  #define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
59
60
  static int ocfs2_force_read_journal(struct inode *inode);
  static int ocfs2_recover_node(struct ocfs2_super *osb,
2205363dc   Jan Kara   ocfs2: Implement ...
61
  			      int node_num, int slot_num);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
62
63
  static int __ocfs2_recovery_thread(void *arg);
  static int ocfs2_commit_cache(struct ocfs2_super *osb);
19ece546a   Jan Kara   ocfs2: Enable quo...
64
  static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
65
  static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
66
  				      int dirty, int replayed);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
67
68
69
70
71
  static int ocfs2_trylock_journal(struct ocfs2_super *osb,
  				 int slot_num);
  static int ocfs2_recover_orphans(struct ocfs2_super *osb,
  				 int slot);
  static int ocfs2_commit_thread(void *arg);
9140db04e   Srinivas Eeda   ocfs2: recover or...
72
73
74
75
76
  static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
  					    int slot_num,
  					    struct ocfs2_dinode *la_dinode,
  					    struct ocfs2_dinode *tl_dinode,
  					    struct ocfs2_quota_recovery *qrec);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
77

19ece546a   Jan Kara   ocfs2: Enable quo...
78
79
80
81
82
83
84
85
86
  static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
  {
  	return __ocfs2_wait_on_mount(osb, 0);
  }
  
  static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
  {
  	return __ocfs2_wait_on_mount(osb, 1);
  }
9140db04e   Srinivas Eeda   ocfs2: recover or...
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
  /*
   * This replay_map is to track online/offline slots, so we could recover
   * offline slots during recovery and mount
   */
  
  enum ocfs2_replay_state {
  	REPLAY_UNNEEDED = 0,	/* Replay is not needed, so ignore this map */
  	REPLAY_NEEDED, 		/* Replay slots marked in rm_replay_slots */
  	REPLAY_DONE 		/* Replay was already queued */
  };
  
  struct ocfs2_replay_map {
  	unsigned int rm_slots;
  	enum ocfs2_replay_state rm_state;
  	unsigned char rm_replay_slots[0];
  };
  
  void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
  {
  	if (!osb->replay_map)
  		return;
  
  	/* If we've already queued the replay, we don't have any more to do */
  	if (osb->replay_map->rm_state == REPLAY_DONE)
  		return;
  
  	osb->replay_map->rm_state = state;
  }
  
  int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
  {
  	struct ocfs2_replay_map *replay_map;
  	int i, node_num;
  
  	/* If replay map is already set, we don't do it again */
  	if (osb->replay_map)
  		return 0;
  
  	replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
  			     (osb->max_slots * sizeof(char)), GFP_KERNEL);
  
  	if (!replay_map) {
  		mlog_errno(-ENOMEM);
  		return -ENOMEM;
  	}
  
  	spin_lock(&osb->osb_lock);
  
  	replay_map->rm_slots = osb->max_slots;
  	replay_map->rm_state = REPLAY_UNNEEDED;
  
  	/* set rm_replay_slots for offline slot(s) */
  	for (i = 0; i < replay_map->rm_slots; i++) {
  		if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
  			replay_map->rm_replay_slots[i] = 1;
  	}
  
  	osb->replay_map = replay_map;
  	spin_unlock(&osb->osb_lock);
  	return 0;
  }
  
  void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
  {
  	struct ocfs2_replay_map *replay_map = osb->replay_map;
  	int i;
  
  	if (!replay_map)
  		return;
  
  	if (replay_map->rm_state != REPLAY_NEEDED)
  		return;
  
  	for (i = 0; i < replay_map->rm_slots; i++)
  		if (replay_map->rm_replay_slots[i])
  			ocfs2_queue_recovery_completion(osb->journal, i, NULL,
  							NULL, NULL);
  	replay_map->rm_state = REPLAY_DONE;
  }
  
  void ocfs2_free_replay_slots(struct ocfs2_super *osb)
  {
  	struct ocfs2_replay_map *replay_map = osb->replay_map;
  
  	if (!osb->replay_map)
  		return;
  
  	kfree(replay_map);
  	osb->replay_map = NULL;
  }
553abd046   Joel Becker   ocfs2: Change the...
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
  int ocfs2_recovery_init(struct ocfs2_super *osb)
  {
  	struct ocfs2_recovery_map *rm;
  
  	mutex_init(&osb->recovery_lock);
  	osb->disable_recovery = 0;
  	osb->recovery_thread_task = NULL;
  	init_waitqueue_head(&osb->recovery_event);
  
  	rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
  		     osb->max_slots * sizeof(unsigned int),
  		     GFP_KERNEL);
  	if (!rm) {
  		mlog_errno(-ENOMEM);
  		return -ENOMEM;
  	}
  
  	rm->rm_entries = (unsigned int *)((char *)rm +
  					  sizeof(struct ocfs2_recovery_map));
  	osb->recovery_map = rm;
  
  	return 0;
  }
  
  /* we can't grab the goofy sem lock from inside wait_event, so we use
   * memory barriers to make sure that we'll see the null task before
   * being woken up */
  static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
  {
  	mb();
  	return osb->recovery_thread_task != NULL;
  }
  
  void ocfs2_recovery_exit(struct ocfs2_super *osb)
  {
  	struct ocfs2_recovery_map *rm;
  
  	/* disable any new recovery threads and wait for any currently
  	 * running ones to exit. Do this before setting the vol_state. */
  	mutex_lock(&osb->recovery_lock);
  	osb->disable_recovery = 1;
  	mutex_unlock(&osb->recovery_lock);
  	wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
  
  	/* At this point, we know that no more recovery threads can be
  	 * launched, so wait for any recovery completion work to
  	 * complete. */
  	flush_workqueue(ocfs2_wq);
  
  	/*
  	 * Now that recovery is shut down, and the osb is about to be
  	 * freed,  the osb_lock is not taken here.
  	 */
  	rm = osb->recovery_map;
  	/* XXX: Should we bug if there are dirty entries? */
  
  	kfree(rm);
  }
  
  static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
  				     unsigned int node_num)
  {
  	int i;
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
  
  	assert_spin_locked(&osb->osb_lock);
  
  	for (i = 0; i < rm->rm_used; i++) {
  		if (rm->rm_entries[i] == node_num)
  			return 1;
  	}
  
  	return 0;
  }
  
  /* Behaves like test-and-set.  Returns the previous value */
  static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
  				  unsigned int node_num)
  {
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
  
  	spin_lock(&osb->osb_lock);
  	if (__ocfs2_recovery_map_test(osb, node_num)) {
  		spin_unlock(&osb->osb_lock);
  		return 1;
  	}
  
  	/* XXX: Can this be exploited? Not from o2dlm... */
  	BUG_ON(rm->rm_used >= osb->max_slots);
  
  	rm->rm_entries[rm->rm_used] = node_num;
  	rm->rm_used++;
  	spin_unlock(&osb->osb_lock);
  
  	return 0;
  }
  
  static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
  				     unsigned int node_num)
  {
  	int i;
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
  
  	spin_lock(&osb->osb_lock);
  
  	for (i = 0; i < rm->rm_used; i++) {
  		if (rm->rm_entries[i] == node_num)
  			break;
  	}
  
  	if (i < rm->rm_used) {
  		/* XXX: be careful with the pointer math */
  		memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
  			(rm->rm_used - i - 1) * sizeof(unsigned int));
  		rm->rm_used--;
  	}
  
  	spin_unlock(&osb->osb_lock);
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
296
297
298
299
  static int ocfs2_commit_cache(struct ocfs2_super *osb)
  {
  	int status = 0;
  	unsigned int flushed;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
300
  	struct ocfs2_journal *journal = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
301
302
303
304
  	journal = osb->journal;
  
  	/* Flush all pending commits and checkpoint the journal. */
  	down_write(&journal->j_trans_barrier);
b41079504   Tao Ma   ocfs2: Remove mas...
305
306
307
  	flushed = atomic_read(&journal->j_num_trans);
  	trace_ocfs2_commit_cache_begin(flushed);
  	if (flushed == 0) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
308
  		up_write(&journal->j_trans_barrier);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
309
310
  		goto finally;
  	}
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
311
312
313
  	jbd2_journal_lock_updates(journal->j_journal);
  	status = jbd2_journal_flush(journal->j_journal);
  	jbd2_journal_unlock_updates(journal->j_journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
314
315
316
317
318
  	if (status < 0) {
  		up_write(&journal->j_trans_barrier);
  		mlog_errno(status);
  		goto finally;
  	}
f9c57ada3   Tao Ma   ocfs2: Remove unu...
319
  	ocfs2_inc_trans_id(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
320
321
322
323
  
  	flushed = atomic_read(&journal->j_num_trans);
  	atomic_set(&journal->j_num_trans, 0);
  	up_write(&journal->j_trans_barrier);
b41079504   Tao Ma   ocfs2: Remove mas...
324
  	trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
325

34d024f84   Mark Fasheh   ocfs2: Remove mou...
326
  	ocfs2_wake_downconvert_thread(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
327
328
  	wake_up(&journal->j_checkpointed);
  finally:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
329
330
  	return status;
  }
1fabe1481   Mark Fasheh   ocfs2: Remove str...
331
  handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
332
  {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
333
  	journal_t *journal = osb->journal->j_journal;
1fabe1481   Mark Fasheh   ocfs2: Remove str...
334
  	handle_t *handle;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
335

ebdec83ba   Eric Sesterhenn / snakebyte   [PATCH] BUG_ON() ...
336
  	BUG_ON(!osb || !osb->journal->j_journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
337

65eff9ccf   Mark Fasheh   ocfs2: remove han...
338
339
  	if (ocfs2_is_hard_readonly(osb))
  		return ERR_PTR(-EROFS);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
340
341
342
  
  	BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
  	BUG_ON(max_buffs <= 0);
90e86a63e   Jan Kara   ocfs2: Support ne...
343
344
345
  	/* Nested transaction? Just return the handle... */
  	if (journal_current_handle())
  		return jbd2_journal_start(journal, max_buffs);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
346

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
347
  	down_read(&osb->journal->j_trans_barrier);
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
348
  	handle = jbd2_journal_start(journal, max_buffs);
1fabe1481   Mark Fasheh   ocfs2: Remove str...
349
  	if (IS_ERR(handle)) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
350
  		up_read(&osb->journal->j_trans_barrier);
1fabe1481   Mark Fasheh   ocfs2: Remove str...
351
  		mlog_errno(PTR_ERR(handle));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
352
353
354
  
  		if (is_journal_aborted(journal)) {
  			ocfs2_abort(osb->sb, "Detected aborted journal");
1fabe1481   Mark Fasheh   ocfs2: Remove str...
355
  			handle = ERR_PTR(-EROFS);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
356
  		}
c271c5c22   Sunil Mushran   ocfs2: local mounts
357
358
359
360
  	} else {
  		if (!ocfs2_mount_local(osb))
  			atomic_inc(&(osb->journal->j_num_trans));
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
361

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
362
  	return handle;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
363
  }
1fabe1481   Mark Fasheh   ocfs2: Remove str...
364
365
  int ocfs2_commit_trans(struct ocfs2_super *osb,
  		       handle_t *handle)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
366
  {
90e86a63e   Jan Kara   ocfs2: Support ne...
367
  	int ret, nested;
02dc1af44   Mark Fasheh   ocfs2: pass ocfs2...
368
  	struct ocfs2_journal *journal = osb->journal;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
369
370
  
  	BUG_ON(!handle);
90e86a63e   Jan Kara   ocfs2: Support ne...
371
  	nested = handle->h_ref > 1;
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
372
  	ret = jbd2_journal_stop(handle);
1fabe1481   Mark Fasheh   ocfs2: Remove str...
373
374
  	if (ret < 0)
  		mlog_errno(ret);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
375

90e86a63e   Jan Kara   ocfs2: Support ne...
376
377
  	if (!nested)
  		up_read(&journal->j_trans_barrier);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
378

1fabe1481   Mark Fasheh   ocfs2: Remove str...
379
  	return ret;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
380
381
382
  }
  
  /*
c901fb007   Tao Ma   ocfs2: Make ocfs2...
383
   * 'nblocks' is what you want to add to the current transaction.
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
384
   *
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
385
   * This might call jbd2_journal_restart() which will commit dirty buffers
e8aed3450   Mark Fasheh   ocfs2: Re-journal...
386
387
388
389
390
   * and then restart the transaction. Before calling
   * ocfs2_extend_trans(), any changed blocks should have been
   * dirtied. After calling it, all blocks which need to be changed must
   * go through another set of journal_access/journal_dirty calls.
   *
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
391
392
393
394
395
396
397
398
   * WARNING: This will not release any semaphores or disk locks taken
   * during the transaction, so make sure they were taken *before*
   * start_trans or we'll have ordering deadlocks.
   *
   * WARNING2: Note that we do *not* drop j_trans_barrier here. This is
   * good because transaction ids haven't yet been recorded on the
   * cluster locks associated with this handle.
   */
1fc581467   Mark Fasheh   ocfs2: have ocfs2...
399
  int ocfs2_extend_trans(handle_t *handle, int nblocks)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
400
  {
c901fb007   Tao Ma   ocfs2: Make ocfs2...
401
  	int status, old_nblocks;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
402
403
  
  	BUG_ON(!handle);
c901fb007   Tao Ma   ocfs2: Make ocfs2...
404
  	BUG_ON(nblocks < 0);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
405

c901fb007   Tao Ma   ocfs2: Make ocfs2...
406
407
408
409
  	if (!nblocks)
  		return 0;
  
  	old_nblocks = handle->h_buffer_credits;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
410

b41079504   Tao Ma   ocfs2: Remove mas...
411
  	trace_ocfs2_extend_trans(old_nblocks, nblocks);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
412

e407e3978   Joel Becker   ocfs2: Fix CONFIG...
413
  #ifdef CONFIG_OCFS2_DEBUG_FS
0879c584f   Mark Fasheh   ocfs2: Allow for ...
414
415
  	status = 1;
  #else
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
416
  	status = jbd2_journal_extend(handle, nblocks);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
417
418
419
420
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
0879c584f   Mark Fasheh   ocfs2: Allow for ...
421
  #endif
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
422
423
  
  	if (status > 0) {
b41079504   Tao Ma   ocfs2: Remove mas...
424
  		trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
c901fb007   Tao Ma   ocfs2: Make ocfs2...
425
426
  		status = jbd2_journal_restart(handle,
  					      old_nblocks + nblocks);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
427
  		if (status < 0) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
428
429
430
  			mlog_errno(status);
  			goto bail;
  		}
01ddf1e18   Mark Fasheh   ocfs2: remove unu...
431
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
432
433
434
  
  	status = 0;
  bail:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
435
436
  	return status;
  }
50655ae9e   Joel Becker   ocfs2: Add journa...
437
438
439
440
441
442
443
444
445
  struct ocfs2_triggers {
  	struct jbd2_buffer_trigger_type	ot_triggers;
  	int				ot_offset;
  };
  
  static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
  {
  	return container_of(triggers, struct ocfs2_triggers, ot_triggers);
  }
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
446
  static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
50655ae9e   Joel Becker   ocfs2: Add journa...
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
  				 struct buffer_head *bh,
  				 void *data, size_t size)
  {
  	struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
  
  	/*
  	 * We aren't guaranteed to have the superblock here, so we
  	 * must unconditionally compute the ecc data.
  	 * __ocfs2_journal_access() will only set the triggers if
  	 * metaecc is enabled.
  	 */
  	ocfs2_block_check_compute(data, size, data + ot->ot_offset);
  }
  
  /*
   * Quota blocks have their own trigger because the struct ocfs2_block_check
   * offset depends on the blocksize.
   */
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
465
  static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
50655ae9e   Joel Becker   ocfs2: Add journa...
466
467
468
469
470
471
472
473
474
475
476
477
478
479
  				 struct buffer_head *bh,
  				 void *data, size_t size)
  {
  	struct ocfs2_disk_dqtrailer *dqt =
  		ocfs2_block_dqtrailer(size, data);
  
  	/*
  	 * We aren't guaranteed to have the superblock here, so we
  	 * must unconditionally compute the ecc data.
  	 * __ocfs2_journal_access() will only set the triggers if
  	 * metaecc is enabled.
  	 */
  	ocfs2_block_check_compute(data, size, &dqt->dq_check);
  }
c175a518b   Joel Becker   ocfs2: Checksum a...
480
481
482
483
  /*
   * Directory blocks also have their own trigger because the
   * struct ocfs2_block_check offset depends on the blocksize.
   */
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
484
  static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
c175a518b   Joel Becker   ocfs2: Checksum a...
485
486
487
488
489
490
491
492
493
494
495
496
497
498
  				 struct buffer_head *bh,
  				 void *data, size_t size)
  {
  	struct ocfs2_dir_block_trailer *trailer =
  		ocfs2_dir_trailer_from_size(size, data);
  
  	/*
  	 * We aren't guaranteed to have the superblock here, so we
  	 * must unconditionally compute the ecc data.
  	 * __ocfs2_journal_access() will only set the triggers if
  	 * metaecc is enabled.
  	 */
  	ocfs2_block_check_compute(data, size, &trailer->db_check);
  }
50655ae9e   Joel Becker   ocfs2: Add journa...
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
  static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
  				struct buffer_head *bh)
  {
  	mlog(ML_ERROR,
  	     "ocfs2_abort_trigger called by JBD2.  bh = 0x%lx, "
  	     "bh->b_blocknr = %llu
  ",
  	     (unsigned long)bh,
  	     (unsigned long long)bh->b_blocknr);
  
  	/* We aren't guaranteed to have the superblock here - but if we
  	 * don't, it'll just crash. */
  	ocfs2_error(bh->b_assoc_map->host->i_sb,
  		    "JBD2 has aborted our journal, ocfs2 cannot continue
  ");
  }
  
  static struct ocfs2_triggers di_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
518
  		.t_frozen = ocfs2_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
519
520
521
522
523
524
525
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_dinode, i_check),
  };
  
  static struct ocfs2_triggers eb_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
526
  		.t_frozen = ocfs2_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
527
528
529
530
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_extent_block, h_check),
  };
93c97087a   Tao Ma   ocfs2: Add metaec...
531
532
  static struct ocfs2_triggers rb_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
533
  		.t_frozen = ocfs2_frozen_trigger,
93c97087a   Tao Ma   ocfs2: Add metaec...
534
535
536
537
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_refcount_block, rf_check),
  };
50655ae9e   Joel Becker   ocfs2: Add journa...
538
539
  static struct ocfs2_triggers gd_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
540
  		.t_frozen = ocfs2_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
541
542
543
544
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_group_desc, bg_check),
  };
c175a518b   Joel Becker   ocfs2: Checksum a...
545
546
  static struct ocfs2_triggers db_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
547
  		.t_frozen = ocfs2_db_frozen_trigger,
c175a518b   Joel Becker   ocfs2: Checksum a...
548
549
550
  		.t_abort = ocfs2_abort_trigger,
  	},
  };
50655ae9e   Joel Becker   ocfs2: Add journa...
551
552
  static struct ocfs2_triggers xb_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
553
  		.t_frozen = ocfs2_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
554
555
556
557
558
559
560
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_xattr_block, xb_check),
  };
  
  static struct ocfs2_triggers dq_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
561
  		.t_frozen = ocfs2_dq_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
562
563
564
  		.t_abort = ocfs2_abort_trigger,
  	},
  };
9b7895efa   Mark Fasheh   ocfs2: Add a name...
565
566
  static struct ocfs2_triggers dr_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
567
  		.t_frozen = ocfs2_frozen_trigger,
9b7895efa   Mark Fasheh   ocfs2: Add a name...
568
569
570
571
572
573
574
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_dx_root_block, dr_check),
  };
  
  static struct ocfs2_triggers dl_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
575
  		.t_frozen = ocfs2_frozen_trigger,
9b7895efa   Mark Fasheh   ocfs2: Add a name...
576
577
578
579
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_dx_leaf, dl_check),
  };
50655ae9e   Joel Becker   ocfs2: Add journa...
580
  static int __ocfs2_journal_access(handle_t *handle,
0cf2f7632   Joel Becker   ocfs2: Pass struc...
581
  				  struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
582
583
584
  				  struct buffer_head *bh,
  				  struct ocfs2_triggers *triggers,
  				  int type)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
585
586
  {
  	int status;
0cf2f7632   Joel Becker   ocfs2: Pass struc...
587
588
  	struct ocfs2_super *osb =
  		OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
589

0cf2f7632   Joel Becker   ocfs2: Pass struc...
590
  	BUG_ON(!ci || !ci->ci_ops);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
591
592
  	BUG_ON(!handle);
  	BUG_ON(!bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
593

b41079504   Tao Ma   ocfs2: Remove mas...
594
595
596
  	trace_ocfs2_journal_access(
  		(unsigned long long)ocfs2_metadata_cache_owner(ci),
  		(unsigned long long)bh->b_blocknr, type, bh->b_size);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
597
598
599
600
601
602
603
604
605
606
  
  	/* we can safely remove this assertion after testing. */
  	if (!buffer_uptodate(bh)) {
  		mlog(ML_ERROR, "giving me a buffer that's not uptodate!
  ");
  		mlog(ML_ERROR, "b_blocknr=%llu
  ",
  		     (unsigned long long)bh->b_blocknr);
  		BUG();
  	}
0cf2f7632   Joel Becker   ocfs2: Pass struc...
607
  	/* Set the current transaction information on the ci so
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
608
  	 * that the locking code knows whether it can drop it's locks
0cf2f7632   Joel Becker   ocfs2: Pass struc...
609
  	 * on this ci or not. We're protected from the commit
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
610
611
612
  	 * thread updating the current transaction id until
  	 * ocfs2_commit_trans() because ocfs2_start_trans() took
  	 * j_trans_barrier for us. */
0cf2f7632   Joel Becker   ocfs2: Pass struc...
613
  	ocfs2_set_ci_lock_trans(osb->journal, ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
614

0cf2f7632   Joel Becker   ocfs2: Pass struc...
615
  	ocfs2_metadata_cache_io_lock(ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
616
617
618
  	switch (type) {
  	case OCFS2_JOURNAL_ACCESS_CREATE:
  	case OCFS2_JOURNAL_ACCESS_WRITE:
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
619
  		status = jbd2_journal_get_write_access(handle, bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
620
621
622
  		break;
  
  	case OCFS2_JOURNAL_ACCESS_UNDO:
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
623
  		status = jbd2_journal_get_undo_access(handle, bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
624
625
626
627
  		break;
  
  	default:
  		status = -EINVAL;
af901ca18   André Goddard Rosa   tree-wide: fix as...
628
629
  		mlog(ML_ERROR, "Unknown access type!
  ");
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
630
  	}
0cf2f7632   Joel Becker   ocfs2: Pass struc...
631
  	if (!status && ocfs2_meta_ecc(osb) && triggers)
50655ae9e   Joel Becker   ocfs2: Add journa...
632
  		jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
0cf2f7632   Joel Becker   ocfs2: Pass struc...
633
  	ocfs2_metadata_cache_io_unlock(ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
634
635
636
637
638
  
  	if (status < 0)
  		mlog(ML_ERROR, "Error %d getting %d access to buffer!
  ",
  		     status, type);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
639
640
  	return status;
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
641
642
  int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
  			    struct buffer_head *bh, int type)
50655ae9e   Joel Becker   ocfs2: Add journa...
643
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
644
  	return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
645
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
646
  int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
647
648
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
649
  	return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
650
  }
93c97087a   Tao Ma   ocfs2: Add metaec...
651
652
653
654
655
656
  int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
  			    struct buffer_head *bh, int type)
  {
  	return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
  				      type);
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
657
  int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
658
659
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
660
  	return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
661
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
662
  int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
663
664
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
665
  	return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
666
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
667
  int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
668
669
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
670
  	return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
671
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
672
  int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
673
674
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
675
  	return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
676
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
677
  int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
9b7895efa   Mark Fasheh   ocfs2: Add a name...
678
679
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
680
  	return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
9b7895efa   Mark Fasheh   ocfs2: Add a name...
681
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
682
  int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
9b7895efa   Mark Fasheh   ocfs2: Add a name...
683
684
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
685
  	return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
9b7895efa   Mark Fasheh   ocfs2: Add a name...
686
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
687
  int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
688
689
  			 struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
690
  	return __ocfs2_journal_access(handle, ci, bh, NULL, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
691
  }
ec20cec7a   Joel Becker   ocfs2: Make ocfs2...
692
  void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
693
694
  {
  	int status;
b41079504   Tao Ma   ocfs2: Remove mas...
695
  	trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
696

2b4e30fbd   Joel Becker   ocfs2: Switch ove...
697
  	status = jbd2_journal_dirty_metadata(handle, bh);
ec20cec7a   Joel Becker   ocfs2: Make ocfs2...
698
  	BUG_ON(status);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
699
  }
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
700
  #define OCFS2_DEFAULT_COMMIT_INTERVAL	(HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
701
702
703
704
  
  void ocfs2_set_journal_params(struct ocfs2_super *osb)
  {
  	journal_t *journal = osb->journal->j_journal;
d147b3d63   Mark Fasheh   ocfs2: Support co...
705
706
707
708
  	unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
  
  	if (osb->osb_commit_interval)
  		commit_interval = osb->osb_commit_interval;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
709

a931da6ac   Theodore Ts'o   jbd2: Change j_st...
710
  	write_lock(&journal->j_state_lock);
d147b3d63   Mark Fasheh   ocfs2: Support co...
711
  	journal->j_commit_interval = commit_interval;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
712
  	if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
713
  		journal->j_flags |= JBD2_BARRIER;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
714
  	else
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
715
  		journal->j_flags &= ~JBD2_BARRIER;
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
716
  	write_unlock(&journal->j_state_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
717
718
719
720
721
722
723
724
725
726
  }
  
  int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
  {
  	int status = -1;
  	struct inode *inode = NULL; /* the journal inode */
  	journal_t *j_journal = NULL;
  	struct ocfs2_dinode *di = NULL;
  	struct buffer_head *bh = NULL;
  	struct ocfs2_super *osb;
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
727
  	int inode_lock = 0;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
728

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
  	BUG_ON(!journal);
  
  	osb = journal->j_osb;
  
  	/* already have the inode for our journal */
  	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
  					    osb->slot_num);
  	if (inode == NULL) {
  		status = -EACCES;
  		mlog_errno(status);
  		goto done;
  	}
  	if (is_bad_inode(inode)) {
  		mlog(ML_ERROR, "access error (bad inode)
  ");
  		iput(inode);
  		inode = NULL;
  		status = -EACCES;
  		goto done;
  	}
  
  	SET_INODE_JOURNAL(inode);
  	OCFS2_I(inode)->ip_open_count++;
6eff5790d   Mark Fasheh   [PATCH] ocfs2: do...
752
753
754
  	/* Skip recovery waits here - journal inode metadata never
  	 * changes in a live cluster so it can be considered an
  	 * exception to the rule. */
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
755
  	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
756
757
758
759
760
761
  	if (status < 0) {
  		if (status != -ERESTARTSYS)
  			mlog(ML_ERROR, "Could not get lock on journal!
  ");
  		goto done;
  	}
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
762
  	inode_lock = 1;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
763
764
765
766
767
768
769
770
771
  	di = (struct ocfs2_dinode *)bh->b_data;
  
  	if (inode->i_size <  OCFS2_MIN_JOURNAL_SIZE) {
  		mlog(ML_ERROR, "Journal file size (%lld) is too small!
  ",
  		     inode->i_size);
  		status = -EINVAL;
  		goto done;
  	}
b41079504   Tao Ma   ocfs2: Remove mas...
772
773
774
  	trace_ocfs2_journal_init(inode->i_size,
  				 (unsigned long long)inode->i_blocks,
  				 OCFS2_I(inode)->ip_clusters);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
775
776
  
  	/* call the kernels journal init function now */
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
777
  	j_journal = jbd2_journal_init_inode(inode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
778
779
780
781
782
783
  	if (j_journal == NULL) {
  		mlog(ML_ERROR, "Linux journal layer error
  ");
  		status = -EINVAL;
  		goto done;
  	}
b41079504   Tao Ma   ocfs2: Remove mas...
784
  	trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
  
  	*dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
  		  OCFS2_JOURNAL_DIRTY_FL);
  
  	journal->j_journal = j_journal;
  	journal->j_inode = inode;
  	journal->j_bh = bh;
  
  	ocfs2_set_journal_params(osb);
  
  	journal->j_state = OCFS2_JOURNAL_LOADED;
  
  	status = 0;
  done:
  	if (status < 0) {
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
800
801
  		if (inode_lock)
  			ocfs2_inode_unlock(inode, 1);
a81cb88b6   Mark Fasheh   ocfs2: Don't chec...
802
  		brelse(bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
803
804
805
806
807
  		if (inode) {
  			OCFS2_I(inode)->ip_open_count--;
  			iput(inode);
  		}
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
808
809
  	return status;
  }
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
810
811
812
813
814
815
816
817
818
  static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
  {
  	le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
  }
  
  static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
  {
  	return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
819
  static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
820
  				      int dirty, int replayed)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
821
822
823
824
825
826
  {
  	int status;
  	unsigned int flags;
  	struct ocfs2_journal *journal = osb->journal;
  	struct buffer_head *bh = journal->j_bh;
  	struct ocfs2_dinode *fe;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
827
  	fe = (struct ocfs2_dinode *)bh->b_data;
10995aa24   Joel Becker   ocfs2: Morph the ...
828
829
830
831
832
  
  	/* The journal bh on the osb always comes from ocfs2_journal_init()
  	 * and was validated there inside ocfs2_inode_lock_full().  It's a
  	 * code bug if we mess it up. */
  	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
833
834
835
836
837
838
839
  
  	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
  	if (dirty)
  		flags |= OCFS2_JOURNAL_DIRTY_FL;
  	else
  		flags &= ~OCFS2_JOURNAL_DIRTY_FL;
  	fe->id1.journal1.ij_flags = cpu_to_le32(flags);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
840
841
  	if (replayed)
  		ocfs2_bump_recovery_generation(fe);
13723d00e   Joel Becker   ocfs2: Use metada...
842
  	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
8cb471e8f   Joel Becker   ocfs2: Take the i...
843
  	status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
844
845
  	if (status < 0)
  		mlog_errno(status);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
846
847
848
849
850
851
852
853
854
855
856
857
858
  	return status;
  }
  
  /*
   * If the journal has been kmalloc'd it needs to be freed after this
   * call.
   */
  void ocfs2_journal_shutdown(struct ocfs2_super *osb)
  {
  	struct ocfs2_journal *journal = NULL;
  	int status = 0;
  	struct inode *inode = NULL;
  	int num_running_trans = 0;
ebdec83ba   Eric Sesterhenn / snakebyte   [PATCH] BUG_ON() ...
859
  	BUG_ON(!osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
860
861
862
863
864
865
866
867
868
  
  	journal = osb->journal;
  	if (!journal)
  		goto done;
  
  	inode = journal->j_inode;
  
  	if (journal->j_state != OCFS2_JOURNAL_LOADED)
  		goto done;
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
869
  	/* need to inc inode use count - jbd2_journal_destroy will iput. */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
870
871
872
873
  	if (!igrab(inode))
  		BUG();
  
  	num_running_trans = atomic_read(&(osb->journal->j_num_trans));
b41079504   Tao Ma   ocfs2: Remove mas...
874
  	trace_ocfs2_journal_shutdown(num_running_trans);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
875
876
877
878
879
880
881
882
883
884
885
886
  
  	/* Do a commit_cache here. It will flush our journal, *and*
  	 * release any locks that are still held.
  	 * set the SHUTDOWN flag and release the trans lock.
  	 * the commit thread will take the trans lock for us below. */
  	journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;
  
  	/* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not
  	 * drop the trans_lock (which we want to hold until we
  	 * completely destroy the journal. */
  	if (osb->commit_task) {
  		/* Wait for the commit thread */
b41079504   Tao Ma   ocfs2: Remove mas...
887
  		trace_ocfs2_journal_shutdown_wait(osb->commit_task);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
888
889
890
891
892
  		kthread_stop(osb->commit_task);
  		osb->commit_task = NULL;
  	}
  
  	BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
c271c5c22   Sunil Mushran   ocfs2: local mounts
893
  	if (ocfs2_mount_local(osb)) {
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
894
895
896
  		jbd2_journal_lock_updates(journal->j_journal);
  		status = jbd2_journal_flush(journal->j_journal);
  		jbd2_journal_unlock_updates(journal->j_journal);
c271c5c22   Sunil Mushran   ocfs2: local mounts
897
898
899
900
901
902
903
904
905
  		if (status < 0)
  			mlog_errno(status);
  	}
  
  	if (status == 0) {
  		/*
  		 * Do not toggle if flush was unsuccessful otherwise
  		 * will leave dirty metadata in a "clean" journal
  		 */
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
906
  		status = ocfs2_journal_toggle_dirty(osb, 0, 0);
c271c5c22   Sunil Mushran   ocfs2: local mounts
907
908
909
  		if (status < 0)
  			mlog_errno(status);
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
910
911
  
  	/* Shutdown the kernel journal system */
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
912
  	jbd2_journal_destroy(journal->j_journal);
ae0dff683   Sunil Mushran   ocfs2: Set journa...
913
  	journal->j_journal = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
914
915
916
917
  
  	OCFS2_I(inode)->ip_open_count--;
  
  	/* unlock our journal */
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
918
  	ocfs2_inode_unlock(inode, 1);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
919
920
921
922
923
924
925
926
927
928
  
  	brelse(journal->j_bh);
  	journal->j_bh = NULL;
  
  	journal->j_state = OCFS2_JOURNAL_FREE;
  
  //	up_write(&journal->j_trans_barrier);
  done:
  	if (inode)
  		iput(inode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
929
930
931
932
933
934
935
  }
  
  static void ocfs2_clear_journal_error(struct super_block *sb,
  				      journal_t *journal,
  				      int slot)
  {
  	int olderr;
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
936
  	olderr = jbd2_journal_errno(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
937
938
939
940
941
942
943
  	if (olderr) {
  		mlog(ML_ERROR, "File system error %d recorded in "
  		     "journal %u.
  ", olderr, slot);
  		mlog(ML_ERROR, "File system on device %s needs checking.
  ",
  		     sb->s_id);
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
944
945
  		jbd2_journal_ack_err(journal);
  		jbd2_journal_clear_err(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
946
947
  	}
  }
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
948
  int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
949
950
951
  {
  	int status = 0;
  	struct ocfs2_super *osb;
b1f3550fa   Julia Lawall   ocfs2: Use BUG_ON
952
  	BUG_ON(!journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
953
954
  
  	osb = journal->j_osb;
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
955
  	status = jbd2_journal_load(journal->j_journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
956
957
958
959
960
961
962
  	if (status < 0) {
  		mlog(ML_ERROR, "Failed to load journal!
  ");
  		goto done;
  	}
  
  	ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
963
  	status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
964
965
966
967
968
969
  	if (status < 0) {
  		mlog_errno(status);
  		goto done;
  	}
  
  	/* Launch the commit thread */
c271c5c22   Sunil Mushran   ocfs2: local mounts
970
971
972
973
974
975
976
977
978
979
980
  	if (!local) {
  		osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
  					       "ocfs2cmt");
  		if (IS_ERR(osb->commit_task)) {
  			status = PTR_ERR(osb->commit_task);
  			osb->commit_task = NULL;
  			mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
  			     "error=%d", status);
  			goto done;
  		}
  	} else
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
981
  		osb->commit_task = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
982
983
  
  done:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
984
985
986
987
988
989
990
991
992
  	return status;
  }
  
  
  /* 'full' flag tells us whether we clear out all blocks or if we just
   * mark the journal clean */
  int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
  {
  	int status;
ebdec83ba   Eric Sesterhenn / snakebyte   [PATCH] BUG_ON() ...
993
  	BUG_ON(!journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
994

2b4e30fbd   Joel Becker   ocfs2: Switch ove...
995
  	status = jbd2_journal_wipe(journal->j_journal, full);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
996
997
998
999
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1000
  	status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1001
1002
1003
1004
  	if (status < 0)
  		mlog_errno(status);
  
  bail:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1005
1006
  	return status;
  }
553abd046   Joel Becker   ocfs2: Change the...
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
  static int ocfs2_recovery_completed(struct ocfs2_super *osb)
  {
  	int empty;
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
  
  	spin_lock(&osb->osb_lock);
  	empty = (rm->rm_used == 0);
  	spin_unlock(&osb->osb_lock);
  
  	return empty;
  }
  
  void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
  {
  	wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
  /*
   * JBD Might read a cached version of another nodes journal file. We
   * don't want this as this file changes often and we get no
   * notification on those changes. The only way to be sure that we've
   * got the most up to date version of those blocks then is to force
   * read them off disk. Just searching through the buffer cache won't
   * work as there may be pages backing this file which are still marked
   * up to date. We know things can't change on this file underneath us
   * as we have the lock by now :)
   */
  static int ocfs2_force_read_journal(struct inode *inode)
  {
  	int status = 0;
4f902c377   Mark Fasheh   ocfs2: Fix extent...
1036
  	int i;
8110b073a   Mark Fasheh   ocfs2: Fix up i_b...
1037
  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
4f902c377   Mark Fasheh   ocfs2: Fix extent...
1038
  #define CONCURRENT_JOURNAL_FILL 32ULL
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1039
  	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1040
  	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
8110b073a   Mark Fasheh   ocfs2: Fix up i_b...
1041
  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1042
  	v_blkno = 0;
8110b073a   Mark Fasheh   ocfs2: Fix up i_b...
1043
  	while (v_blkno < num_blocks) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1044
  		status = ocfs2_extent_map_get_blocks(inode, v_blkno,
49cb8d2d4   Mark Fasheh   ocfs2: Read from ...
1045
  						     &p_blkno, &p_blocks, NULL);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1046
1047
1048
1049
1050
1051
1052
  		if (status < 0) {
  			mlog_errno(status);
  			goto bail;
  		}
  
  		if (p_blocks > CONCURRENT_JOURNAL_FILL)
  			p_blocks = CONCURRENT_JOURNAL_FILL;
dd4a2c2bf   Mark Fasheh   ocfs2: Don't popu...
1053
1054
  		/* We are reading journal data which should not
  		 * be put in the uptodate cache */
da1e90985   Joel Becker   ocfs2: Separate o...
1055
1056
  		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
  						p_blkno, p_blocks, bhs);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
  		if (status < 0) {
  			mlog_errno(status);
  			goto bail;
  		}
  
  		for(i = 0; i < p_blocks; i++) {
  			brelse(bhs[i]);
  			bhs[i] = NULL;
  		}
  
  		v_blkno += p_blocks;
  	}
  
  bail:
  	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
a81cb88b6   Mark Fasheh   ocfs2: Don't chec...
1072
  		brelse(bhs[i]);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1073
1074
1075
1076
1077
1078
1079
1080
  	return status;
  }
  
  struct ocfs2_la_recovery_item {
  	struct list_head	lri_list;
  	int			lri_slot;
  	struct ocfs2_dinode	*lri_la_dinode;
  	struct ocfs2_dinode	*lri_tl_dinode;
2205363dc   Jan Kara   ocfs2: Implement ...
1081
  	struct ocfs2_quota_recovery *lri_qrec;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
  };
  
  /* Does the second half of the recovery process. By this point, the
   * node is marked clean and can actually be considered recovered,
   * hence it's no longer in the recovery map, but there's still some
   * cleanup we can do which shouldn't happen within the recovery thread
   * as locking in that context becomes very difficult if we are to take
   * recovering nodes into account.
   *
   * NOTE: This function can and will sleep on recovery of other nodes
   * during cluster locking, just like any other ocfs2 process.
   */
c4028958b   David Howells   WorkStruct: make ...
1094
  void ocfs2_complete_recovery(struct work_struct *work)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1095
  {
b41079504   Tao Ma   ocfs2: Remove mas...
1096
  	int ret = 0;
c4028958b   David Howells   WorkStruct: make ...
1097
1098
1099
  	struct ocfs2_journal *journal =
  		container_of(work, struct ocfs2_journal, j_recovery_work);
  	struct ocfs2_super *osb = journal->j_osb;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1100
  	struct ocfs2_dinode *la_dinode, *tl_dinode;
800deef3f   Christoph Hellwig   [PATCH] ocfs2: us...
1101
  	struct ocfs2_la_recovery_item *item, *n;
2205363dc   Jan Kara   ocfs2: Implement ...
1102
  	struct ocfs2_quota_recovery *qrec;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1103
  	LIST_HEAD(tmp_la_list);
b41079504   Tao Ma   ocfs2: Remove mas...
1104
1105
  	trace_ocfs2_complete_recovery(
  		(unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1106
1107
1108
1109
  
  	spin_lock(&journal->j_lock);
  	list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
  	spin_unlock(&journal->j_lock);
800deef3f   Christoph Hellwig   [PATCH] ocfs2: us...
1110
  	list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1111
  		list_del_init(&item->lri_list);
19ece546a   Jan Kara   ocfs2: Enable quo...
1112
  		ocfs2_wait_on_quotas(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1113
  		la_dinode = item->lri_la_dinode;
b41079504   Tao Ma   ocfs2: Remove mas...
1114
1115
  		tl_dinode = item->lri_tl_dinode;
  		qrec = item->lri_qrec;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1116

b41079504   Tao Ma   ocfs2: Remove mas...
1117
1118
1119
1120
1121
1122
  		trace_ocfs2_complete_recovery_slot(item->lri_slot,
  			la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
  			tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0,
  			qrec);
  
  		if (la_dinode) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1123
1124
1125
1126
1127
1128
1129
  			ret = ocfs2_complete_local_alloc_recovery(osb,
  								  la_dinode);
  			if (ret < 0)
  				mlog_errno(ret);
  
  			kfree(la_dinode);
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1130
  		if (tl_dinode) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
  			ret = ocfs2_complete_truncate_log_recovery(osb,
  								   tl_dinode);
  			if (ret < 0)
  				mlog_errno(ret);
  
  			kfree(tl_dinode);
  		}
  
  		ret = ocfs2_recover_orphans(osb, item->lri_slot);
  		if (ret < 0)
  			mlog_errno(ret);
2205363dc   Jan Kara   ocfs2: Implement ...
1142
  		if (qrec) {
2205363dc   Jan Kara   ocfs2: Implement ...
1143
1144
1145
1146
1147
1148
  			ret = ocfs2_finish_quota_recovery(osb, qrec,
  							  item->lri_slot);
  			if (ret < 0)
  				mlog_errno(ret);
  			/* Recovery info is already freed now */
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1149
1150
  		kfree(item);
  	}
b41079504   Tao Ma   ocfs2: Remove mas...
1151
  	trace_ocfs2_complete_recovery_end(ret);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1152
1153
1154
1155
1156
1157
1158
1159
  }
  
  /* NOTE: This function always eats your references to la_dinode and
   * tl_dinode, either manually on error, or by passing them to
   * ocfs2_complete_recovery */
  static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
  					    int slot_num,
  					    struct ocfs2_dinode *la_dinode,
2205363dc   Jan Kara   ocfs2: Implement ...
1160
1161
  					    struct ocfs2_dinode *tl_dinode,
  					    struct ocfs2_quota_recovery *qrec)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1162
1163
  {
  	struct ocfs2_la_recovery_item *item;
afae00ab4   Sunil Mushran   ocfs2: fix gfp ma...
1164
  	item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1165
1166
1167
1168
1169
1170
1171
1172
1173
  	if (!item) {
  		/* Though we wish to avoid it, we are in fact safe in
  		 * skipping local alloc cleanup as fsck.ocfs2 is more
  		 * than capable of reclaiming unused space. */
  		if (la_dinode)
  			kfree(la_dinode);
  
  		if (tl_dinode)
  			kfree(tl_dinode);
2205363dc   Jan Kara   ocfs2: Implement ...
1174
1175
  		if (qrec)
  			ocfs2_free_quota_recovery(qrec);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1176
1177
1178
1179
1180
1181
1182
1183
  		mlog_errno(-ENOMEM);
  		return;
  	}
  
  	INIT_LIST_HEAD(&item->lri_list);
  	item->lri_la_dinode = la_dinode;
  	item->lri_slot = slot_num;
  	item->lri_tl_dinode = tl_dinode;
2205363dc   Jan Kara   ocfs2: Implement ...
1184
  	item->lri_qrec = qrec;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1185
1186
1187
1188
1189
1190
1191
1192
  
  	spin_lock(&journal->j_lock);
  	list_add_tail(&item->lri_list, &journal->j_la_cleanups);
  	queue_work(ocfs2_wq, &journal->j_recovery_work);
  	spin_unlock(&journal->j_lock);
  }
  
  /* Called by the mount code to queue recovery the last part of
9140db04e   Srinivas Eeda   ocfs2: recover or...
1193
   * recovery for it's own and offline slot(s). */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1194
1195
1196
  void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
  {
  	struct ocfs2_journal *journal = osb->journal;
10b3dd761   Sunil Mushran   ocfs2: Skip mount...
1197
1198
  	if (ocfs2_is_hard_readonly(osb))
  		return;
9140db04e   Srinivas Eeda   ocfs2: recover or...
1199
1200
1201
1202
1203
  	/* No need to queue up our truncate_log as regular cleanup will catch
  	 * that */
  	ocfs2_queue_recovery_completion(journal, osb->slot_num,
  					osb->local_alloc_copy, NULL, NULL);
  	ocfs2_schedule_truncate_log_flush(osb, 0);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1204

9140db04e   Srinivas Eeda   ocfs2: recover or...
1205
1206
1207
1208
1209
1210
1211
  	osb->local_alloc_copy = NULL;
  	osb->dirty = 0;
  
  	/* queue to recover orphan slots for all offline slots */
  	ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
  	ocfs2_queue_replay_slots(osb);
  	ocfs2_free_replay_slots(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1212
  }
2205363dc   Jan Kara   ocfs2: Implement ...
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
  void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
  {
  	if (osb->quota_rec) {
  		ocfs2_queue_recovery_completion(osb->journal,
  						osb->slot_num,
  						NULL,
  						NULL,
  						osb->quota_rec);
  		osb->quota_rec = NULL;
  	}
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1224
1225
  static int __ocfs2_recovery_thread(void *arg)
  {
2205363dc   Jan Kara   ocfs2: Implement ...
1226
  	int status, node_num, slot_num;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1227
  	struct ocfs2_super *osb = arg;
553abd046   Joel Becker   ocfs2: Change the...
1228
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
2205363dc   Jan Kara   ocfs2: Implement ...
1229
1230
1231
  	int *rm_quota = NULL;
  	int rm_quota_used = 0, i;
  	struct ocfs2_quota_recovery *qrec;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1232

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1233
1234
1235
1236
  	status = ocfs2_wait_on_mount(osb);
  	if (status < 0) {
  		goto bail;
  	}
2205363dc   Jan Kara   ocfs2: Implement ...
1237
1238
1239
1240
1241
  	rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS);
  	if (!rm_quota) {
  		status = -ENOMEM;
  		goto bail;
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1242
1243
1244
1245
1246
1247
  restart:
  	status = ocfs2_super_lock(osb, 1);
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
9140db04e   Srinivas Eeda   ocfs2: recover or...
1248
1249
1250
1251
1252
1253
1254
  	status = ocfs2_compute_replay_slots(osb);
  	if (status < 0)
  		mlog_errno(status);
  
  	/* queue recovery for our own slot */
  	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
  					NULL, NULL);
553abd046   Joel Becker   ocfs2: Change the...
1255
1256
1257
1258
1259
1260
  	spin_lock(&osb->osb_lock);
  	while (rm->rm_used) {
  		/* It's always safe to remove entry zero, as we won't
  		 * clear it until ocfs2_recover_node() has succeeded. */
  		node_num = rm->rm_entries[0];
  		spin_unlock(&osb->osb_lock);
2205363dc   Jan Kara   ocfs2: Implement ...
1261
  		slot_num = ocfs2_node_num_to_slot(osb, node_num);
b41079504   Tao Ma   ocfs2: Remove mas...
1262
  		trace_ocfs2_recovery_thread_node(node_num, slot_num);
2205363dc   Jan Kara   ocfs2: Implement ...
1263
1264
  		if (slot_num == -ENOENT) {
  			status = 0;
2205363dc   Jan Kara   ocfs2: Implement ...
1265
1266
  			goto skip_recovery;
  		}
2205363dc   Jan Kara   ocfs2: Implement ...
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
  
  		/* It is a bit subtle with quota recovery. We cannot do it
  		 * immediately because we have to obtain cluster locks from
  		 * quota files and we also don't want to just skip it because
  		 * then quota usage would be out of sync until some node takes
  		 * the slot. So we remember which nodes need quota recovery
  		 * and when everything else is done, we recover quotas. */
  		for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
  		if (i == rm_quota_used)
  			rm_quota[rm_quota_used++] = slot_num;
  
  		status = ocfs2_recover_node(osb, node_num, slot_num);
  skip_recovery:
553abd046   Joel Becker   ocfs2: Change the...
1280
1281
1282
  		if (!status) {
  			ocfs2_recovery_map_clear(osb, node_num);
  		} else {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1283
1284
1285
1286
1287
1288
1289
  			mlog(ML_ERROR,
  			     "Error %d recovering node %d on device (%u,%u)!
  ",
  			     status, node_num,
  			     MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
  			mlog(ML_ERROR, "Volume requires unmount.
  ");
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1290
  		}
553abd046   Joel Becker   ocfs2: Change the...
1291
  		spin_lock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1292
  	}
553abd046   Joel Becker   ocfs2: Change the...
1293
  	spin_unlock(&osb->osb_lock);
b41079504   Tao Ma   ocfs2: Remove mas...
1294
  	trace_ocfs2_recovery_thread_end(status);
553abd046   Joel Becker   ocfs2: Change the...
1295

539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1296
1297
1298
1299
1300
  	/* Refresh all journal recovery generations from disk */
  	status = ocfs2_check_journals_nolocks(osb);
  	status = (status == -EROFS) ? 0 : status;
  	if (status < 0)
  		mlog_errno(status);
2205363dc   Jan Kara   ocfs2: Implement ...
1301
  	/* Now it is right time to recover quotas... We have to do this under
25985edce   Lucas De Marchi   Fix common misspe...
1302
  	 * superblock lock so that no one can start using the slot (and crash)
2205363dc   Jan Kara   ocfs2: Implement ...
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
  	 * before we recover it */
  	for (i = 0; i < rm_quota_used; i++) {
  		qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
  		if (IS_ERR(qrec)) {
  			status = PTR_ERR(qrec);
  			mlog_errno(status);
  			continue;
  		}
  		ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
  						NULL, NULL, qrec);
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1314
  	ocfs2_super_unlock(osb, 1);
9140db04e   Srinivas Eeda   ocfs2: recover or...
1315
1316
  	/* queue recovery for offline slots */
  	ocfs2_queue_replay_slots(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1317
1318
  
  bail:
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1319
  	mutex_lock(&osb->recovery_lock);
553abd046   Joel Becker   ocfs2: Change the...
1320
  	if (!status && !ocfs2_recovery_completed(osb)) {
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1321
  		mutex_unlock(&osb->recovery_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1322
1323
  		goto restart;
  	}
9140db04e   Srinivas Eeda   ocfs2: recover or...
1324
  	ocfs2_free_replay_slots(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1325
1326
1327
  	osb->recovery_thread_task = NULL;
  	mb(); /* sync with ocfs2_recovery_thread_running */
  	wake_up(&osb->recovery_event);
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1328
  	mutex_unlock(&osb->recovery_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1329

2205363dc   Jan Kara   ocfs2: Implement ...
1330
1331
  	if (rm_quota)
  		kfree(rm_quota);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1332
1333
1334
1335
1336
1337
1338
1339
1340
  	/* no one is callint kthread_stop() for us so the kthread() api
  	 * requires that we call do_exit().  And it isn't exported, but
  	 * complete_and_exit() seems to be a minimal wrapper around it. */
  	complete_and_exit(NULL, status);
  	return status;
  }
  
  void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
  {
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1341
  	mutex_lock(&osb->recovery_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1342

b41079504   Tao Ma   ocfs2: Remove mas...
1343
1344
1345
1346
  	trace_ocfs2_recovery_thread(node_num, osb->node_num,
  		osb->disable_recovery, osb->recovery_thread_task,
  		osb->disable_recovery ?
  		-1 : ocfs2_recovery_map_set(osb, node_num));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1347

b41079504   Tao Ma   ocfs2: Remove mas...
1348
1349
  	if (osb->disable_recovery)
  		goto out;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1350
1351
1352
1353
1354
  
  	if (osb->recovery_thread_task)
  		goto out;
  
  	osb->recovery_thread_task =  kthread_run(__ocfs2_recovery_thread, osb,
784270435   Mark Fasheh   ocfs2: clean up s...
1355
  						 "ocfs2rec");
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1356
1357
1358
1359
1360
1361
  	if (IS_ERR(osb->recovery_thread_task)) {
  		mlog_errno((int)PTR_ERR(osb->recovery_thread_task));
  		osb->recovery_thread_task = NULL;
  	}
  
  out:
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1362
  	mutex_unlock(&osb->recovery_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1363
  	wake_up(&osb->recovery_event);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1364
  }
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
  static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
  				    int slot_num,
  				    struct buffer_head **bh,
  				    struct inode **ret_inode)
  {
  	int status = -EACCES;
  	struct inode *inode = NULL;
  
  	BUG_ON(slot_num >= osb->max_slots);
  
  	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
  					    slot_num);
  	if (!inode || is_bad_inode(inode)) {
  		mlog_errno(status);
  		goto bail;
  	}
  	SET_INODE_JOURNAL(inode);
b657c95c1   Joel Becker   ocfs2: Wrap inode...
1382
  	status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
  
  	status = 0;
  
  bail:
  	if (inode) {
  		if (status || !ret_inode)
  			iput(inode);
  		else
  			*ret_inode = inode;
  	}
  	return status;
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
  /* Does the actual journal replay and marks the journal inode as
   * clean. Will only replay if the journal inode is marked dirty. */
  static int ocfs2_replay_journal(struct ocfs2_super *osb,
  				int node_num,
  				int slot_num)
  {
  	int status;
  	int got_lock = 0;
  	unsigned int flags;
  	struct inode *inode = NULL;
  	struct ocfs2_dinode *fe;
  	journal_t *journal = NULL;
  	struct buffer_head *bh = NULL;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1412
  	u32 slot_reco_gen;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1413

539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1414
1415
  	status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
  	if (status) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1416
1417
1418
  		mlog_errno(status);
  		goto done;
  	}
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
  
  	fe = (struct ocfs2_dinode *)bh->b_data;
  	slot_reco_gen = ocfs2_get_recovery_generation(fe);
  	brelse(bh);
  	bh = NULL;
  
  	/*
  	 * As the fs recovery is asynchronous, there is a small chance that
  	 * another node mounted (and recovered) the slot before the recovery
  	 * thread could get the lock. To handle that, we dirty read the journal
  	 * inode for that slot to get the recovery generation. If it is
  	 * different than what we expected, the slot has been recovered.
  	 * If not, it needs recovery.
  	 */
  	if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
b41079504   Tao Ma   ocfs2: Remove mas...
1434
  		trace_ocfs2_replay_journal_recovered(slot_num,
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1435
1436
1437
  		     osb->slot_recovery_generations[slot_num], slot_reco_gen);
  		osb->slot_recovery_generations[slot_num] = slot_reco_gen;
  		status = -EBUSY;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1438
1439
  		goto done;
  	}
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1440
1441
  
  	/* Continue with recovery as the journal has not yet been recovered */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1442

e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1443
  	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1444
  	if (status < 0) {
b41079504   Tao Ma   ocfs2: Remove mas...
1445
  		trace_ocfs2_replay_journal_lock_err(status);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
  		if (status != -ERESTARTSYS)
  			mlog(ML_ERROR, "Could not lock journal!
  ");
  		goto done;
  	}
  	got_lock = 1;
  
  	fe = (struct ocfs2_dinode *) bh->b_data;
  
  	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1456
  	slot_reco_gen = ocfs2_get_recovery_generation(fe);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1457
1458
  
  	if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
b41079504   Tao Ma   ocfs2: Remove mas...
1459
  		trace_ocfs2_replay_journal_skip(node_num);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1460
1461
  		/* Refresh recovery generation for the slot */
  		osb->slot_recovery_generations[slot_num] = slot_reco_gen;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1462
1463
  		goto done;
  	}
9140db04e   Srinivas Eeda   ocfs2: recover or...
1464
1465
  	/* we need to run complete recovery for offline orphan slots */
  	ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
619c200de   Sunil Mushran   ocfs2: Clean up m...
1466
1467
1468
1469
  	printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\
  	       "device (%u,%u)
  ", node_num, slot_num, MAJOR(osb->sb->s_dev),
  	       MINOR(osb->sb->s_dev));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1470
1471
1472
1473
1474
1475
1476
1477
  
  	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
  
  	status = ocfs2_force_read_journal(inode);
  	if (status < 0) {
  		mlog_errno(status);
  		goto done;
  	}
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1478
  	journal = jbd2_journal_init_inode(inode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1479
1480
1481
1482
1483
1484
  	if (journal == NULL) {
  		mlog(ML_ERROR, "Linux journal layer error
  ");
  		status = -EIO;
  		goto done;
  	}
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1485
  	status = jbd2_journal_load(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1486
1487
1488
1489
  	if (status < 0) {
  		mlog_errno(status);
  		if (!igrab(inode))
  			BUG();
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1490
  		jbd2_journal_destroy(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1491
1492
1493
1494
1495
1496
  		goto done;
  	}
  
  	ocfs2_clear_journal_error(osb->sb, journal, slot_num);
  
  	/* wipe the journal */
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1497
1498
1499
  	jbd2_journal_lock_updates(journal);
  	status = jbd2_journal_flush(journal);
  	jbd2_journal_unlock_updates(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1500
1501
1502
1503
1504
1505
1506
  	if (status < 0)
  		mlog_errno(status);
  
  	/* This will mark the node clean */
  	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
  	flags &= ~OCFS2_JOURNAL_DIRTY_FL;
  	fe->id1.journal1.ij_flags = cpu_to_le32(flags);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1507
1508
1509
1510
  	/* Increment recovery generation to indicate successful recovery */
  	ocfs2_bump_recovery_generation(fe);
  	osb->slot_recovery_generations[slot_num] =
  					ocfs2_get_recovery_generation(fe);
13723d00e   Joel Becker   ocfs2: Use metada...
1511
  	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
8cb471e8f   Joel Becker   ocfs2: Take the i...
1512
  	status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1513
1514
1515
1516
1517
  	if (status < 0)
  		mlog_errno(status);
  
  	if (!igrab(inode))
  		BUG();
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1518
  	jbd2_journal_destroy(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1519

619c200de   Sunil Mushran   ocfs2: Clean up m...
1520
1521
1522
1523
  	printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\
  	       "device (%u,%u)
  ", node_num, slot_num, MAJOR(osb->sb->s_dev),
  	       MINOR(osb->sb->s_dev));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1524
1525
1526
  done:
  	/* drop the lock on this nodes journal */
  	if (got_lock)
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1527
  		ocfs2_inode_unlock(inode, 1);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1528
1529
1530
  
  	if (inode)
  		iput(inode);
a81cb88b6   Mark Fasheh   ocfs2: Don't chec...
1531
  	brelse(bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1532

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
  	return status;
  }
  
  /*
   * Do the most important parts of node recovery:
   *  - Replay it's journal
   *  - Stamp a clean local allocator file
   *  - Stamp a clean truncate log
   *  - Mark the node clean
   *
   * If this function completes without error, a node in OCFS2 can be
   * said to have been safely recovered. As a result, failure during the
   * second part of a nodes recovery process (local alloc recovery) is
   * far less concerning.
   */
  static int ocfs2_recover_node(struct ocfs2_super *osb,
2205363dc   Jan Kara   ocfs2: Implement ...
1549
  			      int node_num, int slot_num)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1550
1551
  {
  	int status = 0;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1552
1553
  	struct ocfs2_dinode *la_copy = NULL;
  	struct ocfs2_dinode *tl_copy = NULL;
b41079504   Tao Ma   ocfs2: Remove mas...
1554
  	trace_ocfs2_recover_node(node_num, slot_num, osb->node_num);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1555
1556
1557
  
  	/* Should not ever be called to recover ourselves -- in that
  	 * case we should've called ocfs2_journal_load instead. */
ebdec83ba   Eric Sesterhenn / snakebyte   [PATCH] BUG_ON() ...
1558
  	BUG_ON(osb->node_num == node_num);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1559

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1560
1561
  	status = ocfs2_replay_journal(osb, node_num, slot_num);
  	if (status < 0) {
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1562
  		if (status == -EBUSY) {
b41079504   Tao Ma   ocfs2: Remove mas...
1563
  			trace_ocfs2_recover_node_skip(slot_num, node_num);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1564
1565
1566
  			status = 0;
  			goto done;
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
  		mlog_errno(status);
  		goto done;
  	}
  
  	/* Stamp a clean local alloc file AFTER recovering the journal... */
  	status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy);
  	if (status < 0) {
  		mlog_errno(status);
  		goto done;
  	}
  
  	/* An error from begin_truncate_log_recovery is not
  	 * serious enough to warrant halting the rest of
  	 * recovery. */
  	status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
  	if (status < 0)
  		mlog_errno(status);
  
  	/* Likewise, this would be a strange but ultimately not so
  	 * harmful place to get an error... */
8e8a4603b   Mark Fasheh   ocfs2: Move slot ...
1587
  	status = ocfs2_clear_slot(osb, slot_num);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1588
1589
1590
1591
1592
  	if (status < 0)
  		mlog_errno(status);
  
  	/* This will kfree the memory pointed to by la_copy and tl_copy */
  	ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
2205363dc   Jan Kara   ocfs2: Implement ...
1593
  					tl_copy, NULL);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1594
1595
1596
  
  	status = 0;
  done:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
  	return status;
  }
  
  /* Test node liveness by trylocking his journal. If we get the lock,
   * we drop it here. Return 0 if we got the lock, -EAGAIN if node is
   * still alive (we couldn't get the lock) and < 0 on error. */
  static int ocfs2_trylock_journal(struct ocfs2_super *osb,
  				 int slot_num)
  {
  	int status, flags;
  	struct inode *inode = NULL;
  
  	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
  					    slot_num);
  	if (inode == NULL) {
  		mlog(ML_ERROR, "access error
  ");
  		status = -EACCES;
  		goto bail;
  	}
  	if (is_bad_inode(inode)) {
  		mlog(ML_ERROR, "access error (bad inode)
  ");
  		iput(inode);
  		inode = NULL;
  		status = -EACCES;
  		goto bail;
  	}
  	SET_INODE_JOURNAL(inode);
  
  	flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1628
  	status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1629
1630
1631
1632
1633
  	if (status < 0) {
  		if (status != -EAGAIN)
  			mlog_errno(status);
  		goto bail;
  	}
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1634
  	ocfs2_inode_unlock(inode, 1);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
  bail:
  	if (inode)
  		iput(inode);
  
  	return status;
  }
  
  /* Call this underneath ocfs2_super_lock. It also assumes that the
   * slot info struct has been updated from disk. */
  int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
  {
d85b20e4b   Joel Becker   ocfs2: Make ocfs2...
1646
1647
  	unsigned int node_num;
  	int status, i;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1648
  	u32 gen;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1649
1650
  	struct buffer_head *bh = NULL;
  	struct ocfs2_dinode *di;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1651
1652
1653
  
  	/* This is called with the super block cluster lock, so we
  	 * know that the slot map can't change underneath us. */
d85b20e4b   Joel Becker   ocfs2: Make ocfs2...
1654
  	for (i = 0; i < osb->max_slots; i++) {
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1655
1656
1657
1658
1659
1660
1661
  		/* Read journal inode to get the recovery generation */
  		status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
  		if (status) {
  			mlog_errno(status);
  			goto bail;
  		}
  		di = (struct ocfs2_dinode *)bh->b_data;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1662
  		gen = ocfs2_get_recovery_generation(di);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1663
1664
  		brelse(bh);
  		bh = NULL;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1665
1666
  		spin_lock(&osb->osb_lock);
  		osb->slot_recovery_generations[i] = gen;
b41079504   Tao Ma   ocfs2: Remove mas...
1667
1668
  		trace_ocfs2_mark_dead_nodes(i,
  					    osb->slot_recovery_generations[i]);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1669

a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1670
1671
  		if (i == osb->slot_num) {
  			spin_unlock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1672
  			continue;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1673
  		}
d85b20e4b   Joel Becker   ocfs2: Make ocfs2...
1674
1675
  
  		status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1676
1677
  		if (status == -ENOENT) {
  			spin_unlock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1678
  			continue;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1679
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1680

a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1681
1682
  		if (__ocfs2_recovery_map_test(osb, node_num)) {
  			spin_unlock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1683
  			continue;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1684
  		}
d85b20e4b   Joel Becker   ocfs2: Make ocfs2...
1685
  		spin_unlock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
  
  		/* Ok, we have a slot occupied by another node which
  		 * is not in the recovery map. We trylock his journal
  		 * file here to test if he's alive. */
  		status = ocfs2_trylock_journal(osb, i);
  		if (!status) {
  			/* Since we're called from mount, we know that
  			 * the recovery thread can't race us on
  			 * setting / checking the recovery bits. */
  			ocfs2_recovery_thread(osb, node_num);
  		} else if ((status < 0) && (status != -EAGAIN)) {
  			mlog_errno(status);
  			goto bail;
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1700
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1701
1702
1703
  
  	status = 0;
  bail:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1704
1705
  	return status;
  }
83273932f   Srinivas Eeda   ocfs2: timer to q...
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
  /*
   * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some
   * randomness to the timeout to minimize multple nodes firing the timer at the
   * same time.
   */
  static inline unsigned long ocfs2_orphan_scan_timeout(void)
  {
  	unsigned long time;
  
  	get_random_bytes(&time, sizeof(time));
  	time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
  	return msecs_to_jiffies(time);
  }
  
  /*
   * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for
   * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This
   * is done to catch any orphans that are left over in orphan directories.
   *
a035bff6b   Sunil Mushran   ocfs2: Add commen...
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
   * It scans all slots, even ones that are in use. It does so to handle the
   * case described below:
   *
   *   Node 1 has an inode it was using. The dentry went away due to memory
   *   pressure.  Node 1 closes the inode, but it's on the free list. The node
   *   has the open lock.
   *   Node 2 unlinks the inode. It grabs the dentry lock to notify others,
   *   but node 1 has no dentry and doesn't get the message. It trylocks the
   *   open lock, sees that another node has a PR, and does nothing.
   *   Later node 2 runs its orphan dir. It igets the inode, trylocks the
   *   open lock, sees the PR still, and does nothing.
   *   Basically, we have to trigger an orphan iput on node 1. The only way
   *   for this to happen is if node 1 runs node 2's orphan dir.
   *
83273932f   Srinivas Eeda   ocfs2: timer to q...
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
   * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT
   * seconds.  It gets an EX lock on os_lockres and checks sequence number
   * stored in LVB. If the sequence number has changed, it means some other
   * node has done the scan.  This node skips the scan and tracks the
   * sequence number.  If the sequence number didn't change, it means a scan
   * hasn't happened.  The node queues a scan and increments the
   * sequence number in the LVB.
   */
  void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
  {
  	struct ocfs2_orphan_scan *os;
  	int status, i;
  	u32 seqno = 0;
  
  	os = &osb->osb_orphan_scan;
692684e19   Sunil Mushran   ocfs2: Stop orpha...
1754
1755
  	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
  		goto out;
b41079504   Tao Ma   ocfs2: Remove mas...
1756
1757
  	trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno,
  					    atomic_read(&os->os_state));
df152c241   Sunil Mushran   ocfs2: Disable or...
1758
  	status = ocfs2_orphan_scan_lock(osb, &seqno);
83273932f   Srinivas Eeda   ocfs2: timer to q...
1759
1760
1761
1762
1763
  	if (status < 0) {
  		if (status != -EAGAIN)
  			mlog_errno(status);
  		goto out;
  	}
692684e19   Sunil Mushran   ocfs2: Stop orpha...
1764
1765
1766
  	/* Do no queue the tasks if the volume is being umounted */
  	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
  		goto unlock;
83273932f   Srinivas Eeda   ocfs2: timer to q...
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
  	if (os->os_seqno != seqno) {
  		os->os_seqno = seqno;
  		goto unlock;
  	}
  
  	for (i = 0; i < osb->max_slots; i++)
  		ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
  						NULL);
  	/*
  	 * We queued a recovery on orphan slots, increment the sequence
  	 * number and update LVB so other node will skip the scan for a while
  	 */
  	seqno++;
15633a220   Srinivas Eeda   ocfs2 patch to tr...
1780
1781
  	os->os_count++;
  	os->os_scantime = CURRENT_TIME;
83273932f   Srinivas Eeda   ocfs2: timer to q...
1782
  unlock:
df152c241   Sunil Mushran   ocfs2: Disable or...
1783
  	ocfs2_orphan_scan_unlock(osb, seqno);
83273932f   Srinivas Eeda   ocfs2: timer to q...
1784
  out:
b41079504   Tao Ma   ocfs2: Remove mas...
1785
1786
  	trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno,
  					  atomic_read(&os->os_state));
83273932f   Srinivas Eeda   ocfs2: timer to q...
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
  	return;
  }
  
  /* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */
  void ocfs2_orphan_scan_work(struct work_struct *work)
  {
  	struct ocfs2_orphan_scan *os;
  	struct ocfs2_super *osb;
  
  	os = container_of(work, struct ocfs2_orphan_scan,
  			  os_orphan_scan_work.work);
  	osb = os->os_osb;
  
  	mutex_lock(&os->os_lock);
  	ocfs2_queue_orphan_scan(osb);
692684e19   Sunil Mushran   ocfs2: Stop orpha...
1802
  	if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
40f165f41   Tao Ma   ocfs2: Move orpha...
1803
  		queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
692684e19   Sunil Mushran   ocfs2: Stop orpha...
1804
  				      ocfs2_orphan_scan_timeout());
83273932f   Srinivas Eeda   ocfs2: timer to q...
1805
1806
1807
1808
1809
1810
1811
1812
  	mutex_unlock(&os->os_lock);
  }
  
  void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
  {
  	struct ocfs2_orphan_scan *os;
  
  	os = &osb->osb_orphan_scan;
df152c241   Sunil Mushran   ocfs2: Disable or...
1813
1814
1815
1816
1817
1818
  	if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
  		atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
  		mutex_lock(&os->os_lock);
  		cancel_delayed_work(&os->os_orphan_scan_work);
  		mutex_unlock(&os->os_lock);
  	}
83273932f   Srinivas Eeda   ocfs2: timer to q...
1819
  }
df152c241   Sunil Mushran   ocfs2: Disable or...
1820
  void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
83273932f   Srinivas Eeda   ocfs2: timer to q...
1821
1822
1823
1824
1825
  {
  	struct ocfs2_orphan_scan *os;
  
  	os = &osb->osb_orphan_scan;
  	os->os_osb = osb;
15633a220   Srinivas Eeda   ocfs2 patch to tr...
1826
  	os->os_count = 0;
3211949f8   Sunil Mushran   ocfs2: Do not ini...
1827
  	os->os_seqno = 0;
83273932f   Srinivas Eeda   ocfs2: timer to q...
1828
  	mutex_init(&os->os_lock);
df152c241   Sunil Mushran   ocfs2: Disable or...
1829
  	INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
8b712cd58   Jeff Mahoney   ocfs2: Fixup orph...
1830
  }
83273932f   Srinivas Eeda   ocfs2: timer to q...
1831

8b712cd58   Jeff Mahoney   ocfs2: Fixup orph...
1832
1833
1834
1835
1836
1837
  void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
  {
  	struct ocfs2_orphan_scan *os;
  
  	os = &osb->osb_orphan_scan;
  	os->os_scantime = CURRENT_TIME;
df152c241   Sunil Mushran   ocfs2: Disable or...
1838
1839
1840
1841
  	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
  		atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
  	else {
  		atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
40f165f41   Tao Ma   ocfs2: Move orpha...
1842
1843
  		queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
  				   ocfs2_orphan_scan_timeout());
df152c241   Sunil Mushran   ocfs2: Disable or...
1844
  	}
83273932f   Srinivas Eeda   ocfs2: timer to q...
1845
  }
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
  struct ocfs2_orphan_filldir_priv {
  	struct inode		*head;
  	struct ocfs2_super	*osb;
  };
  
  static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
  				loff_t pos, u64 ino, unsigned type)
  {
  	struct ocfs2_orphan_filldir_priv *p = priv;
  	struct inode *iter;
  
  	if (name_len == 1 && !strncmp(".", name, 1))
  		return 0;
  	if (name_len == 2 && !strncmp("..", name, 2))
  		return 0;
  
  	/* Skip bad inodes so that recovery can continue */
  	iter = ocfs2_iget(p->osb, ino,
5fa0613ea   Jan Kara   ocfs2: Silence fa...
1864
  			  OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1865
1866
  	if (IS_ERR(iter))
  		return 0;
b41079504   Tao Ma   ocfs2: Remove mas...
1867
  	trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1868
1869
1870
1871
1872
1873
1874
  	/* No locking is required for the next_orphan queue as there
  	 * is only ever a single process doing orphan recovery. */
  	OCFS2_I(iter)->ip_next_orphan = p->head;
  	p->head = iter;
  
  	return 0;
  }
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1875
1876
1877
  static int ocfs2_queue_orphans(struct ocfs2_super *osb,
  			       int slot,
  			       struct inode **head)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1878
  {
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1879
  	int status;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1880
  	struct inode *orphan_dir_inode = NULL;
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1881
1882
1883
1884
1885
  	struct ocfs2_orphan_filldir_priv priv;
  	loff_t pos = 0;
  
  	priv.osb = osb;
  	priv.head = *head;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1886
1887
1888
1889
1890
1891
1892
  
  	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
  						       ORPHAN_DIR_SYSTEM_INODE,
  						       slot);
  	if  (!orphan_dir_inode) {
  		status = -ENOENT;
  		mlog_errno(status);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1893
  		return status;
2bd632165   Sunil Mushran   ocfs2/trivial: Re...
1894
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1895

1b1dcc1b5   Jes Sorensen   [PATCH] mutex sub...
1896
  	mutex_lock(&orphan_dir_inode->i_mutex);
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1897
  	status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1898
  	if (status < 0) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1899
1900
1901
  		mlog_errno(status);
  		goto out;
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1902

5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1903
1904
1905
1906
  	status = ocfs2_dir_foreach(orphan_dir_inode, &pos, &priv,
  				   ocfs2_orphan_filldir);
  	if (status) {
  		mlog_errno(status);
a86370fbb   Mark Fasheh   ocfs2: fix exit-w...
1907
  		goto out_cluster;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1908
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1909

5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1910
  	*head = priv.head;
a86370fbb   Mark Fasheh   ocfs2: fix exit-w...
1911
  out_cluster:
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1912
  	ocfs2_inode_unlock(orphan_dir_inode, 0);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1913
1914
  out:
  	mutex_unlock(&orphan_dir_inode->i_mutex);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1915
  	iput(orphan_dir_inode);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
  	return status;
  }
  
  static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb,
  					      int slot)
  {
  	int ret;
  
  	spin_lock(&osb->osb_lock);
  	ret = !osb->osb_orphan_wipes[slot];
  	spin_unlock(&osb->osb_lock);
  	return ret;
  }
  
  static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb,
  					     int slot)
  {
  	spin_lock(&osb->osb_lock);
  	/* Mark ourselves such that new processes in delete_inode()
  	 * know to quit early. */
  	ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
  	while (osb->osb_orphan_wipes[slot]) {
  		/* If any processes are already in the middle of an
  		 * orphan wipe on this dir, then we need to wait for
  		 * them. */
  		spin_unlock(&osb->osb_lock);
  		wait_event_interruptible(osb->osb_wipe_event,
  					 ocfs2_orphan_recovery_can_continue(osb, slot));
  		spin_lock(&osb->osb_lock);
  	}
  	spin_unlock(&osb->osb_lock);
  }
  
  static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
  					      int slot)
  {
  	ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
  }
  
  /*
   * Orphan recovery. Each mounted node has it's own orphan dir which we
   * must run during recovery. Our strategy here is to build a list of
   * the inodes in the orphan dir and iget/iput them. The VFS does
   * (most) of the rest of the work.
   *
   * Orphan recovery can happen at any time, not just mount so we have a
   * couple of extra considerations.
   *
   * - We grab as many inodes as we can under the orphan dir lock -
   *   doing iget() outside the orphan dir risks getting a reference on
   *   an invalid inode.
   * - We must be sure not to deadlock with other processes on the
   *   system wanting to run delete_inode(). This can happen when they go
   *   to lock the orphan dir and the orphan recovery process attempts to
   *   iget() inside the orphan dir lock. This can be avoided by
   *   advertising our state to ocfs2_delete_inode().
   */
  static int ocfs2_recover_orphans(struct ocfs2_super *osb,
  				 int slot)
  {
  	int ret = 0;
  	struct inode *inode = NULL;
  	struct inode *iter;
  	struct ocfs2_inode_info *oi;
b41079504   Tao Ma   ocfs2: Remove mas...
1980
  	trace_ocfs2_recover_orphans(slot);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1981
1982
1983
1984
1985
1986
1987
1988
1989
  
  	ocfs2_mark_recovering_orphan_dir(osb, slot);
  	ret = ocfs2_queue_orphans(osb, slot, &inode);
  	ocfs2_clear_recovering_orphan_dir(osb, slot);
  
  	/* Error here should be noted, but we want to continue with as
  	 * many queued inodes as we've got. */
  	if (ret)
  		mlog_errno(ret);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1990
1991
1992
  
  	while (inode) {
  		oi = OCFS2_I(inode);
b41079504   Tao Ma   ocfs2: Remove mas...
1993
1994
  		trace_ocfs2_recover_orphans_iput(
  					(unsigned long long)oi->ip_blkno);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1995
1996
1997
1998
  
  		iter = oi->ip_next_orphan;
  
  		spin_lock(&oi->ip_lock);
34d024f84   Mark Fasheh   ocfs2: Remove mou...
1999
2000
2001
2002
  		/* The remote delete code may have set these on the
  		 * assumption that the other node would wipe them
  		 * successfully.  If they are still in the node's
  		 * orphan dir, we need to reset that state. */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2003
2004
2005
2006
2007
  		oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE);
  
  		/* Set the proper information to get us going into
  		 * ocfs2_delete_inode. */
  		oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2008
2009
2010
2011
2012
2013
  		spin_unlock(&oi->ip_lock);
  
  		iput(inode);
  
  		inode = iter;
  	}
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
2014
  	return ret;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2015
  }
19ece546a   Jan Kara   ocfs2: Enable quo...
2016
  static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2017
2018
2019
2020
2021
  {
  	/* This check is good because ocfs2 will wait on our recovery
  	 * thread before changing it to something other than MOUNTED
  	 * or DISABLED. */
  	wait_event(osb->osb_mount_event,
19ece546a   Jan Kara   ocfs2: Enable quo...
2022
2023
  		  (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
  		   atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2024
2025
2026
2027
2028
2029
  		   atomic_read(&osb->vol_state) == VOLUME_DISABLED);
  
  	/* If there's an error on mount, then we may never get to the
  	 * MOUNTED flag, but this is set right before
  	 * dismount_volume() so we can trust it. */
  	if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
b41079504   Tao Ma   ocfs2: Remove mas...
2030
  		trace_ocfs2_wait_on_mount(VOLUME_DISABLED);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
  		mlog(0, "mount error, exiting!
  ");
  		return -EBUSY;
  	}
  
  	return 0;
  }
  
  static int ocfs2_commit_thread(void *arg)
  {
  	int status;
  	struct ocfs2_super *osb = arg;
  	struct ocfs2_journal *journal = osb->journal;
  
  	/* we can trust j_num_trans here because _should_stop() is only set in
  	 * shutdown and nobody other than ourselves should be able to start
  	 * transactions.  committing on shutdown might take a few iterations
  	 * as final transactions put deleted inodes on the list */
  	while (!(kthread_should_stop() &&
  		 atomic_read(&journal->j_num_trans) == 0)) {
745ae8ba2   Mark Fasheh   [PATCH] ocfs2: on...
2051
2052
2053
  		wait_event_interruptible(osb->checkpoint_event,
  					 atomic_read(&journal->j_num_trans)
  					 || kthread_should_stop());
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
  
  		status = ocfs2_commit_cache(osb);
  		if (status < 0)
  			mlog_errno(status);
  
  		if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
  			mlog(ML_KTHREAD,
  			     "commit_thread: %u transactions pending on "
  			     "shutdown
  ",
  			     atomic_read(&journal->j_num_trans));
  		}
  	}
  
  	return 0;
  }
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2070
2071
2072
2073
2074
  /* Reads all the journal inodes without taking any cluster locks. Used
   * for hard readonly access to determine whether any journal requires
   * recovery. Also used to refresh the recovery generation numbers after
   * a journal has been recovered by another node.
   */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2075
2076
2077
2078
  int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
  {
  	int ret = 0;
  	unsigned int slot;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2079
  	struct buffer_head *di_bh = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2080
  	struct ocfs2_dinode *di;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2081
  	int journal_dirty = 0;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2082
2083
  
  	for(slot = 0; slot < osb->max_slots; slot++) {
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2084
2085
  		ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
  		if (ret) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2086
2087
2088
2089
2090
  			mlog_errno(ret);
  			goto out;
  		}
  
  		di = (struct ocfs2_dinode *) di_bh->b_data;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2091
2092
  		osb->slot_recovery_generations[slot] =
  					ocfs2_get_recovery_generation(di);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2093
2094
  		if (le32_to_cpu(di->id1.journal1.ij_flags) &
  		    OCFS2_JOURNAL_DIRTY_FL)
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2095
  			journal_dirty = 1;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2096
2097
  
  		brelse(di_bh);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2098
  		di_bh = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2099
2100
2101
  	}
  
  out:
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2102
2103
  	if (journal_dirty)
  		ret = -EROFS;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2104
2105
  	return ret;
  }