Blame view

fs/ocfs2/journal.c 57.8 KB
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
  /* -*- mode: c; c-basic-offset: 8; -*-
   * vim: noexpandtab sw=8 ts=8 sts=0:
   *
   * journal.c
   *
   * Defines functions of journalling api
   *
   * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
   *
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public
   * License as published by the Free Software Foundation; either
   * version 2 of the License, or (at your option) any later version.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * General Public License for more details.
   *
   * You should have received a copy of the GNU General Public
   * License along with this program; if not, write to the
   * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   * Boston, MA 021110-1307, USA.
   */
  
  #include <linux/fs.h>
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
  #include <linux/kthread.h>
83273932f   Srinivas Eeda   ocfs2: timer to q...
31
32
  #include <linux/time.h>
  #include <linux/random.h>
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
33

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
34
35
36
37
38
  #include <cluster/masklog.h>
  
  #include "ocfs2.h"
  
  #include "alloc.h"
50655ae9e   Joel Becker   ocfs2: Add journa...
39
  #include "blockcheck.h"
316f4b9f9   Mark Fasheh   ocfs2: Move direc...
40
  #include "dir.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
41
42
43
44
45
46
  #include "dlmglue.h"
  #include "extent_map.h"
  #include "heartbeat.h"
  #include "inode.h"
  #include "journal.h"
  #include "localalloc.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
47
48
  #include "slot_map.h"
  #include "super.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
49
  #include "sysfile.h"
0cf2f7632   Joel Becker   ocfs2: Pass struc...
50
  #include "uptodate.h"
2205363dc   Jan Kara   ocfs2: Implement ...
51
  #include "quota.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
52
53
  
  #include "buffer_head_io.h"
b41079504   Tao Ma   ocfs2: Remove mas...
54
  #include "ocfs2_trace.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
55

34af946a2   Ingo Molnar   [PATCH] spin/rwlo...
56
  DEFINE_SPINLOCK(trans_inc_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
57

83273932f   Srinivas Eeda   ocfs2: timer to q...
58
  #define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
59
60
  static int ocfs2_force_read_journal(struct inode *inode);
  static int ocfs2_recover_node(struct ocfs2_super *osb,
2205363dc   Jan Kara   ocfs2: Implement ...
61
  			      int node_num, int slot_num);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
62
63
  static int __ocfs2_recovery_thread(void *arg);
  static int ocfs2_commit_cache(struct ocfs2_super *osb);
19ece546a   Jan Kara   ocfs2: Enable quo...
64
  static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
65
  static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
66
  				      int dirty, int replayed);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
67
68
69
70
71
  static int ocfs2_trylock_journal(struct ocfs2_super *osb,
  				 int slot_num);
  static int ocfs2_recover_orphans(struct ocfs2_super *osb,
  				 int slot);
  static int ocfs2_commit_thread(void *arg);
9140db04e   Srinivas Eeda   ocfs2: recover or...
72
73
74
75
76
  static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
  					    int slot_num,
  					    struct ocfs2_dinode *la_dinode,
  					    struct ocfs2_dinode *tl_dinode,
  					    struct ocfs2_quota_recovery *qrec);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
77

19ece546a   Jan Kara   ocfs2: Enable quo...
78
79
80
81
82
83
84
85
86
  static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
  {
  	return __ocfs2_wait_on_mount(osb, 0);
  }
  
  static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
  {
  	return __ocfs2_wait_on_mount(osb, 1);
  }
9140db04e   Srinivas Eeda   ocfs2: recover or...
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
  /*
   * This replay_map is to track online/offline slots, so we could recover
   * offline slots during recovery and mount
   */
  
  enum ocfs2_replay_state {
  	REPLAY_UNNEEDED = 0,	/* Replay is not needed, so ignore this map */
  	REPLAY_NEEDED, 		/* Replay slots marked in rm_replay_slots */
  	REPLAY_DONE 		/* Replay was already queued */
  };
  
  struct ocfs2_replay_map {
  	unsigned int rm_slots;
  	enum ocfs2_replay_state rm_state;
  	unsigned char rm_replay_slots[0];
  };
  
  void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
  {
  	if (!osb->replay_map)
  		return;
  
  	/* If we've already queued the replay, we don't have any more to do */
  	if (osb->replay_map->rm_state == REPLAY_DONE)
  		return;
  
  	osb->replay_map->rm_state = state;
  }
  
  int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
  {
  	struct ocfs2_replay_map *replay_map;
  	int i, node_num;
  
  	/* If replay map is already set, we don't do it again */
  	if (osb->replay_map)
  		return 0;
  
  	replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
  			     (osb->max_slots * sizeof(char)), GFP_KERNEL);
  
  	if (!replay_map) {
  		mlog_errno(-ENOMEM);
  		return -ENOMEM;
  	}
  
  	spin_lock(&osb->osb_lock);
  
  	replay_map->rm_slots = osb->max_slots;
  	replay_map->rm_state = REPLAY_UNNEEDED;
  
  	/* set rm_replay_slots for offline slot(s) */
  	for (i = 0; i < replay_map->rm_slots; i++) {
  		if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
  			replay_map->rm_replay_slots[i] = 1;
  	}
  
  	osb->replay_map = replay_map;
  	spin_unlock(&osb->osb_lock);
  	return 0;
  }
  
  void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
  {
  	struct ocfs2_replay_map *replay_map = osb->replay_map;
  	int i;
  
  	if (!replay_map)
  		return;
  
  	if (replay_map->rm_state != REPLAY_NEEDED)
  		return;
  
  	for (i = 0; i < replay_map->rm_slots; i++)
  		if (replay_map->rm_replay_slots[i])
  			ocfs2_queue_recovery_completion(osb->journal, i, NULL,
  							NULL, NULL);
  	replay_map->rm_state = REPLAY_DONE;
  }
  
  void ocfs2_free_replay_slots(struct ocfs2_super *osb)
  {
  	struct ocfs2_replay_map *replay_map = osb->replay_map;
  
  	if (!osb->replay_map)
  		return;
  
  	kfree(replay_map);
  	osb->replay_map = NULL;
  }
553abd046   Joel Becker   ocfs2: Change the...
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
  int ocfs2_recovery_init(struct ocfs2_super *osb)
  {
  	struct ocfs2_recovery_map *rm;
  
  	mutex_init(&osb->recovery_lock);
  	osb->disable_recovery = 0;
  	osb->recovery_thread_task = NULL;
  	init_waitqueue_head(&osb->recovery_event);
  
  	rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
  		     osb->max_slots * sizeof(unsigned int),
  		     GFP_KERNEL);
  	if (!rm) {
  		mlog_errno(-ENOMEM);
  		return -ENOMEM;
  	}
  
  	rm->rm_entries = (unsigned int *)((char *)rm +
  					  sizeof(struct ocfs2_recovery_map));
  	osb->recovery_map = rm;
  
  	return 0;
  }
  
  /* we can't grab the goofy sem lock from inside wait_event, so we use
   * memory barriers to make sure that we'll see the null task before
   * being woken up */
  static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
  {
  	mb();
  	return osb->recovery_thread_task != NULL;
  }
  
  void ocfs2_recovery_exit(struct ocfs2_super *osb)
  {
  	struct ocfs2_recovery_map *rm;
  
  	/* disable any new recovery threads and wait for any currently
  	 * running ones to exit. Do this before setting the vol_state. */
  	mutex_lock(&osb->recovery_lock);
  	osb->disable_recovery = 1;
  	mutex_unlock(&osb->recovery_lock);
  	wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
  
  	/* At this point, we know that no more recovery threads can be
  	 * launched, so wait for any recovery completion work to
  	 * complete. */
  	flush_workqueue(ocfs2_wq);
  
  	/*
  	 * Now that recovery is shut down, and the osb is about to be
  	 * freed,  the osb_lock is not taken here.
  	 */
  	rm = osb->recovery_map;
  	/* XXX: Should we bug if there are dirty entries? */
  
  	kfree(rm);
  }
  
  static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
  				     unsigned int node_num)
  {
  	int i;
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
  
  	assert_spin_locked(&osb->osb_lock);
  
  	for (i = 0; i < rm->rm_used; i++) {
  		if (rm->rm_entries[i] == node_num)
  			return 1;
  	}
  
  	return 0;
  }
  
  /* Behaves like test-and-set.  Returns the previous value */
  static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
  				  unsigned int node_num)
  {
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
  
  	spin_lock(&osb->osb_lock);
  	if (__ocfs2_recovery_map_test(osb, node_num)) {
  		spin_unlock(&osb->osb_lock);
  		return 1;
  	}
  
  	/* XXX: Can this be exploited? Not from o2dlm... */
  	BUG_ON(rm->rm_used >= osb->max_slots);
  
  	rm->rm_entries[rm->rm_used] = node_num;
  	rm->rm_used++;
  	spin_unlock(&osb->osb_lock);
  
  	return 0;
  }
  
  static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
  				     unsigned int node_num)
  {
  	int i;
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
  
  	spin_lock(&osb->osb_lock);
  
  	for (i = 0; i < rm->rm_used; i++) {
  		if (rm->rm_entries[i] == node_num)
  			break;
  	}
  
  	if (i < rm->rm_used) {
  		/* XXX: be careful with the pointer math */
  		memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
  			(rm->rm_used - i - 1) * sizeof(unsigned int));
  		rm->rm_used--;
  	}
  
  	spin_unlock(&osb->osb_lock);
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
296
297
298
299
  static int ocfs2_commit_cache(struct ocfs2_super *osb)
  {
  	int status = 0;
  	unsigned int flushed;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
300
  	struct ocfs2_journal *journal = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
301
302
303
304
  	journal = osb->journal;
  
  	/* Flush all pending commits and checkpoint the journal. */
  	down_write(&journal->j_trans_barrier);
b41079504   Tao Ma   ocfs2: Remove mas...
305
306
307
  	flushed = atomic_read(&journal->j_num_trans);
  	trace_ocfs2_commit_cache_begin(flushed);
  	if (flushed == 0) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
308
  		up_write(&journal->j_trans_barrier);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
309
310
  		goto finally;
  	}
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
311
312
313
  	jbd2_journal_lock_updates(journal->j_journal);
  	status = jbd2_journal_flush(journal->j_journal);
  	jbd2_journal_unlock_updates(journal->j_journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
314
315
316
317
318
  	if (status < 0) {
  		up_write(&journal->j_trans_barrier);
  		mlog_errno(status);
  		goto finally;
  	}
f9c57ada3   Tao Ma   ocfs2: Remove unu...
319
  	ocfs2_inc_trans_id(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
320
321
322
323
  
  	flushed = atomic_read(&journal->j_num_trans);
  	atomic_set(&journal->j_num_trans, 0);
  	up_write(&journal->j_trans_barrier);
b41079504   Tao Ma   ocfs2: Remove mas...
324
  	trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
325

34d024f84   Mark Fasheh   ocfs2: Remove mou...
326
  	ocfs2_wake_downconvert_thread(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
327
328
  	wake_up(&journal->j_checkpointed);
  finally:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
329
330
  	return status;
  }
1fabe1481   Mark Fasheh   ocfs2: Remove str...
331
  handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
332
  {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
333
  	journal_t *journal = osb->journal->j_journal;
1fabe1481   Mark Fasheh   ocfs2: Remove str...
334
  	handle_t *handle;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
335

ebdec83ba   Eric Sesterhenn / snakebyte   [PATCH] BUG_ON() ...
336
  	BUG_ON(!osb || !osb->journal->j_journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
337

65eff9ccf   Mark Fasheh   ocfs2: remove han...
338
339
  	if (ocfs2_is_hard_readonly(osb))
  		return ERR_PTR(-EROFS);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
340
341
342
  
  	BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
  	BUG_ON(max_buffs <= 0);
90e86a63e   Jan Kara   ocfs2: Support ne...
343
344
345
  	/* Nested transaction? Just return the handle... */
  	if (journal_current_handle())
  		return jbd2_journal_start(journal, max_buffs);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
346

fef6925cd   Jan Kara   ocfs2: Convert to...
347
  	sb_start_intwrite(osb->sb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
348
  	down_read(&osb->journal->j_trans_barrier);
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
349
  	handle = jbd2_journal_start(journal, max_buffs);
1fabe1481   Mark Fasheh   ocfs2: Remove str...
350
  	if (IS_ERR(handle)) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
351
  		up_read(&osb->journal->j_trans_barrier);
fef6925cd   Jan Kara   ocfs2: Convert to...
352
  		sb_end_intwrite(osb->sb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
353

1fabe1481   Mark Fasheh   ocfs2: Remove str...
354
  		mlog_errno(PTR_ERR(handle));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
355
356
357
  
  		if (is_journal_aborted(journal)) {
  			ocfs2_abort(osb->sb, "Detected aborted journal");
1fabe1481   Mark Fasheh   ocfs2: Remove str...
358
  			handle = ERR_PTR(-EROFS);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
359
  		}
c271c5c22   Sunil Mushran   ocfs2: local mounts
360
361
362
363
  	} else {
  		if (!ocfs2_mount_local(osb))
  			atomic_inc(&(osb->journal->j_num_trans));
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
364

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
365
  	return handle;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
366
  }
1fabe1481   Mark Fasheh   ocfs2: Remove str...
367
368
  int ocfs2_commit_trans(struct ocfs2_super *osb,
  		       handle_t *handle)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
369
  {
90e86a63e   Jan Kara   ocfs2: Support ne...
370
  	int ret, nested;
02dc1af44   Mark Fasheh   ocfs2: pass ocfs2...
371
  	struct ocfs2_journal *journal = osb->journal;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
372
373
  
  	BUG_ON(!handle);
90e86a63e   Jan Kara   ocfs2: Support ne...
374
  	nested = handle->h_ref > 1;
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
375
  	ret = jbd2_journal_stop(handle);
1fabe1481   Mark Fasheh   ocfs2: Remove str...
376
377
  	if (ret < 0)
  		mlog_errno(ret);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
378

fef6925cd   Jan Kara   ocfs2: Convert to...
379
  	if (!nested) {
90e86a63e   Jan Kara   ocfs2: Support ne...
380
  		up_read(&journal->j_trans_barrier);
fef6925cd   Jan Kara   ocfs2: Convert to...
381
382
  		sb_end_intwrite(osb->sb);
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
383

1fabe1481   Mark Fasheh   ocfs2: Remove str...
384
  	return ret;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
385
386
387
  }
  
  /*
c901fb007   Tao Ma   ocfs2: Make ocfs2...
388
   * 'nblocks' is what you want to add to the current transaction.
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
389
   *
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
390
   * This might call jbd2_journal_restart() which will commit dirty buffers
e8aed3450   Mark Fasheh   ocfs2: Re-journal...
391
392
393
394
395
   * and then restart the transaction. Before calling
   * ocfs2_extend_trans(), any changed blocks should have been
   * dirtied. After calling it, all blocks which need to be changed must
   * go through another set of journal_access/journal_dirty calls.
   *
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
396
397
398
399
400
401
402
403
   * WARNING: This will not release any semaphores or disk locks taken
   * during the transaction, so make sure they were taken *before*
   * start_trans or we'll have ordering deadlocks.
   *
   * WARNING2: Note that we do *not* drop j_trans_barrier here. This is
   * good because transaction ids haven't yet been recorded on the
   * cluster locks associated with this handle.
   */
1fc581467   Mark Fasheh   ocfs2: have ocfs2...
404
  int ocfs2_extend_trans(handle_t *handle, int nblocks)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
405
  {
c901fb007   Tao Ma   ocfs2: Make ocfs2...
406
  	int status, old_nblocks;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
407
408
  
  	BUG_ON(!handle);
c901fb007   Tao Ma   ocfs2: Make ocfs2...
409
  	BUG_ON(nblocks < 0);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
410

c901fb007   Tao Ma   ocfs2: Make ocfs2...
411
412
413
414
  	if (!nblocks)
  		return 0;
  
  	old_nblocks = handle->h_buffer_credits;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
415

b41079504   Tao Ma   ocfs2: Remove mas...
416
  	trace_ocfs2_extend_trans(old_nblocks, nblocks);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
417

e407e3978   Joel Becker   ocfs2: Fix CONFIG...
418
  #ifdef CONFIG_OCFS2_DEBUG_FS
0879c584f   Mark Fasheh   ocfs2: Allow for ...
419
420
  	status = 1;
  #else
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
421
  	status = jbd2_journal_extend(handle, nblocks);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
422
423
424
425
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
0879c584f   Mark Fasheh   ocfs2: Allow for ...
426
  #endif
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
427
428
  
  	if (status > 0) {
b41079504   Tao Ma   ocfs2: Remove mas...
429
  		trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
c901fb007   Tao Ma   ocfs2: Make ocfs2...
430
431
  		status = jbd2_journal_restart(handle,
  					      old_nblocks + nblocks);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
432
  		if (status < 0) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
433
434
435
  			mlog_errno(status);
  			goto bail;
  		}
01ddf1e18   Mark Fasheh   ocfs2: remove unu...
436
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
437
438
439
  
  	status = 0;
  bail:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
440
441
  	return status;
  }
2b1e55c38   Younger Liu   ocfs2: lighten up...
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
  /*
   * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA.
   * If that fails, restart the transaction & regain write access for the
   * buffer head which is used for metadata modifications.
   * Taken from Ext4: extend_or_restart_transaction()
   */
  int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
  {
  	int status, old_nblks;
  
  	BUG_ON(!handle);
  
  	old_nblks = handle->h_buffer_credits;
  	trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
  
  	if (old_nblks < thresh)
  		return 0;
  
  	status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA);
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
  
  	if (status > 0) {
  		status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA);
  		if (status < 0)
  			mlog_errno(status);
  	}
  
  bail:
  	return status;
  }
50655ae9e   Joel Becker   ocfs2: Add journa...
475
476
477
478
479
480
481
482
483
  struct ocfs2_triggers {
  	struct jbd2_buffer_trigger_type	ot_triggers;
  	int				ot_offset;
  };
  
  static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
  {
  	return container_of(triggers, struct ocfs2_triggers, ot_triggers);
  }
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
484
  static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
50655ae9e   Joel Becker   ocfs2: Add journa...
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
  				 struct buffer_head *bh,
  				 void *data, size_t size)
  {
  	struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
  
  	/*
  	 * We aren't guaranteed to have the superblock here, so we
  	 * must unconditionally compute the ecc data.
  	 * __ocfs2_journal_access() will only set the triggers if
  	 * metaecc is enabled.
  	 */
  	ocfs2_block_check_compute(data, size, data + ot->ot_offset);
  }
  
  /*
   * Quota blocks have their own trigger because the struct ocfs2_block_check
   * offset depends on the blocksize.
   */
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
503
  static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
50655ae9e   Joel Becker   ocfs2: Add journa...
504
505
506
507
508
509
510
511
512
513
514
515
516
517
  				 struct buffer_head *bh,
  				 void *data, size_t size)
  {
  	struct ocfs2_disk_dqtrailer *dqt =
  		ocfs2_block_dqtrailer(size, data);
  
  	/*
  	 * We aren't guaranteed to have the superblock here, so we
  	 * must unconditionally compute the ecc data.
  	 * __ocfs2_journal_access() will only set the triggers if
  	 * metaecc is enabled.
  	 */
  	ocfs2_block_check_compute(data, size, &dqt->dq_check);
  }
c175a518b   Joel Becker   ocfs2: Checksum a...
518
519
520
521
  /*
   * Directory blocks also have their own trigger because the
   * struct ocfs2_block_check offset depends on the blocksize.
   */
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
522
  static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
c175a518b   Joel Becker   ocfs2: Checksum a...
523
524
525
526
527
528
529
530
531
532
533
534
535
536
  				 struct buffer_head *bh,
  				 void *data, size_t size)
  {
  	struct ocfs2_dir_block_trailer *trailer =
  		ocfs2_dir_trailer_from_size(size, data);
  
  	/*
  	 * We aren't guaranteed to have the superblock here, so we
  	 * must unconditionally compute the ecc data.
  	 * __ocfs2_journal_access() will only set the triggers if
  	 * metaecc is enabled.
  	 */
  	ocfs2_block_check_compute(data, size, &trailer->db_check);
  }
50655ae9e   Joel Becker   ocfs2: Add journa...
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
  static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
  				struct buffer_head *bh)
  {
  	mlog(ML_ERROR,
  	     "ocfs2_abort_trigger called by JBD2.  bh = 0x%lx, "
  	     "bh->b_blocknr = %llu
  ",
  	     (unsigned long)bh,
  	     (unsigned long long)bh->b_blocknr);
  
  	/* We aren't guaranteed to have the superblock here - but if we
  	 * don't, it'll just crash. */
  	ocfs2_error(bh->b_assoc_map->host->i_sb,
  		    "JBD2 has aborted our journal, ocfs2 cannot continue
  ");
  }
  
  static struct ocfs2_triggers di_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
556
  		.t_frozen = ocfs2_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
557
558
559
560
561
562
563
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_dinode, i_check),
  };
  
  static struct ocfs2_triggers eb_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
564
  		.t_frozen = ocfs2_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
565
566
567
568
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_extent_block, h_check),
  };
93c97087a   Tao Ma   ocfs2: Add metaec...
569
570
  static struct ocfs2_triggers rb_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
571
  		.t_frozen = ocfs2_frozen_trigger,
93c97087a   Tao Ma   ocfs2: Add metaec...
572
573
574
575
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_refcount_block, rf_check),
  };
50655ae9e   Joel Becker   ocfs2: Add journa...
576
577
  static struct ocfs2_triggers gd_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
578
  		.t_frozen = ocfs2_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
579
580
581
582
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_group_desc, bg_check),
  };
c175a518b   Joel Becker   ocfs2: Checksum a...
583
584
  static struct ocfs2_triggers db_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
585
  		.t_frozen = ocfs2_db_frozen_trigger,
c175a518b   Joel Becker   ocfs2: Checksum a...
586
587
588
  		.t_abort = ocfs2_abort_trigger,
  	},
  };
50655ae9e   Joel Becker   ocfs2: Add journa...
589
590
  static struct ocfs2_triggers xb_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
591
  		.t_frozen = ocfs2_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
592
593
594
595
596
597
598
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_xattr_block, xb_check),
  };
  
  static struct ocfs2_triggers dq_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
599
  		.t_frozen = ocfs2_dq_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
600
601
602
  		.t_abort = ocfs2_abort_trigger,
  	},
  };
9b7895efa   Mark Fasheh   ocfs2: Add a name...
603
604
  static struct ocfs2_triggers dr_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
605
  		.t_frozen = ocfs2_frozen_trigger,
9b7895efa   Mark Fasheh   ocfs2: Add a name...
606
607
608
609
610
611
612
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_dx_root_block, dr_check),
  };
  
  static struct ocfs2_triggers dl_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
613
  		.t_frozen = ocfs2_frozen_trigger,
9b7895efa   Mark Fasheh   ocfs2: Add a name...
614
615
616
617
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_dx_leaf, dl_check),
  };
50655ae9e   Joel Becker   ocfs2: Add journa...
618
  static int __ocfs2_journal_access(handle_t *handle,
0cf2f7632   Joel Becker   ocfs2: Pass struc...
619
  				  struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
620
621
622
  				  struct buffer_head *bh,
  				  struct ocfs2_triggers *triggers,
  				  int type)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
623
624
  {
  	int status;
0cf2f7632   Joel Becker   ocfs2: Pass struc...
625
626
  	struct ocfs2_super *osb =
  		OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
627

0cf2f7632   Joel Becker   ocfs2: Pass struc...
628
  	BUG_ON(!ci || !ci->ci_ops);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
629
630
  	BUG_ON(!handle);
  	BUG_ON(!bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
631

b41079504   Tao Ma   ocfs2: Remove mas...
632
633
634
  	trace_ocfs2_journal_access(
  		(unsigned long long)ocfs2_metadata_cache_owner(ci),
  		(unsigned long long)bh->b_blocknr, type, bh->b_size);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
635
636
637
638
639
640
641
642
643
644
  
  	/* we can safely remove this assertion after testing. */
  	if (!buffer_uptodate(bh)) {
  		mlog(ML_ERROR, "giving me a buffer that's not uptodate!
  ");
  		mlog(ML_ERROR, "b_blocknr=%llu
  ",
  		     (unsigned long long)bh->b_blocknr);
  		BUG();
  	}
0cf2f7632   Joel Becker   ocfs2: Pass struc...
645
  	/* Set the current transaction information on the ci so
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
646
  	 * that the locking code knows whether it can drop it's locks
0cf2f7632   Joel Becker   ocfs2: Pass struc...
647
  	 * on this ci or not. We're protected from the commit
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
648
649
650
  	 * thread updating the current transaction id until
  	 * ocfs2_commit_trans() because ocfs2_start_trans() took
  	 * j_trans_barrier for us. */
0cf2f7632   Joel Becker   ocfs2: Pass struc...
651
  	ocfs2_set_ci_lock_trans(osb->journal, ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
652

0cf2f7632   Joel Becker   ocfs2: Pass struc...
653
  	ocfs2_metadata_cache_io_lock(ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
654
655
656
  	switch (type) {
  	case OCFS2_JOURNAL_ACCESS_CREATE:
  	case OCFS2_JOURNAL_ACCESS_WRITE:
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
657
  		status = jbd2_journal_get_write_access(handle, bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
658
659
660
  		break;
  
  	case OCFS2_JOURNAL_ACCESS_UNDO:
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
661
  		status = jbd2_journal_get_undo_access(handle, bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
662
663
664
665
  		break;
  
  	default:
  		status = -EINVAL;
af901ca18   André Goddard Rosa   tree-wide: fix as...
666
667
  		mlog(ML_ERROR, "Unknown access type!
  ");
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
668
  	}
0cf2f7632   Joel Becker   ocfs2: Pass struc...
669
  	if (!status && ocfs2_meta_ecc(osb) && triggers)
50655ae9e   Joel Becker   ocfs2: Add journa...
670
  		jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
0cf2f7632   Joel Becker   ocfs2: Pass struc...
671
  	ocfs2_metadata_cache_io_unlock(ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
672
673
674
675
676
  
  	if (status < 0)
  		mlog(ML_ERROR, "Error %d getting %d access to buffer!
  ",
  		     status, type);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
677
678
  	return status;
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
679
680
  int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
  			    struct buffer_head *bh, int type)
50655ae9e   Joel Becker   ocfs2: Add journa...
681
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
682
  	return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
683
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
684
  int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
685
686
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
687
  	return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
688
  }
93c97087a   Tao Ma   ocfs2: Add metaec...
689
690
691
692
693
694
  int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
  			    struct buffer_head *bh, int type)
  {
  	return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
  				      type);
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
695
  int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
696
697
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
698
  	return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
699
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
700
  int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
701
702
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
703
  	return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
704
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
705
  int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
706
707
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
708
  	return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
709
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
710
  int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
711
712
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
713
  	return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
714
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
715
  int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
9b7895efa   Mark Fasheh   ocfs2: Add a name...
716
717
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
718
  	return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
9b7895efa   Mark Fasheh   ocfs2: Add a name...
719
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
720
  int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
9b7895efa   Mark Fasheh   ocfs2: Add a name...
721
722
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
723
  	return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
9b7895efa   Mark Fasheh   ocfs2: Add a name...
724
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
725
  int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
726
727
  			 struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
728
  	return __ocfs2_journal_access(handle, ci, bh, NULL, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
729
  }
ec20cec7a   Joel Becker   ocfs2: Make ocfs2...
730
  void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
731
732
  {
  	int status;
b41079504   Tao Ma   ocfs2: Remove mas...
733
  	trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
734

2b4e30fbd   Joel Becker   ocfs2: Switch ove...
735
  	status = jbd2_journal_dirty_metadata(handle, bh);
ec20cec7a   Joel Becker   ocfs2: Make ocfs2...
736
  	BUG_ON(status);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
737
  }
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
738
  #define OCFS2_DEFAULT_COMMIT_INTERVAL	(HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
739
740
741
742
  
  void ocfs2_set_journal_params(struct ocfs2_super *osb)
  {
  	journal_t *journal = osb->journal->j_journal;
d147b3d63   Mark Fasheh   ocfs2: Support co...
743
744
745
746
  	unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
  
  	if (osb->osb_commit_interval)
  		commit_interval = osb->osb_commit_interval;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
747

a931da6ac   Theodore Ts'o   jbd2: Change j_st...
748
  	write_lock(&journal->j_state_lock);
d147b3d63   Mark Fasheh   ocfs2: Support co...
749
  	journal->j_commit_interval = commit_interval;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
750
  	if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
751
  		journal->j_flags |= JBD2_BARRIER;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
752
  	else
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
753
  		journal->j_flags &= ~JBD2_BARRIER;
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
754
  	write_unlock(&journal->j_state_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
755
756
757
758
759
760
761
762
763
764
  }
  
  int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
  {
  	int status = -1;
  	struct inode *inode = NULL; /* the journal inode */
  	journal_t *j_journal = NULL;
  	struct ocfs2_dinode *di = NULL;
  	struct buffer_head *bh = NULL;
  	struct ocfs2_super *osb;
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
765
  	int inode_lock = 0;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
766

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
  	BUG_ON(!journal);
  
  	osb = journal->j_osb;
  
  	/* already have the inode for our journal */
  	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
  					    osb->slot_num);
  	if (inode == NULL) {
  		status = -EACCES;
  		mlog_errno(status);
  		goto done;
  	}
  	if (is_bad_inode(inode)) {
  		mlog(ML_ERROR, "access error (bad inode)
  ");
  		iput(inode);
  		inode = NULL;
  		status = -EACCES;
  		goto done;
  	}
  
  	SET_INODE_JOURNAL(inode);
  	OCFS2_I(inode)->ip_open_count++;
6eff5790d   Mark Fasheh   [PATCH] ocfs2: do...
790
791
792
  	/* Skip recovery waits here - journal inode metadata never
  	 * changes in a live cluster so it can be considered an
  	 * exception to the rule. */
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
793
  	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
794
795
796
797
798
799
  	if (status < 0) {
  		if (status != -ERESTARTSYS)
  			mlog(ML_ERROR, "Could not get lock on journal!
  ");
  		goto done;
  	}
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
800
  	inode_lock = 1;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
801
  	di = (struct ocfs2_dinode *)bh->b_data;
f17c20dd2   Junxiao Bi   ocfs2: use i_size...
802
  	if (i_size_read(inode) <  OCFS2_MIN_JOURNAL_SIZE) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
803
804
  		mlog(ML_ERROR, "Journal file size (%lld) is too small!
  ",
f17c20dd2   Junxiao Bi   ocfs2: use i_size...
805
  		     i_size_read(inode));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
806
807
808
  		status = -EINVAL;
  		goto done;
  	}
f17c20dd2   Junxiao Bi   ocfs2: use i_size...
809
  	trace_ocfs2_journal_init(i_size_read(inode),
b41079504   Tao Ma   ocfs2: Remove mas...
810
811
  				 (unsigned long long)inode->i_blocks,
  				 OCFS2_I(inode)->ip_clusters);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
812
813
  
  	/* call the kernels journal init function now */
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
814
  	j_journal = jbd2_journal_init_inode(inode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
815
816
817
818
819
820
  	if (j_journal == NULL) {
  		mlog(ML_ERROR, "Linux journal layer error
  ");
  		status = -EINVAL;
  		goto done;
  	}
b41079504   Tao Ma   ocfs2: Remove mas...
821
  	trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
  
  	*dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
  		  OCFS2_JOURNAL_DIRTY_FL);
  
  	journal->j_journal = j_journal;
  	journal->j_inode = inode;
  	journal->j_bh = bh;
  
  	ocfs2_set_journal_params(osb);
  
  	journal->j_state = OCFS2_JOURNAL_LOADED;
  
  	status = 0;
  done:
  	if (status < 0) {
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
837
838
  		if (inode_lock)
  			ocfs2_inode_unlock(inode, 1);
a81cb88b6   Mark Fasheh   ocfs2: Don't chec...
839
  		brelse(bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
840
841
842
843
844
  		if (inode) {
  			OCFS2_I(inode)->ip_open_count--;
  			iput(inode);
  		}
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
845
846
  	return status;
  }
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
847
848
849
850
851
852
853
854
855
  static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
  {
  	le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
  }
  
  static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
  {
  	return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
856
  static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
857
  				      int dirty, int replayed)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
858
859
860
861
862
863
  {
  	int status;
  	unsigned int flags;
  	struct ocfs2_journal *journal = osb->journal;
  	struct buffer_head *bh = journal->j_bh;
  	struct ocfs2_dinode *fe;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
864
  	fe = (struct ocfs2_dinode *)bh->b_data;
10995aa24   Joel Becker   ocfs2: Morph the ...
865
866
867
868
869
  
  	/* The journal bh on the osb always comes from ocfs2_journal_init()
  	 * and was validated there inside ocfs2_inode_lock_full().  It's a
  	 * code bug if we mess it up. */
  	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
870
871
872
873
874
875
876
  
  	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
  	if (dirty)
  		flags |= OCFS2_JOURNAL_DIRTY_FL;
  	else
  		flags &= ~OCFS2_JOURNAL_DIRTY_FL;
  	fe->id1.journal1.ij_flags = cpu_to_le32(flags);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
877
878
  	if (replayed)
  		ocfs2_bump_recovery_generation(fe);
13723d00e   Joel Becker   ocfs2: Use metada...
879
  	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
8cb471e8f   Joel Becker   ocfs2: Take the i...
880
  	status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
881
882
  	if (status < 0)
  		mlog_errno(status);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
883
884
885
886
887
888
889
890
891
892
893
894
895
  	return status;
  }
  
  /*
   * If the journal has been kmalloc'd it needs to be freed after this
   * call.
   */
  void ocfs2_journal_shutdown(struct ocfs2_super *osb)
  {
  	struct ocfs2_journal *journal = NULL;
  	int status = 0;
  	struct inode *inode = NULL;
  	int num_running_trans = 0;
ebdec83ba   Eric Sesterhenn / snakebyte   [PATCH] BUG_ON() ...
896
  	BUG_ON(!osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
897
898
899
900
901
902
903
904
905
  
  	journal = osb->journal;
  	if (!journal)
  		goto done;
  
  	inode = journal->j_inode;
  
  	if (journal->j_state != OCFS2_JOURNAL_LOADED)
  		goto done;
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
906
  	/* need to inc inode use count - jbd2_journal_destroy will iput. */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
907
908
909
910
  	if (!igrab(inode))
  		BUG();
  
  	num_running_trans = atomic_read(&(osb->journal->j_num_trans));
b41079504   Tao Ma   ocfs2: Remove mas...
911
  	trace_ocfs2_journal_shutdown(num_running_trans);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
912
913
914
915
916
917
918
919
920
921
922
923
  
  	/* Do a commit_cache here. It will flush our journal, *and*
  	 * release any locks that are still held.
  	 * set the SHUTDOWN flag and release the trans lock.
  	 * the commit thread will take the trans lock for us below. */
  	journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;
  
  	/* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not
  	 * drop the trans_lock (which we want to hold until we
  	 * completely destroy the journal. */
  	if (osb->commit_task) {
  		/* Wait for the commit thread */
b41079504   Tao Ma   ocfs2: Remove mas...
924
  		trace_ocfs2_journal_shutdown_wait(osb->commit_task);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
925
926
927
928
929
  		kthread_stop(osb->commit_task);
  		osb->commit_task = NULL;
  	}
  
  	BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
c271c5c22   Sunil Mushran   ocfs2: local mounts
930
  	if (ocfs2_mount_local(osb)) {
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
931
932
933
  		jbd2_journal_lock_updates(journal->j_journal);
  		status = jbd2_journal_flush(journal->j_journal);
  		jbd2_journal_unlock_updates(journal->j_journal);
c271c5c22   Sunil Mushran   ocfs2: local mounts
934
935
936
937
938
939
940
941
942
  		if (status < 0)
  			mlog_errno(status);
  	}
  
  	if (status == 0) {
  		/*
  		 * Do not toggle if flush was unsuccessful otherwise
  		 * will leave dirty metadata in a "clean" journal
  		 */
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
943
  		status = ocfs2_journal_toggle_dirty(osb, 0, 0);
c271c5c22   Sunil Mushran   ocfs2: local mounts
944
945
946
  		if (status < 0)
  			mlog_errno(status);
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
947
948
  
  	/* Shutdown the kernel journal system */
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
949
  	jbd2_journal_destroy(journal->j_journal);
ae0dff683   Sunil Mushran   ocfs2: Set journa...
950
  	journal->j_journal = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
951
952
953
954
  
  	OCFS2_I(inode)->ip_open_count--;
  
  	/* unlock our journal */
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
955
  	ocfs2_inode_unlock(inode, 1);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
956
957
958
959
960
961
962
963
964
965
  
  	brelse(journal->j_bh);
  	journal->j_bh = NULL;
  
  	journal->j_state = OCFS2_JOURNAL_FREE;
  
  //	up_write(&journal->j_trans_barrier);
  done:
  	if (inode)
  		iput(inode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
966
967
968
969
970
971
972
  }
  
  static void ocfs2_clear_journal_error(struct super_block *sb,
  				      journal_t *journal,
  				      int slot)
  {
  	int olderr;
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
973
  	olderr = jbd2_journal_errno(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
974
975
976
977
978
979
980
  	if (olderr) {
  		mlog(ML_ERROR, "File system error %d recorded in "
  		     "journal %u.
  ", olderr, slot);
  		mlog(ML_ERROR, "File system on device %s needs checking.
  ",
  		     sb->s_id);
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
981
982
  		jbd2_journal_ack_err(journal);
  		jbd2_journal_clear_err(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
983
984
  	}
  }
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
985
  int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
986
987
988
  {
  	int status = 0;
  	struct ocfs2_super *osb;
b1f3550fa   Julia Lawall   ocfs2: Use BUG_ON
989
  	BUG_ON(!journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
990
991
  
  	osb = journal->j_osb;
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
992
  	status = jbd2_journal_load(journal->j_journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
993
994
995
996
997
998
999
  	if (status < 0) {
  		mlog(ML_ERROR, "Failed to load journal!
  ");
  		goto done;
  	}
  
  	ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1000
  	status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1001
1002
1003
1004
1005
1006
  	if (status < 0) {
  		mlog_errno(status);
  		goto done;
  	}
  
  	/* Launch the commit thread */
c271c5c22   Sunil Mushran   ocfs2: local mounts
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
  	if (!local) {
  		osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
  					       "ocfs2cmt");
  		if (IS_ERR(osb->commit_task)) {
  			status = PTR_ERR(osb->commit_task);
  			osb->commit_task = NULL;
  			mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
  			     "error=%d", status);
  			goto done;
  		}
  	} else
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1018
  		osb->commit_task = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1019
1020
  
  done:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1021
1022
1023
1024
1025
1026
1027
1028
1029
  	return status;
  }
  
  
  /* 'full' flag tells us whether we clear out all blocks or if we just
   * mark the journal clean */
  int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
  {
  	int status;
ebdec83ba   Eric Sesterhenn / snakebyte   [PATCH] BUG_ON() ...
1030
  	BUG_ON(!journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1031

2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1032
  	status = jbd2_journal_wipe(journal->j_journal, full);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1033
1034
1035
1036
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1037
  	status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1038
1039
1040
1041
  	if (status < 0)
  		mlog_errno(status);
  
  bail:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1042
1043
  	return status;
  }
553abd046   Joel Becker   ocfs2: Change the...
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
  static int ocfs2_recovery_completed(struct ocfs2_super *osb)
  {
  	int empty;
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
  
  	spin_lock(&osb->osb_lock);
  	empty = (rm->rm_used == 0);
  	spin_unlock(&osb->osb_lock);
  
  	return empty;
  }
  
  void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
  {
  	wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
  /*
   * JBD Might read a cached version of another nodes journal file. We
   * don't want this as this file changes often and we get no
   * notification on those changes. The only way to be sure that we've
   * got the most up to date version of those blocks then is to force
   * read them off disk. Just searching through the buffer cache won't
   * work as there may be pages backing this file which are still marked
   * up to date. We know things can't change on this file underneath us
   * as we have the lock by now :)
   */
  static int ocfs2_force_read_journal(struct inode *inode)
  {
  	int status = 0;
4f902c377   Mark Fasheh   ocfs2: Fix extent...
1073
  	int i;
8110b073a   Mark Fasheh   ocfs2: Fix up i_b...
1074
  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
4f902c377   Mark Fasheh   ocfs2: Fix extent...
1075
  #define CONCURRENT_JOURNAL_FILL 32ULL
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1076
  	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1077
  	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
f17c20dd2   Junxiao Bi   ocfs2: use i_size...
1078
  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1079
  	v_blkno = 0;
8110b073a   Mark Fasheh   ocfs2: Fix up i_b...
1080
  	while (v_blkno < num_blocks) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1081
  		status = ocfs2_extent_map_get_blocks(inode, v_blkno,
49cb8d2d4   Mark Fasheh   ocfs2: Read from ...
1082
  						     &p_blkno, &p_blocks, NULL);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1083
1084
1085
1086
1087
1088
1089
  		if (status < 0) {
  			mlog_errno(status);
  			goto bail;
  		}
  
  		if (p_blocks > CONCURRENT_JOURNAL_FILL)
  			p_blocks = CONCURRENT_JOURNAL_FILL;
dd4a2c2bf   Mark Fasheh   ocfs2: Don't popu...
1090
1091
  		/* We are reading journal data which should not
  		 * be put in the uptodate cache */
da1e90985   Joel Becker   ocfs2: Separate o...
1092
1093
  		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
  						p_blkno, p_blocks, bhs);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
  		if (status < 0) {
  			mlog_errno(status);
  			goto bail;
  		}
  
  		for(i = 0; i < p_blocks; i++) {
  			brelse(bhs[i]);
  			bhs[i] = NULL;
  		}
  
  		v_blkno += p_blocks;
  	}
  
  bail:
  	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
a81cb88b6   Mark Fasheh   ocfs2: Don't chec...
1109
  		brelse(bhs[i]);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1110
1111
1112
1113
1114
1115
1116
1117
  	return status;
  }
  
  struct ocfs2_la_recovery_item {
  	struct list_head	lri_list;
  	int			lri_slot;
  	struct ocfs2_dinode	*lri_la_dinode;
  	struct ocfs2_dinode	*lri_tl_dinode;
2205363dc   Jan Kara   ocfs2: Implement ...
1118
  	struct ocfs2_quota_recovery *lri_qrec;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
  };
  
  /* Does the second half of the recovery process. By this point, the
   * node is marked clean and can actually be considered recovered,
   * hence it's no longer in the recovery map, but there's still some
   * cleanup we can do which shouldn't happen within the recovery thread
   * as locking in that context becomes very difficult if we are to take
   * recovering nodes into account.
   *
   * NOTE: This function can and will sleep on recovery of other nodes
   * during cluster locking, just like any other ocfs2 process.
   */
c4028958b   David Howells   WorkStruct: make ...
1131
  void ocfs2_complete_recovery(struct work_struct *work)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1132
  {
b41079504   Tao Ma   ocfs2: Remove mas...
1133
  	int ret = 0;
c4028958b   David Howells   WorkStruct: make ...
1134
1135
1136
  	struct ocfs2_journal *journal =
  		container_of(work, struct ocfs2_journal, j_recovery_work);
  	struct ocfs2_super *osb = journal->j_osb;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1137
  	struct ocfs2_dinode *la_dinode, *tl_dinode;
800deef3f   Christoph Hellwig   [PATCH] ocfs2: us...
1138
  	struct ocfs2_la_recovery_item *item, *n;
2205363dc   Jan Kara   ocfs2: Implement ...
1139
  	struct ocfs2_quota_recovery *qrec;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1140
  	LIST_HEAD(tmp_la_list);
b41079504   Tao Ma   ocfs2: Remove mas...
1141
1142
  	trace_ocfs2_complete_recovery(
  		(unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1143
1144
1145
1146
  
  	spin_lock(&journal->j_lock);
  	list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
  	spin_unlock(&journal->j_lock);
800deef3f   Christoph Hellwig   [PATCH] ocfs2: us...
1147
  	list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1148
  		list_del_init(&item->lri_list);
19ece546a   Jan Kara   ocfs2: Enable quo...
1149
  		ocfs2_wait_on_quotas(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1150
  		la_dinode = item->lri_la_dinode;
b41079504   Tao Ma   ocfs2: Remove mas...
1151
1152
  		tl_dinode = item->lri_tl_dinode;
  		qrec = item->lri_qrec;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1153

b41079504   Tao Ma   ocfs2: Remove mas...
1154
1155
1156
1157
1158
1159
  		trace_ocfs2_complete_recovery_slot(item->lri_slot,
  			la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
  			tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0,
  			qrec);
  
  		if (la_dinode) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1160
1161
1162
1163
1164
1165
1166
  			ret = ocfs2_complete_local_alloc_recovery(osb,
  								  la_dinode);
  			if (ret < 0)
  				mlog_errno(ret);
  
  			kfree(la_dinode);
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1167
  		if (tl_dinode) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
  			ret = ocfs2_complete_truncate_log_recovery(osb,
  								   tl_dinode);
  			if (ret < 0)
  				mlog_errno(ret);
  
  			kfree(tl_dinode);
  		}
  
  		ret = ocfs2_recover_orphans(osb, item->lri_slot);
  		if (ret < 0)
  			mlog_errno(ret);
2205363dc   Jan Kara   ocfs2: Implement ...
1179
  		if (qrec) {
2205363dc   Jan Kara   ocfs2: Implement ...
1180
1181
1182
1183
1184
1185
  			ret = ocfs2_finish_quota_recovery(osb, qrec,
  							  item->lri_slot);
  			if (ret < 0)
  				mlog_errno(ret);
  			/* Recovery info is already freed now */
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1186
1187
  		kfree(item);
  	}
b41079504   Tao Ma   ocfs2: Remove mas...
1188
  	trace_ocfs2_complete_recovery_end(ret);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1189
1190
1191
1192
1193
1194
1195
1196
  }
  
  /* NOTE: This function always eats your references to la_dinode and
   * tl_dinode, either manually on error, or by passing them to
   * ocfs2_complete_recovery */
  static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
  					    int slot_num,
  					    struct ocfs2_dinode *la_dinode,
2205363dc   Jan Kara   ocfs2: Implement ...
1197
1198
  					    struct ocfs2_dinode *tl_dinode,
  					    struct ocfs2_quota_recovery *qrec)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1199
1200
  {
  	struct ocfs2_la_recovery_item *item;
afae00ab4   Sunil Mushran   ocfs2: fix gfp ma...
1201
  	item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1202
1203
1204
1205
  	if (!item) {
  		/* Though we wish to avoid it, we are in fact safe in
  		 * skipping local alloc cleanup as fsck.ocfs2 is more
  		 * than capable of reclaiming unused space. */
d787ab097   Tim Gardner   ocfs2: remove kfr...
1206
1207
  		kfree(la_dinode);
  		kfree(tl_dinode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1208

2205363dc   Jan Kara   ocfs2: Implement ...
1209
1210
  		if (qrec)
  			ocfs2_free_quota_recovery(qrec);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1211
1212
1213
1214
1215
1216
1217
1218
  		mlog_errno(-ENOMEM);
  		return;
  	}
  
  	INIT_LIST_HEAD(&item->lri_list);
  	item->lri_la_dinode = la_dinode;
  	item->lri_slot = slot_num;
  	item->lri_tl_dinode = tl_dinode;
2205363dc   Jan Kara   ocfs2: Implement ...
1219
  	item->lri_qrec = qrec;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1220
1221
1222
1223
1224
1225
1226
1227
  
  	spin_lock(&journal->j_lock);
  	list_add_tail(&item->lri_list, &journal->j_la_cleanups);
  	queue_work(ocfs2_wq, &journal->j_recovery_work);
  	spin_unlock(&journal->j_lock);
  }
  
  /* Called by the mount code to queue recovery the last part of
9140db04e   Srinivas Eeda   ocfs2: recover or...
1228
   * recovery for it's own and offline slot(s). */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1229
1230
1231
  void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
  {
  	struct ocfs2_journal *journal = osb->journal;
10b3dd761   Sunil Mushran   ocfs2: Skip mount...
1232
1233
  	if (ocfs2_is_hard_readonly(osb))
  		return;
9140db04e   Srinivas Eeda   ocfs2: recover or...
1234
1235
1236
1237
1238
  	/* No need to queue up our truncate_log as regular cleanup will catch
  	 * that */
  	ocfs2_queue_recovery_completion(journal, osb->slot_num,
  					osb->local_alloc_copy, NULL, NULL);
  	ocfs2_schedule_truncate_log_flush(osb, 0);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1239

9140db04e   Srinivas Eeda   ocfs2: recover or...
1240
1241
1242
1243
1244
1245
1246
  	osb->local_alloc_copy = NULL;
  	osb->dirty = 0;
  
  	/* queue to recover orphan slots for all offline slots */
  	ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
  	ocfs2_queue_replay_slots(osb);
  	ocfs2_free_replay_slots(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1247
  }
2205363dc   Jan Kara   ocfs2: Implement ...
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
  void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
  {
  	if (osb->quota_rec) {
  		ocfs2_queue_recovery_completion(osb->journal,
  						osb->slot_num,
  						NULL,
  						NULL,
  						osb->quota_rec);
  		osb->quota_rec = NULL;
  	}
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1259
1260
  static int __ocfs2_recovery_thread(void *arg)
  {
2205363dc   Jan Kara   ocfs2: Implement ...
1261
  	int status, node_num, slot_num;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1262
  	struct ocfs2_super *osb = arg;
553abd046   Joel Becker   ocfs2: Change the...
1263
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
2205363dc   Jan Kara   ocfs2: Implement ...
1264
1265
1266
  	int *rm_quota = NULL;
  	int rm_quota_used = 0, i;
  	struct ocfs2_quota_recovery *qrec;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1267

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1268
1269
1270
1271
  	status = ocfs2_wait_on_mount(osb);
  	if (status < 0) {
  		goto bail;
  	}
2205363dc   Jan Kara   ocfs2: Implement ...
1272
1273
1274
1275
1276
  	rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS);
  	if (!rm_quota) {
  		status = -ENOMEM;
  		goto bail;
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1277
1278
1279
1280
1281
1282
  restart:
  	status = ocfs2_super_lock(osb, 1);
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
9140db04e   Srinivas Eeda   ocfs2: recover or...
1283
1284
1285
1286
1287
1288
1289
  	status = ocfs2_compute_replay_slots(osb);
  	if (status < 0)
  		mlog_errno(status);
  
  	/* queue recovery for our own slot */
  	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
  					NULL, NULL);
553abd046   Joel Becker   ocfs2: Change the...
1290
1291
1292
1293
1294
1295
  	spin_lock(&osb->osb_lock);
  	while (rm->rm_used) {
  		/* It's always safe to remove entry zero, as we won't
  		 * clear it until ocfs2_recover_node() has succeeded. */
  		node_num = rm->rm_entries[0];
  		spin_unlock(&osb->osb_lock);
2205363dc   Jan Kara   ocfs2: Implement ...
1296
  		slot_num = ocfs2_node_num_to_slot(osb, node_num);
b41079504   Tao Ma   ocfs2: Remove mas...
1297
  		trace_ocfs2_recovery_thread_node(node_num, slot_num);
2205363dc   Jan Kara   ocfs2: Implement ...
1298
1299
  		if (slot_num == -ENOENT) {
  			status = 0;
2205363dc   Jan Kara   ocfs2: Implement ...
1300
1301
  			goto skip_recovery;
  		}
2205363dc   Jan Kara   ocfs2: Implement ...
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
  
  		/* It is a bit subtle with quota recovery. We cannot do it
  		 * immediately because we have to obtain cluster locks from
  		 * quota files and we also don't want to just skip it because
  		 * then quota usage would be out of sync until some node takes
  		 * the slot. So we remember which nodes need quota recovery
  		 * and when everything else is done, we recover quotas. */
  		for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
  		if (i == rm_quota_used)
  			rm_quota[rm_quota_used++] = slot_num;
  
  		status = ocfs2_recover_node(osb, node_num, slot_num);
  skip_recovery:
553abd046   Joel Becker   ocfs2: Change the...
1315
1316
1317
  		if (!status) {
  			ocfs2_recovery_map_clear(osb, node_num);
  		} else {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1318
1319
1320
1321
1322
1323
1324
  			mlog(ML_ERROR,
  			     "Error %d recovering node %d on device (%u,%u)!
  ",
  			     status, node_num,
  			     MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
  			mlog(ML_ERROR, "Volume requires unmount.
  ");
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1325
  		}
553abd046   Joel Becker   ocfs2: Change the...
1326
  		spin_lock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1327
  	}
553abd046   Joel Becker   ocfs2: Change the...
1328
  	spin_unlock(&osb->osb_lock);
b41079504   Tao Ma   ocfs2: Remove mas...
1329
  	trace_ocfs2_recovery_thread_end(status);
553abd046   Joel Becker   ocfs2: Change the...
1330

539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1331
1332
1333
1334
1335
  	/* Refresh all journal recovery generations from disk */
  	status = ocfs2_check_journals_nolocks(osb);
  	status = (status == -EROFS) ? 0 : status;
  	if (status < 0)
  		mlog_errno(status);
2205363dc   Jan Kara   ocfs2: Implement ...
1336
  	/* Now it is right time to recover quotas... We have to do this under
25985edce   Lucas De Marchi   Fix common misspe...
1337
  	 * superblock lock so that no one can start using the slot (and crash)
2205363dc   Jan Kara   ocfs2: Implement ...
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
  	 * before we recover it */
  	for (i = 0; i < rm_quota_used; i++) {
  		qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
  		if (IS_ERR(qrec)) {
  			status = PTR_ERR(qrec);
  			mlog_errno(status);
  			continue;
  		}
  		ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
  						NULL, NULL, qrec);
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1349
  	ocfs2_super_unlock(osb, 1);
9140db04e   Srinivas Eeda   ocfs2: recover or...
1350
1351
  	/* queue recovery for offline slots */
  	ocfs2_queue_replay_slots(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1352
1353
  
  bail:
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1354
  	mutex_lock(&osb->recovery_lock);
553abd046   Joel Becker   ocfs2: Change the...
1355
  	if (!status && !ocfs2_recovery_completed(osb)) {
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1356
  		mutex_unlock(&osb->recovery_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1357
1358
  		goto restart;
  	}
9140db04e   Srinivas Eeda   ocfs2: recover or...
1359
  	ocfs2_free_replay_slots(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1360
1361
1362
  	osb->recovery_thread_task = NULL;
  	mb(); /* sync with ocfs2_recovery_thread_running */
  	wake_up(&osb->recovery_event);
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1363
  	mutex_unlock(&osb->recovery_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1364

d787ab097   Tim Gardner   ocfs2: remove kfr...
1365
  	kfree(rm_quota);
2205363dc   Jan Kara   ocfs2: Implement ...
1366

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1367
1368
1369
1370
1371
1372
1373
1374
1375
  	/* no one is callint kthread_stop() for us so the kthread() api
  	 * requires that we call do_exit().  And it isn't exported, but
  	 * complete_and_exit() seems to be a minimal wrapper around it. */
  	complete_and_exit(NULL, status);
  	return status;
  }
  
  void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
  {
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1376
  	mutex_lock(&osb->recovery_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1377

b41079504   Tao Ma   ocfs2: Remove mas...
1378
1379
1380
1381
  	trace_ocfs2_recovery_thread(node_num, osb->node_num,
  		osb->disable_recovery, osb->recovery_thread_task,
  		osb->disable_recovery ?
  		-1 : ocfs2_recovery_map_set(osb, node_num));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1382

b41079504   Tao Ma   ocfs2: Remove mas...
1383
1384
  	if (osb->disable_recovery)
  		goto out;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1385
1386
1387
1388
1389
  
  	if (osb->recovery_thread_task)
  		goto out;
  
  	osb->recovery_thread_task =  kthread_run(__ocfs2_recovery_thread, osb,
784270435   Mark Fasheh   ocfs2: clean up s...
1390
  						 "ocfs2rec");
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1391
1392
1393
1394
1395
1396
  	if (IS_ERR(osb->recovery_thread_task)) {
  		mlog_errno((int)PTR_ERR(osb->recovery_thread_task));
  		osb->recovery_thread_task = NULL;
  	}
  
  out:
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1397
  	mutex_unlock(&osb->recovery_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1398
  	wake_up(&osb->recovery_event);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1399
  }
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
  static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
  				    int slot_num,
  				    struct buffer_head **bh,
  				    struct inode **ret_inode)
  {
  	int status = -EACCES;
  	struct inode *inode = NULL;
  
  	BUG_ON(slot_num >= osb->max_slots);
  
  	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
  					    slot_num);
  	if (!inode || is_bad_inode(inode)) {
  		mlog_errno(status);
  		goto bail;
  	}
  	SET_INODE_JOURNAL(inode);
b657c95c1   Joel Becker   ocfs2: Wrap inode...
1417
  	status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
  
  	status = 0;
  
  bail:
  	if (inode) {
  		if (status || !ret_inode)
  			iput(inode);
  		else
  			*ret_inode = inode;
  	}
  	return status;
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
  /* Does the actual journal replay and marks the journal inode as
   * clean. Will only replay if the journal inode is marked dirty. */
  static int ocfs2_replay_journal(struct ocfs2_super *osb,
  				int node_num,
  				int slot_num)
  {
  	int status;
  	int got_lock = 0;
  	unsigned int flags;
  	struct inode *inode = NULL;
  	struct ocfs2_dinode *fe;
  	journal_t *journal = NULL;
  	struct buffer_head *bh = NULL;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1447
  	u32 slot_reco_gen;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1448

539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1449
1450
  	status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
  	if (status) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1451
1452
1453
  		mlog_errno(status);
  		goto done;
  	}
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
  
  	fe = (struct ocfs2_dinode *)bh->b_data;
  	slot_reco_gen = ocfs2_get_recovery_generation(fe);
  	brelse(bh);
  	bh = NULL;
  
  	/*
  	 * As the fs recovery is asynchronous, there is a small chance that
  	 * another node mounted (and recovered) the slot before the recovery
  	 * thread could get the lock. To handle that, we dirty read the journal
  	 * inode for that slot to get the recovery generation. If it is
  	 * different than what we expected, the slot has been recovered.
  	 * If not, it needs recovery.
  	 */
  	if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
b41079504   Tao Ma   ocfs2: Remove mas...
1469
  		trace_ocfs2_replay_journal_recovered(slot_num,
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1470
1471
1472
  		     osb->slot_recovery_generations[slot_num], slot_reco_gen);
  		osb->slot_recovery_generations[slot_num] = slot_reco_gen;
  		status = -EBUSY;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1473
1474
  		goto done;
  	}
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1475
1476
  
  	/* Continue with recovery as the journal has not yet been recovered */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1477

e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1478
  	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1479
  	if (status < 0) {
b41079504   Tao Ma   ocfs2: Remove mas...
1480
  		trace_ocfs2_replay_journal_lock_err(status);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
  		if (status != -ERESTARTSYS)
  			mlog(ML_ERROR, "Could not lock journal!
  ");
  		goto done;
  	}
  	got_lock = 1;
  
  	fe = (struct ocfs2_dinode *) bh->b_data;
  
  	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1491
  	slot_reco_gen = ocfs2_get_recovery_generation(fe);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1492
1493
  
  	if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
b41079504   Tao Ma   ocfs2: Remove mas...
1494
  		trace_ocfs2_replay_journal_skip(node_num);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1495
1496
  		/* Refresh recovery generation for the slot */
  		osb->slot_recovery_generations[slot_num] = slot_reco_gen;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1497
1498
  		goto done;
  	}
9140db04e   Srinivas Eeda   ocfs2: recover or...
1499
1500
  	/* we need to run complete recovery for offline orphan slots */
  	ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
619c200de   Sunil Mushran   ocfs2: Clean up m...
1501
1502
1503
1504
  	printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\
  	       "device (%u,%u)
  ", node_num, slot_num, MAJOR(osb->sb->s_dev),
  	       MINOR(osb->sb->s_dev));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1505
1506
1507
1508
1509
1510
1511
1512
  
  	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
  
  	status = ocfs2_force_read_journal(inode);
  	if (status < 0) {
  		mlog_errno(status);
  		goto done;
  	}
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1513
  	journal = jbd2_journal_init_inode(inode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1514
1515
1516
1517
1518
1519
  	if (journal == NULL) {
  		mlog(ML_ERROR, "Linux journal layer error
  ");
  		status = -EIO;
  		goto done;
  	}
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1520
  	status = jbd2_journal_load(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1521
1522
1523
1524
  	if (status < 0) {
  		mlog_errno(status);
  		if (!igrab(inode))
  			BUG();
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1525
  		jbd2_journal_destroy(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1526
1527
1528
1529
1530
1531
  		goto done;
  	}
  
  	ocfs2_clear_journal_error(osb->sb, journal, slot_num);
  
  	/* wipe the journal */
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1532
1533
1534
  	jbd2_journal_lock_updates(journal);
  	status = jbd2_journal_flush(journal);
  	jbd2_journal_unlock_updates(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1535
1536
1537
1538
1539
1540
1541
  	if (status < 0)
  		mlog_errno(status);
  
  	/* This will mark the node clean */
  	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
  	flags &= ~OCFS2_JOURNAL_DIRTY_FL;
  	fe->id1.journal1.ij_flags = cpu_to_le32(flags);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1542
1543
1544
1545
  	/* Increment recovery generation to indicate successful recovery */
  	ocfs2_bump_recovery_generation(fe);
  	osb->slot_recovery_generations[slot_num] =
  					ocfs2_get_recovery_generation(fe);
13723d00e   Joel Becker   ocfs2: Use metada...
1546
  	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
8cb471e8f   Joel Becker   ocfs2: Take the i...
1547
  	status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1548
1549
1550
1551
1552
  	if (status < 0)
  		mlog_errno(status);
  
  	if (!igrab(inode))
  		BUG();
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1553
  	jbd2_journal_destroy(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1554

619c200de   Sunil Mushran   ocfs2: Clean up m...
1555
1556
1557
1558
  	printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\
  	       "device (%u,%u)
  ", node_num, slot_num, MAJOR(osb->sb->s_dev),
  	       MINOR(osb->sb->s_dev));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1559
1560
1561
  done:
  	/* drop the lock on this nodes journal */
  	if (got_lock)
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1562
  		ocfs2_inode_unlock(inode, 1);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1563
1564
1565
  
  	if (inode)
  		iput(inode);
a81cb88b6   Mark Fasheh   ocfs2: Don't chec...
1566
  	brelse(bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1567

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
  	return status;
  }
  
  /*
   * Do the most important parts of node recovery:
   *  - Replay it's journal
   *  - Stamp a clean local allocator file
   *  - Stamp a clean truncate log
   *  - Mark the node clean
   *
   * If this function completes without error, a node in OCFS2 can be
   * said to have been safely recovered. As a result, failure during the
   * second part of a nodes recovery process (local alloc recovery) is
   * far less concerning.
   */
  static int ocfs2_recover_node(struct ocfs2_super *osb,
2205363dc   Jan Kara   ocfs2: Implement ...
1584
  			      int node_num, int slot_num)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1585
1586
  {
  	int status = 0;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1587
1588
  	struct ocfs2_dinode *la_copy = NULL;
  	struct ocfs2_dinode *tl_copy = NULL;
b41079504   Tao Ma   ocfs2: Remove mas...
1589
  	trace_ocfs2_recover_node(node_num, slot_num, osb->node_num);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1590
1591
1592
  
  	/* Should not ever be called to recover ourselves -- in that
  	 * case we should've called ocfs2_journal_load instead. */
ebdec83ba   Eric Sesterhenn / snakebyte   [PATCH] BUG_ON() ...
1593
  	BUG_ON(osb->node_num == node_num);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1594

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1595
1596
  	status = ocfs2_replay_journal(osb, node_num, slot_num);
  	if (status < 0) {
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1597
  		if (status == -EBUSY) {
b41079504   Tao Ma   ocfs2: Remove mas...
1598
  			trace_ocfs2_recover_node_skip(slot_num, node_num);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1599
1600
1601
  			status = 0;
  			goto done;
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
  		mlog_errno(status);
  		goto done;
  	}
  
  	/* Stamp a clean local alloc file AFTER recovering the journal... */
  	status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy);
  	if (status < 0) {
  		mlog_errno(status);
  		goto done;
  	}
  
  	/* An error from begin_truncate_log_recovery is not
  	 * serious enough to warrant halting the rest of
  	 * recovery. */
  	status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
  	if (status < 0)
  		mlog_errno(status);
  
  	/* Likewise, this would be a strange but ultimately not so
  	 * harmful place to get an error... */
8e8a4603b   Mark Fasheh   ocfs2: Move slot ...
1622
  	status = ocfs2_clear_slot(osb, slot_num);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1623
1624
1625
1626
1627
  	if (status < 0)
  		mlog_errno(status);
  
  	/* This will kfree the memory pointed to by la_copy and tl_copy */
  	ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
2205363dc   Jan Kara   ocfs2: Implement ...
1628
  					tl_copy, NULL);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1629
1630
1631
  
  	status = 0;
  done:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
  	return status;
  }
  
  /* Test node liveness by trylocking his journal. If we get the lock,
   * we drop it here. Return 0 if we got the lock, -EAGAIN if node is
   * still alive (we couldn't get the lock) and < 0 on error. */
  static int ocfs2_trylock_journal(struct ocfs2_super *osb,
  				 int slot_num)
  {
  	int status, flags;
  	struct inode *inode = NULL;
  
  	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
  					    slot_num);
  	if (inode == NULL) {
  		mlog(ML_ERROR, "access error
  ");
  		status = -EACCES;
  		goto bail;
  	}
  	if (is_bad_inode(inode)) {
  		mlog(ML_ERROR, "access error (bad inode)
  ");
  		iput(inode);
  		inode = NULL;
  		status = -EACCES;
  		goto bail;
  	}
  	SET_INODE_JOURNAL(inode);
  
  	flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1663
  	status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1664
1665
1666
1667
1668
  	if (status < 0) {
  		if (status != -EAGAIN)
  			mlog_errno(status);
  		goto bail;
  	}
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1669
  	ocfs2_inode_unlock(inode, 1);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
  bail:
  	if (inode)
  		iput(inode);
  
  	return status;
  }
  
  /* Call this underneath ocfs2_super_lock. It also assumes that the
   * slot info struct has been updated from disk. */
  int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
  {
d85b20e4b   Joel Becker   ocfs2: Make ocfs2...
1681
1682
  	unsigned int node_num;
  	int status, i;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1683
  	u32 gen;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1684
1685
  	struct buffer_head *bh = NULL;
  	struct ocfs2_dinode *di;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1686
1687
1688
  
  	/* This is called with the super block cluster lock, so we
  	 * know that the slot map can't change underneath us. */
d85b20e4b   Joel Becker   ocfs2: Make ocfs2...
1689
  	for (i = 0; i < osb->max_slots; i++) {
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1690
1691
1692
1693
1694
1695
1696
  		/* Read journal inode to get the recovery generation */
  		status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
  		if (status) {
  			mlog_errno(status);
  			goto bail;
  		}
  		di = (struct ocfs2_dinode *)bh->b_data;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1697
  		gen = ocfs2_get_recovery_generation(di);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1698
1699
  		brelse(bh);
  		bh = NULL;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1700
1701
  		spin_lock(&osb->osb_lock);
  		osb->slot_recovery_generations[i] = gen;
b41079504   Tao Ma   ocfs2: Remove mas...
1702
1703
  		trace_ocfs2_mark_dead_nodes(i,
  					    osb->slot_recovery_generations[i]);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1704

a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1705
1706
  		if (i == osb->slot_num) {
  			spin_unlock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1707
  			continue;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1708
  		}
d85b20e4b   Joel Becker   ocfs2: Make ocfs2...
1709
1710
  
  		status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1711
1712
  		if (status == -ENOENT) {
  			spin_unlock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1713
  			continue;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1714
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1715

a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1716
1717
  		if (__ocfs2_recovery_map_test(osb, node_num)) {
  			spin_unlock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1718
  			continue;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1719
  		}
d85b20e4b   Joel Becker   ocfs2: Make ocfs2...
1720
  		spin_unlock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
  
  		/* Ok, we have a slot occupied by another node which
  		 * is not in the recovery map. We trylock his journal
  		 * file here to test if he's alive. */
  		status = ocfs2_trylock_journal(osb, i);
  		if (!status) {
  			/* Since we're called from mount, we know that
  			 * the recovery thread can't race us on
  			 * setting / checking the recovery bits. */
  			ocfs2_recovery_thread(osb, node_num);
  		} else if ((status < 0) && (status != -EAGAIN)) {
  			mlog_errno(status);
  			goto bail;
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1735
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1736
1737
1738
  
  	status = 0;
  bail:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1739
1740
  	return status;
  }
83273932f   Srinivas Eeda   ocfs2: timer to q...
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
  /*
   * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some
   * randomness to the timeout to minimize multple nodes firing the timer at the
   * same time.
   */
  static inline unsigned long ocfs2_orphan_scan_timeout(void)
  {
  	unsigned long time;
  
  	get_random_bytes(&time, sizeof(time));
  	time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
  	return msecs_to_jiffies(time);
  }
  
  /*
   * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for
   * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This
   * is done to catch any orphans that are left over in orphan directories.
   *
a035bff6b   Sunil Mushran   ocfs2: Add commen...
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
   * It scans all slots, even ones that are in use. It does so to handle the
   * case described below:
   *
   *   Node 1 has an inode it was using. The dentry went away due to memory
   *   pressure.  Node 1 closes the inode, but it's on the free list. The node
   *   has the open lock.
   *   Node 2 unlinks the inode. It grabs the dentry lock to notify others,
   *   but node 1 has no dentry and doesn't get the message. It trylocks the
   *   open lock, sees that another node has a PR, and does nothing.
   *   Later node 2 runs its orphan dir. It igets the inode, trylocks the
   *   open lock, sees the PR still, and does nothing.
   *   Basically, we have to trigger an orphan iput on node 1. The only way
   *   for this to happen is if node 1 runs node 2's orphan dir.
   *
83273932f   Srinivas Eeda   ocfs2: timer to q...
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
   * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT
   * seconds.  It gets an EX lock on os_lockres and checks sequence number
   * stored in LVB. If the sequence number has changed, it means some other
   * node has done the scan.  This node skips the scan and tracks the
   * sequence number.  If the sequence number didn't change, it means a scan
   * hasn't happened.  The node queues a scan and increments the
   * sequence number in the LVB.
   */
  void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
  {
  	struct ocfs2_orphan_scan *os;
  	int status, i;
  	u32 seqno = 0;
  
  	os = &osb->osb_orphan_scan;
692684e19   Sunil Mushran   ocfs2: Stop orpha...
1789
1790
  	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
  		goto out;
b41079504   Tao Ma   ocfs2: Remove mas...
1791
1792
  	trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno,
  					    atomic_read(&os->os_state));
df152c241   Sunil Mushran   ocfs2: Disable or...
1793
  	status = ocfs2_orphan_scan_lock(osb, &seqno);
83273932f   Srinivas Eeda   ocfs2: timer to q...
1794
1795
1796
1797
1798
  	if (status < 0) {
  		if (status != -EAGAIN)
  			mlog_errno(status);
  		goto out;
  	}
692684e19   Sunil Mushran   ocfs2: Stop orpha...
1799
1800
1801
  	/* Do no queue the tasks if the volume is being umounted */
  	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
  		goto unlock;
83273932f   Srinivas Eeda   ocfs2: timer to q...
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
  	if (os->os_seqno != seqno) {
  		os->os_seqno = seqno;
  		goto unlock;
  	}
  
  	for (i = 0; i < osb->max_slots; i++)
  		ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
  						NULL);
  	/*
  	 * We queued a recovery on orphan slots, increment the sequence
  	 * number and update LVB so other node will skip the scan for a while
  	 */
  	seqno++;
15633a220   Srinivas Eeda   ocfs2 patch to tr...
1815
1816
  	os->os_count++;
  	os->os_scantime = CURRENT_TIME;
83273932f   Srinivas Eeda   ocfs2: timer to q...
1817
  unlock:
df152c241   Sunil Mushran   ocfs2: Disable or...
1818
  	ocfs2_orphan_scan_unlock(osb, seqno);
83273932f   Srinivas Eeda   ocfs2: timer to q...
1819
  out:
b41079504   Tao Ma   ocfs2: Remove mas...
1820
1821
  	trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno,
  					  atomic_read(&os->os_state));
83273932f   Srinivas Eeda   ocfs2: timer to q...
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
  	return;
  }
  
  /* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */
  void ocfs2_orphan_scan_work(struct work_struct *work)
  {
  	struct ocfs2_orphan_scan *os;
  	struct ocfs2_super *osb;
  
  	os = container_of(work, struct ocfs2_orphan_scan,
  			  os_orphan_scan_work.work);
  	osb = os->os_osb;
  
  	mutex_lock(&os->os_lock);
  	ocfs2_queue_orphan_scan(osb);
692684e19   Sunil Mushran   ocfs2: Stop orpha...
1837
  	if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
40f165f41   Tao Ma   ocfs2: Move orpha...
1838
  		queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
692684e19   Sunil Mushran   ocfs2: Stop orpha...
1839
  				      ocfs2_orphan_scan_timeout());
83273932f   Srinivas Eeda   ocfs2: timer to q...
1840
1841
1842
1843
1844
1845
1846
1847
  	mutex_unlock(&os->os_lock);
  }
  
  void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
  {
  	struct ocfs2_orphan_scan *os;
  
  	os = &osb->osb_orphan_scan;
df152c241   Sunil Mushran   ocfs2: Disable or...
1848
1849
1850
1851
1852
1853
  	if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
  		atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
  		mutex_lock(&os->os_lock);
  		cancel_delayed_work(&os->os_orphan_scan_work);
  		mutex_unlock(&os->os_lock);
  	}
83273932f   Srinivas Eeda   ocfs2: timer to q...
1854
  }
df152c241   Sunil Mushran   ocfs2: Disable or...
1855
  void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
83273932f   Srinivas Eeda   ocfs2: timer to q...
1856
1857
1858
1859
1860
  {
  	struct ocfs2_orphan_scan *os;
  
  	os = &osb->osb_orphan_scan;
  	os->os_osb = osb;
15633a220   Srinivas Eeda   ocfs2 patch to tr...
1861
  	os->os_count = 0;
3211949f8   Sunil Mushran   ocfs2: Do not ini...
1862
  	os->os_seqno = 0;
83273932f   Srinivas Eeda   ocfs2: timer to q...
1863
  	mutex_init(&os->os_lock);
df152c241   Sunil Mushran   ocfs2: Disable or...
1864
  	INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
8b712cd58   Jeff Mahoney   ocfs2: Fixup orph...
1865
  }
83273932f   Srinivas Eeda   ocfs2: timer to q...
1866

8b712cd58   Jeff Mahoney   ocfs2: Fixup orph...
1867
1868
1869
1870
1871
1872
  void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
  {
  	struct ocfs2_orphan_scan *os;
  
  	os = &osb->osb_orphan_scan;
  	os->os_scantime = CURRENT_TIME;
df152c241   Sunil Mushran   ocfs2: Disable or...
1873
1874
1875
1876
  	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
  		atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
  	else {
  		atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
40f165f41   Tao Ma   ocfs2: Move orpha...
1877
1878
  		queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
  				   ocfs2_orphan_scan_timeout());
df152c241   Sunil Mushran   ocfs2: Disable or...
1879
  	}
83273932f   Srinivas Eeda   ocfs2: timer to q...
1880
  }
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1881
  struct ocfs2_orphan_filldir_priv {
3704412bd   Al Viro   [readdir] convert...
1882
  	struct dir_context	ctx;
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
  	struct inode		*head;
  	struct ocfs2_super	*osb;
  };
  
  static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
  				loff_t pos, u64 ino, unsigned type)
  {
  	struct ocfs2_orphan_filldir_priv *p = priv;
  	struct inode *iter;
  
  	if (name_len == 1 && !strncmp(".", name, 1))
  		return 0;
  	if (name_len == 2 && !strncmp("..", name, 2))
  		return 0;
  
  	/* Skip bad inodes so that recovery can continue */
  	iter = ocfs2_iget(p->osb, ino,
5fa0613ea   Jan Kara   ocfs2: Silence fa...
1900
  			  OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1901
1902
  	if (IS_ERR(iter))
  		return 0;
b41079504   Tao Ma   ocfs2: Remove mas...
1903
  	trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1904
1905
1906
1907
1908
1909
1910
  	/* No locking is required for the next_orphan queue as there
  	 * is only ever a single process doing orphan recovery. */
  	OCFS2_I(iter)->ip_next_orphan = p->head;
  	p->head = iter;
  
  	return 0;
  }
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1911
1912
1913
  static int ocfs2_queue_orphans(struct ocfs2_super *osb,
  			       int slot,
  			       struct inode **head)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1914
  {
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1915
  	int status;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1916
  	struct inode *orphan_dir_inode = NULL;
3704412bd   Al Viro   [readdir] convert...
1917
1918
1919
1920
1921
  	struct ocfs2_orphan_filldir_priv priv = {
  		.ctx.actor = ocfs2_orphan_filldir,
  		.osb = osb,
  		.head = *head
  	};
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1922
1923
1924
1925
1926
1927
1928
  
  	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
  						       ORPHAN_DIR_SYSTEM_INODE,
  						       slot);
  	if  (!orphan_dir_inode) {
  		status = -ENOENT;
  		mlog_errno(status);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1929
  		return status;
2bd632165   Sunil Mushran   ocfs2/trivial: Re...
1930
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1931

1b1dcc1b5   Jes Sorensen   [PATCH] mutex sub...
1932
  	mutex_lock(&orphan_dir_inode->i_mutex);
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1933
  	status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1934
  	if (status < 0) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1935
1936
1937
  		mlog_errno(status);
  		goto out;
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1938

3704412bd   Al Viro   [readdir] convert...
1939
  	status = ocfs2_dir_foreach(orphan_dir_inode, &priv.ctx);
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1940
1941
  	if (status) {
  		mlog_errno(status);
a86370fbb   Mark Fasheh   ocfs2: fix exit-w...
1942
  		goto out_cluster;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1943
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1944

5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1945
  	*head = priv.head;
a86370fbb   Mark Fasheh   ocfs2: fix exit-w...
1946
  out_cluster:
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1947
  	ocfs2_inode_unlock(orphan_dir_inode, 0);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1948
1949
  out:
  	mutex_unlock(&orphan_dir_inode->i_mutex);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1950
  	iput(orphan_dir_inode);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
  	return status;
  }
  
  static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb,
  					      int slot)
  {
  	int ret;
  
  	spin_lock(&osb->osb_lock);
  	ret = !osb->osb_orphan_wipes[slot];
  	spin_unlock(&osb->osb_lock);
  	return ret;
  }
  
  static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb,
  					     int slot)
  {
  	spin_lock(&osb->osb_lock);
  	/* Mark ourselves such that new processes in delete_inode()
  	 * know to quit early. */
  	ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
  	while (osb->osb_orphan_wipes[slot]) {
  		/* If any processes are already in the middle of an
  		 * orphan wipe on this dir, then we need to wait for
  		 * them. */
  		spin_unlock(&osb->osb_lock);
  		wait_event_interruptible(osb->osb_wipe_event,
  					 ocfs2_orphan_recovery_can_continue(osb, slot));
  		spin_lock(&osb->osb_lock);
  	}
  	spin_unlock(&osb->osb_lock);
  }
  
  static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
  					      int slot)
  {
  	ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
  }
  
  /*
   * Orphan recovery. Each mounted node has it's own orphan dir which we
   * must run during recovery. Our strategy here is to build a list of
   * the inodes in the orphan dir and iget/iput them. The VFS does
   * (most) of the rest of the work.
   *
   * Orphan recovery can happen at any time, not just mount so we have a
   * couple of extra considerations.
   *
   * - We grab as many inodes as we can under the orphan dir lock -
   *   doing iget() outside the orphan dir risks getting a reference on
   *   an invalid inode.
   * - We must be sure not to deadlock with other processes on the
   *   system wanting to run delete_inode(). This can happen when they go
   *   to lock the orphan dir and the orphan recovery process attempts to
   *   iget() inside the orphan dir lock. This can be avoided by
   *   advertising our state to ocfs2_delete_inode().
   */
  static int ocfs2_recover_orphans(struct ocfs2_super *osb,
  				 int slot)
  {
  	int ret = 0;
  	struct inode *inode = NULL;
  	struct inode *iter;
  	struct ocfs2_inode_info *oi;
b41079504   Tao Ma   ocfs2: Remove mas...
2015
  	trace_ocfs2_recover_orphans(slot);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
2016
2017
2018
2019
2020
2021
2022
2023
2024
  
  	ocfs2_mark_recovering_orphan_dir(osb, slot);
  	ret = ocfs2_queue_orphans(osb, slot, &inode);
  	ocfs2_clear_recovering_orphan_dir(osb, slot);
  
  	/* Error here should be noted, but we want to continue with as
  	 * many queued inodes as we've got. */
  	if (ret)
  		mlog_errno(ret);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2025
2026
2027
  
  	while (inode) {
  		oi = OCFS2_I(inode);
b41079504   Tao Ma   ocfs2: Remove mas...
2028
2029
  		trace_ocfs2_recover_orphans_iput(
  					(unsigned long long)oi->ip_blkno);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2030
2031
2032
2033
  
  		iter = oi->ip_next_orphan;
  
  		spin_lock(&oi->ip_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2034
2035
2036
  		/* Set the proper information to get us going into
  		 * ocfs2_delete_inode. */
  		oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2037
2038
2039
2040
2041
2042
  		spin_unlock(&oi->ip_lock);
  
  		iput(inode);
  
  		inode = iter;
  	}
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
2043
  	return ret;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2044
  }
19ece546a   Jan Kara   ocfs2: Enable quo...
2045
  static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2046
2047
2048
2049
2050
  {
  	/* This check is good because ocfs2 will wait on our recovery
  	 * thread before changing it to something other than MOUNTED
  	 * or DISABLED. */
  	wait_event(osb->osb_mount_event,
19ece546a   Jan Kara   ocfs2: Enable quo...
2051
2052
  		  (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
  		   atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2053
2054
2055
2056
2057
2058
  		   atomic_read(&osb->vol_state) == VOLUME_DISABLED);
  
  	/* If there's an error on mount, then we may never get to the
  	 * MOUNTED flag, but this is set right before
  	 * dismount_volume() so we can trust it. */
  	if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
b41079504   Tao Ma   ocfs2: Remove mas...
2059
  		trace_ocfs2_wait_on_mount(VOLUME_DISABLED);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
  		mlog(0, "mount error, exiting!
  ");
  		return -EBUSY;
  	}
  
  	return 0;
  }
  
  static int ocfs2_commit_thread(void *arg)
  {
  	int status;
  	struct ocfs2_super *osb = arg;
  	struct ocfs2_journal *journal = osb->journal;
  
  	/* we can trust j_num_trans here because _should_stop() is only set in
  	 * shutdown and nobody other than ourselves should be able to start
  	 * transactions.  committing on shutdown might take a few iterations
  	 * as final transactions put deleted inodes on the list */
  	while (!(kthread_should_stop() &&
  		 atomic_read(&journal->j_num_trans) == 0)) {
745ae8ba2   Mark Fasheh   [PATCH] ocfs2: on...
2080
2081
2082
  		wait_event_interruptible(osb->checkpoint_event,
  					 atomic_read(&journal->j_num_trans)
  					 || kthread_should_stop());
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
  
  		status = ocfs2_commit_cache(osb);
  		if (status < 0)
  			mlog_errno(status);
  
  		if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
  			mlog(ML_KTHREAD,
  			     "commit_thread: %u transactions pending on "
  			     "shutdown
  ",
  			     atomic_read(&journal->j_num_trans));
  		}
  	}
  
  	return 0;
  }
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2099
2100
2101
2102
2103
  /* Reads all the journal inodes without taking any cluster locks. Used
   * for hard readonly access to determine whether any journal requires
   * recovery. Also used to refresh the recovery generation numbers after
   * a journal has been recovered by another node.
   */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2104
2105
2106
2107
  int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
  {
  	int ret = 0;
  	unsigned int slot;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2108
  	struct buffer_head *di_bh = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2109
  	struct ocfs2_dinode *di;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2110
  	int journal_dirty = 0;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2111
2112
  
  	for(slot = 0; slot < osb->max_slots; slot++) {
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2113
2114
  		ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
  		if (ret) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2115
2116
2117
2118
2119
  			mlog_errno(ret);
  			goto out;
  		}
  
  		di = (struct ocfs2_dinode *) di_bh->b_data;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2120
2121
  		osb->slot_recovery_generations[slot] =
  					ocfs2_get_recovery_generation(di);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2122
2123
  		if (le32_to_cpu(di->id1.journal1.ij_flags) &
  		    OCFS2_JOURNAL_DIRTY_FL)
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2124
  			journal_dirty = 1;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2125
2126
  
  		brelse(di_bh);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2127
  		di_bh = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2128
2129
2130
  	}
  
  out:
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2131
2132
  	if (journal_dirty)
  		ret = -EROFS;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2133
2134
  	return ret;
  }