Blame view

fs/ocfs2/journal.c 61.7 KB
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
  /* -*- mode: c; c-basic-offset: 8; -*-
   * vim: noexpandtab sw=8 ts=8 sts=0:
   *
   * journal.c
   *
   * Defines functions of journalling api
   *
   * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
   *
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public
   * License as published by the Free Software Foundation; either
   * version 2 of the License, or (at your option) any later version.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * General Public License for more details.
   *
   * You should have received a copy of the GNU General Public
   * License along with this program; if not, write to the
   * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   * Boston, MA 021110-1307, USA.
   */
  
  #include <linux/fs.h>
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
  #include <linux/kthread.h>
83273932f   Srinivas Eeda   ocfs2: timer to q...
31
32
  #include <linux/time.h>
  #include <linux/random.h>
55b465b66   Joseph Qi   ocfs2: limit prin...
33
  #include <linux/delay.h>
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
34

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
35
36
37
38
39
  #include <cluster/masklog.h>
  
  #include "ocfs2.h"
  
  #include "alloc.h"
50655ae9e   Joel Becker   ocfs2: Add journa...
40
  #include "blockcheck.h"
316f4b9f9   Mark Fasheh   ocfs2: Move direc...
41
  #include "dir.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
42
43
44
45
46
47
  #include "dlmglue.h"
  #include "extent_map.h"
  #include "heartbeat.h"
  #include "inode.h"
  #include "journal.h"
  #include "localalloc.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
48
49
  #include "slot_map.h"
  #include "super.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
50
  #include "sysfile.h"
0cf2f7632   Joel Becker   ocfs2: Pass struc...
51
  #include "uptodate.h"
2205363dc   Jan Kara   ocfs2: Implement ...
52
  #include "quota.h"
ed460cffc   Joseph Qi   ocfs2: add orphan...
53
54
  #include "file.h"
  #include "namei.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
55
56
  
  #include "buffer_head_io.h"
b41079504   Tao Ma   ocfs2: Remove mas...
57
  #include "ocfs2_trace.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
58

34af946a2   Ingo Molnar   [PATCH] spin/rwlo...
59
  DEFINE_SPINLOCK(trans_inc_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
60

83273932f   Srinivas Eeda   ocfs2: timer to q...
61
  #define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
62
63
  static int ocfs2_force_read_journal(struct inode *inode);
  static int ocfs2_recover_node(struct ocfs2_super *osb,
2205363dc   Jan Kara   ocfs2: Implement ...
64
  			      int node_num, int slot_num);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
65
66
  static int __ocfs2_recovery_thread(void *arg);
  static int ocfs2_commit_cache(struct ocfs2_super *osb);
19ece546a   Jan Kara   ocfs2: Enable quo...
67
  static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
68
  static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
69
  				      int dirty, int replayed);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
70
71
72
  static int ocfs2_trylock_journal(struct ocfs2_super *osb,
  				 int slot_num);
  static int ocfs2_recover_orphans(struct ocfs2_super *osb,
ed460cffc   Joseph Qi   ocfs2: add orphan...
73
74
  				 int slot,
  				 enum ocfs2_orphan_reco_type orphan_reco_type);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
75
  static int ocfs2_commit_thread(void *arg);
9140db04e   Srinivas Eeda   ocfs2: recover or...
76
77
78
79
  static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
  					    int slot_num,
  					    struct ocfs2_dinode *la_dinode,
  					    struct ocfs2_dinode *tl_dinode,
ed460cffc   Joseph Qi   ocfs2: add orphan...
80
81
  					    struct ocfs2_quota_recovery *qrec,
  					    enum ocfs2_orphan_reco_type orphan_reco_type);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
82

19ece546a   Jan Kara   ocfs2: Enable quo...
83
84
85
86
87
88
89
90
91
  static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
  {
  	return __ocfs2_wait_on_mount(osb, 0);
  }
  
  static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
  {
  	return __ocfs2_wait_on_mount(osb, 1);
  }
9140db04e   Srinivas Eeda   ocfs2: recover or...
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
  /*
   * This replay_map is to track online/offline slots, so we could recover
   * offline slots during recovery and mount
   */
  
  enum ocfs2_replay_state {
  	REPLAY_UNNEEDED = 0,	/* Replay is not needed, so ignore this map */
  	REPLAY_NEEDED, 		/* Replay slots marked in rm_replay_slots */
  	REPLAY_DONE 		/* Replay was already queued */
  };
  
  struct ocfs2_replay_map {
  	unsigned int rm_slots;
  	enum ocfs2_replay_state rm_state;
  	unsigned char rm_replay_slots[0];
  };
b519ea6d9   Joseph Qi   ocfs2: mark local...
108
  static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
9140db04e   Srinivas Eeda   ocfs2: recover or...
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
  {
  	if (!osb->replay_map)
  		return;
  
  	/* If we've already queued the replay, we don't have any more to do */
  	if (osb->replay_map->rm_state == REPLAY_DONE)
  		return;
  
  	osb->replay_map->rm_state = state;
  }
  
  int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
  {
  	struct ocfs2_replay_map *replay_map;
  	int i, node_num;
  
  	/* If replay map is already set, we don't do it again */
  	if (osb->replay_map)
  		return 0;
  
  	replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
  			     (osb->max_slots * sizeof(char)), GFP_KERNEL);
  
  	if (!replay_map) {
  		mlog_errno(-ENOMEM);
  		return -ENOMEM;
  	}
  
  	spin_lock(&osb->osb_lock);
  
  	replay_map->rm_slots = osb->max_slots;
  	replay_map->rm_state = REPLAY_UNNEEDED;
  
  	/* set rm_replay_slots for offline slot(s) */
  	for (i = 0; i < replay_map->rm_slots; i++) {
  		if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
  			replay_map->rm_replay_slots[i] = 1;
  	}
  
  	osb->replay_map = replay_map;
  	spin_unlock(&osb->osb_lock);
  	return 0;
  }
b519ea6d9   Joseph Qi   ocfs2: mark local...
152
  static void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
ed460cffc   Joseph Qi   ocfs2: add orphan...
153
  		enum ocfs2_orphan_reco_type orphan_reco_type)
9140db04e   Srinivas Eeda   ocfs2: recover or...
154
155
156
157
158
159
160
161
162
163
164
165
166
  {
  	struct ocfs2_replay_map *replay_map = osb->replay_map;
  	int i;
  
  	if (!replay_map)
  		return;
  
  	if (replay_map->rm_state != REPLAY_NEEDED)
  		return;
  
  	for (i = 0; i < replay_map->rm_slots; i++)
  		if (replay_map->rm_replay_slots[i])
  			ocfs2_queue_recovery_completion(osb->journal, i, NULL,
ed460cffc   Joseph Qi   ocfs2: add orphan...
167
168
  							NULL, NULL,
  							orphan_reco_type);
9140db04e   Srinivas Eeda   ocfs2: recover or...
169
170
  	replay_map->rm_state = REPLAY_DONE;
  }
b519ea6d9   Joseph Qi   ocfs2: mark local...
171
  static void ocfs2_free_replay_slots(struct ocfs2_super *osb)
9140db04e   Srinivas Eeda   ocfs2: recover or...
172
173
174
175
176
177
178
179
180
  {
  	struct ocfs2_replay_map *replay_map = osb->replay_map;
  
  	if (!osb->replay_map)
  		return;
  
  	kfree(replay_map);
  	osb->replay_map = NULL;
  }
553abd046   Joel Becker   ocfs2: Change the...
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
  int ocfs2_recovery_init(struct ocfs2_super *osb)
  {
  	struct ocfs2_recovery_map *rm;
  
  	mutex_init(&osb->recovery_lock);
  	osb->disable_recovery = 0;
  	osb->recovery_thread_task = NULL;
  	init_waitqueue_head(&osb->recovery_event);
  
  	rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
  		     osb->max_slots * sizeof(unsigned int),
  		     GFP_KERNEL);
  	if (!rm) {
  		mlog_errno(-ENOMEM);
  		return -ENOMEM;
  	}
  
  	rm->rm_entries = (unsigned int *)((char *)rm +
  					  sizeof(struct ocfs2_recovery_map));
  	osb->recovery_map = rm;
  
  	return 0;
  }
  
  /* we can't grab the goofy sem lock from inside wait_event, so we use
   * memory barriers to make sure that we'll see the null task before
   * being woken up */
  static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
  {
  	mb();
  	return osb->recovery_thread_task != NULL;
  }
  
  void ocfs2_recovery_exit(struct ocfs2_super *osb)
  {
  	struct ocfs2_recovery_map *rm;
  
  	/* disable any new recovery threads and wait for any currently
  	 * running ones to exit. Do this before setting the vol_state. */
  	mutex_lock(&osb->recovery_lock);
  	osb->disable_recovery = 1;
  	mutex_unlock(&osb->recovery_lock);
  	wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
  
  	/* At this point, we know that no more recovery threads can be
  	 * launched, so wait for any recovery completion work to
  	 * complete. */
35ddf78e4   jiangyiwen   ocfs2: fix occurr...
228
  	flush_workqueue(osb->ocfs2_wq);
553abd046   Joel Becker   ocfs2: Change the...
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
  
  	/*
  	 * Now that recovery is shut down, and the osb is about to be
  	 * freed,  the osb_lock is not taken here.
  	 */
  	rm = osb->recovery_map;
  	/* XXX: Should we bug if there are dirty entries? */
  
  	kfree(rm);
  }
  
  static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
  				     unsigned int node_num)
  {
  	int i;
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
  
  	assert_spin_locked(&osb->osb_lock);
  
  	for (i = 0; i < rm->rm_used; i++) {
  		if (rm->rm_entries[i] == node_num)
  			return 1;
  	}
  
  	return 0;
  }
  
  /* Behaves like test-and-set.  Returns the previous value */
  static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
  				  unsigned int node_num)
  {
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
  
  	spin_lock(&osb->osb_lock);
  	if (__ocfs2_recovery_map_test(osb, node_num)) {
  		spin_unlock(&osb->osb_lock);
  		return 1;
  	}
  
  	/* XXX: Can this be exploited? Not from o2dlm... */
  	BUG_ON(rm->rm_used >= osb->max_slots);
  
  	rm->rm_entries[rm->rm_used] = node_num;
  	rm->rm_used++;
  	spin_unlock(&osb->osb_lock);
  
  	return 0;
  }
  
  static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
  				     unsigned int node_num)
  {
  	int i;
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
  
  	spin_lock(&osb->osb_lock);
  
  	for (i = 0; i < rm->rm_used; i++) {
  		if (rm->rm_entries[i] == node_num)
  			break;
  	}
  
  	if (i < rm->rm_used) {
  		/* XXX: be careful with the pointer math */
  		memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
  			(rm->rm_used - i - 1) * sizeof(unsigned int));
  		rm->rm_used--;
  	}
  
  	spin_unlock(&osb->osb_lock);
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
300
301
302
303
  static int ocfs2_commit_cache(struct ocfs2_super *osb)
  {
  	int status = 0;
  	unsigned int flushed;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
304
  	struct ocfs2_journal *journal = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
305
306
307
308
  	journal = osb->journal;
  
  	/* Flush all pending commits and checkpoint the journal. */
  	down_write(&journal->j_trans_barrier);
b41079504   Tao Ma   ocfs2: Remove mas...
309
310
311
  	flushed = atomic_read(&journal->j_num_trans);
  	trace_ocfs2_commit_cache_begin(flushed);
  	if (flushed == 0) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
312
  		up_write(&journal->j_trans_barrier);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
313
314
  		goto finally;
  	}
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
315
316
317
  	jbd2_journal_lock_updates(journal->j_journal);
  	status = jbd2_journal_flush(journal->j_journal);
  	jbd2_journal_unlock_updates(journal->j_journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
318
319
320
321
322
  	if (status < 0) {
  		up_write(&journal->j_trans_barrier);
  		mlog_errno(status);
  		goto finally;
  	}
f9c57ada3   Tao Ma   ocfs2: Remove unu...
323
  	ocfs2_inc_trans_id(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
324
325
326
327
  
  	flushed = atomic_read(&journal->j_num_trans);
  	atomic_set(&journal->j_num_trans, 0);
  	up_write(&journal->j_trans_barrier);
b41079504   Tao Ma   ocfs2: Remove mas...
328
  	trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
329

34d024f84   Mark Fasheh   ocfs2: Remove mou...
330
  	ocfs2_wake_downconvert_thread(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
331
332
  	wake_up(&journal->j_checkpointed);
  finally:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
333
334
  	return status;
  }
1fabe1481   Mark Fasheh   ocfs2: Remove str...
335
  handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
336
  {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
337
  	journal_t *journal = osb->journal->j_journal;
1fabe1481   Mark Fasheh   ocfs2: Remove str...
338
  	handle_t *handle;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
339

ebdec83ba   Eric Sesterhenn / snakebyte   [PATCH] BUG_ON() ...
340
  	BUG_ON(!osb || !osb->journal->j_journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
341

65eff9ccf   Mark Fasheh   ocfs2: remove han...
342
343
  	if (ocfs2_is_hard_readonly(osb))
  		return ERR_PTR(-EROFS);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
344
345
346
  
  	BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
  	BUG_ON(max_buffs <= 0);
90e86a63e   Jan Kara   ocfs2: Support ne...
347
348
349
  	/* Nested transaction? Just return the handle... */
  	if (journal_current_handle())
  		return jbd2_journal_start(journal, max_buffs);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
350

fef6925cd   Jan Kara   ocfs2: Convert to...
351
  	sb_start_intwrite(osb->sb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
352
  	down_read(&osb->journal->j_trans_barrier);
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
353
  	handle = jbd2_journal_start(journal, max_buffs);
1fabe1481   Mark Fasheh   ocfs2: Remove str...
354
  	if (IS_ERR(handle)) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
355
  		up_read(&osb->journal->j_trans_barrier);
fef6925cd   Jan Kara   ocfs2: Convert to...
356
  		sb_end_intwrite(osb->sb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
357

1fabe1481   Mark Fasheh   ocfs2: Remove str...
358
  		mlog_errno(PTR_ERR(handle));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
359
360
  
  		if (is_journal_aborted(journal)) {
7ecef14ab   Joe Perches   ocfs2: neaten do_...
361
362
  			ocfs2_abort(osb->sb, "Detected aborted journal
  ");
1fabe1481   Mark Fasheh   ocfs2: Remove str...
363
  			handle = ERR_PTR(-EROFS);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
364
  		}
c271c5c22   Sunil Mushran   ocfs2: local mounts
365
366
367
368
  	} else {
  		if (!ocfs2_mount_local(osb))
  			atomic_inc(&(osb->journal->j_num_trans));
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
369

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
370
  	return handle;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
371
  }
1fabe1481   Mark Fasheh   ocfs2: Remove str...
372
373
  int ocfs2_commit_trans(struct ocfs2_super *osb,
  		       handle_t *handle)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
374
  {
90e86a63e   Jan Kara   ocfs2: Support ne...
375
  	int ret, nested;
02dc1af44   Mark Fasheh   ocfs2: pass ocfs2...
376
  	struct ocfs2_journal *journal = osb->journal;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
377
378
  
  	BUG_ON(!handle);
90e86a63e   Jan Kara   ocfs2: Support ne...
379
  	nested = handle->h_ref > 1;
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
380
  	ret = jbd2_journal_stop(handle);
1fabe1481   Mark Fasheh   ocfs2: Remove str...
381
382
  	if (ret < 0)
  		mlog_errno(ret);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
383

fef6925cd   Jan Kara   ocfs2: Convert to...
384
  	if (!nested) {
90e86a63e   Jan Kara   ocfs2: Support ne...
385
  		up_read(&journal->j_trans_barrier);
fef6925cd   Jan Kara   ocfs2: Convert to...
386
387
  		sb_end_intwrite(osb->sb);
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
388

1fabe1481   Mark Fasheh   ocfs2: Remove str...
389
  	return ret;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
390
391
392
  }
  
  /*
c901fb007   Tao Ma   ocfs2: Make ocfs2...
393
   * 'nblocks' is what you want to add to the current transaction.
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
394
   *
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
395
   * This might call jbd2_journal_restart() which will commit dirty buffers
e8aed3450   Mark Fasheh   ocfs2: Re-journal...
396
397
398
399
400
   * and then restart the transaction. Before calling
   * ocfs2_extend_trans(), any changed blocks should have been
   * dirtied. After calling it, all blocks which need to be changed must
   * go through another set of journal_access/journal_dirty calls.
   *
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
401
402
403
404
405
406
407
408
   * WARNING: This will not release any semaphores or disk locks taken
   * during the transaction, so make sure they were taken *before*
   * start_trans or we'll have ordering deadlocks.
   *
   * WARNING2: Note that we do *not* drop j_trans_barrier here. This is
   * good because transaction ids haven't yet been recorded on the
   * cluster locks associated with this handle.
   */
1fc581467   Mark Fasheh   ocfs2: have ocfs2...
409
  int ocfs2_extend_trans(handle_t *handle, int nblocks)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
410
  {
c901fb007   Tao Ma   ocfs2: Make ocfs2...
411
  	int status, old_nblocks;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
412
413
  
  	BUG_ON(!handle);
c901fb007   Tao Ma   ocfs2: Make ocfs2...
414
  	BUG_ON(nblocks < 0);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
415

c901fb007   Tao Ma   ocfs2: Make ocfs2...
416
417
418
419
  	if (!nblocks)
  		return 0;
  
  	old_nblocks = handle->h_buffer_credits;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
420

b41079504   Tao Ma   ocfs2: Remove mas...
421
  	trace_ocfs2_extend_trans(old_nblocks, nblocks);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
422

e407e3978   Joel Becker   ocfs2: Fix CONFIG...
423
  #ifdef CONFIG_OCFS2_DEBUG_FS
0879c584f   Mark Fasheh   ocfs2: Allow for ...
424
425
  	status = 1;
  #else
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
426
  	status = jbd2_journal_extend(handle, nblocks);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
427
428
429
430
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
0879c584f   Mark Fasheh   ocfs2: Allow for ...
431
  #endif
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
432
433
  
  	if (status > 0) {
b41079504   Tao Ma   ocfs2: Remove mas...
434
  		trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
c901fb007   Tao Ma   ocfs2: Make ocfs2...
435
436
  		status = jbd2_journal_restart(handle,
  					      old_nblocks + nblocks);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
437
  		if (status < 0) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
438
439
440
  			mlog_errno(status);
  			goto bail;
  		}
01ddf1e18   Mark Fasheh   ocfs2: remove unu...
441
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
442
443
444
  
  	status = 0;
  bail:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
445
446
  	return status;
  }
2b1e55c38   Younger Liu   ocfs2: lighten up...
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
  /*
   * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA.
   * If that fails, restart the transaction & regain write access for the
   * buffer head which is used for metadata modifications.
   * Taken from Ext4: extend_or_restart_transaction()
   */
  int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
  {
  	int status, old_nblks;
  
  	BUG_ON(!handle);
  
  	old_nblks = handle->h_buffer_credits;
  	trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
  
  	if (old_nblks < thresh)
  		return 0;
  
  	status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA);
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
  
  	if (status > 0) {
  		status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA);
  		if (status < 0)
  			mlog_errno(status);
  	}
  
  bail:
  	return status;
  }
50655ae9e   Joel Becker   ocfs2: Add journa...
480
481
482
483
484
485
486
487
488
  struct ocfs2_triggers {
  	struct jbd2_buffer_trigger_type	ot_triggers;
  	int				ot_offset;
  };
  
  static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
  {
  	return container_of(triggers, struct ocfs2_triggers, ot_triggers);
  }
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
489
  static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
50655ae9e   Joel Becker   ocfs2: Add journa...
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
  				 struct buffer_head *bh,
  				 void *data, size_t size)
  {
  	struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
  
  	/*
  	 * We aren't guaranteed to have the superblock here, so we
  	 * must unconditionally compute the ecc data.
  	 * __ocfs2_journal_access() will only set the triggers if
  	 * metaecc is enabled.
  	 */
  	ocfs2_block_check_compute(data, size, data + ot->ot_offset);
  }
  
  /*
   * Quota blocks have their own trigger because the struct ocfs2_block_check
   * offset depends on the blocksize.
   */
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
508
  static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
50655ae9e   Joel Becker   ocfs2: Add journa...
509
510
511
512
513
514
515
516
517
518
519
520
521
522
  				 struct buffer_head *bh,
  				 void *data, size_t size)
  {
  	struct ocfs2_disk_dqtrailer *dqt =
  		ocfs2_block_dqtrailer(size, data);
  
  	/*
  	 * We aren't guaranteed to have the superblock here, so we
  	 * must unconditionally compute the ecc data.
  	 * __ocfs2_journal_access() will only set the triggers if
  	 * metaecc is enabled.
  	 */
  	ocfs2_block_check_compute(data, size, &dqt->dq_check);
  }
c175a518b   Joel Becker   ocfs2: Checksum a...
523
524
525
526
  /*
   * Directory blocks also have their own trigger because the
   * struct ocfs2_block_check offset depends on the blocksize.
   */
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
527
  static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
c175a518b   Joel Becker   ocfs2: Checksum a...
528
529
530
531
532
533
534
535
536
537
538
539
540
541
  				 struct buffer_head *bh,
  				 void *data, size_t size)
  {
  	struct ocfs2_dir_block_trailer *trailer =
  		ocfs2_dir_trailer_from_size(size, data);
  
  	/*
  	 * We aren't guaranteed to have the superblock here, so we
  	 * must unconditionally compute the ecc data.
  	 * __ocfs2_journal_access() will only set the triggers if
  	 * metaecc is enabled.
  	 */
  	ocfs2_block_check_compute(data, size, &trailer->db_check);
  }
50655ae9e   Joel Becker   ocfs2: Add journa...
542
543
544
545
546
547
548
549
550
  static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
  				struct buffer_head *bh)
  {
  	mlog(ML_ERROR,
  	     "ocfs2_abort_trigger called by JBD2.  bh = 0x%lx, "
  	     "bh->b_blocknr = %llu
  ",
  	     (unsigned long)bh,
  	     (unsigned long long)bh->b_blocknr);
74e364ad1   Xue jiufei   ocfs2: fix NULL p...
551
  	ocfs2_error(bh->b_bdev->bd_super,
50655ae9e   Joel Becker   ocfs2: Add journa...
552
553
554
555
556
557
  		    "JBD2 has aborted our journal, ocfs2 cannot continue
  ");
  }
  
  static struct ocfs2_triggers di_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
558
  		.t_frozen = ocfs2_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
559
560
561
562
563
564
565
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_dinode, i_check),
  };
  
  static struct ocfs2_triggers eb_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
566
  		.t_frozen = ocfs2_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
567
568
569
570
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_extent_block, h_check),
  };
93c97087a   Tao Ma   ocfs2: Add metaec...
571
572
  static struct ocfs2_triggers rb_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
573
  		.t_frozen = ocfs2_frozen_trigger,
93c97087a   Tao Ma   ocfs2: Add metaec...
574
575
576
577
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_refcount_block, rf_check),
  };
50655ae9e   Joel Becker   ocfs2: Add journa...
578
579
  static struct ocfs2_triggers gd_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
580
  		.t_frozen = ocfs2_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
581
582
583
584
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_group_desc, bg_check),
  };
c175a518b   Joel Becker   ocfs2: Checksum a...
585
586
  static struct ocfs2_triggers db_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
587
  		.t_frozen = ocfs2_db_frozen_trigger,
c175a518b   Joel Becker   ocfs2: Checksum a...
588
589
590
  		.t_abort = ocfs2_abort_trigger,
  	},
  };
50655ae9e   Joel Becker   ocfs2: Add journa...
591
592
  static struct ocfs2_triggers xb_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
593
  		.t_frozen = ocfs2_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
594
595
596
597
598
599
600
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_xattr_block, xb_check),
  };
  
  static struct ocfs2_triggers dq_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
601
  		.t_frozen = ocfs2_dq_frozen_trigger,
50655ae9e   Joel Becker   ocfs2: Add journa...
602
603
604
  		.t_abort = ocfs2_abort_trigger,
  	},
  };
9b7895efa   Mark Fasheh   ocfs2: Add a name...
605
606
  static struct ocfs2_triggers dr_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
607
  		.t_frozen = ocfs2_frozen_trigger,
9b7895efa   Mark Fasheh   ocfs2: Add a name...
608
609
610
611
612
613
614
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_dx_root_block, dr_check),
  };
  
  static struct ocfs2_triggers dl_triggers = {
  	.ot_triggers = {
13ceef099   Jan Kara   jbd2/ocfs2: Fix b...
615
  		.t_frozen = ocfs2_frozen_trigger,
9b7895efa   Mark Fasheh   ocfs2: Add a name...
616
617
618
619
  		.t_abort = ocfs2_abort_trigger,
  	},
  	.ot_offset	= offsetof(struct ocfs2_dx_leaf, dl_check),
  };
50655ae9e   Joel Becker   ocfs2: Add journa...
620
  static int __ocfs2_journal_access(handle_t *handle,
0cf2f7632   Joel Becker   ocfs2: Pass struc...
621
  				  struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
622
623
624
  				  struct buffer_head *bh,
  				  struct ocfs2_triggers *triggers,
  				  int type)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
625
626
  {
  	int status;
0cf2f7632   Joel Becker   ocfs2: Pass struc...
627
628
  	struct ocfs2_super *osb =
  		OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
629

0cf2f7632   Joel Becker   ocfs2: Pass struc...
630
  	BUG_ON(!ci || !ci->ci_ops);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
631
632
  	BUG_ON(!handle);
  	BUG_ON(!bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
633

b41079504   Tao Ma   ocfs2: Remove mas...
634
635
636
  	trace_ocfs2_journal_access(
  		(unsigned long long)ocfs2_metadata_cache_owner(ci),
  		(unsigned long long)bh->b_blocknr, type, bh->b_size);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
637
638
639
640
641
  
  	/* we can safely remove this assertion after testing. */
  	if (!buffer_uptodate(bh)) {
  		mlog(ML_ERROR, "giving me a buffer that's not uptodate!
  ");
a7fbc7f31   piaojun   ocfs2: return err...
642
643
644
  		mlog(ML_ERROR, "b_blocknr=%llu, b_state=0x%lx
  ",
  		     (unsigned long long)bh->b_blocknr, bh->b_state);
acf8fdbe6   Joseph Qi   ocfs2: do not BUG...
645
646
647
  
  		lock_buffer(bh);
  		/*
a7fbc7f31   piaojun   ocfs2: return err...
648
649
650
651
652
653
654
655
  		 * A previous transaction with a couple of buffer heads fail
  		 * to checkpoint, so all the bhs are marked as BH_Write_EIO.
  		 * For current transaction, the bh is just among those error
  		 * bhs which previous transaction handle. We can't just clear
  		 * its BH_Write_EIO and reuse directly, since other bhs are
  		 * not written to disk yet and that will cause metadata
  		 * inconsistency. So we should set fs read-only to avoid
  		 * further damage.
acf8fdbe6   Joseph Qi   ocfs2: do not BUG...
656
657
  		 */
  		if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) {
acf8fdbe6   Joseph Qi   ocfs2: do not BUG...
658
  			unlock_buffer(bh);
a7fbc7f31   piaojun   ocfs2: return err...
659
660
661
  			return ocfs2_error(osb->sb, "A previous attempt to "
  					"write this buffer head failed
  ");
acf8fdbe6   Joseph Qi   ocfs2: do not BUG...
662
663
  		}
  		unlock_buffer(bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
664
  	}
0cf2f7632   Joel Becker   ocfs2: Pass struc...
665
  	/* Set the current transaction information on the ci so
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
666
  	 * that the locking code knows whether it can drop it's locks
0cf2f7632   Joel Becker   ocfs2: Pass struc...
667
  	 * on this ci or not. We're protected from the commit
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
668
669
670
  	 * thread updating the current transaction id until
  	 * ocfs2_commit_trans() because ocfs2_start_trans() took
  	 * j_trans_barrier for us. */
0cf2f7632   Joel Becker   ocfs2: Pass struc...
671
  	ocfs2_set_ci_lock_trans(osb->journal, ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
672

0cf2f7632   Joel Becker   ocfs2: Pass struc...
673
  	ocfs2_metadata_cache_io_lock(ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
674
675
676
  	switch (type) {
  	case OCFS2_JOURNAL_ACCESS_CREATE:
  	case OCFS2_JOURNAL_ACCESS_WRITE:
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
677
  		status = jbd2_journal_get_write_access(handle, bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
678
679
680
  		break;
  
  	case OCFS2_JOURNAL_ACCESS_UNDO:
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
681
  		status = jbd2_journal_get_undo_access(handle, bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
682
683
684
685
  		break;
  
  	default:
  		status = -EINVAL;
af901ca18   André Goddard Rosa   tree-wide: fix as...
686
687
  		mlog(ML_ERROR, "Unknown access type!
  ");
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
688
  	}
0cf2f7632   Joel Becker   ocfs2: Pass struc...
689
  	if (!status && ocfs2_meta_ecc(osb) && triggers)
50655ae9e   Joel Becker   ocfs2: Add journa...
690
  		jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
0cf2f7632   Joel Becker   ocfs2: Pass struc...
691
  	ocfs2_metadata_cache_io_unlock(ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
692
693
694
695
696
  
  	if (status < 0)
  		mlog(ML_ERROR, "Error %d getting %d access to buffer!
  ",
  		     status, type);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
697
698
  	return status;
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
699
700
  int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
  			    struct buffer_head *bh, int type)
50655ae9e   Joel Becker   ocfs2: Add journa...
701
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
702
  	return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
703
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
704
  int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
705
706
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
707
  	return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
708
  }
93c97087a   Tao Ma   ocfs2: Add metaec...
709
710
711
712
713
714
  int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
  			    struct buffer_head *bh, int type)
  {
  	return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
  				      type);
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
715
  int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
716
717
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
718
  	return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
719
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
720
  int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
721
722
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
723
  	return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
724
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
725
  int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
726
727
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
728
  	return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
729
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
730
  int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
731
732
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
733
  	return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
734
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
735
  int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
9b7895efa   Mark Fasheh   ocfs2: Add a name...
736
737
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
738
  	return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
9b7895efa   Mark Fasheh   ocfs2: Add a name...
739
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
740
  int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
9b7895efa   Mark Fasheh   ocfs2: Add a name...
741
742
  			    struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
743
  	return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
9b7895efa   Mark Fasheh   ocfs2: Add a name...
744
  }
0cf2f7632   Joel Becker   ocfs2: Pass struc...
745
  int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
50655ae9e   Joel Becker   ocfs2: Add journa...
746
747
  			 struct buffer_head *bh, int type)
  {
0cf2f7632   Joel Becker   ocfs2: Pass struc...
748
  	return __ocfs2_journal_access(handle, ci, bh, NULL, type);
50655ae9e   Joel Becker   ocfs2: Add journa...
749
  }
ec20cec7a   Joel Becker   ocfs2: Make ocfs2...
750
  void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
751
752
  {
  	int status;
b41079504   Tao Ma   ocfs2: Remove mas...
753
  	trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
754

2b4e30fbd   Joel Becker   ocfs2: Switch ove...
755
  	status = jbd2_journal_dirty_metadata(handle, bh);
e272e7f0f   Joseph Qi   ocfs2: do not BUG...
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
  	if (status) {
  		mlog_errno(status);
  		if (!is_handle_aborted(handle)) {
  			journal_t *journal = handle->h_transaction->t_journal;
  			struct super_block *sb = bh->b_bdev->bd_super;
  
  			mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
  					"Aborting transaction and journal.
  ");
  			handle->h_err = status;
  			jbd2_journal_abort_handle(handle);
  			jbd2_journal_abort(journal, status);
  			ocfs2_abort(sb, "Journal already aborted.
  ");
  		}
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
772
  }
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
773
  #define OCFS2_DEFAULT_COMMIT_INTERVAL	(HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
774
775
776
777
  
  void ocfs2_set_journal_params(struct ocfs2_super *osb)
  {
  	journal_t *journal = osb->journal->j_journal;
d147b3d63   Mark Fasheh   ocfs2: Support co...
778
779
780
781
  	unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
  
  	if (osb->osb_commit_interval)
  		commit_interval = osb->osb_commit_interval;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
782

a931da6ac   Theodore Ts'o   jbd2: Change j_st...
783
  	write_lock(&journal->j_state_lock);
d147b3d63   Mark Fasheh   ocfs2: Support co...
784
  	journal->j_commit_interval = commit_interval;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
785
  	if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
786
  		journal->j_flags |= JBD2_BARRIER;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
787
  	else
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
788
  		journal->j_flags &= ~JBD2_BARRIER;
a931da6ac   Theodore Ts'o   jbd2: Change j_st...
789
  	write_unlock(&journal->j_state_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
790
791
792
793
794
795
796
797
798
799
  }
  
  int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
  {
  	int status = -1;
  	struct inode *inode = NULL; /* the journal inode */
  	journal_t *j_journal = NULL;
  	struct ocfs2_dinode *di = NULL;
  	struct buffer_head *bh = NULL;
  	struct ocfs2_super *osb;
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
800
  	int inode_lock = 0;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
801

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
  	BUG_ON(!journal);
  
  	osb = journal->j_osb;
  
  	/* already have the inode for our journal */
  	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
  					    osb->slot_num);
  	if (inode == NULL) {
  		status = -EACCES;
  		mlog_errno(status);
  		goto done;
  	}
  	if (is_bad_inode(inode)) {
  		mlog(ML_ERROR, "access error (bad inode)
  ");
  		iput(inode);
  		inode = NULL;
  		status = -EACCES;
  		goto done;
  	}
  
  	SET_INODE_JOURNAL(inode);
  	OCFS2_I(inode)->ip_open_count++;
6eff5790d   Mark Fasheh   [PATCH] ocfs2: do...
825
826
827
  	/* Skip recovery waits here - journal inode metadata never
  	 * changes in a live cluster so it can be considered an
  	 * exception to the rule. */
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
828
  	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
829
830
831
832
833
834
  	if (status < 0) {
  		if (status != -ERESTARTSYS)
  			mlog(ML_ERROR, "Could not get lock on journal!
  ");
  		goto done;
  	}
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
835
  	inode_lock = 1;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
836
  	di = (struct ocfs2_dinode *)bh->b_data;
f17c20dd2   Junxiao Bi   ocfs2: use i_size...
837
  	if (i_size_read(inode) <  OCFS2_MIN_JOURNAL_SIZE) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
838
839
  		mlog(ML_ERROR, "Journal file size (%lld) is too small!
  ",
f17c20dd2   Junxiao Bi   ocfs2: use i_size...
840
  		     i_size_read(inode));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
841
842
843
  		status = -EINVAL;
  		goto done;
  	}
f17c20dd2   Junxiao Bi   ocfs2: use i_size...
844
  	trace_ocfs2_journal_init(i_size_read(inode),
b41079504   Tao Ma   ocfs2: Remove mas...
845
846
  				 (unsigned long long)inode->i_blocks,
  				 OCFS2_I(inode)->ip_clusters);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
847
848
  
  	/* call the kernels journal init function now */
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
849
  	j_journal = jbd2_journal_init_inode(inode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
850
851
852
853
854
855
  	if (j_journal == NULL) {
  		mlog(ML_ERROR, "Linux journal layer error
  ");
  		status = -EINVAL;
  		goto done;
  	}
b41079504   Tao Ma   ocfs2: Remove mas...
856
  	trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
  
  	*dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
  		  OCFS2_JOURNAL_DIRTY_FL);
  
  	journal->j_journal = j_journal;
  	journal->j_inode = inode;
  	journal->j_bh = bh;
  
  	ocfs2_set_journal_params(osb);
  
  	journal->j_state = OCFS2_JOURNAL_LOADED;
  
  	status = 0;
  done:
  	if (status < 0) {
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
872
873
  		if (inode_lock)
  			ocfs2_inode_unlock(inode, 1);
a81cb88b6   Mark Fasheh   ocfs2: Don't chec...
874
  		brelse(bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
875
876
877
878
879
  		if (inode) {
  			OCFS2_I(inode)->ip_open_count--;
  			iput(inode);
  		}
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
880
881
  	return status;
  }
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
882
883
884
885
886
887
888
889
890
  static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
  {
  	le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
  }
  
  static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
  {
  	return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
891
  static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
892
  				      int dirty, int replayed)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
893
894
895
896
897
898
  {
  	int status;
  	unsigned int flags;
  	struct ocfs2_journal *journal = osb->journal;
  	struct buffer_head *bh = journal->j_bh;
  	struct ocfs2_dinode *fe;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
899
  	fe = (struct ocfs2_dinode *)bh->b_data;
10995aa24   Joel Becker   ocfs2: Morph the ...
900
901
902
903
904
  
  	/* The journal bh on the osb always comes from ocfs2_journal_init()
  	 * and was validated there inside ocfs2_inode_lock_full().  It's a
  	 * code bug if we mess it up. */
  	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
905
906
907
908
909
910
911
  
  	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
  	if (dirty)
  		flags |= OCFS2_JOURNAL_DIRTY_FL;
  	else
  		flags &= ~OCFS2_JOURNAL_DIRTY_FL;
  	fe->id1.journal1.ij_flags = cpu_to_le32(flags);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
912
913
  	if (replayed)
  		ocfs2_bump_recovery_generation(fe);
13723d00e   Joel Becker   ocfs2: Use metada...
914
  	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
8cb471e8f   Joel Becker   ocfs2: Take the i...
915
  	status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
916
917
  	if (status < 0)
  		mlog_errno(status);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
918
919
920
921
922
923
924
925
926
927
928
929
930
  	return status;
  }
  
  /*
   * If the journal has been kmalloc'd it needs to be freed after this
   * call.
   */
  void ocfs2_journal_shutdown(struct ocfs2_super *osb)
  {
  	struct ocfs2_journal *journal = NULL;
  	int status = 0;
  	struct inode *inode = NULL;
  	int num_running_trans = 0;
ebdec83ba   Eric Sesterhenn / snakebyte   [PATCH] BUG_ON() ...
931
  	BUG_ON(!osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
932
933
934
935
936
937
938
939
940
  
  	journal = osb->journal;
  	if (!journal)
  		goto done;
  
  	inode = journal->j_inode;
  
  	if (journal->j_state != OCFS2_JOURNAL_LOADED)
  		goto done;
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
941
  	/* need to inc inode use count - jbd2_journal_destroy will iput. */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
942
943
944
945
  	if (!igrab(inode))
  		BUG();
  
  	num_running_trans = atomic_read(&(osb->journal->j_num_trans));
b41079504   Tao Ma   ocfs2: Remove mas...
946
  	trace_ocfs2_journal_shutdown(num_running_trans);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
947
948
949
950
951
952
953
954
955
956
957
958
  
  	/* Do a commit_cache here. It will flush our journal, *and*
  	 * release any locks that are still held.
  	 * set the SHUTDOWN flag and release the trans lock.
  	 * the commit thread will take the trans lock for us below. */
  	journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;
  
  	/* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not
  	 * drop the trans_lock (which we want to hold until we
  	 * completely destroy the journal. */
  	if (osb->commit_task) {
  		/* Wait for the commit thread */
b41079504   Tao Ma   ocfs2: Remove mas...
959
  		trace_ocfs2_journal_shutdown_wait(osb->commit_task);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
960
961
962
963
964
  		kthread_stop(osb->commit_task);
  		osb->commit_task = NULL;
  	}
  
  	BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
c271c5c22   Sunil Mushran   ocfs2: local mounts
965
  	if (ocfs2_mount_local(osb)) {
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
966
967
968
  		jbd2_journal_lock_updates(journal->j_journal);
  		status = jbd2_journal_flush(journal->j_journal);
  		jbd2_journal_unlock_updates(journal->j_journal);
c271c5c22   Sunil Mushran   ocfs2: local mounts
969
970
971
972
973
974
975
976
977
  		if (status < 0)
  			mlog_errno(status);
  	}
  
  	if (status == 0) {
  		/*
  		 * Do not toggle if flush was unsuccessful otherwise
  		 * will leave dirty metadata in a "clean" journal
  		 */
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
978
  		status = ocfs2_journal_toggle_dirty(osb, 0, 0);
c271c5c22   Sunil Mushran   ocfs2: local mounts
979
980
981
  		if (status < 0)
  			mlog_errno(status);
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
982
983
  
  	/* Shutdown the kernel journal system */
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
984
  	jbd2_journal_destroy(journal->j_journal);
ae0dff683   Sunil Mushran   ocfs2: Set journa...
985
  	journal->j_journal = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
986
987
988
989
  
  	OCFS2_I(inode)->ip_open_count--;
  
  	/* unlock our journal */
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
990
  	ocfs2_inode_unlock(inode, 1);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
991
992
993
994
995
996
997
998
  
  	brelse(journal->j_bh);
  	journal->j_bh = NULL;
  
  	journal->j_state = OCFS2_JOURNAL_FREE;
  
  //	up_write(&journal->j_trans_barrier);
  done:
72865d923   Joseph Qi   ocfs2: clean up r...
999
  	iput(inode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1000
1001
1002
1003
1004
1005
1006
  }
  
  static void ocfs2_clear_journal_error(struct super_block *sb,
  				      journal_t *journal,
  				      int slot)
  {
  	int olderr;
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1007
  	olderr = jbd2_journal_errno(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1008
1009
1010
1011
1012
1013
1014
  	if (olderr) {
  		mlog(ML_ERROR, "File system error %d recorded in "
  		     "journal %u.
  ", olderr, slot);
  		mlog(ML_ERROR, "File system on device %s needs checking.
  ",
  		     sb->s_id);
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1015
1016
  		jbd2_journal_ack_err(journal);
  		jbd2_journal_clear_err(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1017
1018
  	}
  }
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1019
  int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1020
1021
1022
  {
  	int status = 0;
  	struct ocfs2_super *osb;
b1f3550fa   Julia Lawall   ocfs2: Use BUG_ON
1023
  	BUG_ON(!journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1024
1025
  
  	osb = journal->j_osb;
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1026
  	status = jbd2_journal_load(journal->j_journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1027
1028
1029
1030
1031
1032
1033
  	if (status < 0) {
  		mlog(ML_ERROR, "Failed to load journal!
  ");
  		goto done;
  	}
  
  	ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1034
  	status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1035
1036
1037
1038
1039
1040
  	if (status < 0) {
  		mlog_errno(status);
  		goto done;
  	}
  
  	/* Launch the commit thread */
c271c5c22   Sunil Mushran   ocfs2: local mounts
1041
1042
  	if (!local) {
  		osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
5afc44e2e   Joseph Qi   ocfs2: add uuid t...
1043
  				"ocfs2cmt-%s", osb->uuid_str);
c271c5c22   Sunil Mushran   ocfs2: local mounts
1044
1045
1046
1047
1048
1049
1050
1051
  		if (IS_ERR(osb->commit_task)) {
  			status = PTR_ERR(osb->commit_task);
  			osb->commit_task = NULL;
  			mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
  			     "error=%d", status);
  			goto done;
  		}
  	} else
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1052
  		osb->commit_task = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1053
1054
  
  done:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1055
1056
1057
1058
1059
1060
1061
1062
1063
  	return status;
  }
  
  
  /* 'full' flag tells us whether we clear out all blocks or if we just
   * mark the journal clean */
  int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
  {
  	int status;
ebdec83ba   Eric Sesterhenn / snakebyte   [PATCH] BUG_ON() ...
1064
  	BUG_ON(!journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1065

2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1066
  	status = jbd2_journal_wipe(journal->j_journal, full);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1067
1068
1069
1070
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1071
  	status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1072
1073
1074
1075
  	if (status < 0)
  		mlog_errno(status);
  
  bail:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1076
1077
  	return status;
  }
553abd046   Joel Becker   ocfs2: Change the...
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
  static int ocfs2_recovery_completed(struct ocfs2_super *osb)
  {
  	int empty;
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
  
  	spin_lock(&osb->osb_lock);
  	empty = (rm->rm_used == 0);
  	spin_unlock(&osb->osb_lock);
  
  	return empty;
  }
  
  void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
  {
  	wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
  /*
   * JBD Might read a cached version of another nodes journal file. We
   * don't want this as this file changes often and we get no
   * notification on those changes. The only way to be sure that we've
   * got the most up to date version of those blocks then is to force
   * read them off disk. Just searching through the buffer cache won't
   * work as there may be pages backing this file which are still marked
   * up to date. We know things can't change on this file underneath us
   * as we have the lock by now :)
   */
  static int ocfs2_force_read_journal(struct inode *inode)
  {
  	int status = 0;
4f902c377   Mark Fasheh   ocfs2: Fix extent...
1107
  	int i;
8110b073a   Mark Fasheh   ocfs2: Fix up i_b...
1108
  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
0b492f68b   Junxiao Bi   ocfs2: improve re...
1109
1110
  	struct buffer_head *bh = NULL;
  	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1111

f17c20dd2   Junxiao Bi   ocfs2: use i_size...
1112
  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1113
  	v_blkno = 0;
8110b073a   Mark Fasheh   ocfs2: Fix up i_b...
1114
  	while (v_blkno < num_blocks) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1115
  		status = ocfs2_extent_map_get_blocks(inode, v_blkno,
49cb8d2d4   Mark Fasheh   ocfs2: Read from ...
1116
  						     &p_blkno, &p_blocks, NULL);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1117
1118
1119
1120
  		if (status < 0) {
  			mlog_errno(status);
  			goto bail;
  		}
0b492f68b   Junxiao Bi   ocfs2: improve re...
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
  		for (i = 0; i < p_blocks; i++, p_blkno++) {
  			bh = __find_get_block(osb->sb->s_bdev, p_blkno,
  					osb->sb->s_blocksize);
  			/* block not cached. */
  			if (!bh)
  				continue;
  
  			brelse(bh);
  			bh = NULL;
  			/* We are reading journal data which should not
  			 * be put in the uptodate cache.
  			 */
  			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
  			if (status < 0) {
  				mlog_errno(status);
  				goto bail;
  			}
  
  			brelse(bh);
  			bh = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1141
1142
1143
1144
1145
1146
  		}
  
  		v_blkno += p_blocks;
  	}
  
  bail:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1147
1148
1149
1150
1151
1152
1153
1154
  	return status;
  }
  
  struct ocfs2_la_recovery_item {
  	struct list_head	lri_list;
  	int			lri_slot;
  	struct ocfs2_dinode	*lri_la_dinode;
  	struct ocfs2_dinode	*lri_tl_dinode;
2205363dc   Jan Kara   ocfs2: Implement ...
1155
  	struct ocfs2_quota_recovery *lri_qrec;
ed460cffc   Joseph Qi   ocfs2: add orphan...
1156
  	enum ocfs2_orphan_reco_type  lri_orphan_reco_type;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
  };
  
  /* Does the second half of the recovery process. By this point, the
   * node is marked clean and can actually be considered recovered,
   * hence it's no longer in the recovery map, but there's still some
   * cleanup we can do which shouldn't happen within the recovery thread
   * as locking in that context becomes very difficult if we are to take
   * recovering nodes into account.
   *
   * NOTE: This function can and will sleep on recovery of other nodes
   * during cluster locking, just like any other ocfs2 process.
   */
c4028958b   David Howells   WorkStruct: make ...
1169
  void ocfs2_complete_recovery(struct work_struct *work)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1170
  {
b41079504   Tao Ma   ocfs2: Remove mas...
1171
  	int ret = 0;
c4028958b   David Howells   WorkStruct: make ...
1172
1173
1174
  	struct ocfs2_journal *journal =
  		container_of(work, struct ocfs2_journal, j_recovery_work);
  	struct ocfs2_super *osb = journal->j_osb;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1175
  	struct ocfs2_dinode *la_dinode, *tl_dinode;
800deef3f   Christoph Hellwig   [PATCH] ocfs2: us...
1176
  	struct ocfs2_la_recovery_item *item, *n;
2205363dc   Jan Kara   ocfs2: Implement ...
1177
  	struct ocfs2_quota_recovery *qrec;
ed460cffc   Joseph Qi   ocfs2: add orphan...
1178
  	enum ocfs2_orphan_reco_type orphan_reco_type;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1179
  	LIST_HEAD(tmp_la_list);
b41079504   Tao Ma   ocfs2: Remove mas...
1180
1181
  	trace_ocfs2_complete_recovery(
  		(unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1182
1183
1184
1185
  
  	spin_lock(&journal->j_lock);
  	list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
  	spin_unlock(&journal->j_lock);
800deef3f   Christoph Hellwig   [PATCH] ocfs2: us...
1186
  	list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1187
  		list_del_init(&item->lri_list);
19ece546a   Jan Kara   ocfs2: Enable quo...
1188
  		ocfs2_wait_on_quotas(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1189
  		la_dinode = item->lri_la_dinode;
b41079504   Tao Ma   ocfs2: Remove mas...
1190
1191
  		tl_dinode = item->lri_tl_dinode;
  		qrec = item->lri_qrec;
ed460cffc   Joseph Qi   ocfs2: add orphan...
1192
  		orphan_reco_type = item->lri_orphan_reco_type;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1193

b41079504   Tao Ma   ocfs2: Remove mas...
1194
1195
1196
1197
1198
1199
  		trace_ocfs2_complete_recovery_slot(item->lri_slot,
  			la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
  			tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0,
  			qrec);
  
  		if (la_dinode) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1200
1201
1202
1203
1204
1205
1206
  			ret = ocfs2_complete_local_alloc_recovery(osb,
  								  la_dinode);
  			if (ret < 0)
  				mlog_errno(ret);
  
  			kfree(la_dinode);
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1207
  		if (tl_dinode) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1208
1209
1210
1211
1212
1213
1214
  			ret = ocfs2_complete_truncate_log_recovery(osb,
  								   tl_dinode);
  			if (ret < 0)
  				mlog_errno(ret);
  
  			kfree(tl_dinode);
  		}
ed460cffc   Joseph Qi   ocfs2: add orphan...
1215
1216
  		ret = ocfs2_recover_orphans(osb, item->lri_slot,
  				orphan_reco_type);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1217
1218
  		if (ret < 0)
  			mlog_errno(ret);
2205363dc   Jan Kara   ocfs2: Implement ...
1219
  		if (qrec) {
2205363dc   Jan Kara   ocfs2: Implement ...
1220
1221
1222
1223
1224
1225
  			ret = ocfs2_finish_quota_recovery(osb, qrec,
  							  item->lri_slot);
  			if (ret < 0)
  				mlog_errno(ret);
  			/* Recovery info is already freed now */
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1226
1227
  		kfree(item);
  	}
b41079504   Tao Ma   ocfs2: Remove mas...
1228
  	trace_ocfs2_complete_recovery_end(ret);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1229
1230
1231
1232
1233
1234
1235
1236
  }
  
  /* NOTE: This function always eats your references to la_dinode and
   * tl_dinode, either manually on error, or by passing them to
   * ocfs2_complete_recovery */
  static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
  					    int slot_num,
  					    struct ocfs2_dinode *la_dinode,
2205363dc   Jan Kara   ocfs2: Implement ...
1237
  					    struct ocfs2_dinode *tl_dinode,
ed460cffc   Joseph Qi   ocfs2: add orphan...
1238
1239
  					    struct ocfs2_quota_recovery *qrec,
  					    enum ocfs2_orphan_reco_type orphan_reco_type)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1240
1241
  {
  	struct ocfs2_la_recovery_item *item;
afae00ab4   Sunil Mushran   ocfs2: fix gfp ma...
1242
  	item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1243
1244
1245
1246
  	if (!item) {
  		/* Though we wish to avoid it, we are in fact safe in
  		 * skipping local alloc cleanup as fsck.ocfs2 is more
  		 * than capable of reclaiming unused space. */
d787ab097   Tim Gardner   ocfs2: remove kfr...
1247
1248
  		kfree(la_dinode);
  		kfree(tl_dinode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1249

2205363dc   Jan Kara   ocfs2: Implement ...
1250
1251
  		if (qrec)
  			ocfs2_free_quota_recovery(qrec);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1252
1253
1254
1255
1256
1257
1258
1259
  		mlog_errno(-ENOMEM);
  		return;
  	}
  
  	INIT_LIST_HEAD(&item->lri_list);
  	item->lri_la_dinode = la_dinode;
  	item->lri_slot = slot_num;
  	item->lri_tl_dinode = tl_dinode;
2205363dc   Jan Kara   ocfs2: Implement ...
1260
  	item->lri_qrec = qrec;
ed460cffc   Joseph Qi   ocfs2: add orphan...
1261
  	item->lri_orphan_reco_type = orphan_reco_type;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1262
1263
1264
  
  	spin_lock(&journal->j_lock);
  	list_add_tail(&item->lri_list, &journal->j_la_cleanups);
35ddf78e4   jiangyiwen   ocfs2: fix occurr...
1265
  	queue_work(journal->j_osb->ocfs2_wq, &journal->j_recovery_work);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1266
1267
1268
1269
  	spin_unlock(&journal->j_lock);
  }
  
  /* Called by the mount code to queue recovery the last part of
9140db04e   Srinivas Eeda   ocfs2: recover or...
1270
   * recovery for it's own and offline slot(s). */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1271
1272
1273
  void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
  {
  	struct ocfs2_journal *journal = osb->journal;
10b3dd761   Sunil Mushran   ocfs2: Skip mount...
1274
1275
  	if (ocfs2_is_hard_readonly(osb))
  		return;
9140db04e   Srinivas Eeda   ocfs2: recover or...
1276
1277
1278
  	/* No need to queue up our truncate_log as regular cleanup will catch
  	 * that */
  	ocfs2_queue_recovery_completion(journal, osb->slot_num,
ed460cffc   Joseph Qi   ocfs2: add orphan...
1279
1280
  					osb->local_alloc_copy, NULL, NULL,
  					ORPHAN_NEED_TRUNCATE);
9140db04e   Srinivas Eeda   ocfs2: recover or...
1281
  	ocfs2_schedule_truncate_log_flush(osb, 0);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1282

9140db04e   Srinivas Eeda   ocfs2: recover or...
1283
  	osb->local_alloc_copy = NULL;
9140db04e   Srinivas Eeda   ocfs2: recover or...
1284
1285
1286
  
  	/* queue to recover orphan slots for all offline slots */
  	ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
ed460cffc   Joseph Qi   ocfs2: add orphan...
1287
  	ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
9140db04e   Srinivas Eeda   ocfs2: recover or...
1288
  	ocfs2_free_replay_slots(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1289
  }
2205363dc   Jan Kara   ocfs2: Implement ...
1290
1291
1292
1293
1294
1295
1296
  void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
  {
  	if (osb->quota_rec) {
  		ocfs2_queue_recovery_completion(osb->journal,
  						osb->slot_num,
  						NULL,
  						NULL,
ed460cffc   Joseph Qi   ocfs2: add orphan...
1297
1298
  						osb->quota_rec,
  						ORPHAN_NEED_TRUNCATE);
2205363dc   Jan Kara   ocfs2: Implement ...
1299
1300
1301
  		osb->quota_rec = NULL;
  	}
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1302
1303
  static int __ocfs2_recovery_thread(void *arg)
  {
2205363dc   Jan Kara   ocfs2: Implement ...
1304
  	int status, node_num, slot_num;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1305
  	struct ocfs2_super *osb = arg;
553abd046   Joel Becker   ocfs2: Change the...
1306
  	struct ocfs2_recovery_map *rm = osb->recovery_map;
2205363dc   Jan Kara   ocfs2: Implement ...
1307
1308
1309
  	int *rm_quota = NULL;
  	int rm_quota_used = 0, i;
  	struct ocfs2_quota_recovery *qrec;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1310

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1311
1312
1313
1314
  	status = ocfs2_wait_on_mount(osb);
  	if (status < 0) {
  		goto bail;
  	}
2205363dc   Jan Kara   ocfs2: Implement ...
1315
1316
1317
1318
1319
  	rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS);
  	if (!rm_quota) {
  		status = -ENOMEM;
  		goto bail;
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1320
1321
1322
1323
1324
1325
  restart:
  	status = ocfs2_super_lock(osb, 1);
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
9140db04e   Srinivas Eeda   ocfs2: recover or...
1326
1327
1328
1329
1330
1331
  	status = ocfs2_compute_replay_slots(osb);
  	if (status < 0)
  		mlog_errno(status);
  
  	/* queue recovery for our own slot */
  	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
ed460cffc   Joseph Qi   ocfs2: add orphan...
1332
  					NULL, NULL, ORPHAN_NO_NEED_TRUNCATE);
9140db04e   Srinivas Eeda   ocfs2: recover or...
1333

553abd046   Joel Becker   ocfs2: Change the...
1334
1335
1336
1337
1338
1339
  	spin_lock(&osb->osb_lock);
  	while (rm->rm_used) {
  		/* It's always safe to remove entry zero, as we won't
  		 * clear it until ocfs2_recover_node() has succeeded. */
  		node_num = rm->rm_entries[0];
  		spin_unlock(&osb->osb_lock);
2205363dc   Jan Kara   ocfs2: Implement ...
1340
  		slot_num = ocfs2_node_num_to_slot(osb, node_num);
b41079504   Tao Ma   ocfs2: Remove mas...
1341
  		trace_ocfs2_recovery_thread_node(node_num, slot_num);
2205363dc   Jan Kara   ocfs2: Implement ...
1342
1343
  		if (slot_num == -ENOENT) {
  			status = 0;
2205363dc   Jan Kara   ocfs2: Implement ...
1344
1345
  			goto skip_recovery;
  		}
2205363dc   Jan Kara   ocfs2: Implement ...
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
  
  		/* It is a bit subtle with quota recovery. We cannot do it
  		 * immediately because we have to obtain cluster locks from
  		 * quota files and we also don't want to just skip it because
  		 * then quota usage would be out of sync until some node takes
  		 * the slot. So we remember which nodes need quota recovery
  		 * and when everything else is done, we recover quotas. */
  		for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
  		if (i == rm_quota_used)
  			rm_quota[rm_quota_used++] = slot_num;
  
  		status = ocfs2_recover_node(osb, node_num, slot_num);
  skip_recovery:
553abd046   Joel Becker   ocfs2: Change the...
1359
1360
1361
  		if (!status) {
  			ocfs2_recovery_map_clear(osb, node_num);
  		} else {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1362
1363
1364
1365
1366
1367
1368
  			mlog(ML_ERROR,
  			     "Error %d recovering node %d on device (%u,%u)!
  ",
  			     status, node_num,
  			     MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
  			mlog(ML_ERROR, "Volume requires unmount.
  ");
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1369
  		}
553abd046   Joel Becker   ocfs2: Change the...
1370
  		spin_lock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1371
  	}
553abd046   Joel Becker   ocfs2: Change the...
1372
  	spin_unlock(&osb->osb_lock);
b41079504   Tao Ma   ocfs2: Remove mas...
1373
  	trace_ocfs2_recovery_thread_end(status);
553abd046   Joel Becker   ocfs2: Change the...
1374

539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1375
1376
1377
1378
1379
  	/* Refresh all journal recovery generations from disk */
  	status = ocfs2_check_journals_nolocks(osb);
  	status = (status == -EROFS) ? 0 : status;
  	if (status < 0)
  		mlog_errno(status);
2205363dc   Jan Kara   ocfs2: Implement ...
1380
  	/* Now it is right time to recover quotas... We have to do this under
25985edce   Lucas De Marchi   Fix common misspe...
1381
  	 * superblock lock so that no one can start using the slot (and crash)
2205363dc   Jan Kara   ocfs2: Implement ...
1382
1383
1384
1385
1386
1387
1388
1389
1390
  	 * before we recover it */
  	for (i = 0; i < rm_quota_used; i++) {
  		qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
  		if (IS_ERR(qrec)) {
  			status = PTR_ERR(qrec);
  			mlog_errno(status);
  			continue;
  		}
  		ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
ed460cffc   Joseph Qi   ocfs2: add orphan...
1391
1392
  						NULL, NULL, qrec,
  						ORPHAN_NEED_TRUNCATE);
2205363dc   Jan Kara   ocfs2: Implement ...
1393
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1394
  	ocfs2_super_unlock(osb, 1);
9140db04e   Srinivas Eeda   ocfs2: recover or...
1395
  	/* queue recovery for offline slots */
ed460cffc   Joseph Qi   ocfs2: add orphan...
1396
  	ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1397
1398
  
  bail:
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1399
  	mutex_lock(&osb->recovery_lock);
553abd046   Joel Becker   ocfs2: Change the...
1400
  	if (!status && !ocfs2_recovery_completed(osb)) {
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1401
  		mutex_unlock(&osb->recovery_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1402
1403
  		goto restart;
  	}
9140db04e   Srinivas Eeda   ocfs2: recover or...
1404
  	ocfs2_free_replay_slots(osb);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1405
1406
1407
  	osb->recovery_thread_task = NULL;
  	mb(); /* sync with ocfs2_recovery_thread_running */
  	wake_up(&osb->recovery_event);
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1408
  	mutex_unlock(&osb->recovery_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1409

d787ab097   Tim Gardner   ocfs2: remove kfr...
1410
  	kfree(rm_quota);
2205363dc   Jan Kara   ocfs2: Implement ...
1411

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1412
1413
1414
1415
  	/* no one is callint kthread_stop() for us so the kthread() api
  	 * requires that we call do_exit().  And it isn't exported, but
  	 * complete_and_exit() seems to be a minimal wrapper around it. */
  	complete_and_exit(NULL, status);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1416
1417
1418
1419
  }
  
  void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
  {
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1420
  	mutex_lock(&osb->recovery_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1421

b41079504   Tao Ma   ocfs2: Remove mas...
1422
1423
1424
1425
  	trace_ocfs2_recovery_thread(node_num, osb->node_num,
  		osb->disable_recovery, osb->recovery_thread_task,
  		osb->disable_recovery ?
  		-1 : ocfs2_recovery_map_set(osb, node_num));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1426

b41079504   Tao Ma   ocfs2: Remove mas...
1427
1428
  	if (osb->disable_recovery)
  		goto out;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1429
1430
1431
1432
1433
  
  	if (osb->recovery_thread_task)
  		goto out;
  
  	osb->recovery_thread_task =  kthread_run(__ocfs2_recovery_thread, osb,
5afc44e2e   Joseph Qi   ocfs2: add uuid t...
1434
  			"ocfs2rec-%s", osb->uuid_str);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1435
1436
1437
1438
1439
1440
  	if (IS_ERR(osb->recovery_thread_task)) {
  		mlog_errno((int)PTR_ERR(osb->recovery_thread_task));
  		osb->recovery_thread_task = NULL;
  	}
  
  out:
c74ec2f77   Arjan van de Ven   [PATCH] ocfs2: Se...
1441
  	mutex_unlock(&osb->recovery_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1442
  	wake_up(&osb->recovery_event);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1443
  }
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
  static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
  				    int slot_num,
  				    struct buffer_head **bh,
  				    struct inode **ret_inode)
  {
  	int status = -EACCES;
  	struct inode *inode = NULL;
  
  	BUG_ON(slot_num >= osb->max_slots);
  
  	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
  					    slot_num);
  	if (!inode || is_bad_inode(inode)) {
  		mlog_errno(status);
  		goto bail;
  	}
  	SET_INODE_JOURNAL(inode);
b657c95c1   Joel Becker   ocfs2: Wrap inode...
1461
  	status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
  	if (status < 0) {
  		mlog_errno(status);
  		goto bail;
  	}
  
  	status = 0;
  
  bail:
  	if (inode) {
  		if (status || !ret_inode)
  			iput(inode);
  		else
  			*ret_inode = inode;
  	}
  	return status;
  }
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
  /* Does the actual journal replay and marks the journal inode as
   * clean. Will only replay if the journal inode is marked dirty. */
  static int ocfs2_replay_journal(struct ocfs2_super *osb,
  				int node_num,
  				int slot_num)
  {
  	int status;
  	int got_lock = 0;
  	unsigned int flags;
  	struct inode *inode = NULL;
  	struct ocfs2_dinode *fe;
  	journal_t *journal = NULL;
  	struct buffer_head *bh = NULL;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1491
  	u32 slot_reco_gen;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1492

539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1493
1494
  	status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
  	if (status) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1495
1496
1497
  		mlog_errno(status);
  		goto done;
  	}
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
  
  	fe = (struct ocfs2_dinode *)bh->b_data;
  	slot_reco_gen = ocfs2_get_recovery_generation(fe);
  	brelse(bh);
  	bh = NULL;
  
  	/*
  	 * As the fs recovery is asynchronous, there is a small chance that
  	 * another node mounted (and recovered) the slot before the recovery
  	 * thread could get the lock. To handle that, we dirty read the journal
  	 * inode for that slot to get the recovery generation. If it is
  	 * different than what we expected, the slot has been recovered.
  	 * If not, it needs recovery.
  	 */
  	if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
b41079504   Tao Ma   ocfs2: Remove mas...
1513
  		trace_ocfs2_replay_journal_recovered(slot_num,
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1514
1515
1516
  		     osb->slot_recovery_generations[slot_num], slot_reco_gen);
  		osb->slot_recovery_generations[slot_num] = slot_reco_gen;
  		status = -EBUSY;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1517
1518
  		goto done;
  	}
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1519
1520
  
  	/* Continue with recovery as the journal has not yet been recovered */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1521

e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1522
  	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1523
  	if (status < 0) {
b41079504   Tao Ma   ocfs2: Remove mas...
1524
  		trace_ocfs2_replay_journal_lock_err(status);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
  		if (status != -ERESTARTSYS)
  			mlog(ML_ERROR, "Could not lock journal!
  ");
  		goto done;
  	}
  	got_lock = 1;
  
  	fe = (struct ocfs2_dinode *) bh->b_data;
  
  	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1535
  	slot_reco_gen = ocfs2_get_recovery_generation(fe);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1536
1537
  
  	if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
b41079504   Tao Ma   ocfs2: Remove mas...
1538
  		trace_ocfs2_replay_journal_skip(node_num);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1539
1540
  		/* Refresh recovery generation for the slot */
  		osb->slot_recovery_generations[slot_num] = slot_reco_gen;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1541
1542
  		goto done;
  	}
9140db04e   Srinivas Eeda   ocfs2: recover or...
1543
1544
  	/* we need to run complete recovery for offline orphan slots */
  	ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
619c200de   Sunil Mushran   ocfs2: Clean up m...
1545
1546
1547
1548
  	printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\
  	       "device (%u,%u)
  ", node_num, slot_num, MAJOR(osb->sb->s_dev),
  	       MINOR(osb->sb->s_dev));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1549
1550
1551
1552
1553
1554
1555
1556
  
  	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
  
  	status = ocfs2_force_read_journal(inode);
  	if (status < 0) {
  		mlog_errno(status);
  		goto done;
  	}
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1557
  	journal = jbd2_journal_init_inode(inode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1558
1559
1560
1561
1562
1563
  	if (journal == NULL) {
  		mlog(ML_ERROR, "Linux journal layer error
  ");
  		status = -EIO;
  		goto done;
  	}
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1564
  	status = jbd2_journal_load(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1565
1566
1567
1568
  	if (status < 0) {
  		mlog_errno(status);
  		if (!igrab(inode))
  			BUG();
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1569
  		jbd2_journal_destroy(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1570
1571
1572
1573
1574
1575
  		goto done;
  	}
  
  	ocfs2_clear_journal_error(osb->sb, journal, slot_num);
  
  	/* wipe the journal */
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1576
1577
1578
  	jbd2_journal_lock_updates(journal);
  	status = jbd2_journal_flush(journal);
  	jbd2_journal_unlock_updates(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1579
1580
1581
1582
1583
1584
1585
  	if (status < 0)
  		mlog_errno(status);
  
  	/* This will mark the node clean */
  	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
  	flags &= ~OCFS2_JOURNAL_DIRTY_FL;
  	fe->id1.journal1.ij_flags = cpu_to_le32(flags);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1586
1587
1588
1589
  	/* Increment recovery generation to indicate successful recovery */
  	ocfs2_bump_recovery_generation(fe);
  	osb->slot_recovery_generations[slot_num] =
  					ocfs2_get_recovery_generation(fe);
13723d00e   Joel Becker   ocfs2: Use metada...
1590
  	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
8cb471e8f   Joel Becker   ocfs2: Take the i...
1591
  	status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1592
1593
1594
1595
1596
  	if (status < 0)
  		mlog_errno(status);
  
  	if (!igrab(inode))
  		BUG();
2b4e30fbd   Joel Becker   ocfs2: Switch ove...
1597
  	jbd2_journal_destroy(journal);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1598

619c200de   Sunil Mushran   ocfs2: Clean up m...
1599
1600
1601
1602
  	printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\
  	       "device (%u,%u)
  ", node_num, slot_num, MAJOR(osb->sb->s_dev),
  	       MINOR(osb->sb->s_dev));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1603
1604
1605
  done:
  	/* drop the lock on this nodes journal */
  	if (got_lock)
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1606
  		ocfs2_inode_unlock(inode, 1);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1607

72865d923   Joseph Qi   ocfs2: clean up r...
1608
  	iput(inode);
a81cb88b6   Mark Fasheh   ocfs2: Don't chec...
1609
  	brelse(bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1610

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
  	return status;
  }
  
  /*
   * Do the most important parts of node recovery:
   *  - Replay it's journal
   *  - Stamp a clean local allocator file
   *  - Stamp a clean truncate log
   *  - Mark the node clean
   *
   * If this function completes without error, a node in OCFS2 can be
   * said to have been safely recovered. As a result, failure during the
   * second part of a nodes recovery process (local alloc recovery) is
   * far less concerning.
   */
  static int ocfs2_recover_node(struct ocfs2_super *osb,
2205363dc   Jan Kara   ocfs2: Implement ...
1627
  			      int node_num, int slot_num)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1628
1629
  {
  	int status = 0;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1630
1631
  	struct ocfs2_dinode *la_copy = NULL;
  	struct ocfs2_dinode *tl_copy = NULL;
b41079504   Tao Ma   ocfs2: Remove mas...
1632
  	trace_ocfs2_recover_node(node_num, slot_num, osb->node_num);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1633
1634
1635
  
  	/* Should not ever be called to recover ourselves -- in that
  	 * case we should've called ocfs2_journal_load instead. */
ebdec83ba   Eric Sesterhenn / snakebyte   [PATCH] BUG_ON() ...
1636
  	BUG_ON(osb->node_num == node_num);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1637

ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1638
1639
  	status = ocfs2_replay_journal(osb, node_num, slot_num);
  	if (status < 0) {
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1640
  		if (status == -EBUSY) {
b41079504   Tao Ma   ocfs2: Remove mas...
1641
  			trace_ocfs2_recover_node_skip(slot_num, node_num);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1642
1643
1644
  			status = 0;
  			goto done;
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
  		mlog_errno(status);
  		goto done;
  	}
  
  	/* Stamp a clean local alloc file AFTER recovering the journal... */
  	status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy);
  	if (status < 0) {
  		mlog_errno(status);
  		goto done;
  	}
  
  	/* An error from begin_truncate_log_recovery is not
  	 * serious enough to warrant halting the rest of
  	 * recovery. */
  	status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
  	if (status < 0)
  		mlog_errno(status);
  
  	/* Likewise, this would be a strange but ultimately not so
  	 * harmful place to get an error... */
8e8a4603b   Mark Fasheh   ocfs2: Move slot ...
1665
  	status = ocfs2_clear_slot(osb, slot_num);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1666
1667
1668
1669
1670
  	if (status < 0)
  		mlog_errno(status);
  
  	/* This will kfree the memory pointed to by la_copy and tl_copy */
  	ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
ed460cffc   Joseph Qi   ocfs2: add orphan...
1671
  					tl_copy, NULL, ORPHAN_NEED_TRUNCATE);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1672
1673
1674
  
  	status = 0;
  done:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
  	return status;
  }
  
  /* Test node liveness by trylocking his journal. If we get the lock,
   * we drop it here. Return 0 if we got the lock, -EAGAIN if node is
   * still alive (we couldn't get the lock) and < 0 on error. */
  static int ocfs2_trylock_journal(struct ocfs2_super *osb,
  				 int slot_num)
  {
  	int status, flags;
  	struct inode *inode = NULL;
  
  	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
  					    slot_num);
  	if (inode == NULL) {
  		mlog(ML_ERROR, "access error
  ");
  		status = -EACCES;
  		goto bail;
  	}
  	if (is_bad_inode(inode)) {
  		mlog(ML_ERROR, "access error (bad inode)
  ");
  		iput(inode);
  		inode = NULL;
  		status = -EACCES;
  		goto bail;
  	}
  	SET_INODE_JOURNAL(inode);
  
  	flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1706
  	status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1707
1708
1709
1710
1711
  	if (status < 0) {
  		if (status != -EAGAIN)
  			mlog_errno(status);
  		goto bail;
  	}
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1712
  	ocfs2_inode_unlock(inode, 1);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1713
  bail:
72865d923   Joseph Qi   ocfs2: clean up r...
1714
  	iput(inode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1715
1716
1717
1718
1719
1720
1721
1722
  
  	return status;
  }
  
  /* Call this underneath ocfs2_super_lock. It also assumes that the
   * slot info struct has been updated from disk. */
  int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
  {
d85b20e4b   Joel Becker   ocfs2: Make ocfs2...
1723
1724
  	unsigned int node_num;
  	int status, i;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1725
  	u32 gen;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1726
1727
  	struct buffer_head *bh = NULL;
  	struct ocfs2_dinode *di;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1728
1729
1730
  
  	/* This is called with the super block cluster lock, so we
  	 * know that the slot map can't change underneath us. */
d85b20e4b   Joel Becker   ocfs2: Make ocfs2...
1731
  	for (i = 0; i < osb->max_slots; i++) {
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1732
1733
1734
1735
1736
1737
1738
  		/* Read journal inode to get the recovery generation */
  		status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
  		if (status) {
  			mlog_errno(status);
  			goto bail;
  		}
  		di = (struct ocfs2_dinode *)bh->b_data;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1739
  		gen = ocfs2_get_recovery_generation(di);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1740
1741
  		brelse(bh);
  		bh = NULL;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1742
1743
  		spin_lock(&osb->osb_lock);
  		osb->slot_recovery_generations[i] = gen;
b41079504   Tao Ma   ocfs2: Remove mas...
1744
1745
  		trace_ocfs2_mark_dead_nodes(i,
  					    osb->slot_recovery_generations[i]);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
1746

a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1747
1748
  		if (i == osb->slot_num) {
  			spin_unlock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1749
  			continue;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1750
  		}
d85b20e4b   Joel Becker   ocfs2: Make ocfs2...
1751
1752
  
  		status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1753
1754
  		if (status == -ENOENT) {
  			spin_unlock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1755
  			continue;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1756
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1757

a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1758
1759
  		if (__ocfs2_recovery_map_test(osb, node_num)) {
  			spin_unlock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1760
  			continue;
a1af7d15a   Mark Fasheh   ocfs2: Fix sleep-...
1761
  		}
d85b20e4b   Joel Becker   ocfs2: Make ocfs2...
1762
  		spin_unlock(&osb->osb_lock);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
  
  		/* Ok, we have a slot occupied by another node which
  		 * is not in the recovery map. We trylock his journal
  		 * file here to test if he's alive. */
  		status = ocfs2_trylock_journal(osb, i);
  		if (!status) {
  			/* Since we're called from mount, we know that
  			 * the recovery thread can't race us on
  			 * setting / checking the recovery bits. */
  			ocfs2_recovery_thread(osb, node_num);
  		} else if ((status < 0) && (status != -EAGAIN)) {
  			mlog_errno(status);
  			goto bail;
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1777
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1778
1779
1780
  
  	status = 0;
  bail:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1781
1782
  	return status;
  }
83273932f   Srinivas Eeda   ocfs2: timer to q...
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
  /*
   * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some
   * randomness to the timeout to minimize multple nodes firing the timer at the
   * same time.
   */
  static inline unsigned long ocfs2_orphan_scan_timeout(void)
  {
  	unsigned long time;
  
  	get_random_bytes(&time, sizeof(time));
  	time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
  	return msecs_to_jiffies(time);
  }
  
  /*
   * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for
   * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This
   * is done to catch any orphans that are left over in orphan directories.
   *
a035bff6b   Sunil Mushran   ocfs2: Add commen...
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
   * It scans all slots, even ones that are in use. It does so to handle the
   * case described below:
   *
   *   Node 1 has an inode it was using. The dentry went away due to memory
   *   pressure.  Node 1 closes the inode, but it's on the free list. The node
   *   has the open lock.
   *   Node 2 unlinks the inode. It grabs the dentry lock to notify others,
   *   but node 1 has no dentry and doesn't get the message. It trylocks the
   *   open lock, sees that another node has a PR, and does nothing.
   *   Later node 2 runs its orphan dir. It igets the inode, trylocks the
   *   open lock, sees the PR still, and does nothing.
   *   Basically, we have to trigger an orphan iput on node 1. The only way
   *   for this to happen is if node 1 runs node 2's orphan dir.
   *
83273932f   Srinivas Eeda   ocfs2: timer to q...
1816
1817
1818
1819
1820
1821
1822
1823
   * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT
   * seconds.  It gets an EX lock on os_lockres and checks sequence number
   * stored in LVB. If the sequence number has changed, it means some other
   * node has done the scan.  This node skips the scan and tracks the
   * sequence number.  If the sequence number didn't change, it means a scan
   * hasn't happened.  The node queues a scan and increments the
   * sequence number in the LVB.
   */
b519ea6d9   Joseph Qi   ocfs2: mark local...
1824
  static void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
83273932f   Srinivas Eeda   ocfs2: timer to q...
1825
1826
1827
1828
1829
1830
  {
  	struct ocfs2_orphan_scan *os;
  	int status, i;
  	u32 seqno = 0;
  
  	os = &osb->osb_orphan_scan;
692684e19   Sunil Mushran   ocfs2: Stop orpha...
1831
1832
  	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
  		goto out;
b41079504   Tao Ma   ocfs2: Remove mas...
1833
1834
  	trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno,
  					    atomic_read(&os->os_state));
df152c241   Sunil Mushran   ocfs2: Disable or...
1835
  	status = ocfs2_orphan_scan_lock(osb, &seqno);
83273932f   Srinivas Eeda   ocfs2: timer to q...
1836
1837
1838
1839
1840
  	if (status < 0) {
  		if (status != -EAGAIN)
  			mlog_errno(status);
  		goto out;
  	}
692684e19   Sunil Mushran   ocfs2: Stop orpha...
1841
1842
1843
  	/* Do no queue the tasks if the volume is being umounted */
  	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
  		goto unlock;
83273932f   Srinivas Eeda   ocfs2: timer to q...
1844
1845
1846
1847
1848
1849
1850
  	if (os->os_seqno != seqno) {
  		os->os_seqno = seqno;
  		goto unlock;
  	}
  
  	for (i = 0; i < osb->max_slots; i++)
  		ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
ed460cffc   Joseph Qi   ocfs2: add orphan...
1851
  						NULL, ORPHAN_NO_NEED_TRUNCATE);
83273932f   Srinivas Eeda   ocfs2: timer to q...
1852
1853
1854
1855
1856
  	/*
  	 * We queued a recovery on orphan slots, increment the sequence
  	 * number and update LVB so other node will skip the scan for a while
  	 */
  	seqno++;
15633a220   Srinivas Eeda   ocfs2 patch to tr...
1857
  	os->os_count++;
395627b07   Deepa Dinamani   ocfs2: use time64...
1858
  	os->os_scantime = ktime_get_seconds();
83273932f   Srinivas Eeda   ocfs2: timer to q...
1859
  unlock:
df152c241   Sunil Mushran   ocfs2: Disable or...
1860
  	ocfs2_orphan_scan_unlock(osb, seqno);
83273932f   Srinivas Eeda   ocfs2: timer to q...
1861
  out:
b41079504   Tao Ma   ocfs2: Remove mas...
1862
1863
  	trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno,
  					  atomic_read(&os->os_state));
83273932f   Srinivas Eeda   ocfs2: timer to q...
1864
1865
1866
1867
  	return;
  }
  
  /* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */
b519ea6d9   Joseph Qi   ocfs2: mark local...
1868
  static void ocfs2_orphan_scan_work(struct work_struct *work)
83273932f   Srinivas Eeda   ocfs2: timer to q...
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
  {
  	struct ocfs2_orphan_scan *os;
  	struct ocfs2_super *osb;
  
  	os = container_of(work, struct ocfs2_orphan_scan,
  			  os_orphan_scan_work.work);
  	osb = os->os_osb;
  
  	mutex_lock(&os->os_lock);
  	ocfs2_queue_orphan_scan(osb);
692684e19   Sunil Mushran   ocfs2: Stop orpha...
1879
  	if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
35ddf78e4   jiangyiwen   ocfs2: fix occurr...
1880
  		queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
692684e19   Sunil Mushran   ocfs2: Stop orpha...
1881
  				      ocfs2_orphan_scan_timeout());
83273932f   Srinivas Eeda   ocfs2: timer to q...
1882
1883
1884
1885
1886
1887
1888
1889
  	mutex_unlock(&os->os_lock);
  }
  
  void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
  {
  	struct ocfs2_orphan_scan *os;
  
  	os = &osb->osb_orphan_scan;
df152c241   Sunil Mushran   ocfs2: Disable or...
1890
1891
1892
1893
1894
1895
  	if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
  		atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
  		mutex_lock(&os->os_lock);
  		cancel_delayed_work(&os->os_orphan_scan_work);
  		mutex_unlock(&os->os_lock);
  	}
83273932f   Srinivas Eeda   ocfs2: timer to q...
1896
  }
df152c241   Sunil Mushran   ocfs2: Disable or...
1897
  void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
83273932f   Srinivas Eeda   ocfs2: timer to q...
1898
1899
1900
1901
1902
  {
  	struct ocfs2_orphan_scan *os;
  
  	os = &osb->osb_orphan_scan;
  	os->os_osb = osb;
15633a220   Srinivas Eeda   ocfs2 patch to tr...
1903
  	os->os_count = 0;
3211949f8   Sunil Mushran   ocfs2: Do not ini...
1904
  	os->os_seqno = 0;
83273932f   Srinivas Eeda   ocfs2: timer to q...
1905
  	mutex_init(&os->os_lock);
df152c241   Sunil Mushran   ocfs2: Disable or...
1906
  	INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
8b712cd58   Jeff Mahoney   ocfs2: Fixup orph...
1907
  }
83273932f   Srinivas Eeda   ocfs2: timer to q...
1908

8b712cd58   Jeff Mahoney   ocfs2: Fixup orph...
1909
1910
1911
1912
1913
  void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
  {
  	struct ocfs2_orphan_scan *os;
  
  	os = &osb->osb_orphan_scan;
395627b07   Deepa Dinamani   ocfs2: use time64...
1914
  	os->os_scantime = ktime_get_seconds();
df152c241   Sunil Mushran   ocfs2: Disable or...
1915
1916
1917
1918
  	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
  		atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
  	else {
  		atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
35ddf78e4   jiangyiwen   ocfs2: fix occurr...
1919
  		queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
40f165f41   Tao Ma   ocfs2: Move orpha...
1920
  				   ocfs2_orphan_scan_timeout());
df152c241   Sunil Mushran   ocfs2: Disable or...
1921
  	}
83273932f   Srinivas Eeda   ocfs2: timer to q...
1922
  }
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1923
  struct ocfs2_orphan_filldir_priv {
3704412bd   Al Viro   [readdir] convert...
1924
  	struct dir_context	ctx;
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1925
1926
  	struct inode		*head;
  	struct ocfs2_super	*osb;
30edc43c7   Joseph Qi   ocfs2: do not inc...
1927
  	enum ocfs2_orphan_reco_type orphan_reco_type;
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1928
  };
ac7576f4b   Miklos Szeredi   vfs: make first a...
1929
1930
1931
  static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
  				int name_len, loff_t pos, u64 ino,
  				unsigned type)
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1932
  {
ac7576f4b   Miklos Szeredi   vfs: make first a...
1933
1934
  	struct ocfs2_orphan_filldir_priv *p =
  		container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx);
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1935
1936
1937
1938
1939
1940
  	struct inode *iter;
  
  	if (name_len == 1 && !strncmp(".", name, 1))
  		return 0;
  	if (name_len == 2 && !strncmp("..", name, 2))
  		return 0;
30edc43c7   Joseph Qi   ocfs2: do not inc...
1941
1942
1943
1944
1945
  	/* do not include dio entry in case of orphan scan */
  	if ((p->orphan_reco_type == ORPHAN_NO_NEED_TRUNCATE) &&
  			(!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
  			OCFS2_DIO_ORPHAN_PREFIX_LEN)))
  		return 0;
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1946
1947
  	/* Skip bad inodes so that recovery can continue */
  	iter = ocfs2_iget(p->osb, ino,
5fa0613ea   Jan Kara   ocfs2: Silence fa...
1948
  			  OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1949
1950
  	if (IS_ERR(iter))
  		return 0;
93d911fcc   Joseph Qi   ocfs2: only take ...
1951
1952
1953
  	if (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
  			OCFS2_DIO_ORPHAN_PREFIX_LEN))
  		OCFS2_I(iter)->ip_flags |= OCFS2_INODE_DIO_ORPHAN_ENTRY;
ed460cffc   Joseph Qi   ocfs2: add orphan...
1954
1955
1956
1957
1958
1959
  	/* Skip inodes which are already added to recover list, since dio may
  	 * happen concurrently with unlink/rename */
  	if (OCFS2_I(iter)->ip_next_orphan) {
  		iput(iter);
  		return 0;
  	}
b41079504   Tao Ma   ocfs2: Remove mas...
1960
  	trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1961
1962
1963
1964
1965
1966
1967
  	/* No locking is required for the next_orphan queue as there
  	 * is only ever a single process doing orphan recovery. */
  	OCFS2_I(iter)->ip_next_orphan = p->head;
  	p->head = iter;
  
  	return 0;
  }
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1968
1969
  static int ocfs2_queue_orphans(struct ocfs2_super *osb,
  			       int slot,
30edc43c7   Joseph Qi   ocfs2: do not inc...
1970
1971
  			       struct inode **head,
  			       enum ocfs2_orphan_reco_type orphan_reco_type)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1972
  {
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1973
  	int status;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1974
  	struct inode *orphan_dir_inode = NULL;
3704412bd   Al Viro   [readdir] convert...
1975
1976
1977
  	struct ocfs2_orphan_filldir_priv priv = {
  		.ctx.actor = ocfs2_orphan_filldir,
  		.osb = osb,
30edc43c7   Joseph Qi   ocfs2: do not inc...
1978
1979
  		.head = *head,
  		.orphan_reco_type = orphan_reco_type
3704412bd   Al Viro   [readdir] convert...
1980
  	};
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1981
1982
1983
1984
1985
1986
1987
  
  	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
  						       ORPHAN_DIR_SYSTEM_INODE,
  						       slot);
  	if  (!orphan_dir_inode) {
  		status = -ENOENT;
  		mlog_errno(status);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
1988
  		return status;
2bd632165   Sunil Mushran   ocfs2/trivial: Re...
1989
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1990

5955102c9   Al Viro   wrappers for ->i_...
1991
  	inode_lock(orphan_dir_inode);
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
1992
  	status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1993
  	if (status < 0) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1994
1995
1996
  		mlog_errno(status);
  		goto out;
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1997

3704412bd   Al Viro   [readdir] convert...
1998
  	status = ocfs2_dir_foreach(orphan_dir_inode, &priv.ctx);
5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
1999
2000
  	if (status) {
  		mlog_errno(status);
a86370fbb   Mark Fasheh   ocfs2: fix exit-w...
2001
  		goto out_cluster;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2002
  	}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2003

5eae5b96f   Mark Fasheh   ocfs2: Remove ope...
2004
  	*head = priv.head;
a86370fbb   Mark Fasheh   ocfs2: fix exit-w...
2005
  out_cluster:
e63aecb65   Mark Fasheh   ocfs2: Rename ocf...
2006
  	ocfs2_inode_unlock(orphan_dir_inode, 0);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
2007
  out:
5955102c9   Al Viro   wrappers for ->i_...
2008
  	inode_unlock(orphan_dir_inode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2009
  	iput(orphan_dir_inode);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
  	return status;
  }
  
  static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb,
  					      int slot)
  {
  	int ret;
  
  	spin_lock(&osb->osb_lock);
  	ret = !osb->osb_orphan_wipes[slot];
  	spin_unlock(&osb->osb_lock);
  	return ret;
  }
  
  static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb,
  					     int slot)
  {
  	spin_lock(&osb->osb_lock);
  	/* Mark ourselves such that new processes in delete_inode()
  	 * know to quit early. */
  	ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
  	while (osb->osb_orphan_wipes[slot]) {
  		/* If any processes are already in the middle of an
  		 * orphan wipe on this dir, then we need to wait for
  		 * them. */
  		spin_unlock(&osb->osb_lock);
  		wait_event_interruptible(osb->osb_wipe_event,
  					 ocfs2_orphan_recovery_can_continue(osb, slot));
  		spin_lock(&osb->osb_lock);
  	}
  	spin_unlock(&osb->osb_lock);
  }
  
  static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
  					      int slot)
  {
  	ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
  }
  
  /*
   * Orphan recovery. Each mounted node has it's own orphan dir which we
   * must run during recovery. Our strategy here is to build a list of
   * the inodes in the orphan dir and iget/iput them. The VFS does
   * (most) of the rest of the work.
   *
   * Orphan recovery can happen at any time, not just mount so we have a
   * couple of extra considerations.
   *
   * - We grab as many inodes as we can under the orphan dir lock -
   *   doing iget() outside the orphan dir risks getting a reference on
   *   an invalid inode.
   * - We must be sure not to deadlock with other processes on the
   *   system wanting to run delete_inode(). This can happen when they go
   *   to lock the orphan dir and the orphan recovery process attempts to
   *   iget() inside the orphan dir lock. This can be avoided by
   *   advertising our state to ocfs2_delete_inode().
   */
  static int ocfs2_recover_orphans(struct ocfs2_super *osb,
ed460cffc   Joseph Qi   ocfs2: add orphan...
2068
2069
  				 int slot,
  				 enum ocfs2_orphan_reco_type orphan_reco_type)
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
2070
2071
2072
2073
2074
  {
  	int ret = 0;
  	struct inode *inode = NULL;
  	struct inode *iter;
  	struct ocfs2_inode_info *oi;
cf1776a9e   Joseph Qi   ocfs2: fix a tiny...
2075
2076
  	struct buffer_head *di_bh = NULL;
  	struct ocfs2_dinode *di = NULL;
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
2077

b41079504   Tao Ma   ocfs2: Remove mas...
2078
  	trace_ocfs2_recover_orphans(slot);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
2079
2080
  
  	ocfs2_mark_recovering_orphan_dir(osb, slot);
30edc43c7   Joseph Qi   ocfs2: do not inc...
2081
  	ret = ocfs2_queue_orphans(osb, slot, &inode, orphan_reco_type);
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
2082
2083
2084
2085
2086
2087
  	ocfs2_clear_recovering_orphan_dir(osb, slot);
  
  	/* Error here should be noted, but we want to continue with as
  	 * many queued inodes as we've got. */
  	if (ret)
  		mlog_errno(ret);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2088
2089
2090
  
  	while (inode) {
  		oi = OCFS2_I(inode);
b41079504   Tao Ma   ocfs2: Remove mas...
2091
2092
  		trace_ocfs2_recover_orphans_iput(
  					(unsigned long long)oi->ip_blkno);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2093
2094
  
  		iter = oi->ip_next_orphan;
ed460cffc   Joseph Qi   ocfs2: add orphan...
2095
  		oi->ip_next_orphan = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2096

93d911fcc   Joseph Qi   ocfs2: only take ...
2097
  		if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
5955102c9   Al Viro   wrappers for ->i_...
2098
  			inode_lock(inode);
93d911fcc   Joseph Qi   ocfs2: only take ...
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
  			ret = ocfs2_rw_lock(inode, 1);
  			if (ret < 0) {
  				mlog_errno(ret);
  				goto unlock_mutex;
  			}
  			/*
  			 * We need to take and drop the inode lock to
  			 * force read inode from disk.
  			 */
  			ret = ocfs2_inode_lock(inode, &di_bh, 1);
  			if (ret) {
  				mlog_errno(ret);
  				goto unlock_rw;
  			}
  
  			di = (struct ocfs2_dinode *)di_bh->b_data;
  
  			if (di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)) {
  				ret = ocfs2_truncate_file(inode, di_bh,
  						i_size_read(inode));
  				if (ret < 0) {
  					if (ret != -ENOSPC)
  						mlog_errno(ret);
  					goto unlock_inode;
  				}
cf1776a9e   Joseph Qi   ocfs2: fix a tiny...
2124

93d911fcc   Joseph Qi   ocfs2: only take ...
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
  				ret = ocfs2_del_inode_from_orphan(osb, inode,
  						di_bh, 0, 0);
  				if (ret)
  					mlog_errno(ret);
  			}
  unlock_inode:
  			ocfs2_inode_unlock(inode, 1);
  			brelse(di_bh);
  			di_bh = NULL;
  unlock_rw:
  			ocfs2_rw_unlock(inode, 1);
  unlock_mutex:
5955102c9   Al Viro   wrappers for ->i_...
2137
  			inode_unlock(inode);
ed460cffc   Joseph Qi   ocfs2: add orphan...
2138

93d911fcc   Joseph Qi   ocfs2: only take ...
2139
2140
2141
  			/* clear dio flag in ocfs2_inode_info */
  			oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
  		} else {
ed460cffc   Joseph Qi   ocfs2: add orphan...
2142
2143
2144
2145
2146
  			spin_lock(&oi->ip_lock);
  			/* Set the proper information to get us going into
  			 * ocfs2_delete_inode. */
  			oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
  			spin_unlock(&oi->ip_lock);
ad6948212   Joseph Qi   ocfs2: fix race b...
2147
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2148
  		iput(inode);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2149
2150
  		inode = iter;
  	}
b4df6ed8d   Mark Fasheh   [PATCH] ocfs2: fi...
2151
  	return ret;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2152
  }
19ece546a   Jan Kara   ocfs2: Enable quo...
2153
  static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2154
2155
2156
2157
2158
  {
  	/* This check is good because ocfs2 will wait on our recovery
  	 * thread before changing it to something other than MOUNTED
  	 * or DISABLED. */
  	wait_event(osb->osb_mount_event,
19ece546a   Jan Kara   ocfs2: Enable quo...
2159
2160
  		  (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
  		   atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2161
2162
2163
2164
2165
2166
  		   atomic_read(&osb->vol_state) == VOLUME_DISABLED);
  
  	/* If there's an error on mount, then we may never get to the
  	 * MOUNTED flag, but this is set right before
  	 * dismount_volume() so we can trust it. */
  	if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
b41079504   Tao Ma   ocfs2: Remove mas...
2167
  		trace_ocfs2_wait_on_mount(VOLUME_DISABLED);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
  		mlog(0, "mount error, exiting!
  ");
  		return -EBUSY;
  	}
  
  	return 0;
  }
  
  static int ocfs2_commit_thread(void *arg)
  {
  	int status;
  	struct ocfs2_super *osb = arg;
  	struct ocfs2_journal *journal = osb->journal;
  
  	/* we can trust j_num_trans here because _should_stop() is only set in
  	 * shutdown and nobody other than ourselves should be able to start
  	 * transactions.  committing on shutdown might take a few iterations
  	 * as final transactions put deleted inodes on the list */
  	while (!(kthread_should_stop() &&
  		 atomic_read(&journal->j_num_trans) == 0)) {
745ae8ba2   Mark Fasheh   [PATCH] ocfs2: on...
2188
2189
2190
  		wait_event_interruptible(osb->checkpoint_event,
  					 atomic_read(&journal->j_num_trans)
  					 || kthread_should_stop());
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2191
2192
  
  		status = ocfs2_commit_cache(osb);
55b465b66   Joseph Qi   ocfs2: limit prin...
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
  		if (status < 0) {
  			static unsigned long abort_warn_time;
  
  			/* Warn about this once per minute */
  			if (printk_timed_ratelimit(&abort_warn_time, 60*HZ))
  				mlog(ML_ERROR, "status = %d, journal is "
  						"already aborted.
  ", status);
  			/*
  			 * After ocfs2_commit_cache() fails, j_num_trans has a
  			 * non-zero value.  Sleep here to avoid a busy-wait
  			 * loop.
  			 */
  			msleep_interruptible(1000);
  		}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
  
  		if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
  			mlog(ML_KTHREAD,
  			     "commit_thread: %u transactions pending on "
  			     "shutdown
  ",
  			     atomic_read(&journal->j_num_trans));
  		}
  	}
  
  	return 0;
  }
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2220
2221
2222
2223
2224
  /* Reads all the journal inodes without taking any cluster locks. Used
   * for hard readonly access to determine whether any journal requires
   * recovery. Also used to refresh the recovery generation numbers after
   * a journal has been recovered by another node.
   */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2225
2226
2227
2228
  int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
  {
  	int ret = 0;
  	unsigned int slot;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2229
  	struct buffer_head *di_bh = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2230
  	struct ocfs2_dinode *di;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2231
  	int journal_dirty = 0;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2232
2233
  
  	for(slot = 0; slot < osb->max_slots; slot++) {
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2234
2235
  		ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
  		if (ret) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2236
2237
2238
2239
2240
  			mlog_errno(ret);
  			goto out;
  		}
  
  		di = (struct ocfs2_dinode *) di_bh->b_data;
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2241
2242
  		osb->slot_recovery_generations[slot] =
  					ocfs2_get_recovery_generation(di);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2243
2244
  		if (le32_to_cpu(di->id1.journal1.ij_flags) &
  		    OCFS2_JOURNAL_DIRTY_FL)
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2245
  			journal_dirty = 1;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2246
2247
  
  		brelse(di_bh);
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2248
  		di_bh = NULL;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2249
2250
2251
  	}
  
  out:
539d82640   Sunil Mushran   [PATCH 2/2] ocfs2...
2252
2253
  	if (journal_dirty)
  		ret = -EROFS;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
2254
2255
  	return ret;
  }