Blame view

fs/dlm/recover.c 20.2 KB
e7fd41792   David Teigland   [DLM] The core of...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  /******************************************************************************
  *******************************************************************************
  **
  **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
  **  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
  **
  **  This copyrighted material is made available to anyone wishing to use,
  **  modify, copy, or redistribute it subject to the terms and conditions
  **  of the GNU General Public License v.2.
  **
  *******************************************************************************
  ******************************************************************************/
  
  #include "dlm_internal.h"
  #include "lockspace.h"
  #include "dir.h"
  #include "config.h"
  #include "ast.h"
  #include "memory.h"
  #include "rcom.h"
  #include "lock.h"
  #include "lowcomms.h"
  #include "member.h"
  #include "recover.h"
  
  
  /*
   * Recovery waiting routines: these functions wait for a particular reply from
   * a remote node, or for the remote node to report a certain status.  They need
   * to abort if the lockspace is stopped indicating a node has failed (perhaps
   * the one being waited for).
   */
  
  /*
   * Wait until given function returns non-zero or lockspace is stopped
   * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes).  When another
   * function thinks it could have completed the waited-on task, they should wake
   * up ls_wait_general to get an immediate response rather than waiting for the
   * timer to detect the result.  A timer wakes us up periodically while waiting
   * to see if we should abort due to a node failure.  This should only be called
   * by the dlm_recoverd thread.
   */
  
  static void dlm_wait_timer_fn(unsigned long data)
  {
  	struct dlm_ls *ls = (struct dlm_ls *) data;
68c817a1c   David Teigland   [DLM] rename dlm_...
47
  	mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
e7fd41792   David Teigland   [DLM] The core of...
48
49
50
51
52
53
54
55
56
57
  	wake_up(&ls->ls_wait_general);
  }
  
  int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
  {
  	int error = 0;
  
  	init_timer(&ls->ls_timer);
  	ls->ls_timer.function = dlm_wait_timer_fn;
  	ls->ls_timer.data = (long) ls;
68c817a1c   David Teigland   [DLM] rename dlm_...
58
  	ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
e7fd41792   David Teigland   [DLM] The core of...
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
  	add_timer(&ls->ls_timer);
  
  	wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
  	del_timer_sync(&ls->ls_timer);
  
  	if (dlm_recovery_stopped(ls)) {
  		log_debug(ls, "dlm_wait_function aborted");
  		error = -EINTR;
  	}
  	return error;
  }
  
  /*
   * An efficient way for all nodes to wait for all others to have a certain
   * status.  The node with the lowest nodeid polls all the others for their
   * status (wait_status_all) and all the others poll the node with the low id
   * for its accumulated result (wait_status_low).  When all nodes have set
   * status flag X, then status flag X_ALL will be set on the low nodeid.
   */
  
  uint32_t dlm_recover_status(struct dlm_ls *ls)
  {
  	uint32_t status;
  	spin_lock(&ls->ls_recover_lock);
  	status = ls->ls_recover_status;
  	spin_unlock(&ls->ls_recover_lock);
  	return status;
  }
757a42719   David Teigland   dlm: add node slo...
87
88
89
90
  static void _set_recover_status(struct dlm_ls *ls, uint32_t status)
  {
  	ls->ls_recover_status |= status;
  }
e7fd41792   David Teigland   [DLM] The core of...
91
92
93
  void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status)
  {
  	spin_lock(&ls->ls_recover_lock);
757a42719   David Teigland   dlm: add node slo...
94
  	_set_recover_status(ls, status);
e7fd41792   David Teigland   [DLM] The core of...
95
96
  	spin_unlock(&ls->ls_recover_lock);
  }
757a42719   David Teigland   dlm: add node slo...
97
98
  static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status,
  			   int save_slots)
e7fd41792   David Teigland   [DLM] The core of...
99
  {
4007685c6   Al Viro   dlm: use proper t...
100
  	struct dlm_rcom *rc = ls->ls_recover_buf;
e7fd41792   David Teigland   [DLM] The core of...
101
102
103
104
105
106
107
108
109
110
  	struct dlm_member *memb;
  	int error = 0, delay;
  
  	list_for_each_entry(memb, &ls->ls_nodes, list) {
  		delay = 0;
  		for (;;) {
  			if (dlm_recovery_stopped(ls)) {
  				error = -EINTR;
  				goto out;
  			}
757a42719   David Teigland   dlm: add node slo...
111
  			error = dlm_rcom_status(ls, memb->nodeid, 0);
e7fd41792   David Teigland   [DLM] The core of...
112
113
  			if (error)
  				goto out;
757a42719   David Teigland   dlm: add node slo...
114
115
  			if (save_slots)
  				dlm_slot_save(ls, rc, memb);
e7fd41792   David Teigland   [DLM] The core of...
116
117
118
119
120
121
122
123
124
125
  			if (rc->rc_result & wait_status)
  				break;
  			if (delay < 1000)
  				delay += 20;
  			msleep(delay);
  		}
  	}
   out:
  	return error;
  }
757a42719   David Teigland   dlm: add node slo...
126
127
  static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status,
  			   uint32_t status_flags)
e7fd41792   David Teigland   [DLM] The core of...
128
  {
4007685c6   Al Viro   dlm: use proper t...
129
  	struct dlm_rcom *rc = ls->ls_recover_buf;
e7fd41792   David Teigland   [DLM] The core of...
130
131
132
133
134
135
136
  	int error = 0, delay = 0, nodeid = ls->ls_low_nodeid;
  
  	for (;;) {
  		if (dlm_recovery_stopped(ls)) {
  			error = -EINTR;
  			goto out;
  		}
757a42719   David Teigland   dlm: add node slo...
137
  		error = dlm_rcom_status(ls, nodeid, status_flags);
e7fd41792   David Teigland   [DLM] The core of...
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
  		if (error)
  			break;
  
  		if (rc->rc_result & wait_status)
  			break;
  		if (delay < 1000)
  			delay += 20;
  		msleep(delay);
  	}
   out:
  	return error;
  }
  
  static int wait_status(struct dlm_ls *ls, uint32_t status)
  {
  	uint32_t status_all = status << 1;
  	int error;
  
  	if (ls->ls_low_nodeid == dlm_our_nodeid()) {
757a42719   David Teigland   dlm: add node slo...
157
  		error = wait_status_all(ls, status, 0);
e7fd41792   David Teigland   [DLM] The core of...
158
159
160
  		if (!error)
  			dlm_set_recover_status(ls, status_all);
  	} else
757a42719   David Teigland   dlm: add node slo...
161
  		error = wait_status_low(ls, status_all, 0);
e7fd41792   David Teigland   [DLM] The core of...
162
163
164
165
166
167
  
  	return error;
  }
  
  int dlm_recover_members_wait(struct dlm_ls *ls)
  {
757a42719   David Teigland   dlm: add node slo...
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
  	struct dlm_member *memb;
  	struct dlm_slot *slots;
  	int num_slots, slots_size;
  	int error, rv;
  	uint32_t gen;
  
  	list_for_each_entry(memb, &ls->ls_nodes, list) {
  		memb->slot = -1;
  		memb->generation = 0;
  	}
  
  	if (ls->ls_low_nodeid == dlm_our_nodeid()) {
  		error = wait_status_all(ls, DLM_RS_NODES, 1);
  		if (error)
  			goto out;
  
  		/* slots array is sparse, slots_size may be > num_slots */
  
  		rv = dlm_slots_assign(ls, &num_slots, &slots_size, &slots, &gen);
  		if (!rv) {
  			spin_lock(&ls->ls_recover_lock);
  			_set_recover_status(ls, DLM_RS_NODES_ALL);
  			ls->ls_num_slots = num_slots;
  			ls->ls_slots_size = slots_size;
  			ls->ls_slots = slots;
  			ls->ls_generation = gen;
  			spin_unlock(&ls->ls_recover_lock);
  		} else {
  			dlm_set_recover_status(ls, DLM_RS_NODES_ALL);
  		}
  	} else {
  		error = wait_status_low(ls, DLM_RS_NODES_ALL, DLM_RSF_NEED_SLOTS);
  		if (error)
  			goto out;
  
  		dlm_slots_copy_in(ls);
  	}
   out:
  	return error;
e7fd41792   David Teigland   [DLM] The core of...
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
  }
  
  int dlm_recover_directory_wait(struct dlm_ls *ls)
  {
  	return wait_status(ls, DLM_RS_DIR);
  }
  
  int dlm_recover_locks_wait(struct dlm_ls *ls)
  {
  	return wait_status(ls, DLM_RS_LOCKS);
  }
  
  int dlm_recover_done_wait(struct dlm_ls *ls)
  {
  	return wait_status(ls, DLM_RS_DONE);
  }
  
  /*
   * The recover_list contains all the rsb's for which we've requested the new
   * master nodeid.  As replies are returned from the resource directories the
   * rsb's are removed from the list.  When the list is empty we're done.
   *
   * The recover_list is later similarly used for all rsb's for which we've sent
   * new lkb's and need to receive new corresponding lkid's.
   *
   * We use the address of the rsb struct as a simple local identifier for the
   * rsb so we can match an rcom reply with the rsb it was sent for.
   */
  
  static int recover_list_empty(struct dlm_ls *ls)
  {
  	int empty;
  
  	spin_lock(&ls->ls_recover_list_lock);
  	empty = list_empty(&ls->ls_recover_list);
  	spin_unlock(&ls->ls_recover_list_lock);
  
  	return empty;
  }
  
  static void recover_list_add(struct dlm_rsb *r)
  {
  	struct dlm_ls *ls = r->res_ls;
  
  	spin_lock(&ls->ls_recover_list_lock);
  	if (list_empty(&r->res_recover_list)) {
  		list_add_tail(&r->res_recover_list, &ls->ls_recover_list);
  		ls->ls_recover_list_count++;
  		dlm_hold_rsb(r);
  	}
  	spin_unlock(&ls->ls_recover_list_lock);
  }
  
  static void recover_list_del(struct dlm_rsb *r)
  {
  	struct dlm_ls *ls = r->res_ls;
  
  	spin_lock(&ls->ls_recover_list_lock);
  	list_del_init(&r->res_recover_list);
  	ls->ls_recover_list_count--;
  	spin_unlock(&ls->ls_recover_list_lock);
  
  	dlm_put_rsb(r);
  }
  
  static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, uint64_t id)
  {
  	struct dlm_rsb *r = NULL;
  
  	spin_lock(&ls->ls_recover_list_lock);
  
  	list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) {
  		if (id == (unsigned long) r)
  			goto out;
  	}
  	r = NULL;
   out:
  	spin_unlock(&ls->ls_recover_list_lock);
  	return r;
  }
  
  static void recover_list_clear(struct dlm_ls *ls)
  {
  	struct dlm_rsb *r, *s;
  
  	spin_lock(&ls->ls_recover_list_lock);
  	list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
  		list_del_init(&r->res_recover_list);
520698096   David Teigland   [DLM] res_recover...
295
  		r->res_recover_locks_count = 0;
e7fd41792   David Teigland   [DLM] The core of...
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
  		dlm_put_rsb(r);
  		ls->ls_recover_list_count--;
  	}
  
  	if (ls->ls_recover_list_count != 0) {
  		log_error(ls, "warning: recover_list_count %d",
  			  ls->ls_recover_list_count);
  		ls->ls_recover_list_count = 0;
  	}
  	spin_unlock(&ls->ls_recover_list_lock);
  }
  
  
  /* Master recovery: find new master node for rsb's that were
     mastered on nodes that have been removed.
  
     dlm_recover_masters
     recover_master
     dlm_send_rcom_lookup            ->  receive_rcom_lookup
                                         dlm_dir_lookup
     receive_rcom_lookup_reply       <-
     dlm_recover_master_reply
     set_new_master
     set_master_lkbs
     set_lock_master
  */
  
  /*
   * Set the lock master for all LKBs in a lock queue
   * If we are the new master of the rsb, we may have received new
   * MSTCPY locks from other nodes already which we need to ignore
   * when setting the new nodeid.
   */
  
  static void set_lock_master(struct list_head *queue, int nodeid)
  {
  	struct dlm_lkb *lkb;
  
  	list_for_each_entry(lkb, queue, lkb_statequeue)
  		if (!(lkb->lkb_flags & DLM_IFL_MSTCPY))
  			lkb->lkb_nodeid = nodeid;
  }
  
  static void set_master_lkbs(struct dlm_rsb *r)
  {
  	set_lock_master(&r->res_grantqueue, r->res_nodeid);
  	set_lock_master(&r->res_convertqueue, r->res_nodeid);
  	set_lock_master(&r->res_waitqueue, r->res_nodeid);
  }
  
  /*
25985edce   Lucas De Marchi   Fix common misspe...
347
   * Propagate the new master nodeid to locks
e7fd41792   David Teigland   [DLM] The core of...
348
   * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
f7da790d7   David Teigland   [DLM] set purged ...
349
350
   * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which
   * rsb's to consider.
e7fd41792   David Teigland   [DLM] The core of...
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
   */
  
  static void set_new_master(struct dlm_rsb *r, int nodeid)
  {
  	lock_rsb(r);
  	r->res_nodeid = nodeid;
  	set_master_lkbs(r);
  	rsb_set_flag(r, RSB_NEW_MASTER);
  	rsb_set_flag(r, RSB_NEW_MASTER2);
  	unlock_rsb(r);
  }
  
  /*
   * We do async lookups on rsb's that need new masters.  The rsb's
   * waiting for a lookup reply are kept on the recover_list.
   */
  
  static int recover_master(struct dlm_rsb *r)
  {
  	struct dlm_ls *ls = r->res_ls;
  	int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
  
  	dir_nodeid = dlm_dir_nodeid(r);
  
  	if (dir_nodeid == our_nodeid) {
  		error = dlm_dir_lookup(ls, our_nodeid, r->res_name,
  				       r->res_length, &ret_nodeid);
  		if (error)
  			log_error(ls, "recover dir lookup error %d", error);
  
  		if (ret_nodeid == our_nodeid)
  			ret_nodeid = 0;
  		set_new_master(r, ret_nodeid);
  	} else {
  		recover_list_add(r);
  		error = dlm_send_rcom_lookup(r, dir_nodeid);
  	}
  
  	return error;
  }
  
  /*
   * When not using a directory, most resource names will hash to a new static
   * master nodeid and the resource will need to be remastered.
   */
  
  static int recover_master_static(struct dlm_rsb *r)
  {
  	int master = dlm_dir_nodeid(r);
  
  	if (master == dlm_our_nodeid())
  		master = 0;
  
  	if (r->res_nodeid != master) {
  		if (is_master(r))
  			dlm_purge_mstcpy_locks(r);
  		set_new_master(r, master);
  		return 1;
  	}
  	return 0;
  }
  
  /*
   * Go through local root resources and for each rsb which has a master which
   * has departed, get the new master nodeid from the directory.  The dir will
   * assign mastery to the first node to look up the new master.  That means
   * we'll discover in this lookup if we're the new master of any rsb's.
   *
   * We fire off all the dir lookup requests individually and asynchronously to
   * the correct dir node.
   */
  
  int dlm_recover_masters(struct dlm_ls *ls)
  {
  	struct dlm_rsb *r;
  	int error = 0, count = 0;
  
  	log_debug(ls, "dlm_recover_masters");
  
  	down_read(&ls->ls_root_sem);
  	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
  		if (dlm_recovery_stopped(ls)) {
  			up_read(&ls->ls_root_sem);
  			error = -EINTR;
  			goto out;
  		}
  
  		if (dlm_no_directory(ls))
  			count += recover_master_static(r);
222d39609   David Teigland   [DLM] fix master ...
440
441
442
  		else if (!is_master(r) &&
  			 (dlm_is_removed(ls, r->res_nodeid) ||
  			  rsb_flag(r, RSB_NEW_MASTER))) {
e7fd41792   David Teigland   [DLM] The core of...
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
  			recover_master(r);
  			count++;
  		}
  
  		schedule();
  	}
  	up_read(&ls->ls_root_sem);
  
  	log_debug(ls, "dlm_recover_masters %d resources", count);
  
  	error = dlm_wait_function(ls, &recover_list_empty);
   out:
  	if (error)
  		recover_list_clear(ls);
  	return error;
  }
  
  int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
  {
  	struct dlm_rsb *r;
  	int nodeid;
  
  	r = recover_list_find(ls, rc->rc_id);
  	if (!r) {
901359256   David Teigland   [DLM] Update DLM ...
467
  		log_error(ls, "dlm_recover_master_reply no id %llx",
9229f0134   David Teigland   [GFS2] Cast 64 bi...
468
  			  (unsigned long long)rc->rc_id);
e7fd41792   David Teigland   [DLM] The core of...
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
  		goto out;
  	}
  
  	nodeid = rc->rc_result;
  	if (nodeid == dlm_our_nodeid())
  		nodeid = 0;
  
  	set_new_master(r, nodeid);
  	recover_list_del(r);
  
  	if (recover_list_empty(ls))
  		wake_up(&ls->ls_wait_general);
   out:
  	return 0;
  }
  
  
  /* Lock recovery: rebuild the process-copy locks we hold on a
     remastered rsb on the new rsb master.
  
     dlm_recover_locks
     recover_locks
     recover_locks_queue
     dlm_send_rcom_lock              ->  receive_rcom_lock
                                         dlm_recover_master_copy
     receive_rcom_lock_reply         <-
     dlm_recover_process_copy
  */
  
  
  /*
   * keep a count of the number of lkb's we send to the new master; when we get
   * an equal number of replies then recovery for the rsb is done
   */
  
  static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head)
  {
  	struct dlm_lkb *lkb;
  	int error = 0;
  
  	list_for_each_entry(lkb, head, lkb_statequeue) {
  	   	error = dlm_send_rcom_lock(r, lkb);
  		if (error)
  			break;
  		r->res_recover_locks_count++;
  	}
  
  	return error;
  }
e7fd41792   David Teigland   [DLM] The core of...
518
519
520
521
522
  static int recover_locks(struct dlm_rsb *r)
  {
  	int error = 0;
  
  	lock_rsb(r);
e7fd41792   David Teigland   [DLM] The core of...
523

a345da3e8   David Teigland   [DLM] dump rsb an...
524
  	DLM_ASSERT(!r->res_recover_locks_count, dlm_dump_rsb(r););
e7fd41792   David Teigland   [DLM] The core of...
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
  
  	error = recover_locks_queue(r, &r->res_grantqueue);
  	if (error)
  		goto out;
  	error = recover_locks_queue(r, &r->res_convertqueue);
  	if (error)
  		goto out;
  	error = recover_locks_queue(r, &r->res_waitqueue);
  	if (error)
  		goto out;
  
  	if (r->res_recover_locks_count)
  		recover_list_add(r);
  	else
  		rsb_clear_flag(r, RSB_NEW_MASTER);
   out:
  	unlock_rsb(r);
  	return error;
  }
  
  int dlm_recover_locks(struct dlm_ls *ls)
  {
  	struct dlm_rsb *r;
  	int error, count = 0;
  
  	log_debug(ls, "dlm_recover_locks");
  
  	down_read(&ls->ls_root_sem);
  	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
  		if (is_master(r)) {
  			rsb_clear_flag(r, RSB_NEW_MASTER);
  			continue;
  		}
  
  		if (!rsb_flag(r, RSB_NEW_MASTER))
  			continue;
  
  		if (dlm_recovery_stopped(ls)) {
  			error = -EINTR;
  			up_read(&ls->ls_root_sem);
  			goto out;
  		}
  
  		error = recover_locks(r);
  		if (error) {
  			up_read(&ls->ls_root_sem);
  			goto out;
  		}
  
  		count += r->res_recover_locks_count;
  	}
  	up_read(&ls->ls_root_sem);
  
  	log_debug(ls, "dlm_recover_locks %d locks", count);
  
  	error = dlm_wait_function(ls, &recover_list_empty);
   out:
  	if (error)
  		recover_list_clear(ls);
e7fd41792   David Teigland   [DLM] The core of...
584
585
586
587
588
  	return error;
  }
  
  void dlm_recovered_lock(struct dlm_rsb *r)
  {
a345da3e8   David Teigland   [DLM] dump rsb an...
589
  	DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_dump_rsb(r););
e7fd41792   David Teigland   [DLM] The core of...
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
  
  	r->res_recover_locks_count--;
  	if (!r->res_recover_locks_count) {
  		rsb_clear_flag(r, RSB_NEW_MASTER);
  		recover_list_del(r);
  	}
  
  	if (recover_list_empty(r->res_ls))
  		wake_up(&r->res_ls->ls_wait_general);
  }
  
  /*
   * The lvb needs to be recovered on all master rsb's.  This includes setting
   * the VALNOTVALID flag if necessary, and determining the correct lvb contents
   * based on the lvb's of the locks held on the rsb.
   *
   * RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb.  If it
   * was already set prior to recovery, it's not cleared, regardless of locks.
   *
   * The LVB contents are only considered for changing when this is a new master
   * of the rsb (NEW_MASTER2).  Then, the rsb's lvb is taken from any lkb with
   * mode > CR.  If no lkb's exist with mode above CR, the lvb contents are taken
   * from the lkb with the largest lvb sequence number.
   */
  
  static void recover_lvb(struct dlm_rsb *r)
  {
  	struct dlm_lkb *lkb, *high_lkb = NULL;
  	uint32_t high_seq = 0;
901359256   David Teigland   [DLM] Update DLM ...
619
620
  	int lock_lvb_exists = 0;
  	int big_lock_exists = 0;
e7fd41792   David Teigland   [DLM] The core of...
621
622
623
624
625
  	int lvblen = r->res_ls->ls_lvblen;
  
  	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
  		if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
  			continue;
901359256   David Teigland   [DLM] Update DLM ...
626
  		lock_lvb_exists = 1;
e7fd41792   David Teigland   [DLM] The core of...
627
628
  
  		if (lkb->lkb_grmode > DLM_LOCK_CR) {
901359256   David Teigland   [DLM] Update DLM ...
629
  			big_lock_exists = 1;
e7fd41792   David Teigland   [DLM] The core of...
630
631
632
633
634
635
636
637
638
639
640
641
  			goto setflag;
  		}
  
  		if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
  			high_lkb = lkb;
  			high_seq = lkb->lkb_lvbseq;
  		}
  	}
  
  	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
  		if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
  			continue;
901359256   David Teigland   [DLM] Update DLM ...
642
  		lock_lvb_exists = 1;
e7fd41792   David Teigland   [DLM] The core of...
643
644
  
  		if (lkb->lkb_grmode > DLM_LOCK_CR) {
901359256   David Teigland   [DLM] Update DLM ...
645
  			big_lock_exists = 1;
e7fd41792   David Teigland   [DLM] The core of...
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
  			goto setflag;
  		}
  
  		if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
  			high_lkb = lkb;
  			high_seq = lkb->lkb_lvbseq;
  		}
  	}
  
   setflag:
  	if (!lock_lvb_exists)
  		goto out;
  
  	if (!big_lock_exists)
  		rsb_set_flag(r, RSB_VALNOTVALID);
  
  	/* don't mess with the lvb unless we're the new master */
  	if (!rsb_flag(r, RSB_NEW_MASTER2))
  		goto out;
  
  	if (!r->res_lvbptr) {
52bda2b5b   David Teigland   dlm: use dlm pref...
667
  		r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
e7fd41792   David Teigland   [DLM] The core of...
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
  		if (!r->res_lvbptr)
  			goto out;
  	}
  
  	if (big_lock_exists) {
  		r->res_lvbseq = lkb->lkb_lvbseq;
  		memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen);
  	} else if (high_lkb) {
  		r->res_lvbseq = high_lkb->lkb_lvbseq;
  		memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen);
  	} else {
  		r->res_lvbseq = 0;
  		memset(r->res_lvbptr, 0, lvblen);
  	}
   out:
  	return;
  }
  
  /* All master rsb's flagged RECOVER_CONVERT need to be looked at.  The locks
     converting PR->CW or CW->PR need to have their lkb_grmode set. */
  
  static void recover_conversion(struct dlm_rsb *r)
  {
  	struct dlm_lkb *lkb;
  	int grmode = -1;
  
  	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
  		if (lkb->lkb_grmode == DLM_LOCK_PR ||
  		    lkb->lkb_grmode == DLM_LOCK_CW) {
  			grmode = lkb->lkb_grmode;
  			break;
  		}
  	}
  
  	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
  		if (lkb->lkb_grmode != DLM_LOCK_IV)
  			continue;
  		if (grmode == -1)
  			lkb->lkb_grmode = lkb->lkb_rqmode;
  		else
  			lkb->lkb_grmode = grmode;
  	}
  }
f7da790d7   David Teigland   [DLM] set purged ...
711
712
713
714
715
716
717
718
719
  /* We've become the new master for this rsb and waiting/converting locks may
     need to be granted in dlm_grant_after_purge() due to locks that may have
     existed from a removed node. */
  
  static void set_locks_purged(struct dlm_rsb *r)
  {
  	if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue))
  		rsb_set_flag(r, RSB_LOCKS_PURGED);
  }
e7fd41792   David Teigland   [DLM] The core of...
720
721
722
723
724
725
726
727
728
729
730
731
732
  void dlm_recover_rsbs(struct dlm_ls *ls)
  {
  	struct dlm_rsb *r;
  	int count = 0;
  
  	log_debug(ls, "dlm_recover_rsbs");
  
  	down_read(&ls->ls_root_sem);
  	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
  		lock_rsb(r);
  		if (is_master(r)) {
  			if (rsb_flag(r, RSB_RECOVER_CONVERT))
  				recover_conversion(r);
f7da790d7   David Teigland   [DLM] set purged ...
733
734
  			if (rsb_flag(r, RSB_NEW_MASTER2))
  				set_locks_purged(r);
e7fd41792   David Teigland   [DLM] The core of...
735
736
737
738
  			recover_lvb(r);
  			count++;
  		}
  		rsb_clear_flag(r, RSB_RECOVER_CONVERT);
f7da790d7   David Teigland   [DLM] set purged ...
739
  		rsb_clear_flag(r, RSB_NEW_MASTER2);
e7fd41792   David Teigland   [DLM] The core of...
740
741
742
743
744
745
746
747
748
749
750
  		unlock_rsb(r);
  	}
  	up_read(&ls->ls_root_sem);
  
  	log_debug(ls, "dlm_recover_rsbs %d rsbs", count);
  }
  
  /* Create a single list of all root rsb's to be used during recovery */
  
  int dlm_create_root_list(struct dlm_ls *ls)
  {
9beb3bf5a   Bob Peterson   dlm: convert rsb ...
751
  	struct rb_node *n;
e7fd41792   David Teigland   [DLM] The core of...
752
753
754
755
756
757
758
759
760
761
762
  	struct dlm_rsb *r;
  	int i, error = 0;
  
  	down_write(&ls->ls_root_sem);
  	if (!list_empty(&ls->ls_root_list)) {
  		log_error(ls, "root list not empty");
  		error = -EINVAL;
  		goto out;
  	}
  
  	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
c7be761a8   David Teigland   dlm: change rsbtb...
763
  		spin_lock(&ls->ls_rsbtbl[i].lock);
9beb3bf5a   Bob Peterson   dlm: convert rsb ...
764
765
  		for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) {
  			r = rb_entry(n, struct dlm_rsb, res_hashnode);
e7fd41792   David Teigland   [DLM] The core of...
766
767
768
  			list_add(&r->res_root_list, &ls->ls_root_list);
  			dlm_hold_rsb(r);
  		}
85f0379aa   David Teigland   dlm: keep cached ...
769
770
771
772
773
774
  
  		/* If we're using a directory, add tossed rsbs to the root
  		   list; they'll have entries created in the new directory,
  		   but no other recovery steps should do anything with them. */
  
  		if (dlm_no_directory(ls)) {
c7be761a8   David Teigland   dlm: change rsbtb...
775
  			spin_unlock(&ls->ls_rsbtbl[i].lock);
85f0379aa   David Teigland   dlm: keep cached ...
776
777
  			continue;
  		}
9beb3bf5a   Bob Peterson   dlm: convert rsb ...
778
779
  		for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = rb_next(n)) {
  			r = rb_entry(n, struct dlm_rsb, res_hashnode);
85f0379aa   David Teigland   dlm: keep cached ...
780
781
782
  			list_add(&r->res_root_list, &ls->ls_root_list);
  			dlm_hold_rsb(r);
  		}
c7be761a8   David Teigland   dlm: change rsbtb...
783
  		spin_unlock(&ls->ls_rsbtbl[i].lock);
e7fd41792   David Teigland   [DLM] The core of...
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
  	}
   out:
  	up_write(&ls->ls_root_sem);
  	return error;
  }
  
  void dlm_release_root_list(struct dlm_ls *ls)
  {
  	struct dlm_rsb *r, *safe;
  
  	down_write(&ls->ls_root_sem);
  	list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) {
  		list_del_init(&r->res_root_list);
  		dlm_put_rsb(r);
  	}
  	up_write(&ls->ls_root_sem);
  }
85f0379aa   David Teigland   dlm: keep cached ...
801
802
803
804
  /* If not using a directory, clear the entire toss list, there's no benefit to
     caching the master value since it's fixed.  If we are using a dir, keep the
     rsb's we're the master of.  Recovery will add them to the root list and from
     there they'll be entered in the rebuilt directory. */
e7fd41792   David Teigland   [DLM] The core of...
805
806
  void dlm_clear_toss_list(struct dlm_ls *ls)
  {
9beb3bf5a   Bob Peterson   dlm: convert rsb ...
807
808
  	struct rb_node *n, *next;
  	struct dlm_rsb *rsb;
e7fd41792   David Teigland   [DLM] The core of...
809
810
811
  	int i;
  
  	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
c7be761a8   David Teigland   dlm: change rsbtb...
812
  		spin_lock(&ls->ls_rsbtbl[i].lock);
9beb3bf5a   Bob Peterson   dlm: convert rsb ...
813
814
815
816
817
818
  		for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) {
  			next = rb_next(n);;
  			rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
  			if (dlm_no_directory(ls) || !is_master(rsb)) {
  				rb_erase(n, &ls->ls_rsbtbl[i].toss);
  				dlm_free_rsb(rsb);
85f0379aa   David Teigland   dlm: keep cached ...
819
  			}
e7fd41792   David Teigland   [DLM] The core of...
820
  		}
c7be761a8   David Teigland   dlm: change rsbtb...
821
  		spin_unlock(&ls->ls_rsbtbl[i].lock);
e7fd41792   David Teigland   [DLM] The core of...
822
823
  	}
  }