Commit d4400156d415540086c34a06e5d233122d6bf56a
Committed by
Steven Whitehouse
1 parent
435618b75b
Exists in
master
and in
7 other branches
[DLM] fix requestqueue race
Red Hat BZ 211914 There's a race between dlm_recoverd (1) enabling locking and (2) clearing out the requestqueue, and dlm_recvd (1) checking if locking is enabled and (2) adding a message to the requestqueue. An order of recoverd(1), recvd(1), recvd(2), recoverd(2) will result in a message being left on the requestqueue. The fix is to have dlm_recvd check if dlm_recoverd has enabled locking after taking the mutex for the requestqueue and if it has processing the message instead of queueing it. Signed-off-by: David Teigland <teigland@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Showing 3 changed files with 29 additions and 9 deletions Side-by-side Diff
fs/dlm/lock.c
... | ... | @@ -3028,10 +3028,17 @@ |
3028 | 3028 | |
3029 | 3029 | while (1) { |
3030 | 3030 | if (dlm_locking_stopped(ls)) { |
3031 | - if (!recovery) | |
3032 | - dlm_add_requestqueue(ls, nodeid, hd); | |
3033 | - error = -EINTR; | |
3034 | - goto out; | |
3031 | + if (recovery) { | |
3032 | + error = -EINTR; | |
3033 | + goto out; | |
3034 | + } | |
3035 | + error = dlm_add_requestqueue(ls, nodeid, hd); | |
3036 | + if (error == -EAGAIN) | |
3037 | + continue; | |
3038 | + else { | |
3039 | + error = -EINTR; | |
3040 | + goto out; | |
3041 | + } | |
3035 | 3042 | } |
3036 | 3043 | |
3037 | 3044 | if (lock_recovery_try(ls)) |
fs/dlm/requestqueue.c
... | ... | @@ -30,26 +30,39 @@ |
30 | 30 | * lockspace is enabled on some while still suspended on others. |
31 | 31 | */ |
32 | 32 | |
33 | -void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) | |
33 | +int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) | |
34 | 34 | { |
35 | 35 | struct rq_entry *e; |
36 | 36 | int length = hd->h_length; |
37 | + int rv = 0; | |
37 | 38 | |
38 | 39 | if (dlm_is_removed(ls, nodeid)) |
39 | - return; | |
40 | + return 0; | |
40 | 41 | |
41 | 42 | e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); |
42 | 43 | if (!e) { |
43 | 44 | log_print("dlm_add_requestqueue: out of memory\n"); |
44 | - return; | |
45 | + return 0; | |
45 | 46 | } |
46 | 47 | |
47 | 48 | e->nodeid = nodeid; |
48 | 49 | memcpy(e->request, hd, length); |
49 | 50 | |
51 | + /* We need to check dlm_locking_stopped() after taking the mutex to | |
52 | + avoid a race where dlm_recoverd enables locking and runs | |
53 | + process_requestqueue between our earlier dlm_locking_stopped check | |
54 | + and this addition to the requestqueue. */ | |
55 | + | |
50 | 56 | mutex_lock(&ls->ls_requestqueue_mutex); |
51 | - list_add_tail(&e->list, &ls->ls_requestqueue); | |
57 | + if (dlm_locking_stopped(ls)) | |
58 | + list_add_tail(&e->list, &ls->ls_requestqueue); | |
59 | + else { | |
60 | + log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid); | |
61 | + kfree(e); | |
62 | + rv = -EAGAIN; | |
63 | + } | |
52 | 64 | mutex_unlock(&ls->ls_requestqueue_mutex); |
65 | + return rv; | |
53 | 66 | } |
54 | 67 | |
55 | 68 | int dlm_process_requestqueue(struct dlm_ls *ls) |
fs/dlm/requestqueue.h
... | ... | @@ -13,7 +13,7 @@ |
13 | 13 | #ifndef __REQUESTQUEUE_DOT_H__ |
14 | 14 | #define __REQUESTQUEUE_DOT_H__ |
15 | 15 | |
16 | -void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); | |
16 | +int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); | |
17 | 17 | int dlm_process_requestqueue(struct dlm_ls *ls); |
18 | 18 | void dlm_wait_requestqueue(struct dlm_ls *ls); |
19 | 19 | void dlm_purge_requestqueue(struct dlm_ls *ls); |