Commit 98f176fb32f33795b6d0f83856008b932123ab38

Authored by David Teigland
Committed by Steven Whitehouse
1 parent 1babdb4531

[DLM] don't accept replies to old recovery messages

We often abort a recovery after sending a status request to a remote node.
We want to ignore any potential status reply we get from the remote node.
If we get one of these unwanted replies, we've often moved on to the next
recovery message and incremented the message sequence counter, so the
reply will be ignored due to the seq number.  In some cases, we've not
moved on to the next message so the seq number of the reply we want to
ignore is still correct, causing the reply to be accepted.  The next
recovery message will then mistake this old reply as a new one.

To fix this, we add the flag RCOM_WAIT to indicate when we can accept a
new reply.  We clear this flag if we abort recovery while waiting for a
reply.  Before the flag is set again (to allow new replies) we know that
any old replies will be rejected due to their sequence number.  We also
initialize the recovery-message sequence number to a random value when a
lockspace is first created.  This makes it clear when messages are being
rejected from an old instance of a lockspace that has since been
recreated.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

Showing 3 changed files with 39 additions and 11 deletions Side-by-side Diff

fs/dlm/dlm_internal.h
... ... @@ -471,6 +471,7 @@
471 471 char *ls_recover_buf;
472 472 int ls_recover_nodeid; /* for debugging */
473 473 uint64_t ls_rcom_seq;
  474 + spinlock_t ls_rcom_spin;
474 475 struct list_head ls_recover_list;
475 476 spinlock_t ls_recover_list_lock;
476 477 int ls_recover_list_count;
... ... @@ -488,7 +489,8 @@
488 489 #define LSFL_RUNNING 1
489 490 #define LSFL_RECOVERY_STOP 2
490 491 #define LSFL_RCOM_READY 3
491   -#define LSFL_UEVENT_WAIT 4
  492 +#define LSFL_RCOM_WAIT 4
  493 +#define LSFL_UEVENT_WAIT 5
492 494  
493 495 /* much of this is just saving user space pointers associated with the
494 496 lock that we pass back to the user lib with an ast */
... ... @@ -479,6 +479,8 @@
479 479 ls->ls_recoverd_task = NULL;
480 480 mutex_init(&ls->ls_recoverd_active);
481 481 spin_lock_init(&ls->ls_recover_lock);
  482 + spin_lock_init(&ls->ls_rcom_spin);
  483 + get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
482 484 ls->ls_recover_status = 0;
483 485 ls->ls_recover_seq = 0;
484 486 ls->ls_recover_args = NULL;
... ... @@ -90,13 +90,28 @@
90 90 return 0;
91 91 }
92 92  
  93 +static void allow_sync_reply(struct dlm_ls *ls, uint64_t *new_seq)
  94 +{
  95 + spin_lock(&ls->ls_rcom_spin);
  96 + *new_seq = ++ls->ls_rcom_seq;
  97 + set_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
  98 + spin_unlock(&ls->ls_rcom_spin);
  99 +}
  100 +
  101 +static void disallow_sync_reply(struct dlm_ls *ls)
  102 +{
  103 + spin_lock(&ls->ls_rcom_spin);
  104 + clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
  105 + clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
  106 + spin_unlock(&ls->ls_rcom_spin);
  107 +}
  108 +
93 109 int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
94 110 {
95 111 struct dlm_rcom *rc;
96 112 struct dlm_mhandle *mh;
97 113 int error = 0;
98 114  
99   - memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
100 115 ls->ls_recover_nodeid = nodeid;
101 116  
102 117 if (nodeid == dlm_our_nodeid()) {
103 118  
104 119  
... ... @@ -108,12 +123,14 @@
108 123 error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh);
109 124 if (error)
110 125 goto out;
111   - rc->rc_id = ++ls->ls_rcom_seq;
112 126  
  127 + allow_sync_reply(ls, &rc->rc_id);
  128 + memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
  129 +
113 130 send_rcom(ls, mh, rc);
114 131  
115 132 error = dlm_wait_function(ls, &rcom_response);
116   - clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
  133 + disallow_sync_reply(ls);
117 134 if (error)
118 135 goto out;
119 136  
120 137  
121 138  
... ... @@ -150,14 +167,20 @@
150 167  
151 168 static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
152 169 {
153   - if (rc_in->rc_id != ls->ls_rcom_seq) {
154   - log_debug(ls, "reject old reply %d got %llx wanted %llx",
155   - rc_in->rc_type, rc_in->rc_id, ls->ls_rcom_seq);
156   - return;
  170 + spin_lock(&ls->ls_rcom_spin);
  171 + if (!test_bit(LSFL_RCOM_WAIT, &ls->ls_flags) ||
  172 + rc_in->rc_id != ls->ls_rcom_seq) {
  173 + log_debug(ls, "reject reply %d from %d seq %llx expect %llx",
  174 + rc_in->rc_type, rc_in->rc_header.h_nodeid,
  175 + rc_in->rc_id, ls->ls_rcom_seq);
  176 + goto out;
157 177 }
158 178 memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length);
159 179 set_bit(LSFL_RCOM_READY, &ls->ls_flags);
  180 + clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
160 181 wake_up(&ls->ls_wait_general);
  182 + out:
  183 + spin_unlock(&ls->ls_rcom_spin);
161 184 }
162 185  
163 186 static void receive_rcom_status_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
... ... @@ -171,7 +194,6 @@
171 194 struct dlm_mhandle *mh;
172 195 int error = 0, len = sizeof(struct dlm_rcom);
173 196  
174   - memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
175 197 ls->ls_recover_nodeid = nodeid;
176 198  
177 199 if (nodeid == dlm_our_nodeid()) {
178 200  
179 201  
... ... @@ -185,12 +207,14 @@
185 207 if (error)
186 208 goto out;
187 209 memcpy(rc->rc_buf, last_name, last_len);
188   - rc->rc_id = ++ls->ls_rcom_seq;
189 210  
  211 + allow_sync_reply(ls, &rc->rc_id);
  212 + memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
  213 +
190 214 send_rcom(ls, mh, rc);
191 215  
192 216 error = dlm_wait_function(ls, &rcom_response);
193   - clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
  217 + disallow_sync_reply(ls);
194 218 out:
195 219 return error;
196 220 }