Commit a19128260107f951d1b4c421cf98b92f8092b069
Committed by
Joel Becker
1 parent
0b94a909eb
Exists in
master
and in
7 other branches
ocfs2: Prevent a livelock in dlmglue
There is possibility of a livelock in __ocfs2_cluster_lock(). If a node were to get an ast for an upconvert request, followed immediately by a bast, there is a small window where the fs may downconvert the lock before the process requesting the upconvert is able to take the lock. This patch adds a new flag to indicate that the upconvert is still in progress and that the dc thread should not downconvert it right now. Wengang Wang <wen.gang.wang@oracle.com> and Joel Becker <joel.becker@oracle.com> contributed heavily to this patch. Reported-by: David Teigland <teigland@redhat.com> Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
Showing 2 changed files with 50 additions and 3 deletions Side-by-side Diff
fs/ocfs2/dlmglue.c
... | ... | @@ -875,6 +875,14 @@ |
875 | 875 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
876 | 876 | |
877 | 877 | lockres->l_level = lockres->l_requested; |
878 | + | |
879 | + /* | |
880 | + * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing | |
881 | + * the OCFS2_LOCK_BUSY flag to prevent the dc thread from | |
882 | + * downconverting the lock before the upconvert has fully completed. | |
883 | + */ | |
884 | + lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | |
885 | + | |
878 | 886 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
879 | 887 | |
880 | 888 | mlog_exit_void(); |
... | ... | @@ -1134,6 +1142,7 @@ |
1134 | 1142 | mlog_entry_void(); |
1135 | 1143 | spin_lock_irqsave(&lockres->l_lock, flags); |
1136 | 1144 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
1145 | + lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | |
1137 | 1146 | if (convert) |
1138 | 1147 | lockres->l_action = OCFS2_AST_INVALID; |
1139 | 1148 | else |
1140 | 1149 | |
1141 | 1150 | |
... | ... | @@ -1324,13 +1333,13 @@ |
1324 | 1333 | again: |
1325 | 1334 | wait = 0; |
1326 | 1335 | |
1336 | + spin_lock_irqsave(&lockres->l_lock, flags); | |
1337 | + | |
1327 | 1338 | if (catch_signals && signal_pending(current)) { |
1328 | 1339 | ret = -ERESTARTSYS; |
1329 | - goto out; | |
1340 | + goto unlock; | |
1330 | 1341 | } |
1331 | 1342 | |
1332 | - spin_lock_irqsave(&lockres->l_lock, flags); | |
1333 | - | |
1334 | 1343 | mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, |
1335 | 1344 | "Cluster lock called on freeing lockres %s! flags " |
1336 | 1345 | "0x%lx\n", lockres->l_name, lockres->l_flags); |
... | ... | @@ -1347,6 +1356,25 @@ |
1347 | 1356 | goto unlock; |
1348 | 1357 | } |
1349 | 1358 | |
1359 | + if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { | |
1360 | + /* | |
1361 | + * We've upconverted. If the lock now has a level we can | |
1362 | + * work with, we take it. If, however, the lock is not at the | |
1363 | + * required level, we go thru the full cycle. One way this could | |
1364 | + * happen is if a process requesting an upconvert to PR is | |
1365 | + * closely followed by another requesting upconvert to an EX. | |
1366 | + * If the process requesting EX lands here, we want it to | |
1367 | + * continue attempting to upconvert and let the process | |
1368 | + * requesting PR take the lock. | |
1369 | + * If multiple processes request upconvert to PR, the first one | |
1370 | + * here will take the lock. The others will have to go thru the | |
1371 | + * OCFS2_LOCK_BLOCKED check to ensure that there is no pending | |
1372 | + * downconvert request. | |
1373 | + */ | |
1374 | + if (level <= lockres->l_level) | |
1375 | + goto update_holders; | |
1376 | + } | |
1377 | + | |
1350 | 1378 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && |
1351 | 1379 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { |
1352 | 1380 | /* is the lock is currently blocked on behalf of |
1353 | 1381 | |
... | ... | @@ -1417,11 +1445,14 @@ |
1417 | 1445 | goto again; |
1418 | 1446 | } |
1419 | 1447 | |
1448 | +update_holders: | |
1420 | 1449 | /* Ok, if we get here then we're good to go. */ |
1421 | 1450 | ocfs2_inc_holders(lockres, level); |
1422 | 1451 | |
1423 | 1452 | ret = 0; |
1424 | 1453 | unlock: |
1454 | + lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | |
1455 | + | |
1425 | 1456 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1426 | 1457 | out: |
1427 | 1458 | /* |
... | ... | @@ -3401,6 +3432,18 @@ |
3401 | 3432 | } |
3402 | 3433 | goto leave; |
3403 | 3434 | } |
3435 | + | |
3436 | + /* | |
3437 | + * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is | |
3438 | + * set when the ast is received for an upconvert just before the | |
3439 | + * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast | |
3440 | + * on the heels of the ast, we want to delay the downconvert just | |
3441 | + * enough to allow the up requestor to do its task. Because this | |
3442 | + * lock is in the blocked queue, the lock will be downconverted | |
3443 | + * as soon as the requestor is done with the lock. | |
3444 | + */ | |
3445 | + if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) | |
3446 | + goto leave_requeue; | |
3404 | 3447 | |
3405 | 3448 | /* if we're blocking an exclusive and we have *any* holders, |
3406 | 3449 | * then requeue. */ |
fs/ocfs2/ocfs2.h
... | ... | @@ -136,6 +136,10 @@ |
136 | 136 | #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a |
137 | 137 | call to dlm_lock. Only |
138 | 138 | exists with BUSY set. */ |
139 | +#define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread | |
140 | + * from downconverting | |
141 | + * before the upconvert | |
142 | + * has completed */ | |
139 | 143 | |
140 | 144 | struct ocfs2_lock_res_ops; |
141 | 145 |