Commit f49f9baac8f63de9cbc17a0a84e04060496e8e76
Committed by
Trond Myklebust
1 parent
9f52c2525e
Exists in
master
and in
7 other branches
pnfs: fix pnfs lock inversion of i_lock and cl_lock
The pnfs code was using throughout the lock order i_lock, cl_lock. This conflicts with the nfs delegation code. Rework the pnfs code to avoid taking both locks simultaneously. Currently the code takes the double lock to add/remove the layout to a nfs_client list, while atomically checking that the list of lsegs is empty. To avoid this, we rely on existing serializations. When a layout is initialized with lseg count equal zero, LAYOUTGET's openstateid serialization is in effect, making it safe to assume it stays zero unless we change it. And once a layout's lseg count drops to zero, it is set as DESTROYED and so will stay at zero. Signed-off-by: Fred Isaman <iisaman@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Showing 2 changed files with 26 additions and 18 deletions Side-by-side Diff
fs/nfs/callback_proc.c
... | ... | @@ -188,10 +188,10 @@ |
188 | 188 | rv = NFS4ERR_DELAY; |
189 | 189 | list_del_init(&lo->plh_bulk_recall); |
190 | 190 | spin_unlock(&ino->i_lock); |
191 | + pnfs_free_lseg_list(&free_me_list); | |
191 | 192 | put_layout_hdr(lo); |
192 | 193 | iput(ino); |
193 | 194 | } |
194 | - pnfs_free_lseg_list(&free_me_list); | |
195 | 195 | return rv; |
196 | 196 | } |
197 | 197 |
fs/nfs/pnfs.c
... | ... | @@ -247,13 +247,6 @@ |
247 | 247 | BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); |
248 | 248 | list_del(&lseg->pls_list); |
249 | 249 | if (list_empty(&lseg->pls_layout->plh_segs)) { |
250 | - struct nfs_client *clp; | |
251 | - | |
252 | - clp = NFS_SERVER(ino)->nfs_client; | |
253 | - spin_lock(&clp->cl_lock); | |
254 | - /* List does not take a reference, so no need for put here */ | |
255 | - list_del_init(&lseg->pls_layout->plh_layouts); | |
256 | - spin_unlock(&clp->cl_lock); | |
257 | 250 | set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); |
258 | 251 | /* Matched by initial refcount set in alloc_init_layout_hdr */ |
259 | 252 | put_layout_hdr_locked(lseg->pls_layout); |
260 | 253 | |
261 | 254 | |
... | ... | @@ -319,11 +312,27 @@ |
319 | 312 | return invalid - removed; |
320 | 313 | } |
321 | 314 | |
315 | +/* note free_me must contain lsegs from a single layout_hdr */ | |
322 | 316 | void |
323 | 317 | pnfs_free_lseg_list(struct list_head *free_me) |
324 | 318 | { |
325 | 319 | struct pnfs_layout_segment *lseg, *tmp; |
320 | + struct pnfs_layout_hdr *lo; | |
326 | 321 | |
322 | + if (list_empty(free_me)) | |
323 | + return; | |
324 | + | |
325 | + lo = list_first_entry(free_me, struct pnfs_layout_segment, | |
326 | + pls_list)->pls_layout; | |
327 | + | |
328 | + if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) { | |
329 | + struct nfs_client *clp; | |
330 | + | |
331 | + clp = NFS_SERVER(lo->plh_inode)->nfs_client; | |
332 | + spin_lock(&clp->cl_lock); | |
333 | + list_del_init(&lo->plh_layouts); | |
334 | + spin_unlock(&clp->cl_lock); | |
335 | + } | |
327 | 336 | list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { |
328 | 337 | list_del(&lseg->pls_list); |
329 | 338 | free_lseg(lseg); |
... | ... | @@ -705,6 +714,7 @@ |
705 | 714 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; |
706 | 715 | struct pnfs_layout_hdr *lo; |
707 | 716 | struct pnfs_layout_segment *lseg = NULL; |
717 | + bool first = false; | |
708 | 718 | |
709 | 719 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) |
710 | 720 | return NULL; |
... | ... | @@ -735,7 +745,10 @@ |
735 | 745 | atomic_inc(&lo->plh_outstanding); |
736 | 746 | |
737 | 747 | get_layout_hdr(lo); |
738 | - if (list_empty(&lo->plh_segs)) { | |
748 | + if (list_empty(&lo->plh_segs)) | |
749 | + first = true; | |
750 | + spin_unlock(&ino->i_lock); | |
751 | + if (first) { | |
739 | 752 | /* The lo must be on the clp list if there is any |
740 | 753 | * chance of a CB_LAYOUTRECALL(FILE) coming in. |
741 | 754 | */ |
742 | 755 | |
... | ... | @@ -744,17 +757,12 @@ |
744 | 757 | list_add_tail(&lo->plh_layouts, &clp->cl_layouts); |
745 | 758 | spin_unlock(&clp->cl_lock); |
746 | 759 | } |
747 | - spin_unlock(&ino->i_lock); | |
748 | 760 | |
749 | 761 | lseg = send_layoutget(lo, ctx, iomode); |
750 | - if (!lseg) { | |
751 | - spin_lock(&ino->i_lock); | |
752 | - if (list_empty(&lo->plh_segs)) { | |
753 | - spin_lock(&clp->cl_lock); | |
754 | - list_del_init(&lo->plh_layouts); | |
755 | - spin_unlock(&clp->cl_lock); | |
756 | - } | |
757 | - spin_unlock(&ino->i_lock); | |
762 | + if (!lseg && first) { | |
763 | + spin_lock(&clp->cl_lock); | |
764 | + list_del_init(&lo->plh_layouts); | |
765 | + spin_unlock(&clp->cl_lock); | |
758 | 766 | } |
759 | 767 | atomic_dec(&lo->plh_outstanding); |
760 | 768 | put_layout_hdr(lo); |