Commit 44ad37d69b2cc421d5b5c7ad7fed16230685b092
Committed by
Steven Whitehouse
1 parent
001e8e8df4
GFS2: filesystem hang caused by incorrect lock order
This patch fixes a deadlock in GFS2 where two processes are trying to reclaim an unlinked dinode: One holds the inode glock and calls gfs2_lookup_by_inum trying to look up the inode, which it can't, due to I_FREEING. The other has set I_FREEING from vfs and is at the beginning of gfs2_delete_inode waiting for the glock, which is held by the first. The solution is to add a new non_block parameter to the gfs2_iget function that causes it to return -ENOENT if the inode is being freed. Signed-off-by: Bob Peterson <rpeterso@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Showing 6 changed files with 55 additions and 21 deletions Side-by-side Diff
fs/gfs2/dir.c
... | ... | @@ -1506,7 +1506,7 @@ |
1506 | 1506 | inode = gfs2_inode_lookup(dir->i_sb, |
1507 | 1507 | be16_to_cpu(dent->de_type), |
1508 | 1508 | be64_to_cpu(dent->de_inum.no_addr), |
1509 | - be64_to_cpu(dent->de_inum.no_formal_ino)); | |
1509 | + be64_to_cpu(dent->de_inum.no_formal_ino), 0); | |
1510 | 1510 | brelse(bh); |
1511 | 1511 | return inode; |
1512 | 1512 | } |
fs/gfs2/inode.c
... | ... | @@ -40,37 +40,61 @@ |
40 | 40 | u64 ir_length; |
41 | 41 | }; |
42 | 42 | |
43 | +struct gfs2_skip_data { | |
44 | + u64 no_addr; | |
45 | + int skipped; | |
46 | + int non_block; | |
47 | +}; | |
48 | + | |
43 | 49 | static int iget_test(struct inode *inode, void *opaque) |
44 | 50 | { |
45 | 51 | struct gfs2_inode *ip = GFS2_I(inode); |
46 | - u64 *no_addr = opaque; | |
52 | + struct gfs2_skip_data *data = opaque; | |
47 | 53 | |
48 | - if (ip->i_no_addr == *no_addr) | |
54 | + if (ip->i_no_addr == data->no_addr) { | |
55 | + if (data->non_block && | |
56 | + inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { | |
57 | + data->skipped = 1; | |
58 | + return 0; | |
59 | + } | |
49 | 60 | return 1; |
50 | - | |
61 | + } | |
51 | 62 | return 0; |
52 | 63 | } |
53 | 64 | |
54 | 65 | static int iget_set(struct inode *inode, void *opaque) |
55 | 66 | { |
56 | 67 | struct gfs2_inode *ip = GFS2_I(inode); |
57 | - u64 *no_addr = opaque; | |
68 | + struct gfs2_skip_data *data = opaque; | |
58 | 69 | |
59 | - inode->i_ino = (unsigned long)*no_addr; | |
60 | - ip->i_no_addr = *no_addr; | |
70 | + if (data->skipped) | |
71 | + return -ENOENT; | |
72 | + inode->i_ino = (unsigned long)(data->no_addr); | |
73 | + ip->i_no_addr = data->no_addr; | |
61 | 74 | return 0; |
62 | 75 | } |
63 | 76 | |
64 | 77 | struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) |
65 | 78 | { |
66 | 79 | unsigned long hash = (unsigned long)no_addr; |
67 | - return ilookup5(sb, hash, iget_test, &no_addr); | |
80 | + struct gfs2_skip_data data; | |
81 | + | |
82 | + data.no_addr = no_addr; | |
83 | + data.skipped = 0; | |
84 | + data.non_block = 0; | |
85 | + return ilookup5(sb, hash, iget_test, &data); | |
68 | 86 | } |
69 | 87 | |
70 | -static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) | |
88 | +static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr, | |
89 | + int non_block) | |
71 | 90 | { |
91 | + struct gfs2_skip_data data; | |
72 | 92 | unsigned long hash = (unsigned long)no_addr; |
73 | - return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); | |
93 | + | |
94 | + data.no_addr = no_addr; | |
95 | + data.skipped = 0; | |
96 | + data.non_block = non_block; | |
97 | + return iget5_locked(sb, hash, iget_test, iget_set, &data); | |
74 | 98 | } |
75 | 99 | |
76 | 100 | /** |
77 | 101 | |
78 | 102 | |
... | ... | @@ -111,19 +135,20 @@ |
111 | 135 | * @sb: The super block |
112 | 136 | * @no_addr: The inode number |
113 | 137 | * @type: The type of the inode |
138 | + * non_block: Can we block on inodes that are being freed? | |
114 | 139 | * |
115 | 140 | * Returns: A VFS inode, or an error |
116 | 141 | */ |
117 | 142 | |
118 | 143 | struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, |
119 | - u64 no_addr, u64 no_formal_ino) | |
144 | + u64 no_addr, u64 no_formal_ino, int non_block) | |
120 | 145 | { |
121 | 146 | struct inode *inode; |
122 | 147 | struct gfs2_inode *ip; |
123 | 148 | struct gfs2_glock *io_gl = NULL; |
124 | 149 | int error; |
125 | 150 | |
126 | - inode = gfs2_iget(sb, no_addr); | |
151 | + inode = gfs2_iget(sb, no_addr, non_block); | |
127 | 152 | ip = GFS2_I(inode); |
128 | 153 | |
129 | 154 | if (!inode) |
130 | 155 | |
131 | 156 | |
... | ... | @@ -185,11 +210,12 @@ |
185 | 210 | { |
186 | 211 | struct super_block *sb = sdp->sd_vfs; |
187 | 212 | struct gfs2_holder i_gh; |
188 | - struct inode *inode; | |
213 | + struct inode *inode = NULL; | |
189 | 214 | int error; |
190 | 215 | |
216 | + /* Must not read in block until block type is verified */ | |
191 | 217 | error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, |
192 | - LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | |
218 | + LM_ST_EXCLUSIVE, GL_SKIP, &i_gh); | |
193 | 219 | if (error) |
194 | 220 | return ERR_PTR(error); |
195 | 221 | |
... | ... | @@ -197,7 +223,7 @@ |
197 | 223 | if (error) |
198 | 224 | goto fail; |
199 | 225 | |
200 | - inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); | |
226 | + inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1); | |
201 | 227 | if (IS_ERR(inode)) |
202 | 228 | goto fail; |
203 | 229 | |
... | ... | @@ -843,7 +869,7 @@ |
843 | 869 | goto fail_gunlock2; |
844 | 870 | |
845 | 871 | inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, |
846 | - inum.no_formal_ino); | |
872 | + inum.no_formal_ino, 0); | |
847 | 873 | if (IS_ERR(inode)) |
848 | 874 | goto fail_gunlock2; |
849 | 875 |
fs/gfs2/inode.h
... | ... | @@ -97,7 +97,8 @@ |
97 | 97 | } |
98 | 98 | |
99 | 99 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
100 | - u64 no_addr, u64 no_formal_ino); | |
100 | + u64 no_addr, u64 no_formal_ino, | |
101 | + int non_block); | |
101 | 102 | extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, |
102 | 103 | u64 *no_formal_ino, |
103 | 104 | unsigned int blktype); |
fs/gfs2/ops_fstype.c
... | ... | @@ -430,7 +430,7 @@ |
430 | 430 | struct dentry *dentry; |
431 | 431 | struct inode *inode; |
432 | 432 | |
433 | - inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); | |
433 | + inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); | |
434 | 434 | if (IS_ERR(inode)) { |
435 | 435 | fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); |
436 | 436 | return PTR_ERR(inode); |
fs/gfs2/rgrp.c
... | ... | @@ -945,7 +945,7 @@ |
945 | 945 | /* rgblk_search can return a block < goal, so we need to |
946 | 946 | keep it marching forward. */ |
947 | 947 | no_addr = block + rgd->rd_data0; |
948 | - goal++; | |
948 | + goal = max(block + 1, goal + 1); | |
949 | 949 | if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) |
950 | 950 | continue; |
951 | 951 | if (no_addr == skip) |
... | ... | @@ -971,7 +971,7 @@ |
971 | 971 | found++; |
972 | 972 | |
973 | 973 | /* Limit reclaim to sensible number of tasks */ |
974 | - if (found > 2*NR_CPUS) | |
974 | + if (found > NR_CPUS) | |
975 | 975 | return; |
976 | 976 | } |
977 | 977 |
fs/gfs2/super.c
... | ... | @@ -1327,7 +1327,8 @@ |
1327 | 1327 | if (inode->i_nlink || (sb->s_flags & MS_RDONLY)) |
1328 | 1328 | goto out; |
1329 | 1329 | |
1330 | - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | |
1330 | + /* Must not read inode block until block type has been verified */ | |
1331 | + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); | |
1331 | 1332 | if (unlikely(error)) { |
1332 | 1333 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); |
1333 | 1334 | goto out; |
... | ... | @@ -1336,6 +1337,12 @@ |
1336 | 1337 | error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED); |
1337 | 1338 | if (error) |
1338 | 1339 | goto out_truncate; |
1340 | + | |
1341 | + if (test_bit(GIF_INVALID, &ip->i_flags)) { | |
1342 | + error = gfs2_inode_refresh(ip); | |
1343 | + if (error) | |
1344 | + goto out_truncate; | |
1345 | + } | |
1339 | 1346 | |
1340 | 1347 | ip->i_iopen_gh.gh_flags |= GL_NOCACHE; |
1341 | 1348 | gfs2_glock_dq_wait(&ip->i_iopen_gh); |