Commit 44ad37d69b2cc421d5b5c7ad7fed16230685b092

Authored by Bob Peterson
Committed by Steven Whitehouse
1 parent 001e8e8df4

GFS2: filesystem hang caused by incorrect lock order

This patch fixes a deadlock in GFS2 where two processes are trying
to reclaim an unlinked dinode:
One holds the inode glock and calls gfs2_lookup_by_inum trying to look
up the inode, which it can't, due to I_FREEING.  The other has set
I_FREEING from vfs and is at the beginning of gfs2_delete_inode
waiting for the glock, which is held by the first.  The solution is to
add a new non_block parameter to the gfs2_iget function that causes it
to return -ENOENT if the inode is being freed.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

Showing 6 changed files with 55 additions and 21 deletions Side-by-side Diff

... ... @@ -1506,7 +1506,7 @@
1506 1506 inode = gfs2_inode_lookup(dir->i_sb,
1507 1507 be16_to_cpu(dent->de_type),
1508 1508 be64_to_cpu(dent->de_inum.no_addr),
1509   - be64_to_cpu(dent->de_inum.no_formal_ino));
  1509 + be64_to_cpu(dent->de_inum.no_formal_ino), 0);
1510 1510 brelse(bh);
1511 1511 return inode;
1512 1512 }
... ... @@ -40,37 +40,61 @@
40 40 u64 ir_length;
41 41 };
42 42  
  43 +struct gfs2_skip_data {
  44 + u64 no_addr;
  45 + int skipped;
  46 + int non_block;
  47 +};
  48 +
43 49 static int iget_test(struct inode *inode, void *opaque)
44 50 {
45 51 struct gfs2_inode *ip = GFS2_I(inode);
46   - u64 *no_addr = opaque;
  52 + struct gfs2_skip_data *data = opaque;
47 53  
48   - if (ip->i_no_addr == *no_addr)
  54 + if (ip->i_no_addr == data->no_addr) {
  55 + if (data->non_block &&
  56 + inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
  57 + data->skipped = 1;
  58 + return 0;
  59 + }
49 60 return 1;
50   -
  61 + }
51 62 return 0;
52 63 }
53 64  
54 65 static int iget_set(struct inode *inode, void *opaque)
55 66 {
56 67 struct gfs2_inode *ip = GFS2_I(inode);
57   - u64 *no_addr = opaque;
  68 + struct gfs2_skip_data *data = opaque;
58 69  
59   - inode->i_ino = (unsigned long)*no_addr;
60   - ip->i_no_addr = *no_addr;
  70 + if (data->skipped)
  71 + return -ENOENT;
  72 + inode->i_ino = (unsigned long)(data->no_addr);
  73 + ip->i_no_addr = data->no_addr;
61 74 return 0;
62 75 }
63 76  
64 77 struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
65 78 {
66 79 unsigned long hash = (unsigned long)no_addr;
67   - return ilookup5(sb, hash, iget_test, &no_addr);
  80 + struct gfs2_skip_data data;
  81 +
  82 + data.no_addr = no_addr;
  83 + data.skipped = 0;
  84 + data.non_block = 0;
  85 + return ilookup5(sb, hash, iget_test, &data);
68 86 }
69 87  
70   -static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
  88 +static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
  89 + int non_block)
71 90 {
  91 + struct gfs2_skip_data data;
72 92 unsigned long hash = (unsigned long)no_addr;
73   - return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
  93 +
  94 + data.no_addr = no_addr;
  95 + data.skipped = 0;
  96 + data.non_block = non_block;
  97 + return iget5_locked(sb, hash, iget_test, iget_set, &data);
74 98 }
75 99  
76 100 /**
77 101  
78 102  
... ... @@ -111,19 +135,20 @@
111 135 * @sb: The super block
112 136 * @no_addr: The inode number
113 137 * @type: The type of the inode
  138 + * non_block: Can we block on inodes that are being freed?
114 139 *
115 140 * Returns: A VFS inode, or an error
116 141 */
117 142  
118 143 struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
119   - u64 no_addr, u64 no_formal_ino)
  144 + u64 no_addr, u64 no_formal_ino, int non_block)
120 145 {
121 146 struct inode *inode;
122 147 struct gfs2_inode *ip;
123 148 struct gfs2_glock *io_gl = NULL;
124 149 int error;
125 150  
126   - inode = gfs2_iget(sb, no_addr);
  151 + inode = gfs2_iget(sb, no_addr, non_block);
127 152 ip = GFS2_I(inode);
128 153  
129 154 if (!inode)
130 155  
131 156  
... ... @@ -185,11 +210,12 @@
185 210 {
186 211 struct super_block *sb = sdp->sd_vfs;
187 212 struct gfs2_holder i_gh;
188   - struct inode *inode;
  213 + struct inode *inode = NULL;
189 214 int error;
190 215  
  216 + /* Must not read in block until block type is verified */
191 217 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
192   - LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
  218 + LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
193 219 if (error)
194 220 return ERR_PTR(error);
195 221  
... ... @@ -197,7 +223,7 @@
197 223 if (error)
198 224 goto fail;
199 225  
200   - inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
  226 + inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1);
201 227 if (IS_ERR(inode))
202 228 goto fail;
203 229  
... ... @@ -843,7 +869,7 @@
843 869 goto fail_gunlock2;
844 870  
845 871 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
846   - inum.no_formal_ino);
  872 + inum.no_formal_ino, 0);
847 873 if (IS_ERR(inode))
848 874 goto fail_gunlock2;
849 875  
... ... @@ -97,7 +97,8 @@
97 97 }
98 98  
99 99 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
100   - u64 no_addr, u64 no_formal_ino);
  100 + u64 no_addr, u64 no_formal_ino,
  101 + int non_block);
101 102 extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
102 103 u64 *no_formal_ino,
103 104 unsigned int blktype);
fs/gfs2/ops_fstype.c
... ... @@ -430,7 +430,7 @@
430 430 struct dentry *dentry;
431 431 struct inode *inode;
432 432  
433   - inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
  433 + inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
434 434 if (IS_ERR(inode)) {
435 435 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
436 436 return PTR_ERR(inode);
... ... @@ -945,7 +945,7 @@
945 945 /* rgblk_search can return a block < goal, so we need to
946 946 keep it marching forward. */
947 947 no_addr = block + rgd->rd_data0;
948   - goal++;
  948 + goal = max(block + 1, goal + 1);
949 949 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
950 950 continue;
951 951 if (no_addr == skip)
... ... @@ -971,7 +971,7 @@
971 971 found++;
972 972  
973 973 /* Limit reclaim to sensible number of tasks */
974   - if (found > 2*NR_CPUS)
  974 + if (found > NR_CPUS)
975 975 return;
976 976 }
977 977  
... ... @@ -1327,7 +1327,8 @@
1327 1327 if (inode->i_nlink || (sb->s_flags & MS_RDONLY))
1328 1328 goto out;
1329 1329  
1330   - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
  1330 + /* Must not read inode block until block type has been verified */
  1331 + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
1331 1332 if (unlikely(error)) {
1332 1333 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1333 1334 goto out;
... ... @@ -1336,6 +1337,12 @@
1336 1337 error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
1337 1338 if (error)
1338 1339 goto out_truncate;
  1340 +
  1341 + if (test_bit(GIF_INVALID, &ip->i_flags)) {
  1342 + error = gfs2_inode_refresh(ip);
  1343 + if (error)
  1344 + goto out_truncate;
  1345 + }
1339 1346  
1340 1347 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1341 1348 gfs2_glock_dq_wait(&ip->i_iopen_gh);