Blame view

fs/xfs/xfs_sync.c 29.4 KB
fe4fa4b8e   David Chinner   [XFS] move sync c...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
  /*
   * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   * All Rights Reserved.
   *
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public License as
   * published by the Free Software Foundation.
   *
   * This program is distributed in the hope that it would be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   *
   * You should have received a copy of the GNU General Public License
   * along with this program; if not, write the Free Software Foundation,
   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   */
  #include "xfs.h"
  #include "xfs_fs.h"
  #include "xfs_types.h"
  #include "xfs_bit.h"
  #include "xfs_log.h"
  #include "xfs_inum.h"
  #include "xfs_trans.h"
fd074841c   Dave Chinner   xfs: push the AIL...
25
  #include "xfs_trans_priv.h"
fe4fa4b8e   David Chinner   [XFS] move sync c...
26
27
  #include "xfs_sb.h"
  #include "xfs_ag.h"
fe4fa4b8e   David Chinner   [XFS] move sync c...
28
29
  #include "xfs_mount.h"
  #include "xfs_bmap_btree.h"
fe4fa4b8e   David Chinner   [XFS] move sync c...
30
31
32
  #include "xfs_inode.h"
  #include "xfs_dinode.h"
  #include "xfs_error.h"
fe4fa4b8e   David Chinner   [XFS] move sync c...
33
34
  #include "xfs_filestream.h"
  #include "xfs_vnodeops.h"
fe4fa4b8e   David Chinner   [XFS] move sync c...
35
  #include "xfs_inode_item.h"
7d095257e   Christoph Hellwig   xfs: kill xfs_qmops
36
  #include "xfs_quota.h"
0b1b213fc   Christoph Hellwig   xfs: event tracin...
37
  #include "xfs_trace.h"
1a387d3be   Dave Chinner   xfs: dummy transa...
38
  #include "xfs_fsops.h"
fe4fa4b8e   David Chinner   [XFS] move sync c...
39

a167b17e8   David Chinner   [XFS] move xfssyn...
40
41
  #include <linux/kthread.h>
  #include <linux/freezer.h>
c6d09b666   Dave Chinner   xfs: introduce a ...
42
  struct workqueue_struct	*xfs_syncd_wq;	/* sync workqueue */
78ae52567   Dave Chinner   xfs: implement ba...
43
44
45
46
47
48
49
  /*
   * The inode lookup is done in batches to keep the amount of lock traffic and
   * radix tree lookups to a minimum. The batch size is a trade off between
   * lookup reduction and stack usage. This is in the reclaim path, so we can't
   * be too greedy.
   */
  #define XFS_LOOKUP_BATCH	32
e13de955c   Dave Chinner   xfs: split out in...
50
51
52
53
54
  STATIC int
  xfs_inode_ag_walk_grab(
  	struct xfs_inode	*ip)
  {
  	struct inode		*inode = VFS_I(ip);
1a3e8f3da   Dave Chinner   xfs: convert inod...
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
  	ASSERT(rcu_read_lock_held());
  
  	/*
  	 * check for stale RCU freed inode
  	 *
  	 * If the inode has been reallocated, it doesn't matter if it's not in
  	 * the AG we are walking - we are walking for writeback, so if it
  	 * passes all the "valid inode" checks and is dirty, then we'll write
  	 * it back anyway.  If it has been reallocated and still being
  	 * initialised, the XFS_INEW check below will catch it.
  	 */
  	spin_lock(&ip->i_flags_lock);
  	if (!ip->i_ino)
  		goto out_unlock_noent;
  
  	/* avoid new or reclaimable inodes. Leave for reclaim code to flush */
  	if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
  		goto out_unlock_noent;
  	spin_unlock(&ip->i_flags_lock);
e13de955c   Dave Chinner   xfs: split out in...
74
75
76
  	/* nothing to sync during shutdown */
  	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
  		return EFSCORRUPTED;
e13de955c   Dave Chinner   xfs: split out in...
77
78
79
80
81
82
83
84
85
86
87
  	/* If we can't grab the inode, it must on it's way to reclaim. */
  	if (!igrab(inode))
  		return ENOENT;
  
  	if (is_bad_inode(inode)) {
  		IRELE(ip);
  		return ENOENT;
  	}
  
  	/* inode is valid */
  	return 0;
1a3e8f3da   Dave Chinner   xfs: convert inod...
88
89
90
91
  
  out_unlock_noent:
  	spin_unlock(&ip->i_flags_lock);
  	return ENOENT;
e13de955c   Dave Chinner   xfs: split out in...
92
  }
75f3cb139   Dave Chinner   xfs: introduce a ...
93
94
95
  STATIC int
  xfs_inode_ag_walk(
  	struct xfs_mount	*mp,
5017e97d5   Dave Chinner   xfs: rename xfs_g...
96
  	struct xfs_perag	*pag,
75f3cb139   Dave Chinner   xfs: introduce a ...
97
98
  	int			(*execute)(struct xfs_inode *ip,
  					   struct xfs_perag *pag, int flags),
65d0f2053   Dave Chinner   xfs: split inode ...
99
  	int			flags)
75f3cb139   Dave Chinner   xfs: introduce a ...
100
  {
75f3cb139   Dave Chinner   xfs: introduce a ...
101
102
103
  	uint32_t		first_index;
  	int			last_error = 0;
  	int			skipped;
65d0f2053   Dave Chinner   xfs: split inode ...
104
  	int			done;
78ae52567   Dave Chinner   xfs: implement ba...
105
  	int			nr_found;
75f3cb139   Dave Chinner   xfs: introduce a ...
106
107
  
  restart:
65d0f2053   Dave Chinner   xfs: split inode ...
108
  	done = 0;
75f3cb139   Dave Chinner   xfs: introduce a ...
109
110
  	skipped = 0;
  	first_index = 0;
78ae52567   Dave Chinner   xfs: implement ba...
111
  	nr_found = 0;
75f3cb139   Dave Chinner   xfs: introduce a ...
112
  	do {
78ae52567   Dave Chinner   xfs: implement ba...
113
  		struct xfs_inode *batch[XFS_LOOKUP_BATCH];
75f3cb139   Dave Chinner   xfs: introduce a ...
114
  		int		error = 0;
78ae52567   Dave Chinner   xfs: implement ba...
115
  		int		i;
75f3cb139   Dave Chinner   xfs: introduce a ...
116

1a3e8f3da   Dave Chinner   xfs: convert inod...
117
  		rcu_read_lock();
65d0f2053   Dave Chinner   xfs: split inode ...
118
  		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
78ae52567   Dave Chinner   xfs: implement ba...
119
120
  					(void **)batch, first_index,
  					XFS_LOOKUP_BATCH);
65d0f2053   Dave Chinner   xfs: split inode ...
121
  		if (!nr_found) {
1a3e8f3da   Dave Chinner   xfs: convert inod...
122
  			rcu_read_unlock();
75f3cb139   Dave Chinner   xfs: introduce a ...
123
  			break;
c8e20be02   Dave Chinner   xfs: reclaim inod...
124
  		}
75f3cb139   Dave Chinner   xfs: introduce a ...
125

65d0f2053   Dave Chinner   xfs: split inode ...
126
  		/*
78ae52567   Dave Chinner   xfs: implement ba...
127
128
  		 * Grab the inodes before we drop the lock. if we found
  		 * nothing, nr == 0 and the loop will be skipped.
65d0f2053   Dave Chinner   xfs: split inode ...
129
  		 */
78ae52567   Dave Chinner   xfs: implement ba...
130
131
132
133
134
135
136
  		for (i = 0; i < nr_found; i++) {
  			struct xfs_inode *ip = batch[i];
  
  			if (done || xfs_inode_ag_walk_grab(ip))
  				batch[i] = NULL;
  
  			/*
1a3e8f3da   Dave Chinner   xfs: convert inod...
137
138
139
140
141
142
143
144
145
146
  			 * Update the index for the next lookup. Catch
  			 * overflows into the next AG range which can occur if
  			 * we have inodes in the last block of the AG and we
  			 * are currently pointing to the last inode.
  			 *
  			 * Because we may see inodes that are from the wrong AG
  			 * due to RCU freeing and reallocation, only update the
  			 * index if it lies in this AG. It was a race that lead
  			 * us to see this inode, so another lookup from the
  			 * same index will not find it again.
78ae52567   Dave Chinner   xfs: implement ba...
147
  			 */
1a3e8f3da   Dave Chinner   xfs: convert inod...
148
149
  			if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
  				continue;
78ae52567   Dave Chinner   xfs: implement ba...
150
151
152
  			first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
  			if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
  				done = 1;
e13de955c   Dave Chinner   xfs: split out in...
153
  		}
78ae52567   Dave Chinner   xfs: implement ba...
154
155
  
  		/* unlock now we've grabbed the inodes. */
1a3e8f3da   Dave Chinner   xfs: convert inod...
156
  		rcu_read_unlock();
e13de955c   Dave Chinner   xfs: split out in...
157

78ae52567   Dave Chinner   xfs: implement ba...
158
159
160
161
162
163
164
165
166
167
168
  		for (i = 0; i < nr_found; i++) {
  			if (!batch[i])
  				continue;
  			error = execute(batch[i], pag, flags);
  			IRELE(batch[i]);
  			if (error == EAGAIN) {
  				skipped++;
  				continue;
  			}
  			if (error && last_error != EFSCORRUPTED)
  				last_error = error;
75f3cb139   Dave Chinner   xfs: introduce a ...
169
  		}
c8e20be02   Dave Chinner   xfs: reclaim inod...
170
171
  
  		/* bail out if the filesystem is corrupted.  */
75f3cb139   Dave Chinner   xfs: introduce a ...
172
173
  		if (error == EFSCORRUPTED)
  			break;
8daaa8314   Dave Chinner   xfs: make use of ...
174
  		cond_resched();
78ae52567   Dave Chinner   xfs: implement ba...
175
  	} while (nr_found && !done);
75f3cb139   Dave Chinner   xfs: introduce a ...
176
177
178
179
180
  
  	if (skipped) {
  		delay(1);
  		goto restart;
  	}
75f3cb139   Dave Chinner   xfs: introduce a ...
181
182
  	return last_error;
  }
fe588ed32   Christoph Hellwig   xfs: use generic ...
183
  int
75f3cb139   Dave Chinner   xfs: introduce a ...
184
185
186
187
  xfs_inode_ag_iterator(
  	struct xfs_mount	*mp,
  	int			(*execute)(struct xfs_inode *ip,
  					   struct xfs_perag *pag, int flags),
65d0f2053   Dave Chinner   xfs: split inode ...
188
  	int			flags)
75f3cb139   Dave Chinner   xfs: introduce a ...
189
  {
16fd53673   Dave Chinner   xfs: track AGs wi...
190
  	struct xfs_perag	*pag;
75f3cb139   Dave Chinner   xfs: introduce a ...
191
192
193
  	int			error = 0;
  	int			last_error = 0;
  	xfs_agnumber_t		ag;
16fd53673   Dave Chinner   xfs: track AGs wi...
194
  	ag = 0;
65d0f2053   Dave Chinner   xfs: split inode ...
195
196
197
  	while ((pag = xfs_perag_get(mp, ag))) {
  		ag = pag->pag_agno + 1;
  		error = xfs_inode_ag_walk(mp, pag, execute, flags);
5017e97d5   Dave Chinner   xfs: rename xfs_g...
198
  		xfs_perag_put(pag);
75f3cb139   Dave Chinner   xfs: introduce a ...
199
200
201
202
203
204
205
206
  		if (error) {
  			last_error = error;
  			if (error == EFSCORRUPTED)
  				break;
  		}
  	}
  	return XFS_ERROR(last_error);
  }
5a34d5cd0   Dave Chinner   xfs: split inode ...
207
208
209
  STATIC int
  xfs_sync_inode_data(
  	struct xfs_inode	*ip,
75f3cb139   Dave Chinner   xfs: introduce a ...
210
  	struct xfs_perag	*pag,
5a34d5cd0   Dave Chinner   xfs: split inode ...
211
212
213
214
215
216
217
  	int			flags)
  {
  	struct inode		*inode = VFS_I(ip);
  	struct address_space *mapping = inode->i_mapping;
  	int			error = 0;
  
  	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
4a06fd262   Christoph Hellwig   xfs: remove i_ioc...
218
  		return 0;
5a34d5cd0   Dave Chinner   xfs: split inode ...
219
220
221
  
  	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
  		if (flags & SYNC_TRYLOCK)
4a06fd262   Christoph Hellwig   xfs: remove i_ioc...
222
  			return 0;
5a34d5cd0   Dave Chinner   xfs: split inode ...
223
224
225
226
  		xfs_ilock(ip, XFS_IOLOCK_SHARED);
  	}
  
  	error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
0cadda1c5   Christoph Hellwig   xfs: remove dupli...
227
  				0 : XBF_ASYNC, FI_NONE);
5a34d5cd0   Dave Chinner   xfs: split inode ...
228
  	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
5a34d5cd0   Dave Chinner   xfs: split inode ...
229
230
  	return error;
  }
845b6d0cb   Christoph Hellwig   xfs: split inode ...
231
232
233
  STATIC int
  xfs_sync_inode_attr(
  	struct xfs_inode	*ip,
75f3cb139   Dave Chinner   xfs: introduce a ...
234
  	struct xfs_perag	*pag,
845b6d0cb   Christoph Hellwig   xfs: split inode ...
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
  	int			flags)
  {
  	int			error = 0;
  
  	xfs_ilock(ip, XFS_ILOCK_SHARED);
  	if (xfs_inode_clean(ip))
  		goto out_unlock;
  	if (!xfs_iflock_nowait(ip)) {
  		if (!(flags & SYNC_WAIT))
  			goto out_unlock;
  		xfs_iflock(ip);
  	}
  
  	if (xfs_inode_clean(ip)) {
  		xfs_ifunlock(ip);
  		goto out_unlock;
  	}
c854363e8   Dave Chinner   xfs: Use delayed ...
252
  	error = xfs_iflush(ip, flags);
845b6d0cb   Christoph Hellwig   xfs: split inode ...
253

ee58abdfc   Dave Chinner   xfs: avoid gettin...
254
255
256
257
258
259
260
261
262
  	/*
  	 * We don't want to try again on non-blocking flushes that can't run
  	 * again immediately. If an inode really must be written, then that's
  	 * what the SYNC_WAIT flag is for.
  	 */
  	if (error == EAGAIN) {
  		ASSERT(!(flags & SYNC_WAIT));
  		error = 0;
  	}
845b6d0cb   Christoph Hellwig   xfs: split inode ...
263
264
265
266
   out_unlock:
  	xfs_iunlock(ip, XFS_ILOCK_SHARED);
  	return error;
  }
075fe1028   Christoph Hellwig   xfs: split xfs_sy...
267
268
269
  /*
   * Write out pagecache data for the whole filesystem.
   */
64c861494   Christoph Hellwig   xfs: remove expli...
270
  STATIC int
075fe1028   Christoph Hellwig   xfs: split xfs_sy...
271
272
273
  xfs_sync_data(
  	struct xfs_mount	*mp,
  	int			flags)
683a89708   David Chinner   [XFS] Use the ino...
274
  {
075fe1028   Christoph Hellwig   xfs: split xfs_sy...
275
  	int			error;
fe4fa4b8e   David Chinner   [XFS] move sync c...
276

b0710ccc6   Christoph Hellwig   xfs: remove SYNC_...
277
  	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
fe4fa4b8e   David Chinner   [XFS] move sync c...
278

65d0f2053   Dave Chinner   xfs: split inode ...
279
  	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
075fe1028   Christoph Hellwig   xfs: split xfs_sy...
280
281
  	if (error)
  		return XFS_ERROR(error);
e9f1c6ee1   David Chinner   [XFS] make SYNC_D...
282

a14a348bf   Christoph Hellwig   xfs: cleanup up x...
283
  	xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
075fe1028   Christoph Hellwig   xfs: split xfs_sy...
284
285
  	return 0;
  }
e9f1c6ee1   David Chinner   [XFS] make SYNC_D...
286

075fe1028   Christoph Hellwig   xfs: split xfs_sy...
287
288
289
  /*
   * Write out inode metadata (attributes) for the whole filesystem.
   */
64c861494   Christoph Hellwig   xfs: remove expli...
290
  STATIC int
075fe1028   Christoph Hellwig   xfs: split xfs_sy...
291
292
293
294
295
  xfs_sync_attr(
  	struct xfs_mount	*mp,
  	int			flags)
  {
  	ASSERT((flags & ~SYNC_WAIT) == 0);
75f3cb139   Dave Chinner   xfs: introduce a ...
296

65d0f2053   Dave Chinner   xfs: split inode ...
297
  	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
fe4fa4b8e   David Chinner   [XFS] move sync c...
298
  }
2af75df7b   Christoph Hellwig   [XFS] split out t...
299
  STATIC int
2af75df7b   Christoph Hellwig   [XFS] split out t...
300
  xfs_sync_fsdata(
df308bcfe   Christoph Hellwig   xfs: remove perio...
301
  	struct xfs_mount	*mp)
2af75df7b   Christoph Hellwig   [XFS] split out t...
302
303
  {
  	struct xfs_buf		*bp;
c2b006c1d   Christoph Hellwig   xfs: let xfs_bwri...
304
  	int			error;
2af75df7b   Christoph Hellwig   [XFS] split out t...
305
306
  
  	/*
df308bcfe   Christoph Hellwig   xfs: remove perio...
307
308
309
310
311
312
  	 * If the buffer is pinned then push on the log so we won't get stuck
  	 * waiting in the write for someone, maybe ourselves, to flush the log.
  	 *
  	 * Even though we just pushed the log above, we did not have the
  	 * superblock buffer locked at that point so it can become pinned in
  	 * between there and here.
2af75df7b   Christoph Hellwig   [XFS] split out t...
313
  	 */
df308bcfe   Christoph Hellwig   xfs: remove perio...
314
  	bp = xfs_getsb(mp, 0);
811e64c71   Chandra Seetharaman   Replace the macro...
315
  	if (xfs_buf_ispinned(bp))
df308bcfe   Christoph Hellwig   xfs: remove perio...
316
  		xfs_log_force(mp, 0);
c2b006c1d   Christoph Hellwig   xfs: let xfs_bwri...
317
318
319
  	error = xfs_bwrite(bp);
  	xfs_buf_relse(bp);
  	return error;
e9f1c6ee1   David Chinner   [XFS] make SYNC_D...
320
  }
be4f1ac82   Christoph Hellwig   xfs: log all dirt...
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
  int
  xfs_log_dirty_inode(
  	struct xfs_inode	*ip,
  	struct xfs_perag	*pag,
  	int			flags)
  {
  	struct xfs_mount	*mp = ip->i_mount;
  	struct xfs_trans	*tp;
  	int			error;
  
  	if (!ip->i_update_core)
  		return 0;
  
  	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
  	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
  	if (error) {
  		xfs_trans_cancel(tp, 0);
  		return error;
  	}
  
  	xfs_ilock(ip, XFS_ILOCK_EXCL);
  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  	return xfs_trans_commit(tp, 0);
  }
e9f1c6ee1   David Chinner   [XFS] make SYNC_D...
346
  /*
a4e4c4f4a   David Chinner   [XFS] Kill xfs_sy...
347
348
349
350
351
352
353
354
355
356
357
   * When remounting a filesystem read-only or freezing the filesystem, we have
   * two phases to execute. This first phase is syncing the data before we
   * quiesce the filesystem, and the second is flushing all the inodes out after
   * we've waited for all the transactions created by the first phase to
   * complete. The second phase ensures that the inodes are written to their
   * location on disk rather than just existing in transactions in the log. This
   * means after a quiesce there is no log replay required to write the inodes to
   * disk (this is the main difference between a sync and a quiesce).
   */
  /*
   * First stage of freeze - no writers will make progress now we are here,
e9f1c6ee1   David Chinner   [XFS] make SYNC_D...
358
359
   * so we flush delwri and delalloc buffers here, then wait for all I/O to
   * complete.  Data is frozen at that point. Metadata is not frozen,
a4e4c4f4a   David Chinner   [XFS] Kill xfs_sy...
360
361
   * transactions can still occur here so don't bother flushing the buftarg
   * because it'll just get dirty again.
e9f1c6ee1   David Chinner   [XFS] make SYNC_D...
362
363
364
365
366
   */
  int
  xfs_quiesce_data(
  	struct xfs_mount	*mp)
  {
df308bcfe   Christoph Hellwig   xfs: remove perio...
367
  	int			error, error2 = 0;
e9f1c6ee1   David Chinner   [XFS] make SYNC_D...
368

be4f1ac82   Christoph Hellwig   xfs: log all dirt...
369
370
371
372
373
374
375
376
377
  	/*
  	 * Log all pending size and timestamp updates.  The vfs writeback
  	 * code is supposed to do this, but due to its overagressive
  	 * livelock detection it will skip inodes where appending writes
  	 * were written out in the first non-blocking sync phase if their
  	 * completion took long enough that it happened after taking the
  	 * timestamp for the cut-off in the blocking phase.
  	 */
  	xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
34625c661   Christoph Hellwig   xfs: remove xfs_q...
378
  	/* force out the log */
33b8f7c24   Christoph Hellwig   xfs: improve sync...
379
  	xfs_log_force(mp, XFS_LOG_SYNC);
a4e4c4f4a   David Chinner   [XFS] Kill xfs_sy...
380
  	/* write superblock and hoover up shutdown errors */
df308bcfe   Christoph Hellwig   xfs: remove perio...
381
382
383
384
385
386
387
  	error = xfs_sync_fsdata(mp);
  
  	/* make sure all delwri buffers are written out */
  	xfs_flush_buftarg(mp->m_ddev_targp, 1);
  
  	/* mark the log as covered if needed */
  	if (xfs_log_need_covered(mp))
c58efdb44   Dave Chinner   xfs: ensure log c...
388
  		error2 = xfs_fs_log_dummy(mp);
e9f1c6ee1   David Chinner   [XFS] make SYNC_D...
389

a4e4c4f4a   David Chinner   [XFS] Kill xfs_sy...
390
  	/* flush data-only devices */
e9f1c6ee1   David Chinner   [XFS] make SYNC_D...
391
  	if (mp->m_rtdev_targp)
a9add83e5   Christoph Hellwig   xfs: remove XFS_b...
392
  		xfs_flush_buftarg(mp->m_rtdev_targp, 1);
e9f1c6ee1   David Chinner   [XFS] make SYNC_D...
393

df308bcfe   Christoph Hellwig   xfs: remove perio...
394
  	return error ? error : error2;
2af75df7b   Christoph Hellwig   [XFS] split out t...
395
  }
76bf105cb   David Chinner   [XFS] Move remain...
396
397
398
399
400
  STATIC void
  xfs_quiesce_fs(
  	struct xfs_mount	*mp)
  {
  	int	count = 0, pincount;
c854363e8   Dave Chinner   xfs: Use delayed ...
401
  	xfs_reclaim_inodes(mp, 0);
76bf105cb   David Chinner   [XFS] Move remain...
402
  	xfs_flush_buftarg(mp->m_ddev_targp, 0);
76bf105cb   David Chinner   [XFS] Move remain...
403
404
405
406
407
  
  	/*
  	 * This loop must run at least twice.  The first instance of the loop
  	 * will flush most meta data but that will generate more meta data
  	 * (typically directory updates).  Which then must be flushed and
c854363e8   Dave Chinner   xfs: Use delayed ...
408
409
  	 * logged before we can write the unmount record. We also so sync
  	 * reclaim of inodes to catch any that the above delwri flush skipped.
76bf105cb   David Chinner   [XFS] Move remain...
410
411
  	 */
  	do {
c854363e8   Dave Chinner   xfs: Use delayed ...
412
  		xfs_reclaim_inodes(mp, SYNC_WAIT);
075fe1028   Christoph Hellwig   xfs: split xfs_sy...
413
  		xfs_sync_attr(mp, SYNC_WAIT);
76bf105cb   David Chinner   [XFS] Move remain...
414
415
416
417
418
419
420
421
422
423
424
  		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
  		if (!pincount) {
  			delay(50);
  			count++;
  		}
  	} while (count < 2);
  }
  
  /*
   * Second stage of a quiesce. The data is already synced, now we have to take
   * care of the metadata. New transactions are already blocked, so we need to
25985edce   Lucas De Marchi   Fix common misspe...
425
   * wait for any remaining transactions to drain out before proceeding.
76bf105cb   David Chinner   [XFS] Move remain...
426
427
428
429
430
431
432
433
434
435
436
437
438
   */
  void
  xfs_quiesce_attr(
  	struct xfs_mount	*mp)
  {
  	int	error = 0;
  
  	/* wait for all modifications to complete */
  	while (atomic_read(&mp->m_active_trans) > 0)
  		delay(100);
  
  	/* flush inodes and push all remaining buffers out to disk */
  	xfs_quiesce_fs(mp);
5e1065726   Felix Blyakher   [XFS] Warn on tra...
439
440
441
442
443
  	/*
  	 * Just warn here till VFS can correctly support
  	 * read-only remount without racing.
  	 */
  	WARN_ON(atomic_read(&mp->m_active_trans) != 0);
76bf105cb   David Chinner   [XFS] Move remain...
444
445
  
  	/* Push the superblock and write an unmount record */
adab0f67d   Chandra Seetharaman   xfs: Remove the s...
446
  	error = xfs_log_sbcount(mp);
76bf105cb   David Chinner   [XFS] Move remain...
447
  	if (error)
4f10700a2   Dave Chinner   xfs: Convert linu...
448
  		xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
76bf105cb   David Chinner   [XFS] Move remain...
449
450
451
452
  				"Frozen image may not be consistent.");
  	xfs_log_unmount_write(mp);
  	xfs_unmountfs_writesb(mp);
  }
c6d09b666   Dave Chinner   xfs: introduce a ...
453
454
455
  static void
  xfs_syncd_queue_sync(
  	struct xfs_mount        *mp)
a167b17e8   David Chinner   [XFS] move xfssyn...
456
  {
c6d09b666   Dave Chinner   xfs: introduce a ...
457
458
  	queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
  				msecs_to_jiffies(xfs_syncd_centisecs * 10));
a167b17e8   David Chinner   [XFS] move xfssyn...
459
  }
aacaa880b   David Chinner   [XFS] xfssyncd: d...
460
  /*
df308bcfe   Christoph Hellwig   xfs: remove perio...
461
462
   * Every sync period we need to unpin all items, reclaim inodes and sync
   * disk quotas.  We might need to cover the log to indicate that the
1a387d3be   Dave Chinner   xfs: dummy transa...
463
   * filesystem is idle and not frozen.
aacaa880b   David Chinner   [XFS] xfssyncd: d...
464
   */
a167b17e8   David Chinner   [XFS] move xfssyn...
465
466
  STATIC void
  xfs_sync_worker(
c6d09b666   Dave Chinner   xfs: introduce a ...
467
  	struct work_struct *work)
a167b17e8   David Chinner   [XFS] move xfssyn...
468
  {
c6d09b666   Dave Chinner   xfs: introduce a ...
469
470
  	struct xfs_mount *mp = container_of(to_delayed_work(work),
  					struct xfs_mount, m_sync_work);
a167b17e8   David Chinner   [XFS] move xfssyn...
471
  	int		error;
aacaa880b   David Chinner   [XFS] xfssyncd: d...
472
  	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
aacaa880b   David Chinner   [XFS] xfssyncd: d...
473
  		/* dgc: errors ignored here */
1a387d3be   Dave Chinner   xfs: dummy transa...
474
475
  		if (mp->m_super->s_frozen == SB_UNFROZEN &&
  		    xfs_log_need_covered(mp))
c58efdb44   Dave Chinner   xfs: ensure log c...
476
477
478
  			error = xfs_fs_log_dummy(mp);
  		else
  			xfs_log_force(mp, 0);
fd074841c   Dave Chinner   xfs: push the AIL...
479
480
481
  
  		/* start pushing all the metadata that is currently dirty */
  		xfs_ail_push_all(mp->m_ail);
aacaa880b   David Chinner   [XFS] xfssyncd: d...
482
  	}
c6d09b666   Dave Chinner   xfs: introduce a ...
483
484
485
  
  	/* queue us up again */
  	xfs_syncd_queue_sync(mp);
a167b17e8   David Chinner   [XFS] move xfssyn...
486
  }
89e4cb550   Dave Chinner   xfs: convert ENOS...
487
  /*
a7b339f1b   Dave Chinner   xfs: introduce ba...
488
489
490
491
492
493
494
495
496
   * Queue a new inode reclaim pass if there are reclaimable inodes and there
   * isn't a reclaim pass already in progress. By default it runs every 5s based
   * on the xfs syncd work default of 30s. Perhaps this should have it's own
   * tunable, but that can be done if this method proves to be ineffective or too
   * aggressive.
   */
  static void
  xfs_syncd_queue_reclaim(
  	struct xfs_mount        *mp)
a167b17e8   David Chinner   [XFS] move xfssyn...
497
  {
a167b17e8   David Chinner   [XFS] move xfssyn...
498

a7b339f1b   Dave Chinner   xfs: introduce ba...
499
500
501
502
503
504
505
  	/*
  	 * We can have inodes enter reclaim after we've shut down the syncd
  	 * workqueue during unmount, so don't allow reclaim work to be queued
  	 * during unmount.
  	 */
  	if (!(mp->m_super->s_flags & MS_ACTIVE))
  		return;
a167b17e8   David Chinner   [XFS] move xfssyn...
506

a7b339f1b   Dave Chinner   xfs: introduce ba...
507
508
509
510
  	rcu_read_lock();
  	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
  		queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
  			msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
a167b17e8   David Chinner   [XFS] move xfssyn...
511
  	}
a7b339f1b   Dave Chinner   xfs: introduce ba...
512
513
  	rcu_read_unlock();
  }
a167b17e8   David Chinner   [XFS] move xfssyn...
514

a7b339f1b   Dave Chinner   xfs: introduce ba...
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
  /*
   * This is a fast pass over the inode cache to try to get reclaim moving on as
   * many inodes as possible in a short period of time. It kicks itself every few
   * seconds, as well as being kicked by the inode cache shrinker when memory
   * goes low. It scans as quickly as possible avoiding locked inodes or those
   * already being flushed, and once done schedules a future pass.
   */
  STATIC void
  xfs_reclaim_worker(
  	struct work_struct *work)
  {
  	struct xfs_mount *mp = container_of(to_delayed_work(work),
  					struct xfs_mount, m_reclaim_work);
  
  	xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
  	xfs_syncd_queue_reclaim(mp);
  }
  
  /*
89e4cb550   Dave Chinner   xfs: convert ENOS...
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
   * Flush delayed allocate data, attempting to free up reserved space
   * from existing allocations.  At this point a new allocation attempt
   * has failed with ENOSPC and we are in the process of scratching our
   * heads, looking about for more room.
   *
   * Queue a new data flush if there isn't one already in progress and
   * wait for completion of the flush. This means that we only ever have one
   * inode flush in progress no matter how many ENOSPC events are occurring and
   * so will prevent the system from bogging down due to every concurrent
   * ENOSPC event scanning all the active inodes in the system for writeback.
   */
  void
  xfs_flush_inodes(
  	struct xfs_inode	*ip)
  {
  	struct xfs_mount	*mp = ip->i_mount;
  
  	queue_work(xfs_syncd_wq, &mp->m_flush_work);
  	flush_work_sync(&mp->m_flush_work);
  }
  
  STATIC void
  xfs_flush_worker(
  	struct work_struct *work)
  {
  	struct xfs_mount *mp = container_of(work,
  					struct xfs_mount, m_flush_work);
  
  	xfs_sync_data(mp, SYNC_TRYLOCK);
  	xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
a167b17e8   David Chinner   [XFS] move xfssyn...
564
565
566
567
568
569
  }
  
  int
  xfs_syncd_init(
  	struct xfs_mount	*mp)
  {
89e4cb550   Dave Chinner   xfs: convert ENOS...
570
  	INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
c6d09b666   Dave Chinner   xfs: introduce a ...
571
  	INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
a7b339f1b   Dave Chinner   xfs: introduce ba...
572
  	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
c6d09b666   Dave Chinner   xfs: introduce a ...
573
  	xfs_syncd_queue_sync(mp);
a7b339f1b   Dave Chinner   xfs: introduce ba...
574
  	xfs_syncd_queue_reclaim(mp);
c6d09b666   Dave Chinner   xfs: introduce a ...
575

a167b17e8   David Chinner   [XFS] move xfssyn...
576
577
578
579
580
581
582
  	return 0;
  }
  
  void
  xfs_syncd_stop(
  	struct xfs_mount	*mp)
  {
c6d09b666   Dave Chinner   xfs: introduce a ...
583
  	cancel_delayed_work_sync(&mp->m_sync_work);
a7b339f1b   Dave Chinner   xfs: introduce ba...
584
  	cancel_delayed_work_sync(&mp->m_reclaim_work);
89e4cb550   Dave Chinner   xfs: convert ENOS...
585
  	cancel_work_sync(&mp->m_flush_work);
a167b17e8   David Chinner   [XFS] move xfssyn...
586
  }
bc990f5cb   Christoph Hellwig   xfs: fix locking ...
587
588
589
590
591
592
593
594
  void
  __xfs_inode_set_reclaim_tag(
  	struct xfs_perag	*pag,
  	struct xfs_inode	*ip)
  {
  	radix_tree_tag_set(&pag->pag_ici_root,
  			   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
  			   XFS_ICI_RECLAIM_TAG);
16fd53673   Dave Chinner   xfs: track AGs wi...
595
596
597
598
599
600
601
602
  
  	if (!pag->pag_ici_reclaimable) {
  		/* propagate the reclaim tag up into the perag radix tree */
  		spin_lock(&ip->i_mount->m_perag_lock);
  		radix_tree_tag_set(&ip->i_mount->m_perag_tree,
  				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
  				XFS_ICI_RECLAIM_TAG);
  		spin_unlock(&ip->i_mount->m_perag_lock);
a7b339f1b   Dave Chinner   xfs: introduce ba...
603
604
605
  
  		/* schedule periodic background inode reclaim */
  		xfs_syncd_queue_reclaim(ip->i_mount);
16fd53673   Dave Chinner   xfs: track AGs wi...
606
607
608
  		trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
  							-1, _RET_IP_);
  	}
9bf729c0a   Dave Chinner   xfs: add a shrink...
609
  	pag->pag_ici_reclaimable++;
bc990f5cb   Christoph Hellwig   xfs: fix locking ...
610
  }
116545130   David Chinner   [XFS] kill delete...
611
612
613
614
615
  /*
   * We set the inode flag atomically with the radix tree tag.
   * Once we get tag lookups on the radix tree, this inode flag
   * can go away.
   */
396beb853   David Chinner   [XFS] mark inodes...
616
617
618
619
  void
  xfs_inode_set_reclaim_tag(
  	xfs_inode_t	*ip)
  {
5017e97d5   Dave Chinner   xfs: rename xfs_g...
620
621
  	struct xfs_mount *mp = ip->i_mount;
  	struct xfs_perag *pag;
396beb853   David Chinner   [XFS] mark inodes...
622

5017e97d5   Dave Chinner   xfs: rename xfs_g...
623
  	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
1a427ab0c   Dave Chinner   xfs: convert pag_...
624
  	spin_lock(&pag->pag_ici_lock);
396beb853   David Chinner   [XFS] mark inodes...
625
  	spin_lock(&ip->i_flags_lock);
bc990f5cb   Christoph Hellwig   xfs: fix locking ...
626
  	__xfs_inode_set_reclaim_tag(pag, ip);
116545130   David Chinner   [XFS] kill delete...
627
  	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
396beb853   David Chinner   [XFS] mark inodes...
628
  	spin_unlock(&ip->i_flags_lock);
1a427ab0c   Dave Chinner   xfs: convert pag_...
629
  	spin_unlock(&pag->pag_ici_lock);
5017e97d5   Dave Chinner   xfs: rename xfs_g...
630
  	xfs_perag_put(pag);
396beb853   David Chinner   [XFS] mark inodes...
631
  }
081003fff   Johannes Weiner   xfs: properly acc...
632
633
  STATIC void
  __xfs_inode_clear_reclaim(
396beb853   David Chinner   [XFS] mark inodes...
634
635
636
  	xfs_perag_t	*pag,
  	xfs_inode_t	*ip)
  {
9bf729c0a   Dave Chinner   xfs: add a shrink...
637
  	pag->pag_ici_reclaimable--;
16fd53673   Dave Chinner   xfs: track AGs wi...
638
639
640
641
642
643
644
645
646
647
  	if (!pag->pag_ici_reclaimable) {
  		/* clear the reclaim tag from the perag radix tree */
  		spin_lock(&ip->i_mount->m_perag_lock);
  		radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
  				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
  				XFS_ICI_RECLAIM_TAG);
  		spin_unlock(&ip->i_mount->m_perag_lock);
  		trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
  							-1, _RET_IP_);
  	}
396beb853   David Chinner   [XFS] mark inodes...
648
  }
081003fff   Johannes Weiner   xfs: properly acc...
649
650
651
652
653
654
655
656
657
658
  void
  __xfs_inode_clear_reclaim_tag(
  	xfs_mount_t	*mp,
  	xfs_perag_t	*pag,
  	xfs_inode_t	*ip)
  {
  	radix_tree_tag_clear(&pag->pag_ici_root,
  			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
  	__xfs_inode_clear_reclaim(pag, ip);
  }
777df5afd   Dave Chinner   xfs: Make inode r...
659
  /*
e3a20c0b0   Dave Chinner   xfs: batch inode ...
660
661
662
663
664
665
666
667
   * Grab the inode for reclaim exclusively.
   * Return 0 if we grabbed it, non-zero otherwise.
   */
  STATIC int
  xfs_reclaim_inode_grab(
  	struct xfs_inode	*ip,
  	int			flags)
  {
1a3e8f3da   Dave Chinner   xfs: convert inod...
668
669
670
671
672
  	ASSERT(rcu_read_lock_held());
  
  	/* quick check for stale RCU freed inode */
  	if (!ip->i_ino)
  		return 1;
e3a20c0b0   Dave Chinner   xfs: batch inode ...
673
674
  
  	/*
1a3e8f3da   Dave Chinner   xfs: convert inod...
675
  	 * do some unlocked checks first to avoid unnecessary lock traffic.
e3a20c0b0   Dave Chinner   xfs: batch inode ...
676
677
678
679
680
681
682
683
684
685
686
687
  	 * The first is a flush lock check, the second is a already in reclaim
  	 * check. Only do these checks if we are not going to block on locks.
  	 */
  	if ((flags & SYNC_TRYLOCK) &&
  	    (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
  		return 1;
  	}
  
  	/*
  	 * The radix tree lock here protects a thread in xfs_iget from racing
  	 * with us starting reclaim on the inode.  Once we have the
  	 * XFS_IRECLAIM flag set it will not touch us.
1a3e8f3da   Dave Chinner   xfs: convert inod...
688
689
690
691
692
  	 *
  	 * Due to RCU lookup, we may find inodes that have been freed and only
  	 * have XFS_IRECLAIM set.  Indeed, we may see reallocated inodes that
  	 * aren't candidates for reclaim at all, so we must check the
  	 * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
e3a20c0b0   Dave Chinner   xfs: batch inode ...
693
694
  	 */
  	spin_lock(&ip->i_flags_lock);
1a3e8f3da   Dave Chinner   xfs: convert inod...
695
696
697
  	if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
  	    __xfs_iflags_test(ip, XFS_IRECLAIM)) {
  		/* not a reclaim candidate. */
e3a20c0b0   Dave Chinner   xfs: batch inode ...
698
699
700
701
702
703
704
705
706
  		spin_unlock(&ip->i_flags_lock);
  		return 1;
  	}
  	__xfs_iflags_set(ip, XFS_IRECLAIM);
  	spin_unlock(&ip->i_flags_lock);
  	return 0;
  }
  
  /*
777df5afd   Dave Chinner   xfs: Make inode r...
707
708
709
710
711
712
713
714
715
716
717
718
   * Inodes in different states need to be treated differently, and the return
   * value of xfs_iflush is not sufficient to get this right. The following table
   * lists the inode states and the reclaim actions necessary for non-blocking
   * reclaim:
   *
   *
   *	inode state	     iflush ret		required action
   *      ---------------      ----------         ---------------
   *	bad			-		reclaim
   *	shutdown		EIO		unpin and reclaim
   *	clean, unpinned		0		reclaim
   *	stale, unpinned		0		reclaim
c854363e8   Dave Chinner   xfs: Use delayed ...
719
720
721
722
723
   *	clean, pinned(*)	0		requeue
   *	stale, pinned		EAGAIN		requeue
   *	dirty, delwri ok	0		requeue
   *	dirty, delwri blocked	EAGAIN		requeue
   *	dirty, sync flush	0		reclaim
777df5afd   Dave Chinner   xfs: Make inode r...
724
725
726
727
   *
   * (*) dgc: I don't think the clean, pinned state is possible but it gets
   * handled anyway given the order of checks implemented.
   *
c854363e8   Dave Chinner   xfs: Use delayed ...
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
   * As can be seen from the table, the return value of xfs_iflush() is not
   * sufficient to correctly decide the reclaim action here. The checks in
   * xfs_iflush() might look like duplicates, but they are not.
   *
   * Also, because we get the flush lock first, we know that any inode that has
   * been flushed delwri has had the flush completed by the time we check that
   * the inode is clean. The clean inode check needs to be done before flushing
   * the inode delwri otherwise we would loop forever requeuing clean inodes as
   * we cannot tell apart a successful delwri flush and a clean inode from the
   * return value of xfs_iflush().
   *
   * Note that because the inode is flushed delayed write by background
   * writeback, the flush lock may already be held here and waiting on it can
   * result in very long latencies. Hence for sync reclaims, where we wait on the
   * flush lock, the caller should push out delayed write inodes first before
   * trying to reclaim them to minimise the amount of time spent waiting. For
   * background relaim, we just requeue the inode for the next pass.
   *
777df5afd   Dave Chinner   xfs: Make inode r...
746
747
748
   * Hence the order of actions after gaining the locks should be:
   *	bad		=> reclaim
   *	shutdown	=> unpin and reclaim
c854363e8   Dave Chinner   xfs: Use delayed ...
749
750
   *	pinned, delwri	=> requeue
   *	pinned, sync	=> unpin
777df5afd   Dave Chinner   xfs: Make inode r...
751
752
   *	stale		=> reclaim
   *	clean		=> reclaim
c854363e8   Dave Chinner   xfs: Use delayed ...
753
754
   *	dirty, delwri	=> flush and requeue
   *	dirty, sync	=> flush, wait and reclaim
777df5afd   Dave Chinner   xfs: Make inode r...
755
   */
75f3cb139   Dave Chinner   xfs: introduce a ...
756
  STATIC int
c8e20be02   Dave Chinner   xfs: reclaim inod...
757
  xfs_reclaim_inode(
75f3cb139   Dave Chinner   xfs: introduce a ...
758
759
  	struct xfs_inode	*ip,
  	struct xfs_perag	*pag,
c8e20be02   Dave Chinner   xfs: reclaim inod...
760
  	int			sync_mode)
fce08f2f3   David Chinner   [XFS] move inode ...
761
  {
1bfd8d041   Dave Chinner   xfs: introduce in...
762
  	int	error;
777df5afd   Dave Chinner   xfs: Make inode r...
763

1bfd8d041   Dave Chinner   xfs: introduce in...
764
765
  restart:
  	error = 0;
c8e20be02   Dave Chinner   xfs: reclaim inod...
766
  	xfs_ilock(ip, XFS_ILOCK_EXCL);
c854363e8   Dave Chinner   xfs: Use delayed ...
767
768
769
  	if (!xfs_iflock_nowait(ip)) {
  		if (!(sync_mode & SYNC_WAIT))
  			goto out;
4dd2cb4a2   Christoph Hellwig   xfs: force buffer...
770
771
772
773
774
775
776
777
778
779
780
  
  		/*
  		 * If we only have a single dirty inode in a cluster there is
  		 * a fair chance that the AIL push may have pushed it into
  		 * the buffer, but xfsbufd won't touch it until 30 seconds
  		 * from now, and thus we will lock up here.
  		 *
  		 * Promote the inode buffer to the front of the delwri list
  		 * and wake up xfsbufd now.
  		 */
  		xfs_promote_inode(ip);
c854363e8   Dave Chinner   xfs: Use delayed ...
781
782
  		xfs_iflock(ip);
  	}
7a3be02ba   David Chinner   [XFS] use the ino...
783

777df5afd   Dave Chinner   xfs: Make inode r...
784
785
786
787
788
789
  	if (is_bad_inode(VFS_I(ip)))
  		goto reclaim;
  	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
  		xfs_iunpin_wait(ip);
  		goto reclaim;
  	}
c854363e8   Dave Chinner   xfs: Use delayed ...
790
791
792
793
794
  	if (xfs_ipincount(ip)) {
  		if (!(sync_mode & SYNC_WAIT)) {
  			xfs_ifunlock(ip);
  			goto out;
  		}
777df5afd   Dave Chinner   xfs: Make inode r...
795
  		xfs_iunpin_wait(ip);
c854363e8   Dave Chinner   xfs: Use delayed ...
796
  	}
777df5afd   Dave Chinner   xfs: Make inode r...
797
798
799
800
  	if (xfs_iflags_test(ip, XFS_ISTALE))
  		goto reclaim;
  	if (xfs_inode_clean(ip))
  		goto reclaim;
1bfd8d041   Dave Chinner   xfs: introduce in...
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
  	/*
  	 * Now we have an inode that needs flushing.
  	 *
  	 * We do a nonblocking flush here even if we are doing a SYNC_WAIT
  	 * reclaim as we can deadlock with inode cluster removal.
  	 * xfs_ifree_cluster() can lock the inode buffer before it locks the
  	 * ip->i_lock, and we are doing the exact opposite here. As a result,
  	 * doing a blocking xfs_itobp() to get the cluster buffer will result
  	 * in an ABBA deadlock with xfs_ifree_cluster().
  	 *
  	 * As xfs_ifree_cluser() must gather all inodes that are active in the
  	 * cache to mark them stale, if we hit this case we don't actually want
  	 * to do IO here - we want the inode marked stale so we can simply
  	 * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
  	 * just unlock the inode, back off and try again. Hopefully the next
  	 * pass through will see the stale flag set on the inode.
  	 */
  	error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
c854363e8   Dave Chinner   xfs: Use delayed ...
819
  	if (sync_mode & SYNC_WAIT) {
1bfd8d041   Dave Chinner   xfs: introduce in...
820
821
822
823
824
825
  		if (error == EAGAIN) {
  			xfs_iunlock(ip, XFS_ILOCK_EXCL);
  			/* backoff longer than in xfs_ifree_cluster */
  			delay(2);
  			goto restart;
  		}
c854363e8   Dave Chinner   xfs: Use delayed ...
826
827
  		xfs_iflock(ip);
  		goto reclaim;
c8e20be02   Dave Chinner   xfs: reclaim inod...
828
  	}
c854363e8   Dave Chinner   xfs: Use delayed ...
829
830
831
832
833
834
  	/*
  	 * When we have to flush an inode but don't have SYNC_WAIT set, we
  	 * flush the inode out using a delwri buffer and wait for the next
  	 * call into reclaim to find it in a clean state instead of waiting for
  	 * it now. We also don't return errors here - if the error is transient
  	 * then the next reclaim pass will flush the inode, and if the error
f1d486a36   Dave Chinner   xfs: don't warn o...
835
  	 * is permanent then the next sync reclaim will reclaim the inode and
c854363e8   Dave Chinner   xfs: Use delayed ...
836
837
  	 * pass on the error.
  	 */
f1d486a36   Dave Chinner   xfs: don't warn o...
838
  	if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
4f10700a2   Dave Chinner   xfs: Convert linu...
839
  		xfs_warn(ip->i_mount,
c854363e8   Dave Chinner   xfs: Use delayed ...
840
841
842
843
844
845
846
847
848
849
850
851
852
853
  			"inode 0x%llx background reclaim flush failed with %d",
  			(long long)ip->i_ino, error);
  	}
  out:
  	xfs_iflags_clear(ip, XFS_IRECLAIM);
  	xfs_iunlock(ip, XFS_ILOCK_EXCL);
  	/*
  	 * We could return EAGAIN here to make reclaim rescan the inode tree in
  	 * a short while. However, this just burns CPU time scanning the tree
  	 * waiting for IO to complete and xfssyncd never goes back to the idle
  	 * state. Instead, return 0 to let the next scheduled background reclaim
  	 * attempt to reclaim the inode again.
  	 */
  	return 0;
777df5afd   Dave Chinner   xfs: Make inode r...
854
855
  reclaim:
  	xfs_ifunlock(ip);
c8e20be02   Dave Chinner   xfs: reclaim inod...
856
  	xfs_iunlock(ip, XFS_ILOCK_EXCL);
2f11feabb   Dave Chinner   xfs: simplify and...
857
858
859
860
861
862
863
864
865
  
  	XFS_STATS_INC(xs_ig_reclaims);
  	/*
  	 * Remove the inode from the per-AG radix tree.
  	 *
  	 * Because radix_tree_delete won't complain even if the item was never
  	 * added to the tree assert that it's been there before to catch
  	 * problems with the inode life time early on.
  	 */
1a427ab0c   Dave Chinner   xfs: convert pag_...
866
  	spin_lock(&pag->pag_ici_lock);
2f11feabb   Dave Chinner   xfs: simplify and...
867
868
869
  	if (!radix_tree_delete(&pag->pag_ici_root,
  				XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
  		ASSERT(0);
081003fff   Johannes Weiner   xfs: properly acc...
870
  	__xfs_inode_clear_reclaim(pag, ip);
1a427ab0c   Dave Chinner   xfs: convert pag_...
871
  	spin_unlock(&pag->pag_ici_lock);
2f11feabb   Dave Chinner   xfs: simplify and...
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
  
  	/*
  	 * Here we do an (almost) spurious inode lock in order to coordinate
  	 * with inode cache radix tree lookups.  This is because the lookup
  	 * can reference the inodes in the cache without taking references.
  	 *
  	 * We make that OK here by ensuring that we wait until the inode is
  	 * unlocked after the lookup before we go ahead and free it.  We get
  	 * both the ilock and the iolock because the code may need to drop the
  	 * ilock one but will still hold the iolock.
  	 */
  	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
  	xfs_qm_dqdetach(ip);
  	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
  
  	xfs_inode_free(ip);
c854363e8   Dave Chinner   xfs: Use delayed ...
888
  	return error;
7a3be02ba   David Chinner   [XFS] use the ino...
889
  }
65d0f2053   Dave Chinner   xfs: split inode ...
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
  /*
   * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
   * corrupted, we still want to try to reclaim all the inodes. If we don't,
   * then a shut down during filesystem unmount reclaim walk leak all the
   * unreclaimed inodes.
   */
  int
  xfs_reclaim_inodes_ag(
  	struct xfs_mount	*mp,
  	int			flags,
  	int			*nr_to_scan)
  {
  	struct xfs_perag	*pag;
  	int			error = 0;
  	int			last_error = 0;
  	xfs_agnumber_t		ag;
69b491c21   Dave Chinner   xfs: serialise in...
906
907
  	int			trylock = flags & SYNC_TRYLOCK;
  	int			skipped;
65d0f2053   Dave Chinner   xfs: split inode ...
908

69b491c21   Dave Chinner   xfs: serialise in...
909
  restart:
65d0f2053   Dave Chinner   xfs: split inode ...
910
  	ag = 0;
69b491c21   Dave Chinner   xfs: serialise in...
911
  	skipped = 0;
65d0f2053   Dave Chinner   xfs: split inode ...
912
913
914
  	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
  		unsigned long	first_index = 0;
  		int		done = 0;
e3a20c0b0   Dave Chinner   xfs: batch inode ...
915
  		int		nr_found = 0;
65d0f2053   Dave Chinner   xfs: split inode ...
916
917
  
  		ag = pag->pag_agno + 1;
69b491c21   Dave Chinner   xfs: serialise in...
918
919
920
  		if (trylock) {
  			if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
  				skipped++;
f83282a8e   Dave Chinner   xfs: fix per-ag r...
921
  				xfs_perag_put(pag);
69b491c21   Dave Chinner   xfs: serialise in...
922
923
924
925
926
  				continue;
  			}
  			first_index = pag->pag_ici_reclaim_cursor;
  		} else
  			mutex_lock(&pag->pag_ici_reclaim_lock);
65d0f2053   Dave Chinner   xfs: split inode ...
927
  		do {
e3a20c0b0   Dave Chinner   xfs: batch inode ...
928
929
  			struct xfs_inode *batch[XFS_LOOKUP_BATCH];
  			int	i;
65d0f2053   Dave Chinner   xfs: split inode ...
930

1a3e8f3da   Dave Chinner   xfs: convert inod...
931
  			rcu_read_lock();
e3a20c0b0   Dave Chinner   xfs: batch inode ...
932
933
934
935
  			nr_found = radix_tree_gang_lookup_tag(
  					&pag->pag_ici_root,
  					(void **)batch, first_index,
  					XFS_LOOKUP_BATCH,
65d0f2053   Dave Chinner   xfs: split inode ...
936
937
  					XFS_ICI_RECLAIM_TAG);
  			if (!nr_found) {
b22322195   Dave Chinner   xfs: ensure recla...
938
  				done = 1;
1a3e8f3da   Dave Chinner   xfs: convert inod...
939
  				rcu_read_unlock();
65d0f2053   Dave Chinner   xfs: split inode ...
940
941
942
943
  				break;
  			}
  
  			/*
e3a20c0b0   Dave Chinner   xfs: batch inode ...
944
945
  			 * Grab the inodes before we drop the lock. if we found
  			 * nothing, nr == 0 and the loop will be skipped.
65d0f2053   Dave Chinner   xfs: split inode ...
946
  			 */
e3a20c0b0   Dave Chinner   xfs: batch inode ...
947
948
949
950
951
952
953
954
955
956
957
958
  			for (i = 0; i < nr_found; i++) {
  				struct xfs_inode *ip = batch[i];
  
  				if (done || xfs_reclaim_inode_grab(ip, flags))
  					batch[i] = NULL;
  
  				/*
  				 * Update the index for the next lookup. Catch
  				 * overflows into the next AG range which can
  				 * occur if we have inodes in the last block of
  				 * the AG and we are currently pointing to the
  				 * last inode.
1a3e8f3da   Dave Chinner   xfs: convert inod...
959
960
961
962
963
964
965
  				 *
  				 * Because we may see inodes that are from the
  				 * wrong AG due to RCU freeing and
  				 * reallocation, only update the index if it
  				 * lies in this AG. It was a race that lead us
  				 * to see this inode, so another lookup from
  				 * the same index will not find it again.
e3a20c0b0   Dave Chinner   xfs: batch inode ...
966
  				 */
1a3e8f3da   Dave Chinner   xfs: convert inod...
967
968
969
  				if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
  								pag->pag_agno)
  					continue;
e3a20c0b0   Dave Chinner   xfs: batch inode ...
970
971
972
973
  				first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
  				if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
  					done = 1;
  			}
65d0f2053   Dave Chinner   xfs: split inode ...
974

e3a20c0b0   Dave Chinner   xfs: batch inode ...
975
  			/* unlock now we've grabbed the inodes. */
1a3e8f3da   Dave Chinner   xfs: convert inod...
976
  			rcu_read_unlock();
e3a20c0b0   Dave Chinner   xfs: batch inode ...
977
978
979
980
981
982
983
984
985
986
  
  			for (i = 0; i < nr_found; i++) {
  				if (!batch[i])
  					continue;
  				error = xfs_reclaim_inode(batch[i], pag, flags);
  				if (error && last_error != EFSCORRUPTED)
  					last_error = error;
  			}
  
  			*nr_to_scan -= XFS_LOOKUP_BATCH;
65d0f2053   Dave Chinner   xfs: split inode ...
987

8daaa8314   Dave Chinner   xfs: make use of ...
988
  			cond_resched();
e3a20c0b0   Dave Chinner   xfs: batch inode ...
989
  		} while (nr_found && !done && *nr_to_scan > 0);
65d0f2053   Dave Chinner   xfs: split inode ...
990

69b491c21   Dave Chinner   xfs: serialise in...
991
992
993
994
995
  		if (trylock && !done)
  			pag->pag_ici_reclaim_cursor = first_index;
  		else
  			pag->pag_ici_reclaim_cursor = 0;
  		mutex_unlock(&pag->pag_ici_reclaim_lock);
65d0f2053   Dave Chinner   xfs: split inode ...
996
997
  		xfs_perag_put(pag);
  	}
69b491c21   Dave Chinner   xfs: serialise in...
998
999
1000
1001
1002
1003
1004
1005
  
  	/*
  	 * if we skipped any AG, and we still have scan count remaining, do
  	 * another pass this time using blocking reclaim semantics (i.e
  	 * waiting on the reclaim locks and ignoring the reclaim cursors). This
  	 * ensure that when we get more reclaimers than AGs we block rather
  	 * than spin trying to execute reclaim.
  	 */
8daaa8314   Dave Chinner   xfs: make use of ...
1006
  	if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
69b491c21   Dave Chinner   xfs: serialise in...
1007
1008
1009
  		trylock = 0;
  		goto restart;
  	}
65d0f2053   Dave Chinner   xfs: split inode ...
1010
1011
  	return XFS_ERROR(last_error);
  }
7a3be02ba   David Chinner   [XFS] use the ino...
1012
1013
1014
  int
  xfs_reclaim_inodes(
  	xfs_mount_t	*mp,
7a3be02ba   David Chinner   [XFS] use the ino...
1015
1016
  	int		mode)
  {
65d0f2053   Dave Chinner   xfs: split inode ...
1017
1018
1019
  	int		nr_to_scan = INT_MAX;
  
  	return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
9bf729c0a   Dave Chinner   xfs: add a shrink...
1020
1021
1022
  }
  
  /*
8daaa8314   Dave Chinner   xfs: make use of ...
1023
   * Scan a certain number of inodes for reclaim.
a7b339f1b   Dave Chinner   xfs: introduce ba...
1024
1025
   *
   * When called we make sure that there is a background (fast) inode reclaim in
8daaa8314   Dave Chinner   xfs: make use of ...
1026
   * progress, while we will throttle the speed of reclaim via doing synchronous
a7b339f1b   Dave Chinner   xfs: introduce ba...
1027
1028
1029
   * reclaim of inodes. That means if we come across dirty inodes, we wait for
   * them to be cleaned, which we hope will not be very long due to the
   * background walker having already kicked the IO off on those dirty inodes.
9bf729c0a   Dave Chinner   xfs: add a shrink...
1030
   */
8daaa8314   Dave Chinner   xfs: make use of ...
1031
1032
1033
1034
  void
  xfs_reclaim_inodes_nr(
  	struct xfs_mount	*mp,
  	int			nr_to_scan)
9bf729c0a   Dave Chinner   xfs: add a shrink...
1035
  {
8daaa8314   Dave Chinner   xfs: make use of ...
1036
1037
1038
  	/* kick background reclaimer and push the AIL */
  	xfs_syncd_queue_reclaim(mp);
  	xfs_ail_push_all(mp->m_ail);
a7b339f1b   Dave Chinner   xfs: introduce ba...
1039

8daaa8314   Dave Chinner   xfs: make use of ...
1040
1041
  	xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
  }
9bf729c0a   Dave Chinner   xfs: add a shrink...
1042

8daaa8314   Dave Chinner   xfs: make use of ...
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
  /*
   * Return the number of reclaimable inodes in the filesystem for
   * the shrinker to determine how much to reclaim.
   */
  int
  xfs_reclaim_inodes_count(
  	struct xfs_mount	*mp)
  {
  	struct xfs_perag	*pag;
  	xfs_agnumber_t		ag = 0;
  	int			reclaimable = 0;
9bf729c0a   Dave Chinner   xfs: add a shrink...
1054

65d0f2053   Dave Chinner   xfs: split inode ...
1055
1056
  	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
  		ag = pag->pag_agno + 1;
70e60ce71   Dave Chinner   xfs: convert inod...
1057
1058
  		reclaimable += pag->pag_ici_reclaimable;
  		xfs_perag_put(pag);
9bf729c0a   Dave Chinner   xfs: add a shrink...
1059
  	}
9bf729c0a   Dave Chinner   xfs: add a shrink...
1060
1061
  	return reclaimable;
  }