Blame view

fs/xfs/xfs_rw.c 10.9 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
  /*
1e69dd0eb   Nathan Scott   [XFS] Push some c...
2
   * Copyright (c) 2000-2006 Silicon Graphics, Inc.
7b7187698   Nathan Scott   [XFS] Update lice...
3
   * All Rights Reserved.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4
   *
7b7187698   Nathan Scott   [XFS] Update lice...
5
6
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public License as
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
7
8
   * published by the Free Software Foundation.
   *
7b7187698   Nathan Scott   [XFS] Update lice...
9
10
11
12
   * This program is distributed in the hope that it would be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
   *
7b7187698   Nathan Scott   [XFS] Update lice...
14
15
16
   * You should have received a copy of the GNU General Public License
   * along with this program; if not, write the Free Software Foundation,
   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
17
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include "xfs.h"
a844f4510   Nathan Scott   [XFS] Remove xfs_...
19
  #include "xfs_fs.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include "xfs_types.h"
a844f4510   Nathan Scott   [XFS] Remove xfs_...
21
  #include "xfs_bit.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
  #include "xfs_log.h"
a844f4510   Nathan Scott   [XFS] Remove xfs_...
23
  #include "xfs_inum.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
26
  #include "xfs_trans.h"
  #include "xfs_sb.h"
  #include "xfs_ag.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
28
29
  #include "xfs_dir2.h"
  #include "xfs_dmapi.h"
  #include "xfs_mount.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
  #include "xfs_bmap_btree.h"
a844f4510   Nathan Scott   [XFS] Remove xfs_...
31
  #include "xfs_alloc_btree.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
  #include "xfs_ialloc_btree.h"
a844f4510   Nathan Scott   [XFS] Remove xfs_...
33
34
35
36
37
  #include "xfs_dir2_sf.h"
  #include "xfs_attr_sf.h"
  #include "xfs_dinode.h"
  #include "xfs_inode.h"
  #include "xfs_inode_item.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
38
39
40
41
42
  #include "xfs_itable.h"
  #include "xfs_btree.h"
  #include "xfs_alloc.h"
  #include "xfs_ialloc.h"
  #include "xfs_attr.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
43
44
  #include "xfs_bmap.h"
  #include "xfs_acl.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
  #include "xfs_error.h"
  #include "xfs_buf_item.h"
  #include "xfs_rw.h"
  
  /*
   * This is a subroutine for xfs_write() and other writers (xfs_ioctl)
   * which clears the setuid and setgid bits when a file is written.
   */
  int
  xfs_write_clear_setuid(
  	xfs_inode_t	*ip)
  {
  	xfs_mount_t	*mp;
  	xfs_trans_t	*tp;
  	int		error;
  
  	mp = ip->i_mount;
  	tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
  	if ((error = xfs_trans_reserve(tp, 0,
  				      XFS_WRITEID_LOG_RES(mp),
  				      0, 0, 0))) {
  		xfs_trans_cancel(tp, 0);
  		return error;
  	}
  	xfs_ilock(ip, XFS_ILOCK_EXCL);
  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
  	xfs_trans_ihold(tp, ip);
  	ip->i_d.di_mode &= ~S_ISUID;
  
  	/*
  	 * Note that we don't have to worry about mandatory
  	 * file locking being disabled here because we only
  	 * clear the S_ISGID bit if the Group execute bit is
  	 * on, but if it was on then mandatory locking wouldn't
  	 * have been enabled.
  	 */
  	if (ip->i_d.di_mode & S_IXGRP) {
  		ip->i_d.di_mode &= ~S_ISGID;
  	}
  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  	xfs_trans_set_sync(tp);
1c72bf900   Eric Sandeen   [XFS] The last ar...
86
  	error = xfs_trans_commit(tp, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
88
89
90
91
  	xfs_iunlock(ip, XFS_ILOCK_EXCL);
  	return 0;
  }
  
  /*
1e69dd0eb   Nathan Scott   [XFS] Push some c...
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
   * Handle logging requirements of various synchronous types of write.
   */
  int
  xfs_write_sync_logforce(
  	xfs_mount_t	*mp,
  	xfs_inode_t	*ip)
  {
  	int		error = 0;
  
  	/*
  	 * If we're treating this as O_DSYNC and we have not updated the
  	 * size, force the log.
  	 */
  	if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
  	    !(ip->i_update_size)) {
  		xfs_inode_log_item_t	*iip = ip->i_itemp;
  
  		/*
  		 * If an allocation transaction occurred
  		 * without extending the size, then we have to force
  		 * the log up the proper point to ensure that the
  		 * allocation is permanent.  We can't count on
  		 * the fact that buffered writes lock out direct I/O
  		 * writes - the direct I/O write could have extended
  		 * the size nontransactionally, then finished before
  		 * we started.  xfs_write_file will think that the file
  		 * didn't grow but the update isn't safe unless the
  		 * size change is logged.
  		 *
  		 * Force the log if we've committed a transaction
  		 * against the inode or if someone else has and
  		 * the commit record hasn't gone to disk (e.g.
  		 * the inode is pinned).  This guarantees that
  		 * all changes affecting the inode are permanent
  		 * when we return.
  		 */
  		if (iip && iip->ili_last_lsn) {
  			xfs_log_force(mp, iip->ili_last_lsn,
  					XFS_LOG_FORCE | XFS_LOG_SYNC);
  		} else if (xfs_ipincount(ip) > 0) {
  			xfs_log_force(mp, (xfs_lsn_t)0,
  					XFS_LOG_FORCE | XFS_LOG_SYNC);
  		}
  
  	} else {
  		xfs_trans_t	*tp;
  
  		/*
  		 * O_SYNC or O_DSYNC _with_ a size update are handled
  		 * the same way.
  		 *
  		 * If the write was synchronous then we need to make
  		 * sure that the inode modification time is permanent.
  		 * We'll have updated the timestamp above, so here
  		 * we use a synchronous transaction to log the inode.
  		 * It's not fast, but it's necessary.
  		 *
  		 * If this a dsync write and the size got changed
  		 * non-transactionally, then we need to ensure that
  		 * the size change gets logged in a synchronous
  		 * transaction.
  		 */
  		tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
  		if ((error = xfs_trans_reserve(tp, 0,
  						XFS_SWRITE_LOG_RES(mp),
  						0, 0, 0))) {
  			/* Transaction reserve failed */
  			xfs_trans_cancel(tp, 0);
  		} else {
  			/* Transaction reserve successful */
  			xfs_ilock(ip, XFS_ILOCK_EXCL);
  			xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
  			xfs_trans_ihold(tp, ip);
  			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  			xfs_trans_set_sync(tp);
1c72bf900   Eric Sandeen   [XFS] The last ar...
167
  			error = xfs_trans_commit(tp, 0);
1e69dd0eb   Nathan Scott   [XFS] Push some c...
168
169
170
171
172
173
174
175
  			xfs_iunlock(ip, XFS_ILOCK_EXCL);
  		}
  	}
  
  	return error;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
   * Force a shutdown of the filesystem instantly while keeping
   * the filesystem consistent. We don't do an unmount here; just shutdown
   * the shop, make sure that absolutely nothing persistent happens to
   * this filesystem after this point.
   */
  
  void
  xfs_do_force_shutdown(
  	bhv_desc_t	*bdp,
  	int		flags,
  	char		*fname,
  	int		lnnum)
  {
  	int		logerror;
  	xfs_mount_t	*mp;
  
  	mp = XFS_BHVTOM(bdp);
7d04a335b   Nathan Scott   [XFS] Shutdown th...
193
  	logerror = flags & SHUTDOWN_LOG_IO_ERROR;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
194

7d04a335b   Nathan Scott   [XFS] Shutdown th...
195
196
197
198
  	if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
  		cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from "
  				 "line %d of file %s.  Return address = 0x%p",
  			mp->m_fsname, flags, lnnum, fname, __return_address);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
199
200
201
202
203
204
205
206
207
208
  	}
  	/*
  	 * No need to duplicate efforts.
  	 */
  	if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
  		return;
  
  	/*
  	 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
  	 * queue up anybody new on the log reservations, and wakes up
7d04a335b   Nathan Scott   [XFS] Shutdown th...
209
210
  	 * everybody who's sleeping on log reservations to tell them
  	 * the bad news.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
212
213
  	 */
  	if (xfs_log_force_umount(mp, logerror))
  		return;
7d04a335b   Nathan Scott   [XFS] Shutdown th...
214
  	if (flags & SHUTDOWN_CORRUPT_INCORE) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215
216
217
218
219
220
  		xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp,
      "Corruption of in-memory data detected.  Shutting down filesystem: %s",
  			mp->m_fsname);
  		if (XFS_ERRLEVEL_HIGH <= xfs_error_level) {
  			xfs_stack_trace();
  		}
7d04a335b   Nathan Scott   [XFS] Shutdown th...
221
  	} else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
222
223
  		if (logerror) {
  			xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp,
7d04a335b   Nathan Scott   [XFS] Shutdown th...
224
  		"Log I/O Error Detected.  Shutting down filesystem: %s",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
225
  				mp->m_fsname);
7d04a335b   Nathan Scott   [XFS] Shutdown th...
226
  		} else if (flags & SHUTDOWN_DEVICE_REQ) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
227
  			xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp,
7d04a335b   Nathan Scott   [XFS] Shutdown th...
228
229
230
231
232
  		"All device paths lost.  Shutting down filesystem: %s",
  				mp->m_fsname);
  		} else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
  			xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp,
  		"I/O Error Detected.  Shutting down filesystem: %s",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
234
235
  				mp->m_fsname);
  		}
  	}
7d04a335b   Nathan Scott   [XFS] Shutdown th...
236
237
238
  	if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
  		cmn_err(CE_ALERT, "Please umount the filesystem, "
  				  "and rectify the problem(s)");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
  	}
  }
  
  
  /*
   * Called when we want to stop a buffer from getting written or read.
   * We attach the EIO error, muck with its flags, and call biodone
   * so that the proper iodone callbacks get called.
   */
  int
  xfs_bioerror(
  	xfs_buf_t *bp)
  {
  
  #ifdef XFSERRORDEBUG
  	ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
  #endif
  
  	/*
  	 * No need to wait until the buffer is unpinned.
  	 * We aren't flushing it.
  	 */
  	xfs_buftrace("XFS IOERROR", bp);
  	XFS_BUF_ERROR(bp, EIO);
  	/*
  	 * We're calling biodone, so delete B_DONE flag. Either way
  	 * we have to call the iodone callback, and calling biodone
  	 * probably is the best way since it takes care of
  	 * GRIO as well.
  	 */
  	XFS_BUF_UNREAD(bp);
  	XFS_BUF_UNDELAYWRITE(bp);
  	XFS_BUF_UNDONE(bp);
  	XFS_BUF_STALE(bp);
  
  	XFS_BUF_CLR_BDSTRAT_FUNC(bp);
  	xfs_biodone(bp);
  
  	return (EIO);
  }
  
  /*
   * Same as xfs_bioerror, except that we are releasing the buffer
   * here ourselves, and avoiding the biodone call.
   * This is meant for userdata errors; metadata bufs come with
   * iodone functions attached, so that we can track down errors.
   */
  int
  xfs_bioerror_relse(
  	xfs_buf_t *bp)
  {
  	int64_t fl;
  
  	ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks);
  	ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone);
  
  	xfs_buftrace("XFS IOERRELSE", bp);
  	fl = XFS_BUF_BFLAGS(bp);
  	/*
  	 * No need to wait until the buffer is unpinned.
  	 * We aren't flushing it.
  	 *
  	 * chunkhold expects B_DONE to be set, whether
  	 * we actually finish the I/O or not. We don't want to
  	 * change that interface.
  	 */
  	XFS_BUF_UNREAD(bp);
  	XFS_BUF_UNDELAYWRITE(bp);
  	XFS_BUF_DONE(bp);
  	XFS_BUF_STALE(bp);
  	XFS_BUF_CLR_IODONE_FUNC(bp);
  	XFS_BUF_CLR_BDSTRAT_FUNC(bp);
  	if (!(fl & XFS_B_ASYNC)) {
  		/*
  		 * Mark b_error and B_ERROR _both_.
  		 * Lot's of chunkcache code assumes that.
  		 * There's no reason to mark error for
  		 * ASYNC buffers.
  		 */
  		XFS_BUF_ERROR(bp, EIO);
  		XFS_BUF_V_IODONESEMA(bp);
  	} else {
  		xfs_buf_relse(bp);
  	}
  	return (EIO);
  }
ce8e922c0   Nathan Scott   [XFS] Complete th...
325

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326
327
328
329
330
331
332
333
334
335
336
337
  /*
   * Prints out an ALERT message about I/O error.
   */
  void
  xfs_ioerror_alert(
  	char			*func,
  	struct xfs_mount	*mp,
  	xfs_buf_t		*bp,
  	xfs_daddr_t		blkno)
  {
  	cmn_err(CE_ALERT,
   "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"
da1650a5d   Christoph Hellwig   [XFS] Add format ...
338
   "       (\"%s\") error %d buf count %zd",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
339
  		(!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
ce8e922c0   Nathan Scott   [XFS] Complete th...
340
341
342
  		XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
  		(__uint64_t)blkno, func,
  		XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
  }
  
  /*
   * This isn't an absolute requirement, but it is
   * just a good idea to call xfs_read_buf instead of
   * directly doing a read_buf call. For one, we shouldn't
   * be doing this disk read if we are in SHUTDOWN state anyway,
   * so this stops that from happening. Secondly, this does all
   * the error checking stuff and the brelse if appropriate for
   * the caller, so the code can be a little leaner.
   */
  
  int
  xfs_read_buf(
  	struct xfs_mount *mp,
  	xfs_buftarg_t	 *target,
  	xfs_daddr_t	 blkno,
  	int              len,
  	uint             flags,
  	xfs_buf_t	 **bpp)
  {
  	xfs_buf_t	 *bp;
  	int		 error;
  
  	if (flags)
  		bp = xfs_buf_read_flags(target, blkno, len, flags);
  	else
  		bp = xfs_buf_read(target, blkno, len, flags);
  	if (!bp)
  		return XFS_ERROR(EIO);
  	error = XFS_BUF_GETERROR(bp);
  	if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) {
  		*bpp = bp;
  	} else {
  		*bpp = NULL;
  		if (error) {
  			xfs_ioerror_alert("xfs_read_buf", mp, bp, XFS_BUF_ADDR(bp));
  		} else {
  			error = XFS_ERROR(EIO);
  		}
  		if (bp) {
  			XFS_BUF_UNDONE(bp);
  			XFS_BUF_UNDELAYWRITE(bp);
  			XFS_BUF_STALE(bp);
  			/*
  			 * brelse clears B_ERROR and b_error
  			 */
  			xfs_buf_relse(bp);
  		}
  	}
  	return (error);
  }
  
  /*
   * Wrapper around bwrite() so that we can trap
   * write errors, and act accordingly.
   */
  int
  xfs_bwrite(
  	struct xfs_mount *mp,
  	struct xfs_buf	 *bp)
  {
  	int	error;
  
  	/*
  	 * XXXsup how does this work for quotas.
  	 */
  	XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
  	XFS_BUF_SET_FSPRIVATE3(bp, mp);
  	XFS_BUF_WRITE(bp);
  
  	if ((error = XFS_bwrite(bp))) {
  		ASSERT(mp);
  		/*
  		 * Cannot put a buftrace here since if the buffer is not
  		 * B_HOLD then we will brelse() the buffer before returning
  		 * from bwrite and we could be tracing a buffer that has
  		 * been reused.
  		 */
7d04a335b   Nathan Scott   [XFS] Shutdown th...
422
  		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
423
424
425
  	}
  	return (error);
  }