Blame view

fs/sync.c 9.91 KB
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
1
2
3
4
5
6
7
  /*
   * High-level sync()-related operations
   */
  
  #include <linux/kernel.h>
  #include <linux/file.h>
  #include <linux/fs.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
8
  #include <linux/slab.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
9
  #include <linux/export.h>
b7ed78f56   Sage Weil   introduce sys_syn...
10
  #include <linux/namei.h>
914e26379   Al Viro   [PATCH] severing ...
11
  #include <linux/sched.h>
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
12
13
14
15
  #include <linux/writeback.h>
  #include <linux/syscalls.h>
  #include <linux/linkage.h>
  #include <linux/pagemap.h>
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
16
  #include <linux/quotaops.h>
5129a469a   Jörn Engel   Catch filesystems...
17
  #include <linux/backing-dev.h>
5a3e5cb8e   Jan Kara   vfs: Fix sys_sync...
18
  #include "internal.h"
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
19
20
21
  
  #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
  			SYNC_FILE_RANGE_WAIT_AFTER)
c15c54f5f   Jan Kara   vfs: Move syncing...
22
  /*
d8a8559cd   Jens Axboe   writeback: get ri...
23
24
25
26
27
   * Do the filesystem syncing work. For simple filesystems
   * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to
   * submit IO for these buffers via __sync_blockdev(). This also speeds up the
   * wait == 1 case since in that case write_inode() functions do
   * sync_dirty_buffer() and thus effectively write one block at a time.
c15c54f5f   Jan Kara   vfs: Move syncing...
28
   */
0dc83bd30   Jan Kara   Revert "writeback...
29
  static int __sync_filesystem(struct super_block *sb, int wait)
c15c54f5f   Jan Kara   vfs: Move syncing...
30
  {
5fb324ad2   Christoph Hellwig   quota: move code ...
31
  	if (wait)
0dc83bd30   Jan Kara   Revert "writeback...
32
  		sync_inodes_sb(sb);
5fb324ad2   Christoph Hellwig   quota: move code ...
33
  	else
0e175a183   Curt Wohlgemuth   writeback: Add a ...
34
  		writeback_inodes_sb(sb, WB_REASON_SYNC);
5fb324ad2   Christoph Hellwig   quota: move code ...
35

c15c54f5f   Jan Kara   vfs: Move syncing...
36
37
38
39
40
41
42
43
44
45
  	if (sb->s_op->sync_fs)
  		sb->s_op->sync_fs(sb, wait);
  	return __sync_blockdev(sb->s_bdev, wait);
  }
  
  /*
   * Write out and wait upon all dirty data associated with this
   * superblock.  Filesystem data as well as the underlying block
   * device.  Takes the superblock lock.
   */
60b0680fa   Jan Kara   vfs: Rename fsync...
46
  int sync_filesystem(struct super_block *sb)
c15c54f5f   Jan Kara   vfs: Move syncing...
47
48
  {
  	int ret;
5af7926ff   Christoph Hellwig   enforce ->sync_fs...
49
50
51
52
53
54
55
56
57
58
59
  	/*
  	 * We need to be protected against the filesystem going from
  	 * r/o to r/w or vice versa.
  	 */
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
  
  	/*
  	 * No point in syncing out anything if the filesystem is read-only.
  	 */
  	if (sb->s_flags & MS_RDONLY)
  		return 0;
0dc83bd30   Jan Kara   Revert "writeback...
60
  	ret = __sync_filesystem(sb, 0);
c15c54f5f   Jan Kara   vfs: Move syncing...
61
62
  	if (ret < 0)
  		return ret;
0dc83bd30   Jan Kara   Revert "writeback...
63
  	return __sync_filesystem(sb, 1);
c15c54f5f   Jan Kara   vfs: Move syncing...
64
  }
10096fb10   Anton Altaparmakov   Export sync_files...
65
  EXPORT_SYMBOL(sync_filesystem);
c15c54f5f   Jan Kara   vfs: Move syncing...
66

b3de65310   Jan Kara   vfs: Reorder oper...
67
  static void sync_inodes_one_sb(struct super_block *sb, void *arg)
01a05b337   Al Viro   new helper: itera...
68
  {
95f28604a   Jens Axboe   fs: assign sb->s_...
69
  	if (!(sb->s_flags & MS_RDONLY))
0dc83bd30   Jan Kara   Revert "writeback...
70
  		sync_inodes_sb(sb);
01a05b337   Al Viro   new helper: itera...
71
  }
b3de65310   Jan Kara   vfs: Reorder oper...
72

b3de65310   Jan Kara   vfs: Reorder oper...
73
74
75
76
77
  static void sync_fs_one_sb(struct super_block *sb, void *arg)
  {
  	if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs)
  		sb->s_op->sync_fs(sb, *(int *)arg);
  }
d0e91b13e   Jan Kara   vfs: Remove unnec...
78
  static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
b3de65310   Jan Kara   vfs: Reorder oper...
79
  {
d0e91b13e   Jan Kara   vfs: Remove unnec...
80
  	filemap_fdatawrite(bdev->bd_inode->i_mapping);
a8c7176b6   Jan Kara   vfs: Make sys_syn...
81
  }
d0e91b13e   Jan Kara   vfs: Remove unnec...
82
  static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
a8c7176b6   Jan Kara   vfs: Make sys_syn...
83
  {
aa750fd71   Junichi Nomura   mm/filemap.c: mak...
84
85
86
87
88
89
  	/*
  	 * We keep the error status of individual mapping so that
  	 * applications can catch the writeback error using fsync(2).
  	 * See filemap_fdatawait_keep_errors() for details.
  	 */
  	filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
c15c54f5f   Jan Kara   vfs: Move syncing...
90
  }
3beab0b42   Zhang, Yanmin   sys_sync(): fix 1...
91
  /*
4ea425b63   Jan Kara   vfs: Avoid unnece...
92
93
94
95
96
97
98
99
   * Sync everything. We start by waking flusher threads so that most of
   * writeback runs on all devices in parallel. Then we sync all inodes reliably
   * which effectively also waits for all flusher threads to finish doing
   * writeback. At this point all data is on disk so metadata should be stable
   * and we tell filesystems to sync their metadata via ->sync_fs() calls.
   * Finally, we writeout all block devices because some filesystems (e.g. ext2)
   * just write metadata (such as inodes or bitmaps) to block device page cache
   * and do not sync it on their own in ->sync_fs().
3beab0b42   Zhang, Yanmin   sys_sync(): fix 1...
100
   */
5cee5815d   Jan Kara   vfs: Make sys_syn...
101
  SYSCALL_DEFINE0(sync)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
102
  {
b3de65310   Jan Kara   vfs: Reorder oper...
103
  	int nowait = 0, wait = 1;
0e175a183   Curt Wohlgemuth   writeback: Add a ...
104
  	wakeup_flusher_threads(0, WB_REASON_SYNC);
0dc83bd30   Jan Kara   Revert "writeback...
105
  	iterate_supers(sync_inodes_one_sb, NULL);
4ea425b63   Jan Kara   vfs: Avoid unnece...
106
  	iterate_supers(sync_fs_one_sb, &nowait);
b3de65310   Jan Kara   vfs: Reorder oper...
107
  	iterate_supers(sync_fs_one_sb, &wait);
d0e91b13e   Jan Kara   vfs: Remove unnec...
108
109
  	iterate_bdevs(fdatawrite_one_bdev, NULL);
  	iterate_bdevs(fdatawait_one_bdev, NULL);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
110
111
  	if (unlikely(laptop_mode))
  		laptop_sync_completion();
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
112
113
  	return 0;
  }
a2a9537ac   Jens Axboe   Get rid of pdflus...
114
115
  static void do_sync_work(struct work_struct *work)
  {
b3de65310   Jan Kara   vfs: Reorder oper...
116
  	int nowait = 0;
5cee5815d   Jan Kara   vfs: Make sys_syn...
117
118
119
120
  	/*
  	 * Sync twice to reduce the possibility we skipped some inodes / pages
  	 * because they were temporarily locked
  	 */
b3de65310   Jan Kara   vfs: Reorder oper...
121
122
  	iterate_supers(sync_inodes_one_sb, &nowait);
  	iterate_supers(sync_fs_one_sb, &nowait);
d0e91b13e   Jan Kara   vfs: Remove unnec...
123
  	iterate_bdevs(fdatawrite_one_bdev, NULL);
b3de65310   Jan Kara   vfs: Reorder oper...
124
125
  	iterate_supers(sync_inodes_one_sb, &nowait);
  	iterate_supers(sync_fs_one_sb, &nowait);
d0e91b13e   Jan Kara   vfs: Remove unnec...
126
  	iterate_bdevs(fdatawrite_one_bdev, NULL);
5cee5815d   Jan Kara   vfs: Make sys_syn...
127
128
  	printk("Emergency Sync complete
  ");
a2a9537ac   Jens Axboe   Get rid of pdflus...
129
130
  	kfree(work);
  }
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
131
132
  void emergency_sync(void)
  {
a2a9537ac   Jens Axboe   Get rid of pdflus...
133
134
135
136
137
138
139
  	struct work_struct *work;
  
  	work = kmalloc(sizeof(*work), GFP_ATOMIC);
  	if (work) {
  		INIT_WORK(work, do_sync_work);
  		schedule_work(work);
  	}
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
140
  }
b7ed78f56   Sage Weil   introduce sys_syn...
141
142
143
144
145
  /*
   * sync a single super
   */
  SYSCALL_DEFINE1(syncfs, int, fd)
  {
2903ff019   Al Viro   switch simple cas...
146
  	struct fd f = fdget(fd);
b7ed78f56   Sage Weil   introduce sys_syn...
147
148
  	struct super_block *sb;
  	int ret;
b7ed78f56   Sage Weil   introduce sys_syn...
149

2903ff019   Al Viro   switch simple cas...
150
  	if (!f.file)
b7ed78f56   Sage Weil   introduce sys_syn...
151
  		return -EBADF;
b583043e9   Al Viro   kill f_dentry uses
152
  	sb = f.file->f_path.dentry->d_sb;
b7ed78f56   Sage Weil   introduce sys_syn...
153
154
155
156
  
  	down_read(&sb->s_umount);
  	ret = sync_filesystem(sb);
  	up_read(&sb->s_umount);
2903ff019   Al Viro   switch simple cas...
157
  	fdput(f);
b7ed78f56   Sage Weil   introduce sys_syn...
158
159
  	return ret;
  }
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
160
  /**
148f948ba   Jan Kara   vfs: Introduce ne...
161
   * vfs_fsync_range - helper to sync a range of data & metadata to disk
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
162
   * @file:		file to sync
148f948ba   Jan Kara   vfs: Introduce ne...
163
164
165
   * @start:		offset in bytes of the beginning of data range to sync
   * @end:		offset in bytes of the end of data range (inclusive)
   * @datasync:		perform only datasync
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
166
   *
148f948ba   Jan Kara   vfs: Introduce ne...
167
168
169
   * Write back data in range @start..@end and metadata for @file to disk.  If
   * @datasync is set only metadata needed to access modified file data is
   * written.
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
170
   */
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
171
  int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
172
  {
0ae45f63d   Theodore Ts'o   vfs: add support ...
173
  	struct inode *inode = file->f_mapping->host;
72c2d5319   Al Viro   file->f_op is nev...
174
  	if (!file->f_op->fsync)
02c24a821   Josef Bacik   fs: push i_mutex ...
175
  		return -EINVAL;
0ae45f63d   Theodore Ts'o   vfs: add support ...
176
177
178
179
180
181
  	if (!datasync && (inode->i_state & I_DIRTY_TIME)) {
  		spin_lock(&inode->i_lock);
  		inode->i_state &= ~I_DIRTY_TIME;
  		spin_unlock(&inode->i_lock);
  		mark_inode_dirty_sync(inode);
  	}
02c24a821   Josef Bacik   fs: push i_mutex ...
182
  	return file->f_op->fsync(file, start, end, datasync);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
183
  }
148f948ba   Jan Kara   vfs: Introduce ne...
184
185
186
187
188
  EXPORT_SYMBOL(vfs_fsync_range);
  
  /**
   * vfs_fsync - perform a fsync or fdatasync on a file
   * @file:		file to sync
148f948ba   Jan Kara   vfs: Introduce ne...
189
190
191
192
   * @datasync:		only perform a fdatasync operation
   *
   * Write back data and metadata for @file to disk.  If @datasync is
   * set only metadata needed to access modified file data is written.
148f948ba   Jan Kara   vfs: Introduce ne...
193
   */
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
194
  int vfs_fsync(struct file *file, int datasync)
148f948ba   Jan Kara   vfs: Introduce ne...
195
  {
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
196
  	return vfs_fsync_range(file, 0, LLONG_MAX, datasync);
148f948ba   Jan Kara   vfs: Introduce ne...
197
  }
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
198
  EXPORT_SYMBOL(vfs_fsync);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
199

4c728ef58   Christoph Hellwig   add a vfs_fsync h...
200
  static int do_fsync(unsigned int fd, int datasync)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
201
  {
2903ff019   Al Viro   switch simple cas...
202
  	struct fd f = fdget(fd);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
203
  	int ret = -EBADF;
2903ff019   Al Viro   switch simple cas...
204
205
206
  	if (f.file) {
  		ret = vfs_fsync(f.file, datasync);
  		fdput(f);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
207
208
209
  	}
  	return ret;
  }
a5f8fa9e9   Heiko Carstens   [CVE-2009-0029] S...
210
  SYSCALL_DEFINE1(fsync, unsigned int, fd)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
211
  {
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
212
  	return do_fsync(fd, 0);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
213
  }
a5f8fa9e9   Heiko Carstens   [CVE-2009-0029] S...
214
  SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
215
  {
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
216
  	return do_fsync(fd, 1);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
217
218
219
  }
  
  /*
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
220
221
222
223
224
225
226
227
228
229
   * sys_sync_file_range() permits finely controlled syncing over a segment of
   * a file in the range offset .. (offset+nbytes-1) inclusive.  If nbytes is
   * zero then sys_sync_file_range() will operate from offset out to EOF.
   *
   * The flag bits are:
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
   * before performing the write.
   *
   * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
cce770815   Pavel Machek   SYNC_FILE_RANGE_W...
230
231
   * range which are not presently under writeback. Note that this may block for
   * significant periods due to exhaustion of disk request structures.
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
   *
   * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
   * after performing the write.
   *
   * Useful combinations of the flag bits are:
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
   * in the range which were dirty on entry to sys_sync_file_range() are placed
   * under writeout.  This is a start-write-for-data-integrity operation.
   *
   * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
   * are not presently under writeout.  This is an asynchronous flush-to-disk
   * operation.  Not suitable for data integrity operations.
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
   * completion of writeout of all pages in the range.  This will be used after an
   * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
   * for that operation to complete and to return the result.
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER:
   * a traditional sync() operation.  This is a write-for-data-integrity operation
   * which will ensure that all pages in the range which were dirty on entry to
   * sys_sync_file_range() are committed to disk.
   *
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
   * I/O errors or ENOSPC conditions and will return those to the caller, after
   * clearing the EIO and ENOSPC flags in the address_space.
   *
   * It should be noted that none of these operations write out the file's
   * metadata.  So unless the application is strictly performing overwrites of
   * already-instantiated disk blocks, there are no guarantees here that the data
   * will be available after a crash.
   */
4a0fd5bf0   Al Viro   teach SYSCALL_DEF...
266
267
  SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
  				unsigned int, flags)
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
268
269
  {
  	int ret;
2903ff019   Al Viro   switch simple cas...
270
  	struct fd f;
7a0ad10c3   Christoph Hellwig   fold do_sync_file...
271
  	struct address_space *mapping;
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
272
  	loff_t endbyte;			/* inclusive */
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
  	umode_t i_mode;
  
  	ret = -EINVAL;
  	if (flags & ~VALID_FLAGS)
  		goto out;
  
  	endbyte = offset + nbytes;
  
  	if ((s64)offset < 0)
  		goto out;
  	if ((s64)endbyte < 0)
  		goto out;
  	if (endbyte < offset)
  		goto out;
  
  	if (sizeof(pgoff_t) == 4) {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
289
  		if (offset >= (0x100000000ULL << PAGE_SHIFT)) {
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
290
291
292
293
294
295
296
  			/*
  			 * The range starts outside a 32 bit machine's
  			 * pagecache addressing capabilities.  Let it "succeed"
  			 */
  			ret = 0;
  			goto out;
  		}
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
297
  		if (endbyte >= (0x100000000ULL << PAGE_SHIFT)) {
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
298
299
300
301
302
303
304
305
  			/*
  			 * Out to EOF
  			 */
  			nbytes = 0;
  		}
  	}
  
  	if (nbytes == 0)
111ebb6e6   OGAWA Hirofumi   [PATCH] writeback...
306
  		endbyte = LLONG_MAX;
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
307
308
309
310
  	else
  		endbyte--;		/* inclusive */
  
  	ret = -EBADF;
2903ff019   Al Viro   switch simple cas...
311
312
  	f = fdget(fd);
  	if (!f.file)
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
313
  		goto out;
496ad9aa8   Al Viro   new helper: file_...
314
  	i_mode = file_inode(f.file)->i_mode;
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
315
316
317
318
  	ret = -ESPIPE;
  	if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
  			!S_ISLNK(i_mode))
  		goto out_put;
2903ff019   Al Viro   switch simple cas...
319
  	mapping = f.file->f_mapping;
7a0ad10c3   Christoph Hellwig   fold do_sync_file...
320
321
322
323
324
325
326
327
328
329
330
331
332
  	if (!mapping) {
  		ret = -EINVAL;
  		goto out_put;
  	}
  
  	ret = 0;
  	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
  		ret = filemap_fdatawait_range(mapping, offset, endbyte);
  		if (ret < 0)
  			goto out_put;
  	}
  
  	if (flags & SYNC_FILE_RANGE_WRITE) {
23d012709   Jan Kara   fs/sync.c: make s...
333
334
  		ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
  						 WB_SYNC_NONE);
7a0ad10c3   Christoph Hellwig   fold do_sync_file...
335
336
337
338
339
340
  		if (ret < 0)
  			goto out_put;
  	}
  
  	if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
  		ret = filemap_fdatawait_range(mapping, offset, endbyte);
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
341
  out_put:
2903ff019   Al Viro   switch simple cas...
342
  	fdput(f);
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
343
344
345
  out:
  	return ret;
  }
edd5cd4a9   David Woodhouse   Introduce fixed s...
346
347
  /* It would be nice if people remember that not all the world's an i386
     when they introduce new system calls */
4a0fd5bf0   Al Viro   teach SYSCALL_DEF...
348
349
  SYSCALL_DEFINE4(sync_file_range2, int, fd, unsigned int, flags,
  				 loff_t, offset, loff_t, nbytes)
edd5cd4a9   David Woodhouse   Introduce fixed s...
350
351
352
  {
  	return sys_sync_file_range(fd, offset, nbytes, flags);
  }