Blame view

fs/sync.c 9.89 KB
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
1
2
3
4
5
6
7
  /*
   * High-level sync()-related operations
   */
  
  #include <linux/kernel.h>
  #include <linux/file.h>
  #include <linux/fs.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
8
  #include <linux/slab.h>
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
9
  #include <linux/module.h>
b7ed78f56   Sage Weil   introduce sys_syn...
10
  #include <linux/namei.h>
914e26379   Al Viro   [PATCH] severing ...
11
  #include <linux/sched.h>
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
12
13
14
15
  #include <linux/writeback.h>
  #include <linux/syscalls.h>
  #include <linux/linkage.h>
  #include <linux/pagemap.h>
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
16
  #include <linux/quotaops.h>
5129a469a   Jörn Engel   Catch filesystems...
17
  #include <linux/backing-dev.h>
5a3e5cb8e   Jan Kara   vfs: Fix sys_sync...
18
  #include "internal.h"
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
19
20
21
  
  #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
  			SYNC_FILE_RANGE_WAIT_AFTER)
c15c54f5f   Jan Kara   vfs: Move syncing...
22
  /*
d8a8559cd   Jens Axboe   writeback: get ri...
23
24
25
26
27
   * Do the filesystem syncing work. For simple filesystems
   * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to
   * submit IO for these buffers via __sync_blockdev(). This also speeds up the
   * wait == 1 case since in that case write_inode() functions do
   * sync_dirty_buffer() and thus effectively write one block at a time.
c15c54f5f   Jan Kara   vfs: Move syncing...
28
   */
60b0680fa   Jan Kara   vfs: Rename fsync...
29
  static int __sync_filesystem(struct super_block *sb, int wait)
c15c54f5f   Jan Kara   vfs: Move syncing...
30
  {
32a88aa1b   Jens Axboe   fs: Assign bdi in...
31
32
33
34
  	/*
  	 * This should be safe, as we require bdi backing to actually
  	 * write out data in the first place
  	 */
95f28604a   Jens Axboe   fs: assign sb->s_...
35
  	if (sb->s_bdi == &noop_backing_dev_info)
32a88aa1b   Jens Axboe   fs: Assign bdi in...
36
  		return 0;
5fb324ad2   Christoph Hellwig   quota: move code ...
37
38
39
40
  	if (sb->s_qcop && sb->s_qcop->quota_sync)
  		sb->s_qcop->quota_sync(sb, -1, wait);
  
  	if (wait)
d8a8559cd   Jens Axboe   writeback: get ri...
41
  		sync_inodes_sb(sb);
5fb324ad2   Christoph Hellwig   quota: move code ...
42
  	else
0e175a183   Curt Wohlgemuth   writeback: Add a ...
43
  		writeback_inodes_sb(sb, WB_REASON_SYNC);
5fb324ad2   Christoph Hellwig   quota: move code ...
44

c15c54f5f   Jan Kara   vfs: Move syncing...
45
46
47
48
49
50
51
52
53
54
  	if (sb->s_op->sync_fs)
  		sb->s_op->sync_fs(sb, wait);
  	return __sync_blockdev(sb->s_bdev, wait);
  }
  
  /*
   * Write out and wait upon all dirty data associated with this
   * superblock.  Filesystem data as well as the underlying block
   * device.  Takes the superblock lock.
   */
60b0680fa   Jan Kara   vfs: Rename fsync...
55
  int sync_filesystem(struct super_block *sb)
c15c54f5f   Jan Kara   vfs: Move syncing...
56
57
  {
  	int ret;
5af7926ff   Christoph Hellwig   enforce ->sync_fs...
58
59
60
61
62
63
64
65
66
67
68
  	/*
  	 * We need to be protected against the filesystem going from
  	 * r/o to r/w or vice versa.
  	 */
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
  
  	/*
  	 * No point in syncing out anything if the filesystem is read-only.
  	 */
  	if (sb->s_flags & MS_RDONLY)
  		return 0;
60b0680fa   Jan Kara   vfs: Rename fsync...
69
  	ret = __sync_filesystem(sb, 0);
c15c54f5f   Jan Kara   vfs: Move syncing...
70
71
  	if (ret < 0)
  		return ret;
60b0680fa   Jan Kara   vfs: Rename fsync...
72
  	return __sync_filesystem(sb, 1);
c15c54f5f   Jan Kara   vfs: Move syncing...
73
  }
60b0680fa   Jan Kara   vfs: Rename fsync...
74
  EXPORT_SYMBOL_GPL(sync_filesystem);
c15c54f5f   Jan Kara   vfs: Move syncing...
75

01a05b337   Al Viro   new helper: itera...
76
77
  static void sync_one_sb(struct super_block *sb, void *arg)
  {
95f28604a   Jens Axboe   fs: assign sb->s_...
78
  	if (!(sb->s_flags & MS_RDONLY))
01a05b337   Al Viro   new helper: itera...
79
80
  		__sync_filesystem(sb, *(int *)arg);
  }
c15c54f5f   Jan Kara   vfs: Move syncing...
81
82
83
  /*
   * Sync all the data for all the filesystems (called by sys_sync() and
   * emergency sync)
c15c54f5f   Jan Kara   vfs: Move syncing...
84
85
86
   */
  static void sync_filesystems(int wait)
  {
01a05b337   Al Viro   new helper: itera...
87
  	iterate_supers(sync_one_sb, &wait);
c15c54f5f   Jan Kara   vfs: Move syncing...
88
  }
3beab0b42   Zhang, Yanmin   sys_sync(): fix 1...
89
90
91
92
  /*
   * sync everything.  Start out by waking pdflush, because that writes back
   * all queues in parallel.
   */
5cee5815d   Jan Kara   vfs: Make sys_syn...
93
  SYSCALL_DEFINE0(sync)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
94
  {
0e175a183   Curt Wohlgemuth   writeback: Add a ...
95
  	wakeup_flusher_threads(0, WB_REASON_SYNC);
5cee5815d   Jan Kara   vfs: Make sys_syn...
96
97
  	sync_filesystems(0);
  	sync_filesystems(1);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
98
99
  	if (unlikely(laptop_mode))
  		laptop_sync_completion();
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
100
101
  	return 0;
  }
a2a9537ac   Jens Axboe   Get rid of pdflus...
102
103
  static void do_sync_work(struct work_struct *work)
  {
5cee5815d   Jan Kara   vfs: Make sys_syn...
104
105
106
107
108
109
110
111
  	/*
  	 * Sync twice to reduce the possibility we skipped some inodes / pages
  	 * because they were temporarily locked
  	 */
  	sync_filesystems(0);
  	sync_filesystems(0);
  	printk("Emergency Sync complete
  ");
a2a9537ac   Jens Axboe   Get rid of pdflus...
112
113
  	kfree(work);
  }
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
114
115
  void emergency_sync(void)
  {
a2a9537ac   Jens Axboe   Get rid of pdflus...
116
117
118
119
120
121
122
  	struct work_struct *work;
  
  	work = kmalloc(sizeof(*work), GFP_ATOMIC);
  	if (work) {
  		INIT_WORK(work, do_sync_work);
  		schedule_work(work);
  	}
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
123
  }
b7ed78f56   Sage Weil   introduce sys_syn...
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
  /*
   * sync a single super
   */
  SYSCALL_DEFINE1(syncfs, int, fd)
  {
  	struct file *file;
  	struct super_block *sb;
  	int ret;
  	int fput_needed;
  
  	file = fget_light(fd, &fput_needed);
  	if (!file)
  		return -EBADF;
  	sb = file->f_dentry->d_sb;
  
  	down_read(&sb->s_umount);
  	ret = sync_filesystem(sb);
  	up_read(&sb->s_umount);
  
  	fput_light(file, fput_needed);
  	return ret;
  }
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
146
  /**
148f948ba   Jan Kara   vfs: Introduce ne...
147
   * vfs_fsync_range - helper to sync a range of data & metadata to disk
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
148
   * @file:		file to sync
148f948ba   Jan Kara   vfs: Introduce ne...
149
150
151
   * @start:		offset in bytes of the beginning of data range to sync
   * @end:		offset in bytes of the end of data range (inclusive)
   * @datasync:		perform only datasync
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
152
   *
148f948ba   Jan Kara   vfs: Introduce ne...
153
154
155
   * Write back data in range @start..@end and metadata for @file to disk.  If
   * @datasync is set only metadata needed to access modified file data is
   * written.
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
156
   */
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
157
  int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
158
  {
02c24a821   Josef Bacik   fs: push i_mutex ...
159
160
161
  	if (!file->f_op || !file->f_op->fsync)
  		return -EINVAL;
  	return file->f_op->fsync(file, start, end, datasync);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
162
  }
148f948ba   Jan Kara   vfs: Introduce ne...
163
164
165
166
167
  EXPORT_SYMBOL(vfs_fsync_range);
  
  /**
   * vfs_fsync - perform a fsync or fdatasync on a file
   * @file:		file to sync
148f948ba   Jan Kara   vfs: Introduce ne...
168
169
170
171
   * @datasync:		only perform a fdatasync operation
   *
   * Write back data and metadata for @file to disk.  If @datasync is
   * set only metadata needed to access modified file data is written.
148f948ba   Jan Kara   vfs: Introduce ne...
172
   */
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
173
  int vfs_fsync(struct file *file, int datasync)
148f948ba   Jan Kara   vfs: Introduce ne...
174
  {
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
175
  	return vfs_fsync_range(file, 0, LLONG_MAX, datasync);
148f948ba   Jan Kara   vfs: Introduce ne...
176
  }
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
177
  EXPORT_SYMBOL(vfs_fsync);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
178

4c728ef58   Christoph Hellwig   add a vfs_fsync h...
179
  static int do_fsync(unsigned int fd, int datasync)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
180
181
182
183
184
185
  {
  	struct file *file;
  	int ret = -EBADF;
  
  	file = fget(fd);
  	if (file) {
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
186
  		ret = vfs_fsync(file, datasync);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
187
188
189
190
  		fput(file);
  	}
  	return ret;
  }
a5f8fa9e9   Heiko Carstens   [CVE-2009-0029] S...
191
  SYSCALL_DEFINE1(fsync, unsigned int, fd)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
192
  {
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
193
  	return do_fsync(fd, 0);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
194
  }
a5f8fa9e9   Heiko Carstens   [CVE-2009-0029] S...
195
  SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
196
  {
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
197
  	return do_fsync(fd, 1);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
198
  }
148f948ba   Jan Kara   vfs: Introduce ne...
199
200
201
202
203
204
205
206
207
208
  /**
   * generic_write_sync - perform syncing after a write if file / inode is sync
   * @file:	file to which the write happened
   * @pos:	offset where the write started
   * @count:	length of the write
   *
   * This is just a simple wrapper about our general syncing function.
   */
  int generic_write_sync(struct file *file, loff_t pos, loff_t count)
  {
6b2f3d1f7   Christoph Hellwig   vfs: Implement pr...
209
  	if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
148f948ba   Jan Kara   vfs: Introduce ne...
210
  		return 0;
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
211
  	return vfs_fsync_range(file, pos, pos + count - 1,
6b2f3d1f7   Christoph Hellwig   vfs: Implement pr...
212
  			       (file->f_flags & __O_SYNC) ? 0 : 1);
148f948ba   Jan Kara   vfs: Introduce ne...
213
214
  }
  EXPORT_SYMBOL(generic_write_sync);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
215
  /*
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
216
217
218
219
220
221
222
223
224
225
   * sys_sync_file_range() permits finely controlled syncing over a segment of
   * a file in the range offset .. (offset+nbytes-1) inclusive.  If nbytes is
   * zero then sys_sync_file_range() will operate from offset out to EOF.
   *
   * The flag bits are:
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
   * before performing the write.
   *
   * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
cce770815   Pavel Machek   SYNC_FILE_RANGE_W...
226
227
   * range which are not presently under writeback. Note that this may block for
   * significant periods due to exhaustion of disk request structures.
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
   *
   * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
   * after performing the write.
   *
   * Useful combinations of the flag bits are:
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
   * in the range which were dirty on entry to sys_sync_file_range() are placed
   * under writeout.  This is a start-write-for-data-integrity operation.
   *
   * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
   * are not presently under writeout.  This is an asynchronous flush-to-disk
   * operation.  Not suitable for data integrity operations.
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
   * completion of writeout of all pages in the range.  This will be used after an
   * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
   * for that operation to complete and to return the result.
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER:
   * a traditional sync() operation.  This is a write-for-data-integrity operation
   * which will ensure that all pages in the range which were dirty on entry to
   * sys_sync_file_range() are committed to disk.
   *
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
   * I/O errors or ENOSPC conditions and will return those to the caller, after
   * clearing the EIO and ENOSPC flags in the address_space.
   *
   * It should be noted that none of these operations write out the file's
   * metadata.  So unless the application is strictly performing overwrites of
   * already-instantiated disk blocks, there are no guarantees here that the data
   * will be available after a crash.
   */
6673e0c3f   Heiko Carstens   [CVE-2009-0029] S...
262
263
  SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
  				unsigned int flags)
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
264
265
266
  {
  	int ret;
  	struct file *file;
7a0ad10c3   Christoph Hellwig   fold do_sync_file...
267
  	struct address_space *mapping;
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
  	loff_t endbyte;			/* inclusive */
  	int fput_needed;
  	umode_t i_mode;
  
  	ret = -EINVAL;
  	if (flags & ~VALID_FLAGS)
  		goto out;
  
  	endbyte = offset + nbytes;
  
  	if ((s64)offset < 0)
  		goto out;
  	if ((s64)endbyte < 0)
  		goto out;
  	if (endbyte < offset)
  		goto out;
  
  	if (sizeof(pgoff_t) == 4) {
  		if (offset >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
  			/*
  			 * The range starts outside a 32 bit machine's
  			 * pagecache addressing capabilities.  Let it "succeed"
  			 */
  			ret = 0;
  			goto out;
  		}
  		if (endbyte >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
  			/*
  			 * Out to EOF
  			 */
  			nbytes = 0;
  		}
  	}
  
  	if (nbytes == 0)
111ebb6e6   OGAWA Hirofumi   [PATCH] writeback...
303
  		endbyte = LLONG_MAX;
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
304
305
306
307
308
309
310
  	else
  		endbyte--;		/* inclusive */
  
  	ret = -EBADF;
  	file = fget_light(fd, &fput_needed);
  	if (!file)
  		goto out;
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
311
  	i_mode = file->f_path.dentry->d_inode->i_mode;
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
312
313
314
315
  	ret = -ESPIPE;
  	if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
  			!S_ISLNK(i_mode))
  		goto out_put;
7a0ad10c3   Christoph Hellwig   fold do_sync_file...
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
  	mapping = file->f_mapping;
  	if (!mapping) {
  		ret = -EINVAL;
  		goto out_put;
  	}
  
  	ret = 0;
  	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
  		ret = filemap_fdatawait_range(mapping, offset, endbyte);
  		if (ret < 0)
  			goto out_put;
  	}
  
  	if (flags & SYNC_FILE_RANGE_WRITE) {
  		ret = filemap_fdatawrite_range(mapping, offset, endbyte);
  		if (ret < 0)
  			goto out_put;
  	}
  
  	if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
  		ret = filemap_fdatawait_range(mapping, offset, endbyte);
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
337
338
339
340
341
  out_put:
  	fput_light(file, fput_needed);
  out:
  	return ret;
  }
6673e0c3f   Heiko Carstens   [CVE-2009-0029] S...
342
343
344
345
346
347
348
349
350
  #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  asmlinkage long SyS_sync_file_range(long fd, loff_t offset, loff_t nbytes,
  				    long flags)
  {
  	return SYSC_sync_file_range((int) fd, offset, nbytes,
  				    (unsigned int) flags);
  }
  SYSCALL_ALIAS(sys_sync_file_range, SyS_sync_file_range);
  #endif
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
351

edd5cd4a9   David Woodhouse   Introduce fixed s...
352
353
  /* It would be nice if people remember that not all the world's an i386
     when they introduce new system calls */
6673e0c3f   Heiko Carstens   [CVE-2009-0029] S...
354
355
  SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags,
  				 loff_t offset, loff_t nbytes)
edd5cd4a9   David Woodhouse   Introduce fixed s...
356
357
358
  {
  	return sys_sync_file_range(fd, offset, nbytes, flags);
  }
6673e0c3f   Heiko Carstens   [CVE-2009-0029] S...
359
360
361
362
363
364
365
366
367
  #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  asmlinkage long SyS_sync_file_range2(long fd, long flags,
  				     loff_t offset, loff_t nbytes)
  {
  	return SYSC_sync_file_range2((int) fd, (unsigned int) flags,
  				     offset, nbytes);
  }
  SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
  #endif