Blame view

fs/sync.c 9.89 KB
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
1
2
3
4
5
6
7
  /*
   * High-level sync()-related operations
   */
  
  #include <linux/kernel.h>
  #include <linux/file.h>
  #include <linux/fs.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
8
  #include <linux/slab.h>
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
9
  #include <linux/module.h>
914e26379   Al Viro   [PATCH] severing ...
10
  #include <linux/sched.h>
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
11
12
13
14
  #include <linux/writeback.h>
  #include <linux/syscalls.h>
  #include <linux/linkage.h>
  #include <linux/pagemap.h>
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
15
16
  #include <linux/quotaops.h>
  #include <linux/buffer_head.h>
5129a469a   Jörn Engel   Catch filesystems...
17
  #include <linux/backing-dev.h>
5a3e5cb8e   Jan Kara   vfs: Fix sys_sync...
18
  #include "internal.h"
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
19
20
21
  
  #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
  			SYNC_FILE_RANGE_WAIT_AFTER)
c15c54f5f   Jan Kara   vfs: Move syncing...
22
  /*
d8a8559cd   Jens Axboe   writeback: get ri...
23
24
25
26
27
   * Do the filesystem syncing work. For simple filesystems
   * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to
   * submit IO for these buffers via __sync_blockdev(). This also speeds up the
   * wait == 1 case since in that case write_inode() functions do
   * sync_dirty_buffer() and thus effectively write one block at a time.
c15c54f5f   Jan Kara   vfs: Move syncing...
28
   */
60b0680fa   Jan Kara   vfs: Rename fsync...
29
  static int __sync_filesystem(struct super_block *sb, int wait)
c15c54f5f   Jan Kara   vfs: Move syncing...
30
  {
32a88aa1b   Jens Axboe   fs: Assign bdi in...
31
32
33
34
  	/*
  	 * This should be safe, as we require bdi backing to actually
  	 * write out data in the first place
  	 */
5129a469a   Jörn Engel   Catch filesystems...
35
  	if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info)
32a88aa1b   Jens Axboe   fs: Assign bdi in...
36
  		return 0;
5fb324ad2   Christoph Hellwig   quota: move code ...
37
38
39
40
  	if (sb->s_qcop && sb->s_qcop->quota_sync)
  		sb->s_qcop->quota_sync(sb, -1, wait);
  
  	if (wait)
d8a8559cd   Jens Axboe   writeback: get ri...
41
  		sync_inodes_sb(sb);
5fb324ad2   Christoph Hellwig   quota: move code ...
42
  	else
0e3c9a228   Jens Axboe   Revert "writeback...
43
  		writeback_inodes_sb(sb);
5fb324ad2   Christoph Hellwig   quota: move code ...
44

c15c54f5f   Jan Kara   vfs: Move syncing...
45
46
47
48
49
50
51
52
53
54
  	if (sb->s_op->sync_fs)
  		sb->s_op->sync_fs(sb, wait);
  	return __sync_blockdev(sb->s_bdev, wait);
  }
  
  /*
   * Write out and wait upon all dirty data associated with this
   * superblock.  Filesystem data as well as the underlying block
   * device.  Takes the superblock lock.
   */
60b0680fa   Jan Kara   vfs: Rename fsync...
55
  int sync_filesystem(struct super_block *sb)
c15c54f5f   Jan Kara   vfs: Move syncing...
56
57
  {
  	int ret;
5af7926ff   Christoph Hellwig   enforce ->sync_fs...
58
59
60
61
62
63
64
65
66
67
68
  	/*
  	 * We need to be protected against the filesystem going from
  	 * r/o to r/w or vice versa.
  	 */
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
  
  	/*
  	 * No point in syncing out anything if the filesystem is read-only.
  	 */
  	if (sb->s_flags & MS_RDONLY)
  		return 0;
60b0680fa   Jan Kara   vfs: Rename fsync...
69
  	ret = __sync_filesystem(sb, 0);
c15c54f5f   Jan Kara   vfs: Move syncing...
70
71
  	if (ret < 0)
  		return ret;
60b0680fa   Jan Kara   vfs: Rename fsync...
72
  	return __sync_filesystem(sb, 1);
c15c54f5f   Jan Kara   vfs: Move syncing...
73
  }
60b0680fa   Jan Kara   vfs: Rename fsync...
74
  EXPORT_SYMBOL_GPL(sync_filesystem);
c15c54f5f   Jan Kara   vfs: Move syncing...
75

01a05b337   Al Viro   new helper: itera...
76
77
78
79
80
  static void sync_one_sb(struct super_block *sb, void *arg)
  {
  	if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi)
  		__sync_filesystem(sb, *(int *)arg);
  }
c15c54f5f   Jan Kara   vfs: Move syncing...
81
82
83
  /*
   * Sync all the data for all the filesystems (called by sys_sync() and
   * emergency sync)
c15c54f5f   Jan Kara   vfs: Move syncing...
84
85
86
   */
  static void sync_filesystems(int wait)
  {
01a05b337   Al Viro   new helper: itera...
87
  	iterate_supers(sync_one_sb, &wait);
c15c54f5f   Jan Kara   vfs: Move syncing...
88
  }
3beab0b42   Zhang, Yanmin   sys_sync(): fix 1...
89
90
91
92
  /*
   * sync everything.  Start out by waking pdflush, because that writes back
   * all queues in parallel.
   */
5cee5815d   Jan Kara   vfs: Make sys_syn...
93
  SYSCALL_DEFINE0(sync)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
94
  {
03ba3782e   Jens Axboe   writeback: switch...
95
  	wakeup_flusher_threads(0);
5cee5815d   Jan Kara   vfs: Make sys_syn...
96
97
  	sync_filesystems(0);
  	sync_filesystems(1);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
98
99
  	if (unlikely(laptop_mode))
  		laptop_sync_completion();
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
100
101
  	return 0;
  }
a2a9537ac   Jens Axboe   Get rid of pdflus...
102
103
  static void do_sync_work(struct work_struct *work)
  {
5cee5815d   Jan Kara   vfs: Make sys_syn...
104
105
106
107
108
109
110
111
  	/*
  	 * Sync twice to reduce the possibility we skipped some inodes / pages
  	 * because they were temporarily locked
  	 */
  	sync_filesystems(0);
  	sync_filesystems(0);
  	printk("Emergency Sync complete
  ");
a2a9537ac   Jens Axboe   Get rid of pdflus...
112
113
  	kfree(work);
  }
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
114
115
  void emergency_sync(void)
  {
a2a9537ac   Jens Axboe   Get rid of pdflus...
116
117
118
119
120
121
122
  	struct work_struct *work;
  
  	work = kmalloc(sizeof(*work), GFP_ATOMIC);
  	if (work) {
  		INIT_WORK(work, do_sync_work);
  		schedule_work(work);
  	}
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
123
  }
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
124
  /**
148f948ba   Jan Kara   vfs: Introduce ne...
125
   * vfs_fsync_range - helper to sync a range of data & metadata to disk
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
126
   * @file:		file to sync
148f948ba   Jan Kara   vfs: Introduce ne...
127
128
129
   * @start:		offset in bytes of the beginning of data range to sync
   * @end:		offset in bytes of the end of data range (inclusive)
   * @datasync:		perform only datasync
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
130
   *
148f948ba   Jan Kara   vfs: Introduce ne...
131
132
133
   * Write back data in range @start..@end and metadata for @file to disk.  If
   * @datasync is set only metadata needed to access modified file data is
   * written.
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
134
   */
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
135
  int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
136
  {
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
137
  	struct address_space *mapping = file->f_mapping;
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
138
  	int err, ret;
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
139
  	if (!file->f_op || !file->f_op->fsync) {
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
140
141
142
  		ret = -EINVAL;
  		goto out;
  	}
2daea67e9   Christoph Hellwig   fsync: wait for d...
143
  	ret = filemap_write_and_wait_range(mapping, start, end);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
144
145
146
147
148
149
  
  	/*
  	 * We need to protect against concurrent writers, which could cause
  	 * livelocks in fsync_buffers_list().
  	 */
  	mutex_lock(&mapping->host->i_mutex);
7ea808591   Christoph Hellwig   drop unused dentr...
150
  	err = file->f_op->fsync(file, datasync);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
151
152
153
  	if (!ret)
  		ret = err;
  	mutex_unlock(&mapping->host->i_mutex);
148f948ba   Jan Kara   vfs: Introduce ne...
154

cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
155
156
157
  out:
  	return ret;
  }
148f948ba   Jan Kara   vfs: Introduce ne...
158
159
160
161
162
  EXPORT_SYMBOL(vfs_fsync_range);
  
  /**
   * vfs_fsync - perform a fsync or fdatasync on a file
   * @file:		file to sync
148f948ba   Jan Kara   vfs: Introduce ne...
163
164
165
166
   * @datasync:		only perform a fdatasync operation
   *
   * Write back data and metadata for @file to disk.  If @datasync is
   * set only metadata needed to access modified file data is written.
148f948ba   Jan Kara   vfs: Introduce ne...
167
   */
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
168
  int vfs_fsync(struct file *file, int datasync)
148f948ba   Jan Kara   vfs: Introduce ne...
169
  {
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
170
  	return vfs_fsync_range(file, 0, LLONG_MAX, datasync);
148f948ba   Jan Kara   vfs: Introduce ne...
171
  }
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
172
  EXPORT_SYMBOL(vfs_fsync);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
173

4c728ef58   Christoph Hellwig   add a vfs_fsync h...
174
  static int do_fsync(unsigned int fd, int datasync)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
175
176
177
178
179
180
  {
  	struct file *file;
  	int ret = -EBADF;
  
  	file = fget(fd);
  	if (file) {
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
181
  		ret = vfs_fsync(file, datasync);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
182
183
184
185
  		fput(file);
  	}
  	return ret;
  }
a5f8fa9e9   Heiko Carstens   [CVE-2009-0029] S...
186
  SYSCALL_DEFINE1(fsync, unsigned int, fd)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
187
  {
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
188
  	return do_fsync(fd, 0);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
189
  }
a5f8fa9e9   Heiko Carstens   [CVE-2009-0029] S...
190
  SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
191
  {
4c728ef58   Christoph Hellwig   add a vfs_fsync h...
192
  	return do_fsync(fd, 1);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
193
  }
148f948ba   Jan Kara   vfs: Introduce ne...
194
195
196
197
198
199
200
201
202
203
  /**
   * generic_write_sync - perform syncing after a write if file / inode is sync
   * @file:	file to which the write happened
   * @pos:	offset where the write started
   * @count:	length of the write
   *
   * This is just a simple wrapper about our general syncing function.
   */
  int generic_write_sync(struct file *file, loff_t pos, loff_t count)
  {
6b2f3d1f7   Christoph Hellwig   vfs: Implement pr...
204
  	if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
148f948ba   Jan Kara   vfs: Introduce ne...
205
  		return 0;
8018ab057   Christoph Hellwig   sanitize vfs_fsyn...
206
  	return vfs_fsync_range(file, pos, pos + count - 1,
6b2f3d1f7   Christoph Hellwig   vfs: Implement pr...
207
  			       (file->f_flags & __O_SYNC) ? 0 : 1);
148f948ba   Jan Kara   vfs: Introduce ne...
208
209
  }
  EXPORT_SYMBOL(generic_write_sync);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
210
  /*
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
211
212
213
214
215
216
217
218
219
220
   * sys_sync_file_range() permits finely controlled syncing over a segment of
   * a file in the range offset .. (offset+nbytes-1) inclusive.  If nbytes is
   * zero then sys_sync_file_range() will operate from offset out to EOF.
   *
   * The flag bits are:
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
   * before performing the write.
   *
   * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
cce770815   Pavel Machek   SYNC_FILE_RANGE_W...
221
222
   * range which are not presently under writeback. Note that this may block for
   * significant periods due to exhaustion of disk request structures.
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
   *
   * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
   * after performing the write.
   *
   * Useful combinations of the flag bits are:
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
   * in the range which were dirty on entry to sys_sync_file_range() are placed
   * under writeout.  This is a start-write-for-data-integrity operation.
   *
   * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
   * are not presently under writeout.  This is an asynchronous flush-to-disk
   * operation.  Not suitable for data integrity operations.
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
   * completion of writeout of all pages in the range.  This will be used after an
   * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
   * for that operation to complete and to return the result.
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER:
   * a traditional sync() operation.  This is a write-for-data-integrity operation
   * which will ensure that all pages in the range which were dirty on entry to
   * sys_sync_file_range() are committed to disk.
   *
   *
   * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
   * I/O errors or ENOSPC conditions and will return those to the caller, after
   * clearing the EIO and ENOSPC flags in the address_space.
   *
   * It should be noted that none of these operations write out the file's
   * metadata.  So unless the application is strictly performing overwrites of
   * already-instantiated disk blocks, there are no guarantees here that the data
   * will be available after a crash.
   */
6673e0c3f   Heiko Carstens   [CVE-2009-0029] S...
257
258
  SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
  				unsigned int flags)
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
259
260
261
  {
  	int ret;
  	struct file *file;
7a0ad10c3   Christoph Hellwig   fold do_sync_file...
262
  	struct address_space *mapping;
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
  	loff_t endbyte;			/* inclusive */
  	int fput_needed;
  	umode_t i_mode;
  
  	ret = -EINVAL;
  	if (flags & ~VALID_FLAGS)
  		goto out;
  
  	endbyte = offset + nbytes;
  
  	if ((s64)offset < 0)
  		goto out;
  	if ((s64)endbyte < 0)
  		goto out;
  	if (endbyte < offset)
  		goto out;
  
  	if (sizeof(pgoff_t) == 4) {
  		if (offset >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
  			/*
  			 * The range starts outside a 32 bit machine's
  			 * pagecache addressing capabilities.  Let it "succeed"
  			 */
  			ret = 0;
  			goto out;
  		}
  		if (endbyte >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
  			/*
  			 * Out to EOF
  			 */
  			nbytes = 0;
  		}
  	}
  
  	if (nbytes == 0)
111ebb6e6   OGAWA Hirofumi   [PATCH] writeback...
298
  		endbyte = LLONG_MAX;
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
299
300
301
302
303
304
305
  	else
  		endbyte--;		/* inclusive */
  
  	ret = -EBADF;
  	file = fget_light(fd, &fput_needed);
  	if (!file)
  		goto out;
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
306
  	i_mode = file->f_path.dentry->d_inode->i_mode;
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
307
308
309
310
  	ret = -ESPIPE;
  	if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
  			!S_ISLNK(i_mode))
  		goto out_put;
7a0ad10c3   Christoph Hellwig   fold do_sync_file...
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
  	mapping = file->f_mapping;
  	if (!mapping) {
  		ret = -EINVAL;
  		goto out_put;
  	}
  
  	ret = 0;
  	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
  		ret = filemap_fdatawait_range(mapping, offset, endbyte);
  		if (ret < 0)
  			goto out_put;
  	}
  
  	if (flags & SYNC_FILE_RANGE_WRITE) {
  		ret = filemap_fdatawrite_range(mapping, offset, endbyte);
  		if (ret < 0)
  			goto out_put;
  	}
  
  	if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
  		ret = filemap_fdatawait_range(mapping, offset, endbyte);
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
332
333
334
335
336
  out_put:
  	fput_light(file, fput_needed);
  out:
  	return ret;
  }
6673e0c3f   Heiko Carstens   [CVE-2009-0029] S...
337
338
339
340
341
342
343
344
345
  #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  asmlinkage long SyS_sync_file_range(long fd, loff_t offset, loff_t nbytes,
  				    long flags)
  {
  	return SYSC_sync_file_range((int) fd, offset, nbytes,
  				    (unsigned int) flags);
  }
  SYSCALL_ALIAS(sys_sync_file_range, SyS_sync_file_range);
  #endif
f79e2abb9   Andrew Morton   [PATCH] sys_sync_...
346

edd5cd4a9   David Woodhouse   Introduce fixed s...
347
348
  /* It would be nice if people remember that not all the world's an i386
     when they introduce new system calls */
6673e0c3f   Heiko Carstens   [CVE-2009-0029] S...
349
350
  SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags,
  				 loff_t offset, loff_t nbytes)
edd5cd4a9   David Woodhouse   Introduce fixed s...
351
352
353
  {
  	return sys_sync_file_range(fd, offset, nbytes, flags);
  }
6673e0c3f   Heiko Carstens   [CVE-2009-0029] S...
354
355
356
357
358
359
360
361
362
  #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  asmlinkage long SyS_sync_file_range2(long fd, long flags,
  				     loff_t offset, loff_t nbytes)
  {
  	return SYSC_sync_file_range2((int) fd, (unsigned int) flags,
  				     offset, nbytes);
  }
  SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
  #endif