Blame view

fs/ext4/fsync.c 7.86 KB
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
1
  /*
617ba13b3   Mingming Cao   [PATCH] ext4: ren...
2
   *  linux/fs/ext4/fsync.c
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
3
4
5
6
7
8
9
10
11
   *
   *  Copyright (C) 1993  Stephen Tweedie (sct@redhat.com)
   *  from
   *  Copyright (C) 1992  Remy Card (card@masi.ibp.fr)
   *                      Laboratoire MASI - Institut Blaise Pascal
   *                      Universite Pierre et Marie Curie (Paris VI)
   *  from
   *  linux/fs/minix/truncate.c   Copyright (C) 1991, 1992  Linus Torvalds
   *
617ba13b3   Mingming Cao   [PATCH] ext4: ren...
12
   *  ext4fs fsync primitive
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
   *
   *  Big-endian to little-endian byte-swapping/bitmaps by
   *        David S. Miller (davem@caip.rutgers.edu), 1995
   *
   *  Removed unnecessary code duplication for little endian machines
   *  and excessive __inline__s.
   *        Andi Kleen, 1997
   *
   * Major simplications and cleanup - we only need to do the metadata, because
   * we can depend on generic_block_fdatasync() to sync the data blocks.
   */
  
  #include <linux/time.h>
  #include <linux/fs.h>
  #include <linux/sched.h>
  #include <linux/writeback.h>
dab291af8   Mingming Cao   [PATCH] jbd2: ena...
29
  #include <linux/jbd2.h>
d755fb384   Eric Sandeen   ext4: call blkdev...
30
  #include <linux/blkdev.h>
9bffad1ed   Theodore Ts'o   ext4: convert ins...
31

3dcf54515   Christoph Hellwig   ext4: move header...
32
33
  #include "ext4.h"
  #include "ext4_jbd2.h"
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
34

9bffad1ed   Theodore Ts'o   ext4: convert ins...
35
  #include <trace/events/ext4.h>
4a873a472   Theodore Ts'o   ext4: move flush_...
36
37
  static void dump_completed_IO(struct inode * inode)
  {
e8bbe8c40   Tao Ma   ext4: use EXT4FS_...
38
  #ifdef	EXT4FS_DEBUG
4a873a472   Theodore Ts'o   ext4: move flush_...
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
  	struct list_head *cur, *before, *after;
  	ext4_io_end_t *io, *io0, *io1;
  	unsigned long flags;
  
  	if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
  		ext4_debug("inode %lu completed_io list is empty
  ", inode->i_ino);
  		return;
  	}
  
  	ext4_debug("Dump inode %lu completed_io list 
  ", inode->i_ino);
  	spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
  	list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
  		cur = &io->list;
  		before = cur->prev;
  		io0 = container_of(before, ext4_io_end_t, list);
  		after = cur->next;
  		io1 = container_of(after, ext4_io_end_t, list);
  
  		ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p
  ",
  			    io, inode->i_ino, io0, io1);
  	}
  	spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
  #endif
  }
  
  /*
   * This function is called from ext4_sync_file().
   *
   * When IO is completed, the work to convert unwritten extents to
   * written is queued on workqueue but may not get immediately
   * scheduled. When fsync is called, we need to ensure the
   * conversion is complete before fsync returns.
   * The inode keeps track of a list of pending/completed IO that
   * might needs to do the conversion. This function walks through
   * the list and convert the related unwritten extents for completed IO
   * to written.
   * The function return the number of pending IOs on success.
   */
e0cbee3e1   H Hartley Sweeten   ext4: functions s...
80
  int ext4_flush_completed_IO(struct inode *inode)
4a873a472   Theodore Ts'o   ext4: move flush_...
81
82
83
84
85
86
  {
  	ext4_io_end_t *io;
  	struct ext4_inode_info *ei = EXT4_I(inode);
  	unsigned long flags;
  	int ret = 0;
  	int ret2 = 0;
4a873a472   Theodore Ts'o   ext4: move flush_...
87
88
89
90
91
  	dump_completed_IO(inode);
  	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
  	while (!list_empty(&ei->i_completed_io_list)){
  		io = list_entry(ei->i_completed_io_list.next,
  				ext4_io_end_t, list);
b82e384c7   Theodore Ts'o   ext4: optimize lo...
92
  		list_del_init(&io->list);
4a873a472   Theodore Ts'o   ext4: move flush_...
93
94
95
96
97
98
99
100
101
102
  		/*
  		 * Calling ext4_end_io_nolock() to convert completed
  		 * IO to written.
  		 *
  		 * When ext4_sync_file() is called, run_queue() may already
  		 * about to flush the work corresponding to this io structure.
  		 * It will be upset if it founds the io structure related
  		 * to the work-to-be schedule is freed.
  		 *
  		 * Thus we need to keep the io structure still valid here after
25985edce   Lucas De Marchi   Fix common misspe...
103
  		 * conversion finished. The io structure has a flag to
4a873a472   Theodore Ts'o   ext4: move flush_...
104
105
106
107
108
  		 * avoid double converting from both fsync and background work
  		 * queue work.
  		 */
  		spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
  		ret = ext4_end_io_nolock(io);
4a873a472   Theodore Ts'o   ext4: move flush_...
109
110
  		if (ret < 0)
  			ret2 = ret;
b82e384c7   Theodore Ts'o   ext4: optimize lo...
111
  		spin_lock_irqsave(&ei->i_completed_io_lock, flags);
4a873a472   Theodore Ts'o   ext4: move flush_...
112
113
114
115
  	}
  	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
  	return (ret2 < 0) ? ret2 : 0;
  }
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
116
  /*
14ece1028   Frank Mayhar   ext4: Make fsync ...
117
118
119
120
121
122
123
   * If we're not journaling and this is a just-created file, we have to
   * sync our parent directory (if it was freshly created) since
   * otherwise it will only be written by writeback, leaving a huge
   * window during which a crash may lose the file.  This may apply for
   * the parent directory's parent as well, and so on recursively, if
   * they are also freshly created.
   */
0893ed458   Curt Wohlgemuth   ext4: sync the di...
124
  static int ext4_sync_parent(struct inode *inode)
14ece1028   Frank Mayhar   ext4: Make fsync ...
125
  {
0893ed458   Curt Wohlgemuth   ext4: sync the di...
126
  	struct writeback_control wbc;
14ece1028   Frank Mayhar   ext4: Make fsync ...
127
  	struct dentry *dentry = NULL;
d59729f4e   Theodore Ts'o   ext4: fix races i...
128
  	struct inode *next;
0893ed458   Curt Wohlgemuth   ext4: sync the di...
129
  	int ret = 0;
14ece1028   Frank Mayhar   ext4: Make fsync ...
130

d59729f4e   Theodore Ts'o   ext4: fix races i...
131
132
133
134
  	if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY))
  		return 0;
  	inode = igrab(inode);
  	while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
14ece1028   Frank Mayhar   ext4: Make fsync ...
135
  		ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
d59729f4e   Theodore Ts'o   ext4: fix races i...
136
137
138
139
140
141
142
143
144
  		dentry = NULL;
  		spin_lock(&inode->i_lock);
  		if (!list_empty(&inode->i_dentry)) {
  			dentry = list_first_entry(&inode->i_dentry,
  						  struct dentry, d_alias);
  			dget(dentry);
  		}
  		spin_unlock(&inode->i_lock);
  		if (!dentry)
14ece1028   Frank Mayhar   ext4: Make fsync ...
145
  			break;
d59729f4e   Theodore Ts'o   ext4: fix races i...
146
147
148
149
150
151
  		next = igrab(dentry->d_parent->d_inode);
  		dput(dentry);
  		if (!next)
  			break;
  		iput(inode);
  		inode = next;
0893ed458   Curt Wohlgemuth   ext4: sync the di...
152
153
154
155
156
157
158
159
160
  		ret = sync_mapping_buffers(inode->i_mapping);
  		if (ret)
  			break;
  		memset(&wbc, 0, sizeof(wbc));
  		wbc.sync_mode = WB_SYNC_ALL;
  		wbc.nr_to_write = 0;         /* only write out the inode */
  		ret = sync_inode(inode, &wbc);
  		if (ret)
  			break;
14ece1028   Frank Mayhar   ext4: Make fsync ...
161
  	}
d59729f4e   Theodore Ts'o   ext4: fix races i...
162
  	iput(inode);
0893ed458   Curt Wohlgemuth   ext4: sync the di...
163
  	return ret;
14ece1028   Frank Mayhar   ext4: Make fsync ...
164
  }
02c24a821   Josef Bacik   fs: push i_mutex ...
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
  /**
   * __sync_file - generic_file_fsync without the locking and filemap_write
   * @inode:	inode to sync
   * @datasync:	only sync essential metadata if true
   *
   * This is just generic_file_fsync without the locking.  This is needed for
   * nojournal mode to make sure this inodes data/metadata makes it to disk
   * properly.  The i_mutex should be held already.
   */
  static int __sync_inode(struct inode *inode, int datasync)
  {
  	int err;
  	int ret;
  
  	ret = sync_mapping_buffers(inode->i_mapping);
  	if (!(inode->i_state & I_DIRTY))
  		return ret;
  	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
  		return ret;
  
  	err = sync_inode_metadata(inode, 1);
  	if (ret == 0)
  		ret = err;
  	return ret;
  }
14ece1028   Frank Mayhar   ext4: Make fsync ...
190
  /*
617ba13b3   Mingming Cao   [PATCH] ext4: ren...
191
   * akpm: A new design for ext4_sync_file().
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
192
193
194
195
196
197
198
199
   *
   * This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
   * There cannot be a transaction open by this task.
   * Another task could have dirtied this inode.  Its data can be in any
   * state in the journalling system.
   *
   * What we do is just kick off a commit and wait on it.  This will snapshot the
   * inode to disk.
8d5d02e6b   Mingming Cao   ext4: async direc...
200
201
   *
   * i_mutex lock is held when entering and exiting this function
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
202
   */
02c24a821   Josef Bacik   fs: push i_mutex ...
203
  int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
204
  {
7ea808591   Christoph Hellwig   drop unused dentr...
205
  	struct inode *inode = file->f_mapping->host;
b436b9bef   Jan Kara   ext4: Wait for pr...
206
  	struct ext4_inode_info *ei = EXT4_I(inode);
d755fb384   Eric Sandeen   ext4: call blkdev...
207
  	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
b436b9bef   Jan Kara   ext4: Wait for pr...
208
209
  	int ret;
  	tid_t commit_tid;
93628ffb9   Jan Kara   ext4: fix waiting...
210
  	bool needs_barrier = false;
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
211

ac39849dd   Aneesh Kumar K.V   ext4: sparse fixes
212
  	J_ASSERT(ext4_journal_current_handle() == NULL);
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
213

0562e0bad   Jiaying Zhang   ext4: add more tr...
214
  	trace_ext4_sync_file_enter(file, datasync);
ede86cc47   Theodore Ts'o   ext4: Add debuggi...
215

02c24a821   Josef Bacik   fs: push i_mutex ...
216
217
218
219
  	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
  	if (ret)
  		return ret;
  	mutex_lock(&inode->i_mutex);
b436b9bef   Jan Kara   ext4: Wait for pr...
220
  	if (inode->i_sb->s_flags & MS_RDONLY)
02c24a821   Josef Bacik   fs: push i_mutex ...
221
  		goto out;
b436b9bef   Jan Kara   ext4: Wait for pr...
222

3889fd57e   Jiaying Zhang   ext4: flush the i...
223
  	ret = ext4_flush_completed_IO(inode);
8d5d02e6b   Mingming Cao   ext4: async direc...
224
  	if (ret < 0)
0562e0bad   Jiaying Zhang   ext4: add more tr...
225
  		goto out;
60e6679e2   Theodore Ts'o   ext4: Drop whites...
226

14ece1028   Frank Mayhar   ext4: Make fsync ...
227
  	if (!journal) {
02c24a821   Josef Bacik   fs: push i_mutex ...
228
  		ret = __sync_inode(inode, datasync);
14ece1028   Frank Mayhar   ext4: Make fsync ...
229
  		if (!ret && !list_empty(&inode->i_dentry))
0893ed458   Curt Wohlgemuth   ext4: sync the di...
230
  			ret = ext4_sync_parent(inode);
0562e0bad   Jiaying Zhang   ext4: add more tr...
231
  		goto out;
14ece1028   Frank Mayhar   ext4: Make fsync ...
232
  	}
b436b9bef   Jan Kara   ext4: Wait for pr...
233

ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
234
  	/*
b436b9bef   Jan Kara   ext4: Wait for pr...
235
  	 * data=writeback,ordered:
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
236
  	 *  The caller's filemap_fdatawrite()/wait will sync the data.
b436b9bef   Jan Kara   ext4: Wait for pr...
237
238
  	 *  Metadata is in the journal, we wait for proper transaction to
  	 *  commit here.
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
239
240
241
  	 *
  	 * data=journal:
  	 *  filemap_fdatawrite won't do anything (the buffers are clean).
617ba13b3   Mingming Cao   [PATCH] ext4: ren...
242
  	 *  ext4_force_commit will write the file data into the journal and
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
243
244
245
246
247
  	 *  will wait on that.
  	 *  filemap_fdatawait() will encounter a ton of newly-dirtied pages
  	 *  (they were dirtied by commit).  But that's OK - the blocks are
  	 *  safe in-journal, which is all fsync() needs to ensure.
  	 */
0562e0bad   Jiaying Zhang   ext4: add more tr...
248
249
250
251
  	if (ext4_should_journal_data(inode)) {
  		ret = ext4_force_commit(inode->i_sb);
  		goto out;
  	}
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
252

b436b9bef   Jan Kara   ext4: Wait for pr...
253
  	commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
93628ffb9   Jan Kara   ext4: fix waiting...
254
255
256
257
258
259
  	if (journal->j_flags & JBD2_BARRIER &&
  	    !jbd2_trans_will_send_data_barrier(journal, commit_tid))
  		needs_barrier = true;
  	jbd2_log_start_commit(journal, commit_tid);
  	ret = jbd2_log_wait_commit(journal, commit_tid);
  	if (needs_barrier)
dd3932edd   Christoph Hellwig   block: remove BLK...
260
  		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
0562e0bad   Jiaying Zhang   ext4: add more tr...
261
   out:
02c24a821   Josef Bacik   fs: push i_mutex ...
262
  	mutex_unlock(&inode->i_mutex);
0562e0bad   Jiaying Zhang   ext4: add more tr...
263
  	trace_ext4_sync_file_exit(inode, ret);
ac27a0ec1   Dave Kleikamp   [PATCH] ext4: ini...
264
265
  	return ret;
  }