Blame view

fs/ext4/move_extent.c 21.2 KB
748de6736   Akira Fujita   ext4: online defr...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
  /*
   * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
   * Written by Takashi Sato <t-sato@yk.jp.nec.com>
   *            Akira Fujita <a-fujita@rs.jp.nec.com>
   *
   * This program is free software; you can redistribute it and/or modify it
   * under the terms of version 2.1 of the GNU Lesser General Public License
   * as published by the Free Software Foundation.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   */
  
  #include <linux/fs.h>
  #include <linux/quotaops.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
18
  #include <linux/slab.h>
748de6736   Akira Fujita   ext4: online defr...
19
  #include "ext4_jbd2.h"
748de6736   Akira Fujita   ext4: online defr...
20
  #include "ext4.h"
4a092d737   Theodore Ts'o   ext4: rationalize...
21
  #include "ext4_extents.h"
748de6736   Akira Fujita   ext4: online defr...
22

e8505970a   Akira Fujita   ext4: Replace get...
23
24
25
26
27
28
29
  /**
   * get_ext_path - Find an extent path for designated logical block number.
   *
   * @inode:	an inode which is searched
   * @lblock:	logical block number to find an extent path
   * @path:	pointer to an extent path pointer (for output)
   *
ed8a1a766   Theodore Ts'o   ext4: rename ext4...
30
   * ext4_find_extent wrapper. Return 0 on success, or a negative error value
e8505970a   Akira Fujita   ext4: Replace get...
31
32
33
34
   * on failure.
   */
  static inline int
  get_ext_path(struct inode *inode, ext4_lblk_t lblock,
3bdf14b4d   Theodore Ts'o   ext4: reuse path ...
35
  		struct ext4_ext_path **ppath)
e8505970a   Akira Fujita   ext4: Replace get...
36
  {
0e401101d   Dmitry Monakhov   ext4: fix memory ...
37
  	struct ext4_ext_path *path;
e8505970a   Akira Fujita   ext4: Replace get...
38

ed8a1a766   Theodore Ts'o   ext4: rename ext4...
39
  	path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
0e401101d   Dmitry Monakhov   ext4: fix memory ...
40
  	if (IS_ERR(path))
3bdf14b4d   Theodore Ts'o   ext4: reuse path ...
41
42
43
44
45
46
47
48
49
  		return PTR_ERR(path);
  	if (path[ext_depth(inode)].p_ext == NULL) {
  		ext4_ext_drop_refs(path);
  		kfree(path);
  		*ppath = NULL;
  		return -ENODATA;
  	}
  	*ppath = path;
  	return 0;
e8505970a   Akira Fujita   ext4: Replace get...
50
  }
748de6736   Akira Fujita   ext4: online defr...
51
52
  
  /**
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
53
54
   * ext4_double_down_write_data_sem - Acquire two inodes' write lock
   *                                   of i_data_sem
748de6736   Akira Fujita   ext4: online defr...
55
   *
03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
56
   * Acquire write lock of i_data_sem of the two inodes
748de6736   Akira Fujita   ext4: online defr...
57
   */
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
58
59
  void
  ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
748de6736   Akira Fujita   ext4: online defr...
60
  {
03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
61
62
  	if (first < second) {
  		down_write(&EXT4_I(first)->i_data_sem);
daf647d2d   Theodore Ts'o   ext4: add lockdep...
63
  		down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER);
03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
64
65
  	} else {
  		down_write(&EXT4_I(second)->i_data_sem);
daf647d2d   Theodore Ts'o   ext4: add lockdep...
66
  		down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER);
748de6736   Akira Fujita   ext4: online defr...
67

748de6736   Akira Fujita   ext4: online defr...
68
  	}
748de6736   Akira Fujita   ext4: online defr...
69
70
71
  }
  
  /**
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
72
   * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem
748de6736   Akira Fujita   ext4: online defr...
73
74
75
   *
   * @orig_inode:		original inode structure to be released its lock first
   * @donor_inode:	donor inode structure to be released its lock second
fc04cb49a   Akira Fujita   ext4: fix lock or...
76
   * Release write lock of i_data_sem of two inodes (orig and donor).
748de6736   Akira Fujita   ext4: online defr...
77
   */
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
78
79
80
  void
  ext4_double_up_write_data_sem(struct inode *orig_inode,
  			      struct inode *donor_inode)
748de6736   Akira Fujita   ext4: online defr...
81
  {
748de6736   Akira Fujita   ext4: online defr...
82
83
84
85
86
  	up_write(&EXT4_I(orig_inode)->i_data_sem);
  	up_write(&EXT4_I(donor_inode)->i_data_sem);
  }
  
  /**
8c8544739   Dmitry Monakhov   ext4: reimplement...
87
88
89
90
91
   * mext_check_coverage - Check that all extents in range has the same type
   *
   * @inode:		inode in question
   * @from:		block offset of inode
   * @count:		block count to be checked
556615dcb   Lukas Czerner   ext4: rename unin...
92
   * @unwritten:		extents expected to be unwritten
8c8544739   Dmitry Monakhov   ext4: reimplement...
93
94
95
96
97
98
   * @err:		pointer to save error value
   *
   * Return 1 if all extents in range has expected type, and zero otherwise.
   */
  static int
  mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
556615dcb   Lukas Czerner   ext4: rename unin...
99
  		    int unwritten, int *err)
8c8544739   Dmitry Monakhov   ext4: reimplement...
100
101
102
  {
  	struct ext4_ext_path *path = NULL;
  	struct ext4_extent *ext;
0e401101d   Dmitry Monakhov   ext4: fix memory ...
103
  	int ret = 0;
8c8544739   Dmitry Monakhov   ext4: reimplement...
104
105
106
107
  	ext4_lblk_t last = from + count;
  	while (from < last) {
  		*err = get_ext_path(inode, from, &path);
  		if (*err)
0e401101d   Dmitry Monakhov   ext4: fix memory ...
108
  			goto out;
8c8544739   Dmitry Monakhov   ext4: reimplement...
109
  		ext = path[ext_depth(inode)].p_ext;
556615dcb   Lukas Czerner   ext4: rename unin...
110
  		if (unwritten != ext4_ext_is_unwritten(ext))
0e401101d   Dmitry Monakhov   ext4: fix memory ...
111
  			goto out;
8c8544739   Dmitry Monakhov   ext4: reimplement...
112
113
114
  		from += ext4_ext_get_actual_len(ext);
  		ext4_ext_drop_refs(path);
  	}
0e401101d   Dmitry Monakhov   ext4: fix memory ...
115
116
  	ret = 1;
  out:
b7ea89ad0   Theodore Ts'o   ext4: allow a NUL...
117
118
  	ext4_ext_drop_refs(path);
  	kfree(path);
0e401101d   Dmitry Monakhov   ext4: fix memory ...
119
  	return ret;
8c8544739   Dmitry Monakhov   ext4: reimplement...
120
121
122
  }
  
  /**
bb5574880   Dmitry Monakhov   ext4: clean up on...
123
124
125
126
   * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
   *
   * @inode1:	the inode structure
   * @inode2:	the inode structure
65dd8327e   Xiaoguang Wang   ext4: delete usel...
127
128
   * @index1:	page index
   * @index2:	page index
bb5574880   Dmitry Monakhov   ext4: clean up on...
129
130
131
132
133
134
   * @page:	result page vector
   *
   * Grab two locked pages for inode's by inode order
   */
  static int
  mext_page_double_lock(struct inode *inode1, struct inode *inode2,
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
135
  		      pgoff_t index1, pgoff_t index2, struct page *page[2])
bb5574880   Dmitry Monakhov   ext4: clean up on...
136
137
138
139
140
141
142
143
144
  {
  	struct address_space *mapping[2];
  	unsigned fl = AOP_FLAG_NOFS;
  
  	BUG_ON(!inode1 || !inode2);
  	if (inode1 < inode2) {
  		mapping[0] = inode1->i_mapping;
  		mapping[1] = inode2->i_mapping;
  	} else {
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
145
146
147
  		pgoff_t tmp = index1;
  		index1 = index2;
  		index2 = tmp;
bb5574880   Dmitry Monakhov   ext4: clean up on...
148
149
150
  		mapping[0] = inode2->i_mapping;
  		mapping[1] = inode1->i_mapping;
  	}
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
151
  	page[0] = grab_cache_page_write_begin(mapping[0], index1, fl);
bb5574880   Dmitry Monakhov   ext4: clean up on...
152
153
  	if (!page[0])
  		return -ENOMEM;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
154
  	page[1] = grab_cache_page_write_begin(mapping[1], index2, fl);
bb5574880   Dmitry Monakhov   ext4: clean up on...
155
156
  	if (!page[1]) {
  		unlock_page(page[0]);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
157
  		put_page(page[0]);
bb5574880   Dmitry Monakhov   ext4: clean up on...
158
159
  		return -ENOMEM;
  	}
7e8b12c60   Dmitry Monakhov   ext4: defragmenta...
160
161
162
163
164
165
166
  	/*
  	 * grab_cache_page_write_begin() may not wait on page's writeback if
  	 * BDI not demand that. But it is reasonable to be very conservative
  	 * here and explicitly wait on page's writeback
  	 */
  	wait_on_page_writeback(page[0]);
  	wait_on_page_writeback(page[1]);
bf8654676   Fabian Frederick   ext4: use swap() ...
167
168
  	if (inode1 > inode2)
  		swap(page[0], page[1]);
bb5574880   Dmitry Monakhov   ext4: clean up on...
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
  	return 0;
  }
  
  /* Force page buffers uptodate w/o dropping page's lock */
  static int
  mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
  {
  	struct inode *inode = page->mapping->host;
  	sector_t block;
  	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
  	unsigned int blocksize, block_start, block_end;
  	int i, err,  nr = 0, partial = 0;
  	BUG_ON(!PageLocked(page));
  	BUG_ON(PageWriteback(page));
  
  	if (PageUptodate(page))
  		return 0;
  
  	blocksize = 1 << inode->i_blkbits;
  	if (!page_has_buffers(page))
  		create_empty_buffers(page, blocksize, 0);
  
  	head = page_buffers(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
192
  	block = (sector_t)page->index << (PAGE_SHIFT - inode->i_blkbits);
bb5574880   Dmitry Monakhov   ext4: clean up on...
193
194
195
196
197
198
199
200
201
202
203
  	for (bh = head, block_start = 0; bh != head || !block_start;
  	     block++, block_start = block_end, bh = bh->b_this_page) {
  		block_end = block_start + blocksize;
  		if (block_end <= from || block_start >= to) {
  			if (!buffer_uptodate(bh))
  				partial = 1;
  			continue;
  		}
  		if (buffer_uptodate(bh))
  			continue;
  		if (!buffer_mapped(bh)) {
bb5574880   Dmitry Monakhov   ext4: clean up on...
204
205
206
207
208
209
210
  			err = ext4_get_block(inode, block, bh, 0);
  			if (err) {
  				SetPageError(page);
  				return err;
  			}
  			if (!buffer_mapped(bh)) {
  				zero_user(page, block_start, blocksize);
df3a98b08   Dan Carpenter   ext4: remove an u...
211
  				set_buffer_uptodate(bh);
bb5574880   Dmitry Monakhov   ext4: clean up on...
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
  				continue;
  			}
  		}
  		BUG_ON(nr >= MAX_BUF_PER_PAGE);
  		arr[nr++] = bh;
  	}
  	/* No io required */
  	if (!nr)
  		goto out;
  
  	for (i = 0; i < nr; i++) {
  		bh = arr[i];
  		if (!bh_uptodate_or_lock(bh)) {
  			err = bh_submit_read(bh);
  			if (err)
  				return err;
  		}
  	}
  out:
  	if (!partial)
  		SetPageUptodate(page);
  	return 0;
  }
  
  /**
748de6736   Akira Fujita   ext4: online defr...
237
238
239
240
241
   * move_extent_per_page - Move extent data per page
   *
   * @o_filp:			file structure of original file
   * @donor_inode:		donor inode
   * @orig_page_offset:		page index on original file
65dd8327e   Xiaoguang Wang   ext4: delete usel...
242
   * @donor_page_offset:		page index on donor file
748de6736   Akira Fujita   ext4: online defr...
243
244
   * @data_offset_in_page:	block index where data swapping starts
   * @block_len_in_page:		the number of blocks to be swapped
556615dcb   Lukas Czerner   ext4: rename unin...
245
   * @unwritten:			orig extent is unwritten or not
f868a48d0   Akira Fujita   ext4: fix the ret...
246
   * @err:			pointer to save return value
748de6736   Akira Fujita   ext4: online defr...
247
248
   *
   * Save the data in original inode blocks and replace original inode extents
65dd8327e   Xiaoguang Wang   ext4: delete usel...
249
   * with donor inode extents by calling ext4_swap_extents().
f868a48d0   Akira Fujita   ext4: fix the ret...
250
251
   * Finally, write out the saved data in new original inode blocks. Return
   * replaced block count.
748de6736   Akira Fujita   ext4: online defr...
252
253
   */
  static int
44fc48f70   Akira Fujita   ext4: Fix small t...
254
  move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
255
256
257
  		     pgoff_t orig_page_offset, pgoff_t donor_page_offset,
  		     int data_offset_in_page,
  		     int block_len_in_page, int unwritten, int *err)
748de6736   Akira Fujita   ext4: online defr...
258
  {
496ad9aa8   Al Viro   new helper: file_...
259
  	struct inode *orig_inode = file_inode(o_filp);
bb5574880   Dmitry Monakhov   ext4: clean up on...
260
  	struct page *pagep[2] = {NULL, NULL};
748de6736   Akira Fujita   ext4: online defr...
261
  	handle_t *handle;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
262
  	ext4_lblk_t orig_blk_offset, donor_blk_offset;
748de6736   Akira Fujita   ext4: online defr...
263
  	unsigned long blocksize = orig_inode->i_sb->s_blocksize;
f868a48d0   Akira Fujita   ext4: fix the ret...
264
  	unsigned int tmp_data_size, data_size, replaced_size;
bcff24887   Eryu Guan   ext4: don't read ...
265
  	int i, err2, jblocks, retries = 0;
f868a48d0   Akira Fujita   ext4: fix the ret...
266
  	int replaced_count = 0;
bb5574880   Dmitry Monakhov   ext4: clean up on...
267
  	int from = data_offset_in_page << orig_inode->i_blkbits;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
268
  	int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
88c6b61ff   Dmitry Monakhov   ext4: move_extent...
269
  	struct super_block *sb = orig_inode->i_sb;
bcff24887   Eryu Guan   ext4: don't read ...
270
  	struct buffer_head *bh = NULL;
748de6736   Akira Fujita   ext4: online defr...
271
272
273
274
275
  
  	/*
  	 * It needs twice the amount of ordinary journal buffers because
  	 * inode and donor_inode may change each different metadata blocks.
  	 */
bb5574880   Dmitry Monakhov   ext4: clean up on...
276
277
  again:
  	*err = 0;
748de6736   Akira Fujita   ext4: online defr...
278
  	jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
9924a92a8   Theodore Ts'o   ext4: pass contex...
279
  	handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);
748de6736   Akira Fujita   ext4: online defr...
280
  	if (IS_ERR(handle)) {
f868a48d0   Akira Fujita   ext4: fix the ret...
281
282
  		*err = PTR_ERR(handle);
  		return 0;
748de6736   Akira Fujita   ext4: online defr...
283
  	}
748de6736   Akira Fujita   ext4: online defr...
284
285
  	orig_blk_offset = orig_page_offset * blocks_per_page +
  		data_offset_in_page;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
286
287
  	donor_blk_offset = donor_page_offset * blocks_per_page +
  		data_offset_in_page;
f868a48d0   Akira Fujita   ext4: fix the ret...
288
  	/* Calculate data_size */
748de6736   Akira Fujita   ext4: online defr...
289
290
291
  	if ((orig_blk_offset + block_len_in_page - 1) ==
  	    ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
  		/* Replace the last block */
f868a48d0   Akira Fujita   ext4: fix the ret...
292
  		tmp_data_size = orig_inode->i_size & (blocksize - 1);
748de6736   Akira Fujita   ext4: online defr...
293
  		/*
f868a48d0   Akira Fujita   ext4: fix the ret...
294
  		 * If data_size equal zero, it shows data_size is multiples of
748de6736   Akira Fujita   ext4: online defr...
295
296
  		 * blocksize. So we set appropriate value.
  		 */
f868a48d0   Akira Fujita   ext4: fix the ret...
297
298
  		if (tmp_data_size == 0)
  			tmp_data_size = blocksize;
748de6736   Akira Fujita   ext4: online defr...
299

f868a48d0   Akira Fujita   ext4: fix the ret...
300
  		data_size = tmp_data_size +
748de6736   Akira Fujita   ext4: online defr...
301
  			((block_len_in_page - 1) << orig_inode->i_blkbits);
f868a48d0   Akira Fujita   ext4: fix the ret...
302
303
304
305
  	} else
  		data_size = block_len_in_page << orig_inode->i_blkbits;
  
  	replaced_size = data_size;
748de6736   Akira Fujita   ext4: online defr...
306

bb5574880   Dmitry Monakhov   ext4: clean up on...
307
  	*err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
308
  				     donor_page_offset, pagep);
f868a48d0   Akira Fujita   ext4: fix the ret...
309
  	if (unlikely(*err < 0))
bb5574880   Dmitry Monakhov   ext4: clean up on...
310
  		goto stop_journal;
8c8544739   Dmitry Monakhov   ext4: reimplement...
311
  	/*
556615dcb   Lukas Czerner   ext4: rename unin...
312
  	 * If orig extent was unwritten it can become initialized
8c8544739   Dmitry Monakhov   ext4: reimplement...
313
314
315
316
317
  	 * at any time after i_data_sem was dropped, in order to
  	 * serialize with delalloc we have recheck extent while we
  	 * hold page's lock, if it is still the case data copy is not
  	 * necessary, just swap data blocks between orig and donor.
  	 */
556615dcb   Lukas Czerner   ext4: rename unin...
318
  	if (unwritten) {
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
319
  		ext4_double_down_write_data_sem(orig_inode, donor_inode);
8c8544739   Dmitry Monakhov   ext4: reimplement...
320
321
  		/* If any of extents in range became initialized we have to
  		 * fallback to data copying */
556615dcb   Lukas Czerner   ext4: rename unin...
322
323
  		unwritten = mext_check_coverage(orig_inode, orig_blk_offset,
  						block_len_in_page, 1, err);
8c8544739   Dmitry Monakhov   ext4: reimplement...
324
325
  		if (*err)
  			goto drop_data_sem;
748de6736   Akira Fujita   ext4: online defr...
326

fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
327
  		unwritten &= mext_check_coverage(donor_inode, donor_blk_offset,
556615dcb   Lukas Czerner   ext4: rename unin...
328
  						 block_len_in_page, 1, err);
8c8544739   Dmitry Monakhov   ext4: reimplement...
329
330
  		if (*err)
  			goto drop_data_sem;
556615dcb   Lukas Czerner   ext4: rename unin...
331
  		if (!unwritten) {
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
332
  			ext4_double_up_write_data_sem(orig_inode, donor_inode);
8c8544739   Dmitry Monakhov   ext4: reimplement...
333
334
335
336
337
338
339
340
341
  			goto data_copy;
  		}
  		if ((page_has_private(pagep[0]) &&
  		     !try_to_release_page(pagep[0], 0)) ||
  		    (page_has_private(pagep[1]) &&
  		     !try_to_release_page(pagep[1], 0))) {
  			*err = -EBUSY;
  			goto drop_data_sem;
  		}
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
342
343
344
345
  		replaced_count = ext4_swap_extents(handle, orig_inode,
  						   donor_inode, orig_blk_offset,
  						   donor_blk_offset,
  						   block_len_in_page, 1, err);
8c8544739   Dmitry Monakhov   ext4: reimplement...
346
  	drop_data_sem:
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
347
  		ext4_double_up_write_data_sem(orig_inode, donor_inode);
8c8544739   Dmitry Monakhov   ext4: reimplement...
348
349
350
  		goto unlock_pages;
  	}
  data_copy:
bb5574880   Dmitry Monakhov   ext4: clean up on...
351
352
353
354
355
356
357
358
359
360
  	*err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
  	if (*err)
  		goto unlock_pages;
  
  	/* At this point all buffers in range are uptodate, old mapping layout
  	 * is no longer required, try to drop it now. */
  	if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
  	    (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
  		*err = -EBUSY;
  		goto unlock_pages;
748de6736   Akira Fujita   ext4: online defr...
361
  	}
6e2631463   Dmitry Monakhov   ext4: fix incorre...
362
  	ext4_double_down_write_data_sem(orig_inode, donor_inode);
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
363
364
365
  	replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
  					       orig_blk_offset, donor_blk_offset,
  					   block_len_in_page, 1, err);
6e2631463   Dmitry Monakhov   ext4: fix incorre...
366
  	ext4_double_up_write_data_sem(orig_inode, donor_inode);
bb5574880   Dmitry Monakhov   ext4: clean up on...
367
  	if (*err) {
f868a48d0   Akira Fujita   ext4: fix the ret...
368
369
370
371
  		if (replaced_count) {
  			block_len_in_page = replaced_count;
  			replaced_size =
  				block_len_in_page << orig_inode->i_blkbits;
ac48b0a1d   Akira Fujita   ext4: move_extent...
372
  		} else
bb5574880   Dmitry Monakhov   ext4: clean up on...
373
  			goto unlock_pages;
748de6736   Akira Fujita   ext4: online defr...
374
  	}
bb5574880   Dmitry Monakhov   ext4: clean up on...
375
376
  	/* Perform all necessary steps similar write_begin()/write_end()
  	 * but keeping in mind that i_size will not change */
bcff24887   Eryu Guan   ext4: don't read ...
377
378
379
380
381
382
383
384
385
  	if (!page_has_buffers(pagep[0]))
  		create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0);
  	bh = page_buffers(pagep[0]);
  	for (i = 0; i < data_offset_in_page; i++)
  		bh = bh->b_this_page;
  	for (i = 0; i < block_len_in_page; i++) {
  		*err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
  		if (*err < 0)
  			break;
6ffe77bad   Eryu Guan   ext4: iterate ove...
386
  		bh = bh->b_this_page;
bcff24887   Eryu Guan   ext4: don't read ...
387
  	}
bb5574880   Dmitry Monakhov   ext4: clean up on...
388
389
  	if (!*err)
  		*err = block_commit_write(pagep[0], from, from + replaced_size);
748de6736   Akira Fujita   ext4: online defr...
390

bb5574880   Dmitry Monakhov   ext4: clean up on...
391
392
393
394
395
  	if (unlikely(*err < 0))
  		goto repair_branches;
  
  	/* Even in case of data=writeback it is reasonable to pin
  	 * inode to transaction, to prevent unexpected data loss */
ee0876bc6   Jan Kara   ext4: do not ask ...
396
  	*err = ext4_jbd2_inode_add_write(handle, orig_inode);
bb5574880   Dmitry Monakhov   ext4: clean up on...
397
398
399
  
  unlock_pages:
  	unlock_page(pagep[0]);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
400
  	put_page(pagep[0]);
bb5574880   Dmitry Monakhov   ext4: clean up on...
401
  	unlock_page(pagep[1]);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
402
  	put_page(pagep[1]);
bb5574880   Dmitry Monakhov   ext4: clean up on...
403
  stop_journal:
748de6736   Akira Fujita   ext4: online defr...
404
  	ext4_journal_stop(handle);
88c6b61ff   Dmitry Monakhov   ext4: move_extent...
405
406
407
  	if (*err == -ENOSPC &&
  	    ext4_should_retry_alloc(sb, &retries))
  		goto again;
bb5574880   Dmitry Monakhov   ext4: clean up on...
408
409
  	/* Buffer was busy because probably is pinned to journal transaction,
  	 * force transaction commit may help to free it. */
88c6b61ff   Dmitry Monakhov   ext4: move_extent...
410
411
  	if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal &&
  	    jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal))
bb5574880   Dmitry Monakhov   ext4: clean up on...
412
  		goto again;
f868a48d0   Akira Fujita   ext4: fix the ret...
413
  	return replaced_count;
bb5574880   Dmitry Monakhov   ext4: clean up on...
414
415
416
417
418
419
420
  
  repair_branches:
  	/*
  	 * This should never ever happen!
  	 * Extents are swapped already, but we are not able to copy data.
  	 * Try to swap extents to it's original places
  	 */
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
421
  	ext4_double_down_write_data_sem(orig_inode, donor_inode);
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
422
423
424
  	replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode,
  					       orig_blk_offset, donor_blk_offset,
  					   block_len_in_page, 0, &err2);
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
425
  	ext4_double_up_write_data_sem(orig_inode, donor_inode);
bb5574880   Dmitry Monakhov   ext4: clean up on...
426
427
428
429
430
431
432
433
  	if (replaced_count != block_len_in_page) {
  		EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
  				       "Unable to copy data block,"
  				       " data will be lost.");
  		*err = -EIO;
  	}
  	replaced_count = 0;
  	goto unlock_pages;
748de6736   Akira Fujita   ext4: online defr...
434
435
436
  }
  
  /**
c437b2733   Akira Fujita   ext4: Code cleanu...
437
   * mext_check_arguments - Check whether move extent can be done
748de6736   Akira Fujita   ext4: online defr...
438
439
440
441
442
443
   *
   * @orig_inode:		original inode
   * @donor_inode:	donor inode
   * @orig_start:		logical start offset in block for orig
   * @donor_start:	logical start offset in block for donor
   * @len:		the number of blocks to be moved
748de6736   Akira Fujita   ext4: online defr...
444
445
446
447
448
449
450
   *
   * Check the arguments of ext4_move_extents() whether the files can be
   * exchanged with each other.
   * Return 0 on success, or a negative error value on failure.
   */
  static int
  mext_check_arguments(struct inode *orig_inode,
446aaa6e7   Kazuya Mio   ext4: initialize ...
451
452
  		     struct inode *donor_inode, __u64 orig_start,
  		     __u64 donor_start, __u64 *len)
748de6736   Akira Fujita   ext4: online defr...
453
  {
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
454
  	__u64 orig_eof, donor_eof;
70d5d3dce   Akira Fujita   ext4: Fix wrong c...
455
456
  	unsigned int blkbits = orig_inode->i_blkbits;
  	unsigned int blocksize = 1 << blkbits;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
457
458
  	orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits;
  	donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits;
4a58579b9   Akira Fujita   ext4: Fix insuffi...
459
460
461
462
463
464
465
  	if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
  		ext4_debug("ext4 move extent: suid or sgid is set"
  			   " to donor file [ino:orig %lu, donor %lu]
  ",
  			   orig_inode->i_ino, donor_inode->i_ino);
  		return -EINVAL;
  	}
1f5a81e41   Theodore Ts'o   ext4: Make sure t...
466
467
  	if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
  		return -EPERM;
748de6736   Akira Fujita   ext4: online defr...
468
469
470
471
472
473
  	/* Ext4 move extent does not support swapfile */
  	if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
  		ext4_debug("ext4 move extent: The argument files should "
  			"not be swapfile [ino:orig %lu, donor %lu]
  ",
  			orig_inode->i_ino, donor_inode->i_ino);
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
474
  		return -EBUSY;
748de6736   Akira Fujita   ext4: online defr...
475
  	}
daf647d2d   Theodore Ts'o   ext4: add lockdep...
476
477
478
479
480
481
482
  	if (IS_NOQUOTA(orig_inode) || IS_NOQUOTA(donor_inode)) {
  		ext4_debug("ext4 move extent: The argument files should "
  			"not be quota files [ino:orig %lu, donor %lu]
  ",
  			orig_inode->i_ino, donor_inode->i_ino);
  		return -EBUSY;
  	}
748de6736   Akira Fujita   ext4: online defr...
483
  	/* Ext4 move extent supports only extent based file */
12e9b8920   Dmitry Monakhov   ext4: Use bitops ...
484
  	if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
748de6736   Akira Fujita   ext4: online defr...
485
486
487
488
  		ext4_debug("ext4 move extent: orig file is not extents "
  			"based file [ino:orig %lu]
  ", orig_inode->i_ino);
  		return -EOPNOTSUPP;
12e9b8920   Dmitry Monakhov   ext4: Use bitops ...
489
  	} else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
748de6736   Akira Fujita   ext4: online defr...
490
491
492
493
494
495
496
497
498
499
500
501
502
  		ext4_debug("ext4 move extent: donor file is not extents "
  			"based file [ino:donor %lu]
  ", donor_inode->i_ino);
  		return -EOPNOTSUPP;
  	}
  
  	if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
  		ext4_debug("ext4 move extent: File size is 0 byte
  ");
  		return -EINVAL;
  	}
  
  	/* Start offset should be same */
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
503
504
  	if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
  	    (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
748de6736   Akira Fujita   ext4: online defr...
505
  		ext4_debug("ext4 move extent: orig and donor's start "
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
506
507
  			"offset are not alligned [ino:orig %lu, donor %lu]
  ",
748de6736   Akira Fujita   ext4: online defr...
508
509
510
  			orig_inode->i_ino, donor_inode->i_ino);
  		return -EINVAL;
  	}
f17722f91   Lukas Czerner   ext4: Fix max fil...
511
  	if ((orig_start >= EXT_MAX_BLOCKS) ||
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
512
  	    (donor_start >= EXT_MAX_BLOCKS) ||
f17722f91   Lukas Czerner   ext4: Fix max fil...
513
  	    (*len > EXT_MAX_BLOCKS) ||
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
514
  	    (donor_start + *len >= EXT_MAX_BLOCKS) ||
f17722f91   Lukas Czerner   ext4: Fix max fil...
515
  	    (orig_start + *len >= EXT_MAX_BLOCKS))  {
0a80e9867   Eric Sandeen   ext4: replace MAX...
516
  		ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
f17722f91   Lukas Czerner   ext4: Fix max fil...
517
518
  			"[ino:orig %lu, donor %lu]
  ", EXT_MAX_BLOCKS,
748de6736   Akira Fujita   ext4: online defr...
519
520
521
  			orig_inode->i_ino, donor_inode->i_ino);
  		return -EINVAL;
  	}
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
522
523
524
525
  	if (orig_eof < orig_start + *len - 1)
  		*len = orig_eof - orig_start;
  	if (donor_eof < donor_start + *len - 1)
  		*len = donor_eof - donor_start;
748de6736   Akira Fujita   ext4: online defr...
526
  	if (!*len) {
92c28159d   Akira Fujita   ext4: fix spellin...
527
  		ext4_debug("ext4 move extent: len should not be 0 "
748de6736   Akira Fujita   ext4: online defr...
528
529
530
531
532
533
534
535
536
537
  			"[ino:orig %lu, donor %lu]
  ", orig_inode->i_ino,
  			donor_inode->i_ino);
  		return -EINVAL;
  	}
  
  	return 0;
  }
  
  /**
748de6736   Akira Fujita   ext4: online defr...
538
539
540
541
   * ext4_move_extents - Exchange the specified range of a file
   *
   * @o_filp:		file structure of the original file
   * @d_filp:		file structure of the donor file
65dd8327e   Xiaoguang Wang   ext4: delete usel...
542
543
   * @orig_blk:		start offset in block for orig
   * @donor_blk:		start offset in block for donor
748de6736   Akira Fujita   ext4: online defr...
544
545
546
547
548
549
   * @len:		the number of blocks to be moved
   * @moved_len:		moved block length
   *
   * This function returns 0 and moved block length is set in moved_len
   * if succeed, otherwise returns error value.
   *
748de6736   Akira Fujita   ext4: online defr...
550
551
   */
  int
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
552
553
  ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
  		  __u64 donor_blk, __u64 len, __u64 *moved_len)
748de6736   Akira Fujita   ext4: online defr...
554
  {
496ad9aa8   Al Viro   new helper: file_...
555
556
  	struct inode *orig_inode = file_inode(o_filp);
  	struct inode *donor_inode = file_inode(d_filp);
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
557
  	struct ext4_ext_path *path = NULL;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
558
  	int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
559
560
561
  	ext4_lblk_t o_end, o_start = orig_blk;
  	ext4_lblk_t d_start = donor_blk;
  	int ret;
748de6736   Akira Fujita   ext4: online defr...
562

03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
563
564
565
566
567
568
569
570
571
572
  	if (orig_inode->i_sb != donor_inode->i_sb) {
  		ext4_debug("ext4 move extent: The argument files "
  			"should be in same FS [ino:orig %lu, donor %lu]
  ",
  			orig_inode->i_ino, donor_inode->i_ino);
  		return -EINVAL;
  	}
  
  	/* orig and donor should be different inodes */
  	if (orig_inode == donor_inode) {
f3ce8064b   Theodore Ts'o   ext4: EXT4_IOC_MO...
573
  		ext4_debug("ext4 move extent: The argument files should not "
03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
574
575
  			"be same inode [ino:orig %lu, donor %lu]
  ",
f3ce8064b   Theodore Ts'o   ext4: EXT4_IOC_MO...
576
577
578
  			orig_inode->i_ino, donor_inode->i_ino);
  		return -EINVAL;
  	}
7247c0caa   Akira Fujita   ext4: Fix the NUL...
579
580
581
582
583
584
585
586
  	/* Regular file check */
  	if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
  		ext4_debug("ext4 move extent: The argument files should be "
  			"regular file [ino:orig %lu, donor %lu]
  ",
  			orig_inode->i_ino, donor_inode->i_ino);
  		return -EINVAL;
  	}
04e22412f   Eric Whitney   ext4: make online...
587
588
589
  
  	/* TODO: it's not obvious how to swap blocks for inodes with full
  	   journaling enabled */
f066055a3   Dmitry Monakhov   ext4: online defr...
590
591
  	if (ext4_should_journal_data(orig_inode) ||
  	    ext4_should_journal_data(donor_inode)) {
04e22412f   Eric Whitney   ext4: make online...
592
593
594
  		ext4_msg(orig_inode->i_sb, KERN_ERR,
  			 "Online defrag not supported with data journaling");
  		return -EOPNOTSUPP;
f066055a3   Dmitry Monakhov   ext4: online defr...
595
  	}
04e22412f   Eric Whitney   ext4: make online...
596

14fbd4aa6   Eric Whitney   ext4: enforce onl...
597
598
599
600
601
602
  	if (ext4_encrypted_inode(orig_inode) ||
  	    ext4_encrypted_inode(donor_inode)) {
  		ext4_msg(orig_inode->i_sb, KERN_ERR,
  			 "Online defrag not supported for encrypted files");
  		return -EOPNOTSUPP;
  	}
fc04cb49a   Akira Fujita   ext4: fix lock or...
603
  	/* Protect orig and donor inodes against a truncate */
375e289ea   J. Bruce Fields   vfs: pull ext4's ...
604
  	lock_two_nondirectories(orig_inode, donor_inode);
748de6736   Akira Fujita   ext4: online defr...
605

17335dcc4   Dmitry Monakhov   ext4: serialize d...
606
607
608
609
610
  	/* Wait for all existing dio workers */
  	ext4_inode_block_unlocked_dio(orig_inode);
  	ext4_inode_block_unlocked_dio(donor_inode);
  	inode_dio_wait(orig_inode);
  	inode_dio_wait(donor_inode);
fc04cb49a   Akira Fujita   ext4: fix lock or...
611
  	/* Protect extent tree against block allocations via delalloc */
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
612
  	ext4_double_down_write_data_sem(orig_inode, donor_inode);
748de6736   Akira Fujita   ext4: online defr...
613
  	/* Check the filesystem environment whether move_extent can be done */
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
614
615
  	ret = mext_check_arguments(orig_inode, donor_inode, orig_blk,
  				    donor_blk, &len);
03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
616
  	if (ret)
347fa6f1c   Akira Fujita   ext4: Add null ex...
617
  		goto out;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
618
  	o_end = o_start + len;
748de6736   Akira Fujita   ext4: online defr...
619

fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
620
621
622
623
624
625
  	while (o_start < o_end) {
  		struct ext4_extent *ex;
  		ext4_lblk_t cur_blk, next_blk;
  		pgoff_t orig_page_index, donor_page_index;
  		int offset_in_page;
  		int unwritten, cur_len;
748de6736   Akira Fujita   ext4: online defr...
626

fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
627
628
  		ret = get_ext_path(orig_inode, o_start, &path);
  		if (ret)
748de6736   Akira Fujita   ext4: online defr...
629
  			goto out;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
630
631
632
633
634
635
636
637
638
639
640
641
642
  		ex = path[path->p_depth].p_ext;
  		next_blk = ext4_ext_next_allocated_block(path);
  		cur_blk = le32_to_cpu(ex->ee_block);
  		cur_len = ext4_ext_get_actual_len(ex);
  		/* Check hole before the start pos */
  		if (cur_blk + cur_len - 1 < o_start) {
  			if (next_blk == EXT_MAX_BLOCKS) {
  				o_start = o_end;
  				ret = -ENODATA;
  				goto out;
  			}
  			d_start += next_blk - o_start;
  			o_start = next_blk;
3bdf14b4d   Theodore Ts'o   ext4: reuse path ...
643
  			continue;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
644
645
646
647
648
649
650
651
652
653
  		/* Check hole after the start pos */
  		} else if (cur_blk > o_start) {
  			/* Skip hole */
  			d_start += cur_blk - o_start;
  			o_start = cur_blk;
  			/* Extent inside requested range ?*/
  			if (cur_blk >= o_end)
  				goto out;
  		} else { /* in_range(o_start, o_blk, o_len) */
  			cur_len += cur_blk - o_start;
748de6736   Akira Fujita   ext4: online defr...
654
  		}
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
655
656
657
  		unwritten = ext4_ext_is_unwritten(ex);
  		if (o_end - o_start < cur_len)
  			cur_len = o_end - o_start;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
658
  		orig_page_index = o_start >> (PAGE_SHIFT -
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
659
  					       orig_inode->i_blkbits);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
660
  		donor_page_index = d_start >> (PAGE_SHIFT -
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
661
662
663
664
  					       donor_inode->i_blkbits);
  		offset_in_page = o_start % blocks_per_page;
  		if (cur_len > blocks_per_page- offset_in_page)
  			cur_len = blocks_per_page - offset_in_page;
fc04cb49a   Akira Fujita   ext4: fix lock or...
665
666
667
668
669
670
671
  		/*
  		 * Up semaphore to avoid following problems:
  		 * a. transaction deadlock among ext4_journal_start,
  		 *    ->write_begin via pagefault, and jbd2_journal_commit
  		 * b. racing with ->readpage, ->write_begin, and ext4_get_block
  		 *    in move_extent_per_page
  		 */
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
672
  		ext4_double_up_write_data_sem(orig_inode, donor_inode);
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
673
674
675
676
677
  		/* Swap original branches with new branches */
  		move_extent_per_page(o_filp, donor_inode,
  				     orig_page_index, donor_page_index,
  				     offset_in_page, cur_len,
  				     unwritten, &ret);
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
678
  		ext4_double_down_write_data_sem(orig_inode, donor_inode);
03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
679
  		if (ret < 0)
fc04cb49a   Akira Fujita   ext4: fix lock or...
680
  			break;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
681
682
  		o_start += cur_len;
  		d_start += cur_len;
748de6736   Akira Fujita   ext4: online defr...
683
  	}
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
684
685
686
  	*moved_len = o_start - orig_blk;
  	if (*moved_len > len)
  		*moved_len = len;
748de6736   Akira Fujita   ext4: online defr...
687
  out:
94d7c16cb   Akira Fujita   ext4: Fix double-...
688
689
690
691
  	if (*moved_len) {
  		ext4_discard_preallocations(orig_inode);
  		ext4_discard_preallocations(donor_inode);
  	}
b7ea89ad0   Theodore Ts'o   ext4: allow a NUL...
692
693
  	ext4_ext_drop_refs(path);
  	kfree(path);
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
694
  	ext4_double_up_write_data_sem(orig_inode, donor_inode);
17335dcc4   Dmitry Monakhov   ext4: serialize d...
695
696
  	ext4_inode_resume_unlocked_dio(orig_inode);
  	ext4_inode_resume_unlocked_dio(donor_inode);
375e289ea   J. Bruce Fields   vfs: pull ext4's ...
697
  	unlock_two_nondirectories(orig_inode, donor_inode);
748de6736   Akira Fujita   ext4: online defr...
698

03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
699
  	return ret;
748de6736   Akira Fujita   ext4: online defr...
700
  }