Blame view

fs/ext4/move_extent.c 20.5 KB
748de6736   Akira Fujita   ext4: online defr...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
  /*
   * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
   * Written by Takashi Sato <t-sato@yk.jp.nec.com>
   *            Akira Fujita <a-fujita@rs.jp.nec.com>
   *
   * This program is free software; you can redistribute it and/or modify it
   * under the terms of version 2.1 of the GNU Lesser General Public License
   * as published by the Free Software Foundation.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   */
  
  #include <linux/fs.h>
  #include <linux/quotaops.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
18
  #include <linux/slab.h>
748de6736   Akira Fujita   ext4: online defr...
19
  #include "ext4_jbd2.h"
748de6736   Akira Fujita   ext4: online defr...
20
  #include "ext4.h"
4a092d737   Theodore Ts'o   ext4: rationalize...
21
  #include "ext4_extents.h"
748de6736   Akira Fujita   ext4: online defr...
22

e8505970a   Akira Fujita   ext4: Replace get...
23
24
25
26
27
28
29
  /**
   * get_ext_path - Find an extent path for designated logical block number.
   *
   * @inode:	an inode which is searched
   * @lblock:	logical block number to find an extent path
   * @path:	pointer to an extent path pointer (for output)
   *
ed8a1a766   Theodore Ts'o   ext4: rename ext4...
30
   * ext4_find_extent wrapper. Return 0 on success, or a negative error value
e8505970a   Akira Fujita   ext4: Replace get...
31
32
33
34
   * on failure.
   */
  static inline int
  get_ext_path(struct inode *inode, ext4_lblk_t lblock,
3bdf14b4d   Theodore Ts'o   ext4: reuse path ...
35
  		struct ext4_ext_path **ppath)
e8505970a   Akira Fujita   ext4: Replace get...
36
  {
0e401101d   Dmitry Monakhov   ext4: fix memory ...
37
  	struct ext4_ext_path *path;
e8505970a   Akira Fujita   ext4: Replace get...
38

ed8a1a766   Theodore Ts'o   ext4: rename ext4...
39
  	path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
0e401101d   Dmitry Monakhov   ext4: fix memory ...
40
  	if (IS_ERR(path))
3bdf14b4d   Theodore Ts'o   ext4: reuse path ...
41
42
43
44
45
46
47
48
49
  		return PTR_ERR(path);
  	if (path[ext_depth(inode)].p_ext == NULL) {
  		ext4_ext_drop_refs(path);
  		kfree(path);
  		*ppath = NULL;
  		return -ENODATA;
  	}
  	*ppath = path;
  	return 0;
e8505970a   Akira Fujita   ext4: Replace get...
50
  }
748de6736   Akira Fujita   ext4: online defr...
51
52
  
  /**
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
53
54
   * ext4_double_down_write_data_sem - Acquire two inodes' write lock
   *                                   of i_data_sem
748de6736   Akira Fujita   ext4: online defr...
55
   *
03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
56
   * Acquire write lock of i_data_sem of the two inodes
748de6736   Akira Fujita   ext4: online defr...
57
   */
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
58
59
  void
  ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
748de6736   Akira Fujita   ext4: online defr...
60
  {
03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
61
62
63
64
65
66
  	if (first < second) {
  		down_write(&EXT4_I(first)->i_data_sem);
  		down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
  	} else {
  		down_write(&EXT4_I(second)->i_data_sem);
  		down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING);
748de6736   Akira Fujita   ext4: online defr...
67

748de6736   Akira Fujita   ext4: online defr...
68
  	}
748de6736   Akira Fujita   ext4: online defr...
69
70
71
  }
  
  /**
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
72
   * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem
748de6736   Akira Fujita   ext4: online defr...
73
74
75
   *
   * @orig_inode:		original inode structure to be released its lock first
   * @donor_inode:	donor inode structure to be released its lock second
fc04cb49a   Akira Fujita   ext4: fix lock or...
76
   * Release write lock of i_data_sem of two inodes (orig and donor).
748de6736   Akira Fujita   ext4: online defr...
77
   */
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
78
79
80
  void
  ext4_double_up_write_data_sem(struct inode *orig_inode,
  			      struct inode *donor_inode)
748de6736   Akira Fujita   ext4: online defr...
81
  {
748de6736   Akira Fujita   ext4: online defr...
82
83
84
85
86
  	up_write(&EXT4_I(orig_inode)->i_data_sem);
  	up_write(&EXT4_I(donor_inode)->i_data_sem);
  }
  
  /**
8c8544739   Dmitry Monakhov   ext4: reimplement...
87
88
89
90
91
   * mext_check_coverage - Check that all extents in range has the same type
   *
   * @inode:		inode in question
   * @from:		block offset of inode
   * @count:		block count to be checked
556615dcb   Lukas Czerner   ext4: rename unin...
92
   * @unwritten:		extents expected to be unwritten
8c8544739   Dmitry Monakhov   ext4: reimplement...
93
94
95
96
97
98
   * @err:		pointer to save error value
   *
   * Return 1 if all extents in range has expected type, and zero otherwise.
   */
  static int
  mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
556615dcb   Lukas Czerner   ext4: rename unin...
99
  		    int unwritten, int *err)
8c8544739   Dmitry Monakhov   ext4: reimplement...
100
101
102
  {
  	struct ext4_ext_path *path = NULL;
  	struct ext4_extent *ext;
0e401101d   Dmitry Monakhov   ext4: fix memory ...
103
  	int ret = 0;
8c8544739   Dmitry Monakhov   ext4: reimplement...
104
105
106
107
  	ext4_lblk_t last = from + count;
  	while (from < last) {
  		*err = get_ext_path(inode, from, &path);
  		if (*err)
0e401101d   Dmitry Monakhov   ext4: fix memory ...
108
  			goto out;
8c8544739   Dmitry Monakhov   ext4: reimplement...
109
  		ext = path[ext_depth(inode)].p_ext;
556615dcb   Lukas Czerner   ext4: rename unin...
110
  		if (unwritten != ext4_ext_is_unwritten(ext))
0e401101d   Dmitry Monakhov   ext4: fix memory ...
111
  			goto out;
8c8544739   Dmitry Monakhov   ext4: reimplement...
112
113
114
  		from += ext4_ext_get_actual_len(ext);
  		ext4_ext_drop_refs(path);
  	}
0e401101d   Dmitry Monakhov   ext4: fix memory ...
115
116
  	ret = 1;
  out:
b7ea89ad0   Theodore Ts'o   ext4: allow a NUL...
117
118
  	ext4_ext_drop_refs(path);
  	kfree(path);
0e401101d   Dmitry Monakhov   ext4: fix memory ...
119
  	return ret;
8c8544739   Dmitry Monakhov   ext4: reimplement...
120
121
122
  }
  
  /**
bb5574880   Dmitry Monakhov   ext4: clean up on...
123
124
125
126
   * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
   *
   * @inode1:	the inode structure
   * @inode2:	the inode structure
65dd8327e   Xiaoguang Wang   ext4: delete usel...
127
128
   * @index1:	page index
   * @index2:	page index
bb5574880   Dmitry Monakhov   ext4: clean up on...
129
130
131
132
133
134
   * @page:	result page vector
   *
   * Grab two locked pages for inode's by inode order
   */
  static int
  mext_page_double_lock(struct inode *inode1, struct inode *inode2,
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
135
  		      pgoff_t index1, pgoff_t index2, struct page *page[2])
bb5574880   Dmitry Monakhov   ext4: clean up on...
136
137
138
139
140
141
142
143
144
  {
  	struct address_space *mapping[2];
  	unsigned fl = AOP_FLAG_NOFS;
  
  	BUG_ON(!inode1 || !inode2);
  	if (inode1 < inode2) {
  		mapping[0] = inode1->i_mapping;
  		mapping[1] = inode2->i_mapping;
  	} else {
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
145
146
147
  		pgoff_t tmp = index1;
  		index1 = index2;
  		index2 = tmp;
bb5574880   Dmitry Monakhov   ext4: clean up on...
148
149
150
  		mapping[0] = inode2->i_mapping;
  		mapping[1] = inode1->i_mapping;
  	}
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
151
  	page[0] = grab_cache_page_write_begin(mapping[0], index1, fl);
bb5574880   Dmitry Monakhov   ext4: clean up on...
152
153
  	if (!page[0])
  		return -ENOMEM;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
154
  	page[1] = grab_cache_page_write_begin(mapping[1], index2, fl);
bb5574880   Dmitry Monakhov   ext4: clean up on...
155
156
157
158
159
  	if (!page[1]) {
  		unlock_page(page[0]);
  		page_cache_release(page[0]);
  		return -ENOMEM;
  	}
7e8b12c60   Dmitry Monakhov   ext4: defragmenta...
160
161
162
163
164
165
166
  	/*
  	 * grab_cache_page_write_begin() may not wait on page's writeback if
  	 * BDI not demand that. But it is reasonable to be very conservative
  	 * here and explicitly wait on page's writeback
  	 */
  	wait_on_page_writeback(page[0]);
  	wait_on_page_writeback(page[1]);
bb5574880   Dmitry Monakhov   ext4: clean up on...
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
  	if (inode1 > inode2) {
  		struct page *tmp;
  		tmp = page[0];
  		page[0] = page[1];
  		page[1] = tmp;
  	}
  	return 0;
  }
  
  /* Force page buffers uptodate w/o dropping page's lock */
  static int
  mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
  {
  	struct inode *inode = page->mapping->host;
  	sector_t block;
  	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
  	unsigned int blocksize, block_start, block_end;
  	int i, err,  nr = 0, partial = 0;
  	BUG_ON(!PageLocked(page));
  	BUG_ON(PageWriteback(page));
  
  	if (PageUptodate(page))
  		return 0;
  
  	blocksize = 1 << inode->i_blkbits;
  	if (!page_has_buffers(page))
  		create_empty_buffers(page, blocksize, 0);
  
  	head = page_buffers(page);
  	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
  	for (bh = head, block_start = 0; bh != head || !block_start;
  	     block++, block_start = block_end, bh = bh->b_this_page) {
  		block_end = block_start + blocksize;
  		if (block_end <= from || block_start >= to) {
  			if (!buffer_uptodate(bh))
  				partial = 1;
  			continue;
  		}
  		if (buffer_uptodate(bh))
  			continue;
  		if (!buffer_mapped(bh)) {
bb5574880   Dmitry Monakhov   ext4: clean up on...
208
209
210
211
212
213
214
  			err = ext4_get_block(inode, block, bh, 0);
  			if (err) {
  				SetPageError(page);
  				return err;
  			}
  			if (!buffer_mapped(bh)) {
  				zero_user(page, block_start, blocksize);
df3a98b08   Dan Carpenter   ext4: remove an u...
215
  				set_buffer_uptodate(bh);
bb5574880   Dmitry Monakhov   ext4: clean up on...
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
  				continue;
  			}
  		}
  		BUG_ON(nr >= MAX_BUF_PER_PAGE);
  		arr[nr++] = bh;
  	}
  	/* No io required */
  	if (!nr)
  		goto out;
  
  	for (i = 0; i < nr; i++) {
  		bh = arr[i];
  		if (!bh_uptodate_or_lock(bh)) {
  			err = bh_submit_read(bh);
  			if (err)
  				return err;
  		}
  	}
  out:
  	if (!partial)
  		SetPageUptodate(page);
  	return 0;
  }
  
  /**
748de6736   Akira Fujita   ext4: online defr...
241
242
243
244
245
   * move_extent_per_page - Move extent data per page
   *
   * @o_filp:			file structure of original file
   * @donor_inode:		donor inode
   * @orig_page_offset:		page index on original file
65dd8327e   Xiaoguang Wang   ext4: delete usel...
246
   * @donor_page_offset:		page index on donor file
748de6736   Akira Fujita   ext4: online defr...
247
248
   * @data_offset_in_page:	block index where data swapping starts
   * @block_len_in_page:		the number of blocks to be swapped
556615dcb   Lukas Czerner   ext4: rename unin...
249
   * @unwritten:			orig extent is unwritten or not
f868a48d0   Akira Fujita   ext4: fix the ret...
250
   * @err:			pointer to save return value
748de6736   Akira Fujita   ext4: online defr...
251
252
   *
   * Save the data in original inode blocks and replace original inode extents
65dd8327e   Xiaoguang Wang   ext4: delete usel...
253
   * with donor inode extents by calling ext4_swap_extents().
f868a48d0   Akira Fujita   ext4: fix the ret...
254
255
   * Finally, write out the saved data in new original inode blocks. Return
   * replaced block count.
748de6736   Akira Fujita   ext4: online defr...
256
257
   */
  static int
44fc48f70   Akira Fujita   ext4: Fix small t...
258
  move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
259
260
261
  		     pgoff_t orig_page_offset, pgoff_t donor_page_offset,
  		     int data_offset_in_page,
  		     int block_len_in_page, int unwritten, int *err)
748de6736   Akira Fujita   ext4: online defr...
262
  {
496ad9aa8   Al Viro   new helper: file_...
263
  	struct inode *orig_inode = file_inode(o_filp);
bb5574880   Dmitry Monakhov   ext4: clean up on...
264
  	struct page *pagep[2] = {NULL, NULL};
748de6736   Akira Fujita   ext4: online defr...
265
  	handle_t *handle;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
266
  	ext4_lblk_t orig_blk_offset, donor_blk_offset;
748de6736   Akira Fujita   ext4: online defr...
267
  	unsigned long blocksize = orig_inode->i_sb->s_blocksize;
f868a48d0   Akira Fujita   ext4: fix the ret...
268
  	unsigned int tmp_data_size, data_size, replaced_size;
bb5574880   Dmitry Monakhov   ext4: clean up on...
269
  	int err2, jblocks, retries = 0;
f868a48d0   Akira Fujita   ext4: fix the ret...
270
  	int replaced_count = 0;
bb5574880   Dmitry Monakhov   ext4: clean up on...
271
  	int from = data_offset_in_page << orig_inode->i_blkbits;
748de6736   Akira Fujita   ext4: online defr...
272
  	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
88c6b61ff   Dmitry Monakhov   ext4: move_extent...
273
  	struct super_block *sb = orig_inode->i_sb;
748de6736   Akira Fujita   ext4: online defr...
274
275
276
277
278
  
  	/*
  	 * It needs twice the amount of ordinary journal buffers because
  	 * inode and donor_inode may change each different metadata blocks.
  	 */
bb5574880   Dmitry Monakhov   ext4: clean up on...
279
280
  again:
  	*err = 0;
748de6736   Akira Fujita   ext4: online defr...
281
  	jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
9924a92a8   Theodore Ts'o   ext4: pass contex...
282
  	handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);
748de6736   Akira Fujita   ext4: online defr...
283
  	if (IS_ERR(handle)) {
f868a48d0   Akira Fujita   ext4: fix the ret...
284
285
  		*err = PTR_ERR(handle);
  		return 0;
748de6736   Akira Fujita   ext4: online defr...
286
  	}
748de6736   Akira Fujita   ext4: online defr...
287
288
  	orig_blk_offset = orig_page_offset * blocks_per_page +
  		data_offset_in_page;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
289
290
  	donor_blk_offset = donor_page_offset * blocks_per_page +
  		data_offset_in_page;
f868a48d0   Akira Fujita   ext4: fix the ret...
291
  	/* Calculate data_size */
748de6736   Akira Fujita   ext4: online defr...
292
293
294
  	if ((orig_blk_offset + block_len_in_page - 1) ==
  	    ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
  		/* Replace the last block */
f868a48d0   Akira Fujita   ext4: fix the ret...
295
  		tmp_data_size = orig_inode->i_size & (blocksize - 1);
748de6736   Akira Fujita   ext4: online defr...
296
  		/*
f868a48d0   Akira Fujita   ext4: fix the ret...
297
  		 * If data_size equal zero, it shows data_size is multiples of
748de6736   Akira Fujita   ext4: online defr...
298
299
  		 * blocksize. So we set appropriate value.
  		 */
f868a48d0   Akira Fujita   ext4: fix the ret...
300
301
  		if (tmp_data_size == 0)
  			tmp_data_size = blocksize;
748de6736   Akira Fujita   ext4: online defr...
302

f868a48d0   Akira Fujita   ext4: fix the ret...
303
  		data_size = tmp_data_size +
748de6736   Akira Fujita   ext4: online defr...
304
  			((block_len_in_page - 1) << orig_inode->i_blkbits);
f868a48d0   Akira Fujita   ext4: fix the ret...
305
306
307
308
  	} else
  		data_size = block_len_in_page << orig_inode->i_blkbits;
  
  	replaced_size = data_size;
748de6736   Akira Fujita   ext4: online defr...
309

bb5574880   Dmitry Monakhov   ext4: clean up on...
310
  	*err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
311
  				     donor_page_offset, pagep);
f868a48d0   Akira Fujita   ext4: fix the ret...
312
  	if (unlikely(*err < 0))
bb5574880   Dmitry Monakhov   ext4: clean up on...
313
  		goto stop_journal;
8c8544739   Dmitry Monakhov   ext4: reimplement...
314
  	/*
556615dcb   Lukas Czerner   ext4: rename unin...
315
  	 * If orig extent was unwritten it can become initialized
8c8544739   Dmitry Monakhov   ext4: reimplement...
316
317
318
319
320
  	 * at any time after i_data_sem was dropped, in order to
  	 * serialize with delalloc we have recheck extent while we
  	 * hold page's lock, if it is still the case data copy is not
  	 * necessary, just swap data blocks between orig and donor.
  	 */
556615dcb   Lukas Czerner   ext4: rename unin...
321
  	if (unwritten) {
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
322
  		ext4_double_down_write_data_sem(orig_inode, donor_inode);
8c8544739   Dmitry Monakhov   ext4: reimplement...
323
324
  		/* If any of extents in range became initialized we have to
  		 * fallback to data copying */
556615dcb   Lukas Czerner   ext4: rename unin...
325
326
  		unwritten = mext_check_coverage(orig_inode, orig_blk_offset,
  						block_len_in_page, 1, err);
8c8544739   Dmitry Monakhov   ext4: reimplement...
327
328
  		if (*err)
  			goto drop_data_sem;
748de6736   Akira Fujita   ext4: online defr...
329

fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
330
  		unwritten &= mext_check_coverage(donor_inode, donor_blk_offset,
556615dcb   Lukas Czerner   ext4: rename unin...
331
  						 block_len_in_page, 1, err);
8c8544739   Dmitry Monakhov   ext4: reimplement...
332
333
  		if (*err)
  			goto drop_data_sem;
556615dcb   Lukas Czerner   ext4: rename unin...
334
  		if (!unwritten) {
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
335
  			ext4_double_up_write_data_sem(orig_inode, donor_inode);
8c8544739   Dmitry Monakhov   ext4: reimplement...
336
337
338
339
340
341
342
343
344
  			goto data_copy;
  		}
  		if ((page_has_private(pagep[0]) &&
  		     !try_to_release_page(pagep[0], 0)) ||
  		    (page_has_private(pagep[1]) &&
  		     !try_to_release_page(pagep[1], 0))) {
  			*err = -EBUSY;
  			goto drop_data_sem;
  		}
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
345
346
347
348
  		replaced_count = ext4_swap_extents(handle, orig_inode,
  						   donor_inode, orig_blk_offset,
  						   donor_blk_offset,
  						   block_len_in_page, 1, err);
8c8544739   Dmitry Monakhov   ext4: reimplement...
349
  	drop_data_sem:
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
350
  		ext4_double_up_write_data_sem(orig_inode, donor_inode);
8c8544739   Dmitry Monakhov   ext4: reimplement...
351
352
353
  		goto unlock_pages;
  	}
  data_copy:
bb5574880   Dmitry Monakhov   ext4: clean up on...
354
355
356
357
358
359
360
361
362
363
  	*err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
  	if (*err)
  		goto unlock_pages;
  
  	/* At this point all buffers in range are uptodate, old mapping layout
  	 * is no longer required, try to drop it now. */
  	if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
  	    (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
  		*err = -EBUSY;
  		goto unlock_pages;
748de6736   Akira Fujita   ext4: online defr...
364
  	}
6e2631463   Dmitry Monakhov   ext4: fix incorre...
365
  	ext4_double_down_write_data_sem(orig_inode, donor_inode);
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
366
367
368
  	replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
  					       orig_blk_offset, donor_blk_offset,
  					   block_len_in_page, 1, err);
6e2631463   Dmitry Monakhov   ext4: fix incorre...
369
  	ext4_double_up_write_data_sem(orig_inode, donor_inode);
bb5574880   Dmitry Monakhov   ext4: clean up on...
370
  	if (*err) {
f868a48d0   Akira Fujita   ext4: fix the ret...
371
372
373
374
  		if (replaced_count) {
  			block_len_in_page = replaced_count;
  			replaced_size =
  				block_len_in_page << orig_inode->i_blkbits;
ac48b0a1d   Akira Fujita   ext4: move_extent...
375
  		} else
bb5574880   Dmitry Monakhov   ext4: clean up on...
376
  			goto unlock_pages;
748de6736   Akira Fujita   ext4: online defr...
377
  	}
bb5574880   Dmitry Monakhov   ext4: clean up on...
378
379
  	/* Perform all necessary steps similar write_begin()/write_end()
  	 * but keeping in mind that i_size will not change */
7e8b12c60   Dmitry Monakhov   ext4: defragmenta...
380
  	*err = __block_write_begin(pagep[0], from, replaced_size,
bb5574880   Dmitry Monakhov   ext4: clean up on...
381
382
383
  				   ext4_get_block);
  	if (!*err)
  		*err = block_commit_write(pagep[0], from, from + replaced_size);
748de6736   Akira Fujita   ext4: online defr...
384

bb5574880   Dmitry Monakhov   ext4: clean up on...
385
386
387
388
389
390
391
392
393
394
395
396
397
  	if (unlikely(*err < 0))
  		goto repair_branches;
  
  	/* Even in case of data=writeback it is reasonable to pin
  	 * inode to transaction, to prevent unexpected data loss */
  	*err = ext4_jbd2_file_inode(handle, orig_inode);
  
  unlock_pages:
  	unlock_page(pagep[0]);
  	page_cache_release(pagep[0]);
  	unlock_page(pagep[1]);
  	page_cache_release(pagep[1]);
  stop_journal:
748de6736   Akira Fujita   ext4: online defr...
398
  	ext4_journal_stop(handle);
88c6b61ff   Dmitry Monakhov   ext4: move_extent...
399
400
401
  	if (*err == -ENOSPC &&
  	    ext4_should_retry_alloc(sb, &retries))
  		goto again;
bb5574880   Dmitry Monakhov   ext4: clean up on...
402
403
  	/* Buffer was busy because probably is pinned to journal transaction,
  	 * force transaction commit may help to free it. */
88c6b61ff   Dmitry Monakhov   ext4: move_extent...
404
405
  	if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal &&
  	    jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal))
bb5574880   Dmitry Monakhov   ext4: clean up on...
406
  		goto again;
f868a48d0   Akira Fujita   ext4: fix the ret...
407
  	return replaced_count;
bb5574880   Dmitry Monakhov   ext4: clean up on...
408
409
410
411
412
413
414
  
  repair_branches:
  	/*
  	 * This should never ever happen!
  	 * Extents are swapped already, but we are not able to copy data.
  	 * Try to swap extents to it's original places
  	 */
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
415
  	ext4_double_down_write_data_sem(orig_inode, donor_inode);
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
416
417
418
  	replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode,
  					       orig_blk_offset, donor_blk_offset,
  					   block_len_in_page, 0, &err2);
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
419
  	ext4_double_up_write_data_sem(orig_inode, donor_inode);
bb5574880   Dmitry Monakhov   ext4: clean up on...
420
421
422
423
424
425
426
427
  	if (replaced_count != block_len_in_page) {
  		EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
  				       "Unable to copy data block,"
  				       " data will be lost.");
  		*err = -EIO;
  	}
  	replaced_count = 0;
  	goto unlock_pages;
748de6736   Akira Fujita   ext4: online defr...
428
429
430
  }
  
  /**
c437b2733   Akira Fujita   ext4: Code cleanu...
431
   * mext_check_arguments - Check whether move extent can be done
748de6736   Akira Fujita   ext4: online defr...
432
433
434
435
436
437
   *
   * @orig_inode:		original inode
   * @donor_inode:	donor inode
   * @orig_start:		logical start offset in block for orig
   * @donor_start:	logical start offset in block for donor
   * @len:		the number of blocks to be moved
748de6736   Akira Fujita   ext4: online defr...
438
439
440
441
442
443
444
   *
   * Check the arguments of ext4_move_extents() whether the files can be
   * exchanged with each other.
   * Return 0 on success, or a negative error value on failure.
   */
  static int
  mext_check_arguments(struct inode *orig_inode,
446aaa6e7   Kazuya Mio   ext4: initialize ...
445
446
  		     struct inode *donor_inode, __u64 orig_start,
  		     __u64 donor_start, __u64 *len)
748de6736   Akira Fujita   ext4: online defr...
447
  {
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
448
  	__u64 orig_eof, donor_eof;
70d5d3dce   Akira Fujita   ext4: Fix wrong c...
449
450
  	unsigned int blkbits = orig_inode->i_blkbits;
  	unsigned int blocksize = 1 << blkbits;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
451
452
  	orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits;
  	donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits;
4a58579b9   Akira Fujita   ext4: Fix insuffi...
453
454
455
456
457
458
459
  	if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
  		ext4_debug("ext4 move extent: suid or sgid is set"
  			   " to donor file [ino:orig %lu, donor %lu]
  ",
  			   orig_inode->i_ino, donor_inode->i_ino);
  		return -EINVAL;
  	}
1f5a81e41   Theodore Ts'o   ext4: Make sure t...
460
461
  	if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
  		return -EPERM;
748de6736   Akira Fujita   ext4: online defr...
462
463
464
465
466
467
  	/* Ext4 move extent does not support swapfile */
  	if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
  		ext4_debug("ext4 move extent: The argument files should "
  			"not be swapfile [ino:orig %lu, donor %lu]
  ",
  			orig_inode->i_ino, donor_inode->i_ino);
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
468
  		return -EBUSY;
748de6736   Akira Fujita   ext4: online defr...
469
  	}
748de6736   Akira Fujita   ext4: online defr...
470
  	/* Ext4 move extent supports only extent based file */
12e9b8920   Dmitry Monakhov   ext4: Use bitops ...
471
  	if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
748de6736   Akira Fujita   ext4: online defr...
472
473
474
475
  		ext4_debug("ext4 move extent: orig file is not extents "
  			"based file [ino:orig %lu]
  ", orig_inode->i_ino);
  		return -EOPNOTSUPP;
12e9b8920   Dmitry Monakhov   ext4: Use bitops ...
476
  	} else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
748de6736   Akira Fujita   ext4: online defr...
477
478
479
480
481
482
483
484
485
486
487
488
489
  		ext4_debug("ext4 move extent: donor file is not extents "
  			"based file [ino:donor %lu]
  ", donor_inode->i_ino);
  		return -EOPNOTSUPP;
  	}
  
  	if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
  		ext4_debug("ext4 move extent: File size is 0 byte
  ");
  		return -EINVAL;
  	}
  
  	/* Start offset should be same */
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
490
491
  	if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
  	    (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
748de6736   Akira Fujita   ext4: online defr...
492
  		ext4_debug("ext4 move extent: orig and donor's start "
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
493
494
  			"offset are not alligned [ino:orig %lu, donor %lu]
  ",
748de6736   Akira Fujita   ext4: online defr...
495
496
497
  			orig_inode->i_ino, donor_inode->i_ino);
  		return -EINVAL;
  	}
f17722f91   Lukas Czerner   ext4: Fix max fil...
498
  	if ((orig_start >= EXT_MAX_BLOCKS) ||
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
499
  	    (donor_start >= EXT_MAX_BLOCKS) ||
f17722f91   Lukas Czerner   ext4: Fix max fil...
500
  	    (*len > EXT_MAX_BLOCKS) ||
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
501
  	    (donor_start + *len >= EXT_MAX_BLOCKS) ||
f17722f91   Lukas Czerner   ext4: Fix max fil...
502
  	    (orig_start + *len >= EXT_MAX_BLOCKS))  {
0a80e9867   Eric Sandeen   ext4: replace MAX...
503
  		ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
f17722f91   Lukas Czerner   ext4: Fix max fil...
504
505
  			"[ino:orig %lu, donor %lu]
  ", EXT_MAX_BLOCKS,
748de6736   Akira Fujita   ext4: online defr...
506
507
508
  			orig_inode->i_ino, donor_inode->i_ino);
  		return -EINVAL;
  	}
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
509
510
511
512
  	if (orig_eof < orig_start + *len - 1)
  		*len = orig_eof - orig_start;
  	if (donor_eof < donor_start + *len - 1)
  		*len = donor_eof - donor_start;
748de6736   Akira Fujita   ext4: online defr...
513
  	if (!*len) {
92c28159d   Akira Fujita   ext4: fix spellin...
514
  		ext4_debug("ext4 move extent: len should not be 0 "
748de6736   Akira Fujita   ext4: online defr...
515
516
517
518
519
520
521
522
523
524
  			"[ino:orig %lu, donor %lu]
  ", orig_inode->i_ino,
  			donor_inode->i_ino);
  		return -EINVAL;
  	}
  
  	return 0;
  }
  
  /**
748de6736   Akira Fujita   ext4: online defr...
525
526
527
528
   * ext4_move_extents - Exchange the specified range of a file
   *
   * @o_filp:		file structure of the original file
   * @d_filp:		file structure of the donor file
65dd8327e   Xiaoguang Wang   ext4: delete usel...
529
530
   * @orig_blk:		start offset in block for orig
   * @donor_blk:		start offset in block for donor
748de6736   Akira Fujita   ext4: online defr...
531
532
533
534
535
536
   * @len:		the number of blocks to be moved
   * @moved_len:		moved block length
   *
   * This function returns 0 and moved block length is set in moved_len
   * if succeed, otherwise returns error value.
   *
748de6736   Akira Fujita   ext4: online defr...
537
538
   */
  int
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
539
540
  ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
  		  __u64 donor_blk, __u64 len, __u64 *moved_len)
748de6736   Akira Fujita   ext4: online defr...
541
  {
496ad9aa8   Al Viro   new helper: file_...
542
543
  	struct inode *orig_inode = file_inode(o_filp);
  	struct inode *donor_inode = file_inode(d_filp);
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
544
  	struct ext4_ext_path *path = NULL;
748de6736   Akira Fujita   ext4: online defr...
545
  	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
546
547
548
  	ext4_lblk_t o_end, o_start = orig_blk;
  	ext4_lblk_t d_start = donor_blk;
  	int ret;
748de6736   Akira Fujita   ext4: online defr...
549

03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
550
551
552
553
554
555
556
557
558
559
  	if (orig_inode->i_sb != donor_inode->i_sb) {
  		ext4_debug("ext4 move extent: The argument files "
  			"should be in same FS [ino:orig %lu, donor %lu]
  ",
  			orig_inode->i_ino, donor_inode->i_ino);
  		return -EINVAL;
  	}
  
  	/* orig and donor should be different inodes */
  	if (orig_inode == donor_inode) {
f3ce8064b   Theodore Ts'o   ext4: EXT4_IOC_MO...
560
  		ext4_debug("ext4 move extent: The argument files should not "
03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
561
562
  			"be same inode [ino:orig %lu, donor %lu]
  ",
f3ce8064b   Theodore Ts'o   ext4: EXT4_IOC_MO...
563
564
565
  			orig_inode->i_ino, donor_inode->i_ino);
  		return -EINVAL;
  	}
7247c0caa   Akira Fujita   ext4: Fix the NUL...
566
567
568
569
570
571
572
573
  	/* Regular file check */
  	if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
  		ext4_debug("ext4 move extent: The argument files should be "
  			"regular file [ino:orig %lu, donor %lu]
  ",
  			orig_inode->i_ino, donor_inode->i_ino);
  		return -EINVAL;
  	}
f066055a3   Dmitry Monakhov   ext4: online defr...
574
575
576
577
578
579
  	/* TODO: This is non obvious task to swap blocks for inodes with full
  	   jornaling enabled */
  	if (ext4_should_journal_data(orig_inode) ||
  	    ext4_should_journal_data(donor_inode)) {
  		return -EINVAL;
  	}
fc04cb49a   Akira Fujita   ext4: fix lock or...
580
  	/* Protect orig and donor inodes against a truncate */
375e289ea   J. Bruce Fields   vfs: pull ext4's ...
581
  	lock_two_nondirectories(orig_inode, donor_inode);
748de6736   Akira Fujita   ext4: online defr...
582

17335dcc4   Dmitry Monakhov   ext4: serialize d...
583
584
585
586
587
  	/* Wait for all existing dio workers */
  	ext4_inode_block_unlocked_dio(orig_inode);
  	ext4_inode_block_unlocked_dio(donor_inode);
  	inode_dio_wait(orig_inode);
  	inode_dio_wait(donor_inode);
fc04cb49a   Akira Fujita   ext4: fix lock or...
588
  	/* Protect extent tree against block allocations via delalloc */
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
589
  	ext4_double_down_write_data_sem(orig_inode, donor_inode);
748de6736   Akira Fujita   ext4: online defr...
590
  	/* Check the filesystem environment whether move_extent can be done */
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
591
592
  	ret = mext_check_arguments(orig_inode, donor_inode, orig_blk,
  				    donor_blk, &len);
03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
593
  	if (ret)
347fa6f1c   Akira Fujita   ext4: Add null ex...
594
  		goto out;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
595
  	o_end = o_start + len;
748de6736   Akira Fujita   ext4: online defr...
596

fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
597
598
599
600
601
602
  	while (o_start < o_end) {
  		struct ext4_extent *ex;
  		ext4_lblk_t cur_blk, next_blk;
  		pgoff_t orig_page_index, donor_page_index;
  		int offset_in_page;
  		int unwritten, cur_len;
748de6736   Akira Fujita   ext4: online defr...
603

fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
604
605
  		ret = get_ext_path(orig_inode, o_start, &path);
  		if (ret)
748de6736   Akira Fujita   ext4: online defr...
606
  			goto out;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
607
608
609
610
611
612
613
614
615
616
617
618
619
  		ex = path[path->p_depth].p_ext;
  		next_blk = ext4_ext_next_allocated_block(path);
  		cur_blk = le32_to_cpu(ex->ee_block);
  		cur_len = ext4_ext_get_actual_len(ex);
  		/* Check hole before the start pos */
  		if (cur_blk + cur_len - 1 < o_start) {
  			if (next_blk == EXT_MAX_BLOCKS) {
  				o_start = o_end;
  				ret = -ENODATA;
  				goto out;
  			}
  			d_start += next_blk - o_start;
  			o_start = next_blk;
3bdf14b4d   Theodore Ts'o   ext4: reuse path ...
620
  			continue;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
621
622
623
624
625
626
627
628
629
630
  		/* Check hole after the start pos */
  		} else if (cur_blk > o_start) {
  			/* Skip hole */
  			d_start += cur_blk - o_start;
  			o_start = cur_blk;
  			/* Extent inside requested range ?*/
  			if (cur_blk >= o_end)
  				goto out;
  		} else { /* in_range(o_start, o_blk, o_len) */
  			cur_len += cur_blk - o_start;
748de6736   Akira Fujita   ext4: online defr...
631
  		}
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
632
633
634
635
636
637
638
639
640
641
642
  		unwritten = ext4_ext_is_unwritten(ex);
  		if (o_end - o_start < cur_len)
  			cur_len = o_end - o_start;
  
  		orig_page_index = o_start >> (PAGE_CACHE_SHIFT -
  					       orig_inode->i_blkbits);
  		donor_page_index = d_start >> (PAGE_CACHE_SHIFT -
  					       donor_inode->i_blkbits);
  		offset_in_page = o_start % blocks_per_page;
  		if (cur_len > blocks_per_page- offset_in_page)
  			cur_len = blocks_per_page - offset_in_page;
fc04cb49a   Akira Fujita   ext4: fix lock or...
643
644
645
646
647
648
649
  		/*
  		 * Up semaphore to avoid following problems:
  		 * a. transaction deadlock among ext4_journal_start,
  		 *    ->write_begin via pagefault, and jbd2_journal_commit
  		 * b. racing with ->readpage, ->write_begin, and ext4_get_block
  		 *    in move_extent_per_page
  		 */
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
650
  		ext4_double_up_write_data_sem(orig_inode, donor_inode);
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
651
652
653
654
655
  		/* Swap original branches with new branches */
  		move_extent_per_page(o_filp, donor_inode,
  				     orig_page_index, donor_page_index,
  				     offset_in_page, cur_len,
  				     unwritten, &ret);
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
656
  		ext4_double_down_write_data_sem(orig_inode, donor_inode);
03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
657
  		if (ret < 0)
fc04cb49a   Akira Fujita   ext4: fix lock or...
658
  			break;
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
659
660
  		o_start += cur_len;
  		d_start += cur_len;
748de6736   Akira Fujita   ext4: online defr...
661
  	}
fcf6b1b72   Dmitry Monakhov   ext4: refactor ex...
662
663
664
  	*moved_len = o_start - orig_blk;
  	if (*moved_len > len)
  		*moved_len = len;
748de6736   Akira Fujita   ext4: online defr...
665
  out:
94d7c16cb   Akira Fujita   ext4: Fix double-...
666
667
668
669
  	if (*moved_len) {
  		ext4_discard_preallocations(orig_inode);
  		ext4_discard_preallocations(donor_inode);
  	}
b7ea89ad0   Theodore Ts'o   ext4: allow a NUL...
670
671
  	ext4_ext_drop_refs(path);
  	kfree(path);
393d1d1d7   Dr. Tilmann Bubeck   ext4: implementat...
672
  	ext4_double_up_write_data_sem(orig_inode, donor_inode);
17335dcc4   Dmitry Monakhov   ext4: serialize d...
673
674
  	ext4_inode_resume_unlocked_dio(orig_inode);
  	ext4_inode_resume_unlocked_dio(donor_inode);
375e289ea   J. Bruce Fields   vfs: pull ext4's ...
675
  	unlock_two_nondirectories(orig_inode, donor_inode);
748de6736   Akira Fujita   ext4: online defr...
676

03bd8b9b8   Dmitry Monakhov   ext4: move_extent...
677
  	return ret;
748de6736   Akira Fujita   ext4: online defr...
678
  }