Blame view

fs/btrfs/file.c 47.8 KB
6cbd55707   Chris Mason   Btrfs: add GPLv2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
  /*
   * Copyright (C) 2007 Oracle.  All rights reserved.
   *
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public
   * License v2 as published by the Free Software Foundation.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * General Public License for more details.
   *
   * You should have received a copy of the GNU General Public
   * License along with this program; if not, write to the
   * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   * Boston, MA 021110-1307, USA.
   */
39279cc3d   Chris Mason   Btrfs: split up s...
18
19
20
21
22
23
  #include <linux/fs.h>
  #include <linux/pagemap.h>
  #include <linux/highmem.h>
  #include <linux/time.h>
  #include <linux/init.h>
  #include <linux/string.h>
39279cc3d   Chris Mason   Btrfs: split up s...
24
25
  #include <linux/backing-dev.h>
  #include <linux/mpage.h>
2fe17c107   Christoph Hellwig   fallocate should ...
26
  #include <linux/falloc.h>
39279cc3d   Chris Mason   Btrfs: split up s...
27
28
29
30
  #include <linux/swap.h>
  #include <linux/writeback.h>
  #include <linux/statfs.h>
  #include <linux/compat.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
31
  #include <linux/slab.h>
39279cc3d   Chris Mason   Btrfs: split up s...
32
33
34
35
36
37
  #include "ctree.h"
  #include "disk-io.h"
  #include "transaction.h"
  #include "btrfs_inode.h"
  #include "ioctl.h"
  #include "print-tree.h"
e02119d5a   Chris Mason   Btrfs: Add a writ...
38
39
  #include "tree-log.h"
  #include "locking.h"
12fa8ec64   Jeff Mahoney   Btrfs: Add workar...
40
  #include "compat.h"
39279cc3d   Chris Mason   Btrfs: split up s...
41

4cb5300bc   Chris Mason   Btrfs: add mount ...
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
  /*
   * when auto defrag is enabled we
   * queue up these defrag structs to remember which
   * inodes need defragging passes
   */
  struct inode_defrag {
  	struct rb_node rb_node;
  	/* objectid */
  	u64 ino;
  	/*
  	 * transid where the defrag was added, we search for
  	 * extents newer than this
  	 */
  	u64 transid;
  
  	/* root objectid */
  	u64 root;
  
  	/* last offset we were able to defrag */
  	u64 last_offset;
  
  	/* if we've wrapped around back to zero once already */
  	int cycled;
  };
  
  /* pop a record for an inode into the defrag tree.  The lock
   * must be held already
   *
   * If you're inserting a record for an older transid than an
   * existing record, the transid already in the tree is lowered
   *
   * If an existing record is found the defrag item you
   * pass in is freed
   */
a0f98dde1   Wanlong Gao   Btrfs:don't check...
76
  static void __btrfs_add_inode_defrag(struct inode *inode,
4cb5300bc   Chris Mason   Btrfs: add mount ...
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
  				    struct inode_defrag *defrag)
  {
  	struct btrfs_root *root = BTRFS_I(inode)->root;
  	struct inode_defrag *entry;
  	struct rb_node **p;
  	struct rb_node *parent = NULL;
  
  	p = &root->fs_info->defrag_inodes.rb_node;
  	while (*p) {
  		parent = *p;
  		entry = rb_entry(parent, struct inode_defrag, rb_node);
  
  		if (defrag->ino < entry->ino)
  			p = &parent->rb_left;
  		else if (defrag->ino > entry->ino)
  			p = &parent->rb_right;
  		else {
  			/* if we're reinserting an entry for
  			 * an old defrag run, make sure to
  			 * lower the transid of our existing record
  			 */
  			if (defrag->transid < entry->transid)
  				entry->transid = defrag->transid;
  			if (defrag->last_offset > entry->last_offset)
  				entry->last_offset = defrag->last_offset;
  			goto exists;
  		}
  	}
  	BTRFS_I(inode)->in_defrag = 1;
  	rb_link_node(&defrag->rb_node, parent, p);
  	rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
a0f98dde1   Wanlong Gao   Btrfs:don't check...
108
  	return;
4cb5300bc   Chris Mason   Btrfs: add mount ...
109
110
111
  
  exists:
  	kfree(defrag);
a0f98dde1   Wanlong Gao   Btrfs:don't check...
112
  	return;
4cb5300bc   Chris Mason   Btrfs: add mount ...
113
114
115
116
117
118
119
120
121
122
123
124
  
  }
  
  /*
   * insert a defrag record for this inode if auto defrag is
   * enabled
   */
  int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
  			   struct inode *inode)
  {
  	struct btrfs_root *root = BTRFS_I(inode)->root;
  	struct inode_defrag *defrag;
4cb5300bc   Chris Mason   Btrfs: add mount ...
125
126
127
128
  	u64 transid;
  
  	if (!btrfs_test_opt(root, AUTO_DEFRAG))
  		return 0;
7841cb289   David Sterba   btrfs: add helper...
129
  	if (btrfs_fs_closing(root->fs_info))
4cb5300bc   Chris Mason   Btrfs: add mount ...
130
131
132
133
134
135
136
137
138
139
140
141
142
  		return 0;
  
  	if (BTRFS_I(inode)->in_defrag)
  		return 0;
  
  	if (trans)
  		transid = trans->transid;
  	else
  		transid = BTRFS_I(inode)->root->last_trans;
  
  	defrag = kzalloc(sizeof(*defrag), GFP_NOFS);
  	if (!defrag)
  		return -ENOMEM;
a4689d2bd   David Sterba   btrfs: use btrfs_...
143
  	defrag->ino = btrfs_ino(inode);
4cb5300bc   Chris Mason   Btrfs: add mount ...
144
145
146
147
148
  	defrag->transid = transid;
  	defrag->root = root->root_key.objectid;
  
  	spin_lock(&root->fs_info->defrag_inodes_lock);
  	if (!BTRFS_I(inode)->in_defrag)
a0f98dde1   Wanlong Gao   Btrfs:don't check...
149
  		__btrfs_add_inode_defrag(inode, defrag);
f4ac904c4   Dan Carpenter   btrfs: memory lea...
150
151
  	else
  		kfree(defrag);
4cb5300bc   Chris Mason   Btrfs: add mount ...
152
  	spin_unlock(&root->fs_info->defrag_inodes_lock);
a0f98dde1   Wanlong Gao   Btrfs:don't check...
153
  	return 0;
4cb5300bc   Chris Mason   Btrfs: add mount ...
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
  }
  
  /*
   * must be called with the defrag_inodes lock held
   */
  struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino,
  					     struct rb_node **next)
  {
  	struct inode_defrag *entry = NULL;
  	struct rb_node *p;
  	struct rb_node *parent = NULL;
  
  	p = info->defrag_inodes.rb_node;
  	while (p) {
  		parent = p;
  		entry = rb_entry(parent, struct inode_defrag, rb_node);
  
  		if (ino < entry->ino)
  			p = parent->rb_left;
  		else if (ino > entry->ino)
  			p = parent->rb_right;
  		else
  			return entry;
  	}
  
  	if (next) {
  		while (parent && ino > entry->ino) {
  			parent = rb_next(parent);
  			entry = rb_entry(parent, struct inode_defrag, rb_node);
  		}
  		*next = parent;
  	}
  	return NULL;
  }
  
  /*
   * run through the list of inodes in the FS that need
   * defragging
   */
  int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
  {
  	struct inode_defrag *defrag;
  	struct btrfs_root *inode_root;
  	struct inode *inode;
  	struct rb_node *n;
  	struct btrfs_key key;
  	struct btrfs_ioctl_defrag_range_args range;
  	u64 first_ino = 0;
  	int num_defrag;
  	int defrag_batch = 1024;
  
  	memset(&range, 0, sizeof(range));
  	range.len = (u64)-1;
  
  	atomic_inc(&fs_info->defrag_running);
  	spin_lock(&fs_info->defrag_inodes_lock);
  	while(1) {
  		n = NULL;
  
  		/* find an inode to defrag */
  		defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n);
  		if (!defrag) {
  			if (n)
  				defrag = rb_entry(n, struct inode_defrag, rb_node);
  			else if (first_ino) {
  				first_ino = 0;
  				continue;
  			} else {
  				break;
  			}
  		}
  
  		/* remove it from the rbtree */
  		first_ino = defrag->ino + 1;
  		rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
7841cb289   David Sterba   btrfs: add helper...
229
  		if (btrfs_fs_closing(fs_info))
4cb5300bc   Chris Mason   Btrfs: add mount ...
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
  			goto next_free;
  
  		spin_unlock(&fs_info->defrag_inodes_lock);
  
  		/* get the inode */
  		key.objectid = defrag->root;
  		btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
  		key.offset = (u64)-1;
  		inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
  		if (IS_ERR(inode_root))
  			goto next;
  
  		key.objectid = defrag->ino;
  		btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
  		key.offset = 0;
  
  		inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
  		if (IS_ERR(inode))
  			goto next;
  
  		/* do a chunk of defrag */
  		BTRFS_I(inode)->in_defrag = 0;
  		range.start = defrag->last_offset;
  		num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
  					       defrag_batch);
  		/*
  		 * if we filled the whole defrag batch, there
  		 * must be more work to do.  Queue this defrag
  		 * again
  		 */
  		if (num_defrag == defrag_batch) {
  			defrag->last_offset = range.start;
  			__btrfs_add_inode_defrag(inode, defrag);
  			/*
  			 * we don't want to kfree defrag, we added it back to
  			 * the rbtree
  			 */
  			defrag = NULL;
  		} else if (defrag->last_offset && !defrag->cycled) {
  			/*
  			 * we didn't fill our defrag batch, but
  			 * we didn't start at zero.  Make sure we loop
  			 * around to the start of the file.
  			 */
  			defrag->last_offset = 0;
  			defrag->cycled = 1;
  			__btrfs_add_inode_defrag(inode, defrag);
  			defrag = NULL;
  		}
  
  		iput(inode);
  next:
  		spin_lock(&fs_info->defrag_inodes_lock);
  next_free:
  		kfree(defrag);
  	}
  	spin_unlock(&fs_info->defrag_inodes_lock);
  
  	atomic_dec(&fs_info->defrag_running);
  
  	/*
  	 * during unmount, we use the transaction_wait queue to
  	 * wait for the defragger to stop
  	 */
  	wake_up(&fs_info->transaction_wait);
  	return 0;
  }
39279cc3d   Chris Mason   Btrfs: split up s...
297

d352ac681   Chris Mason   Btrfs: add and im...
298
299
300
  /* simple helper to fault in pages and copy.  This should go away
   * and be replaced with calls into generic code.
   */
d397712bc   Chris Mason   Btrfs: Fix checkp...
301
  static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
d0215f3e5   Josef Bacik   Btrfs: simplify o...
302
  					 size_t write_bytes,
a1b32a593   Chris Mason   Btrfs: Add debugg...
303
  					 struct page **prepared_pages,
11c65dccf   Josef Bacik   Btrfs: do aio_wri...
304
  					 struct iov_iter *i)
39279cc3d   Chris Mason   Btrfs: split up s...
305
  {
914ee295a   Xin Zhong   Btrfs: pwrite blo...
306
  	size_t copied = 0;
d0215f3e5   Josef Bacik   Btrfs: simplify o...
307
  	size_t total_copied = 0;
11c65dccf   Josef Bacik   Btrfs: do aio_wri...
308
  	int pg = 0;
39279cc3d   Chris Mason   Btrfs: split up s...
309
  	int offset = pos & (PAGE_CACHE_SIZE - 1);
11c65dccf   Josef Bacik   Btrfs: do aio_wri...
310
  	while (write_bytes > 0) {
39279cc3d   Chris Mason   Btrfs: split up s...
311
312
  		size_t count = min_t(size_t,
  				     PAGE_CACHE_SIZE - offset, write_bytes);
11c65dccf   Josef Bacik   Btrfs: do aio_wri...
313
  		struct page *page = prepared_pages[pg];
914ee295a   Xin Zhong   Btrfs: pwrite blo...
314
315
316
317
318
319
320
321
322
  		/*
  		 * Copy data from userspace to the current page
  		 *
  		 * Disable pagefault to avoid recursive lock since
  		 * the pages are already locked
  		 */
  		pagefault_disable();
  		copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
  		pagefault_enable();
11c65dccf   Josef Bacik   Btrfs: do aio_wri...
323

39279cc3d   Chris Mason   Btrfs: split up s...
324
325
  		/* Flush processor's dcache for this page */
  		flush_dcache_page(page);
31339acd0   Chris Mason   Btrfs: deal with ...
326
327
328
329
330
331
332
333
334
335
336
337
  
  		/*
  		 * if we get a partial write, we can end up with
  		 * partially up to date pages.  These add
  		 * a lot of complexity, so make sure they don't
  		 * happen by forcing this copy to be retried.
  		 *
  		 * The rest of the btrfs_file_write code will fall
  		 * back to page at a time copies after we return 0.
  		 */
  		if (!PageUptodate(page) && copied < count)
  			copied = 0;
11c65dccf   Josef Bacik   Btrfs: do aio_wri...
338
339
  		iov_iter_advance(i, copied);
  		write_bytes -= copied;
914ee295a   Xin Zhong   Btrfs: pwrite blo...
340
  		total_copied += copied;
39279cc3d   Chris Mason   Btrfs: split up s...
341

914ee295a   Xin Zhong   Btrfs: pwrite blo...
342
  		/* Return to btrfs_file_aio_write to fault page */
9f570b8d4   Josef Bacik   Btrfs: fix format...
343
  		if (unlikely(copied == 0))
914ee295a   Xin Zhong   Btrfs: pwrite blo...
344
  			break;
11c65dccf   Josef Bacik   Btrfs: do aio_wri...
345
346
347
348
349
350
351
  
  		if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
  			offset += copied;
  		} else {
  			pg++;
  			offset = 0;
  		}
39279cc3d   Chris Mason   Btrfs: split up s...
352
  	}
914ee295a   Xin Zhong   Btrfs: pwrite blo...
353
  	return total_copied;
39279cc3d   Chris Mason   Btrfs: split up s...
354
  }
d352ac681   Chris Mason   Btrfs: add and im...
355
356
357
  /*
   * unlocks pages after btrfs_file_write is done with them
   */
be1a12a0d   Josef Bacik   Btrfs: deal with ...
358
  void btrfs_drop_pages(struct page **pages, size_t num_pages)
39279cc3d   Chris Mason   Btrfs: split up s...
359
360
361
  {
  	size_t i;
  	for (i = 0; i < num_pages; i++) {
d352ac681   Chris Mason   Btrfs: add and im...
362
363
364
365
  		/* page checked is some magic around finding pages that
  		 * have been modified without going through btrfs_set_page_dirty
  		 * clear it here
  		 */
4a0967527   Chris Mason   Btrfs: Data order...
366
  		ClearPageChecked(pages[i]);
39279cc3d   Chris Mason   Btrfs: split up s...
367
368
369
370
371
  		unlock_page(pages[i]);
  		mark_page_accessed(pages[i]);
  		page_cache_release(pages[i]);
  	}
  }
d352ac681   Chris Mason   Btrfs: add and im...
372
373
374
375
376
377
378
379
  /*
   * after copy_from_user, pages need to be dirtied and we need to make
   * sure holes are created between the current EOF and the start of
   * any next extents (if required).
   *
   * this also makes the decision about creating an inline extent vs
   * doing real data extents, marking pages dirty and delalloc as required.
   */
be1a12a0d   Josef Bacik   Btrfs: deal with ...
380
381
382
383
  int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
  		      struct page **pages, size_t num_pages,
  		      loff_t pos, size_t write_bytes,
  		      struct extent_state **cached)
39279cc3d   Chris Mason   Btrfs: split up s...
384
  {
39279cc3d   Chris Mason   Btrfs: split up s...
385
  	int err = 0;
a52d9a803   Chris Mason   Btrfs: Extent bas...
386
  	int i;
db94535db   Chris Mason   Btrfs: Allow tree...
387
  	u64 num_bytes;
a52d9a803   Chris Mason   Btrfs: Extent bas...
388
389
390
391
  	u64 start_pos;
  	u64 end_of_last_block;
  	u64 end_pos = pos + write_bytes;
  	loff_t isize = i_size_read(inode);
39279cc3d   Chris Mason   Btrfs: split up s...
392

5f39d397d   Chris Mason   Btrfs: Create ext...
393
  	start_pos = pos & ~((u64)root->sectorsize - 1);
db94535db   Chris Mason   Btrfs: Allow tree...
394
395
  	num_bytes = (write_bytes + pos - start_pos +
  		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
39279cc3d   Chris Mason   Btrfs: split up s...
396

db94535db   Chris Mason   Btrfs: Allow tree...
397
  	end_of_last_block = start_pos + num_bytes - 1;
2ac55d41b   Josef Bacik   Btrfs: cache the ...
398
  	err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
be1a12a0d   Josef Bacik   Btrfs: deal with ...
399
  					cached);
d0215f3e5   Josef Bacik   Btrfs: simplify o...
400
401
  	if (err)
  		return err;
9ed74f2db   Josef Bacik   Btrfs: proper -EN...
402

c8b978188   Chris Mason   Btrfs: Add zlib c...
403
404
405
406
407
  	for (i = 0; i < num_pages; i++) {
  		struct page *p = pages[i];
  		SetPageUptodate(p);
  		ClearPageChecked(p);
  		set_page_dirty(p);
a52d9a803   Chris Mason   Btrfs: Extent bas...
408
  	}
9f570b8d4   Josef Bacik   Btrfs: fix format...
409
410
411
412
413
414
415
  
  	/*
  	 * we've only changed i_size in ram, and we haven't updated
  	 * the disk i_size.  There is no need to log the inode
  	 * at this time.
  	 */
  	if (end_pos > isize)
a52d9a803   Chris Mason   Btrfs: Extent bas...
416
  		i_size_write(inode, end_pos);
a22285a6a   Yan, Zheng   Btrfs: Integrate ...
417
  	return 0;
39279cc3d   Chris Mason   Btrfs: split up s...
418
  }
d352ac681   Chris Mason   Btrfs: add and im...
419
420
421
422
  /*
   * this drops all the extents in the cache that intersect the range
   * [start, end].  Existing extents are split as required.
   */
5b21f2ed3   Zheng Yan   Btrfs: extent_map...
423
424
  int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
  			    int skip_pinned)
a52d9a803   Chris Mason   Btrfs: Extent bas...
425
426
  {
  	struct extent_map *em;
3b951516e   Chris Mason   Btrfs: Use the ex...
427
428
  	struct extent_map *split = NULL;
  	struct extent_map *split2 = NULL;
a52d9a803   Chris Mason   Btrfs: Extent bas...
429
  	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
39b5637f6   Yan   Btrfs: Fix "no cs...
430
  	u64 len = end - start + 1;
3b951516e   Chris Mason   Btrfs: Use the ex...
431
432
  	int ret;
  	int testend = 1;
5b21f2ed3   Zheng Yan   Btrfs: extent_map...
433
  	unsigned long flags;
c8b978188   Chris Mason   Btrfs: Add zlib c...
434
  	int compressed = 0;
a52d9a803   Chris Mason   Btrfs: Extent bas...
435

e6dcd2dc9   Chris Mason   Btrfs: New data=o...
436
  	WARN_ON(end < start);
3b951516e   Chris Mason   Btrfs: Use the ex...
437
  	if (end == (u64)-1) {
39b5637f6   Yan   Btrfs: Fix "no cs...
438
  		len = (u64)-1;
3b951516e   Chris Mason   Btrfs: Use the ex...
439
440
  		testend = 0;
  	}
d397712bc   Chris Mason   Btrfs: Fix checkp...
441
  	while (1) {
3b951516e   Chris Mason   Btrfs: Use the ex...
442
  		if (!split)
172ddd60a   David Sterba   btrfs: drop gfp p...
443
  			split = alloc_extent_map();
3b951516e   Chris Mason   Btrfs: Use the ex...
444
  		if (!split2)
172ddd60a   David Sterba   btrfs: drop gfp p...
445
  			split2 = alloc_extent_map();
c26a92037   Tsutomu Itoh   Btrfs: check retu...
446
  		BUG_ON(!split || !split2);
3b951516e   Chris Mason   Btrfs: Use the ex...
447

890871be8   Chris Mason   Btrfs: switch ext...
448
  		write_lock(&em_tree->lock);
39b5637f6   Yan   Btrfs: Fix "no cs...
449
  		em = lookup_extent_mapping(em_tree, start, len);
d1310b2e0   Chris Mason   Btrfs: Split the ...
450
  		if (!em) {
890871be8   Chris Mason   Btrfs: switch ext...
451
  			write_unlock(&em_tree->lock);
a52d9a803   Chris Mason   Btrfs: Extent bas...
452
  			break;
d1310b2e0   Chris Mason   Btrfs: Split the ...
453
  		}
5b21f2ed3   Zheng Yan   Btrfs: extent_map...
454
455
  		flags = em->flags;
  		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
55ef68990   Yan, Zheng   Btrfs: Fix btrfs_...
456
  			if (testend && em->start + em->len >= start + len) {
5b21f2ed3   Zheng Yan   Btrfs: extent_map...
457
  				free_extent_map(em);
a1ed835e1   Chris Mason   Btrfs: Fix extent...
458
  				write_unlock(&em_tree->lock);
5b21f2ed3   Zheng Yan   Btrfs: extent_map...
459
460
  				break;
  			}
55ef68990   Yan, Zheng   Btrfs: Fix btrfs_...
461
462
  			start = em->start + em->len;
  			if (testend)
5b21f2ed3   Zheng Yan   Btrfs: extent_map...
463
  				len = start + len - (em->start + em->len);
5b21f2ed3   Zheng Yan   Btrfs: extent_map...
464
  			free_extent_map(em);
a1ed835e1   Chris Mason   Btrfs: Fix extent...
465
  			write_unlock(&em_tree->lock);
5b21f2ed3   Zheng Yan   Btrfs: extent_map...
466
467
  			continue;
  		}
c8b978188   Chris Mason   Btrfs: Add zlib c...
468
  		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3ce7e67a0   Chris Mason   Btrfs: Drop some ...
469
  		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
a52d9a803   Chris Mason   Btrfs: Extent bas...
470
  		remove_extent_mapping(em_tree, em);
3b951516e   Chris Mason   Btrfs: Use the ex...
471
472
473
474
475
  
  		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
  		    em->start < start) {
  			split->start = em->start;
  			split->len = start - em->start;
ff5b7ee33   Yan Zheng   Btrfs: Fix csum e...
476
  			split->orig_start = em->orig_start;
3b951516e   Chris Mason   Btrfs: Use the ex...
477
  			split->block_start = em->block_start;
c8b978188   Chris Mason   Btrfs: Add zlib c...
478
479
480
481
482
  
  			if (compressed)
  				split->block_len = em->block_len;
  			else
  				split->block_len = split->len;
3b951516e   Chris Mason   Btrfs: Use the ex...
483
  			split->bdev = em->bdev;
5b21f2ed3   Zheng Yan   Btrfs: extent_map...
484
  			split->flags = flags;
261507a02   Li Zefan   btrfs: Allow to a...
485
  			split->compress_type = em->compress_type;
3b951516e   Chris Mason   Btrfs: Use the ex...
486
487
488
489
490
491
492
493
494
495
496
497
498
  			ret = add_extent_mapping(em_tree, split);
  			BUG_ON(ret);
  			free_extent_map(split);
  			split = split2;
  			split2 = NULL;
  		}
  		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
  		    testend && em->start + em->len > start + len) {
  			u64 diff = start + len - em->start;
  
  			split->start = start + len;
  			split->len = em->start + em->len - (start + len);
  			split->bdev = em->bdev;
5b21f2ed3   Zheng Yan   Btrfs: extent_map...
499
  			split->flags = flags;
261507a02   Li Zefan   btrfs: Allow to a...
500
  			split->compress_type = em->compress_type;
3b951516e   Chris Mason   Btrfs: Use the ex...
501

c8b978188   Chris Mason   Btrfs: Add zlib c...
502
503
504
  			if (compressed) {
  				split->block_len = em->block_len;
  				split->block_start = em->block_start;
445a69449   Chris Mason   Btrfs: Fix usage ...
505
  				split->orig_start = em->orig_start;
c8b978188   Chris Mason   Btrfs: Add zlib c...
506
507
508
  			} else {
  				split->block_len = split->len;
  				split->block_start = em->block_start + diff;
445a69449   Chris Mason   Btrfs: Fix usage ...
509
  				split->orig_start = split->start;
c8b978188   Chris Mason   Btrfs: Add zlib c...
510
  			}
3b951516e   Chris Mason   Btrfs: Use the ex...
511
512
513
514
515
516
  
  			ret = add_extent_mapping(em_tree, split);
  			BUG_ON(ret);
  			free_extent_map(split);
  			split = NULL;
  		}
890871be8   Chris Mason   Btrfs: switch ext...
517
  		write_unlock(&em_tree->lock);
d1310b2e0   Chris Mason   Btrfs: Split the ...
518

a52d9a803   Chris Mason   Btrfs: Extent bas...
519
520
521
522
523
  		/* once for us */
  		free_extent_map(em);
  		/* once for the tree*/
  		free_extent_map(em);
  	}
3b951516e   Chris Mason   Btrfs: Use the ex...
524
525
526
527
  	if (split)
  		free_extent_map(split);
  	if (split2)
  		free_extent_map(split2);
a52d9a803   Chris Mason   Btrfs: Extent bas...
528
529
  	return 0;
  }
39279cc3d   Chris Mason   Btrfs: split up s...
530
531
532
533
534
535
536
537
538
  /*
   * this is very complex, but the basic idea is to drop all extents
   * in the range start - end.  hint_block is filled in with a block number
   * that would be a good hint to the block allocator for this file.
   *
   * If an extent intersects the range but is not entirely inside the range
   * it is either truncated or split.  Anything entirely inside the range
   * is deleted from the tree.
   */
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
539
540
  int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
  		       u64 start, u64 end, u64 *hint_byte, int drop_cache)
39279cc3d   Chris Mason   Btrfs: split up s...
541
  {
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
542
  	struct btrfs_root *root = BTRFS_I(inode)->root;
5f39d397d   Chris Mason   Btrfs: Create ext...
543
  	struct extent_buffer *leaf;
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
544
  	struct btrfs_file_extent_item *fi;
39279cc3d   Chris Mason   Btrfs: split up s...
545
  	struct btrfs_path *path;
00f5c795f   Chris Mason   btrfs_drop_extent...
546
  	struct btrfs_key key;
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
547
  	struct btrfs_key new_key;
33345d015   Li Zefan   Btrfs: Always use...
548
  	u64 ino = btrfs_ino(inode);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
549
550
551
552
553
554
555
556
  	u64 search_start = start;
  	u64 disk_bytenr = 0;
  	u64 num_bytes = 0;
  	u64 extent_offset = 0;
  	u64 extent_end = 0;
  	int del_nr = 0;
  	int del_slot = 0;
  	int extent_type;
ccd467d60   Chris Mason   Btrfs: crash reco...
557
  	int recow;
00f5c795f   Chris Mason   btrfs_drop_extent...
558
  	int ret;
39279cc3d   Chris Mason   Btrfs: split up s...
559

a1ed835e1   Chris Mason   Btrfs: Fix extent...
560
561
  	if (drop_cache)
  		btrfs_drop_extent_cache(inode, start, end - 1, 0);
a52d9a803   Chris Mason   Btrfs: Extent bas...
562

39279cc3d   Chris Mason   Btrfs: split up s...
563
564
565
  	path = btrfs_alloc_path();
  	if (!path)
  		return -ENOMEM;
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
566

d397712bc   Chris Mason   Btrfs: Fix checkp...
567
  	while (1) {
ccd467d60   Chris Mason   Btrfs: crash reco...
568
  		recow = 0;
33345d015   Li Zefan   Btrfs: Always use...
569
  		ret = btrfs_lookup_file_extent(trans, root, path, ino,
39279cc3d   Chris Mason   Btrfs: split up s...
570
571
  					       search_start, -1);
  		if (ret < 0)
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
572
573
574
575
  			break;
  		if (ret > 0 && path->slots[0] > 0 && search_start == start) {
  			leaf = path->nodes[0];
  			btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
33345d015   Li Zefan   Btrfs: Always use...
576
  			if (key.objectid == ino &&
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
577
578
  			    key.type == BTRFS_EXTENT_DATA_KEY)
  				path->slots[0]--;
39279cc3d   Chris Mason   Btrfs: split up s...
579
  		}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
580
  		ret = 0;
8c2383c3d   Chris Mason   Subject: Rework b...
581
  next_slot:
5f39d397d   Chris Mason   Btrfs: Create ext...
582
  		leaf = path->nodes[0];
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
583
584
585
586
587
588
589
590
  		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
  			BUG_ON(del_nr > 0);
  			ret = btrfs_next_leaf(root, path);
  			if (ret < 0)
  				break;
  			if (ret > 0) {
  				ret = 0;
  				break;
8c2383c3d   Chris Mason   Subject: Rework b...
591
  			}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
592
593
594
595
596
  			leaf = path->nodes[0];
  			recow = 1;
  		}
  
  		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
33345d015   Li Zefan   Btrfs: Always use...
597
  		if (key.objectid > ino ||
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
  		    key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
  			break;
  
  		fi = btrfs_item_ptr(leaf, path->slots[0],
  				    struct btrfs_file_extent_item);
  		extent_type = btrfs_file_extent_type(leaf, fi);
  
  		if (extent_type == BTRFS_FILE_EXTENT_REG ||
  		    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
  			disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
  			num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
  			extent_offset = btrfs_file_extent_offset(leaf, fi);
  			extent_end = key.offset +
  				btrfs_file_extent_num_bytes(leaf, fi);
  		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
  			extent_end = key.offset +
  				btrfs_file_extent_inline_len(leaf, fi);
8c2383c3d   Chris Mason   Subject: Rework b...
615
  		} else {
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
616
  			WARN_ON(1);
8c2383c3d   Chris Mason   Subject: Rework b...
617
  			extent_end = search_start;
39279cc3d   Chris Mason   Btrfs: split up s...
618
  		}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
619
620
  		if (extent_end <= search_start) {
  			path->slots[0]++;
8c2383c3d   Chris Mason   Subject: Rework b...
621
  			goto next_slot;
39279cc3d   Chris Mason   Btrfs: split up s...
622
  		}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
623
624
  		search_start = max(key.offset, start);
  		if (recow) {
b3b4aa74b   David Sterba   btrfs: drop unuse...
625
  			btrfs_release_path(path);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
626
  			continue;
39279cc3d   Chris Mason   Btrfs: split up s...
627
  		}
6643558db   Yan Zheng   Btrfs: Fix booken...
628

920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
629
630
631
632
633
634
635
636
637
638
639
640
641
  		/*
  		 *     | - range to drop - |
  		 *  | -------- extent -------- |
  		 */
  		if (start > key.offset && end < extent_end) {
  			BUG_ON(del_nr > 0);
  			BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
  
  			memcpy(&new_key, &key, sizeof(new_key));
  			new_key.offset = start;
  			ret = btrfs_duplicate_item(trans, root, path,
  						   &new_key);
  			if (ret == -EAGAIN) {
b3b4aa74b   David Sterba   btrfs: drop unuse...
642
  				btrfs_release_path(path);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
643
  				continue;
6643558db   Yan Zheng   Btrfs: Fix booken...
644
  			}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
  			if (ret < 0)
  				break;
  
  			leaf = path->nodes[0];
  			fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
  					    struct btrfs_file_extent_item);
  			btrfs_set_file_extent_num_bytes(leaf, fi,
  							start - key.offset);
  
  			fi = btrfs_item_ptr(leaf, path->slots[0],
  					    struct btrfs_file_extent_item);
  
  			extent_offset += start - key.offset;
  			btrfs_set_file_extent_offset(leaf, fi, extent_offset);
  			btrfs_set_file_extent_num_bytes(leaf, fi,
  							extent_end - start);
  			btrfs_mark_buffer_dirty(leaf);
  
  			if (disk_bytenr > 0) {
771ed689d   Chris Mason   Btrfs: Optimize c...
664
  				ret = btrfs_inc_extent_ref(trans, root,
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
665
666
667
668
  						disk_bytenr, num_bytes, 0,
  						root->root_key.objectid,
  						new_key.objectid,
  						start - extent_offset);
771ed689d   Chris Mason   Btrfs: Optimize c...
669
  				BUG_ON(ret);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
670
  				*hint_byte = disk_bytenr;
771ed689d   Chris Mason   Btrfs: Optimize c...
671
  			}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
672
  			key.offset = start;
6643558db   Yan Zheng   Btrfs: Fix booken...
673
  		}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
674
675
676
677
678
679
  		/*
  		 *  | ---- range to drop ----- |
  		 *      | -------- extent -------- |
  		 */
  		if (start <= key.offset && end < extent_end) {
  			BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
6643558db   Yan Zheng   Btrfs: Fix booken...
680

920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
681
682
683
  			memcpy(&new_key, &key, sizeof(new_key));
  			new_key.offset = end;
  			btrfs_set_item_key_safe(trans, root, path, &new_key);
6643558db   Yan Zheng   Btrfs: Fix booken...
684

920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
685
686
687
688
689
690
691
692
  			extent_offset += end - key.offset;
  			btrfs_set_file_extent_offset(leaf, fi, extent_offset);
  			btrfs_set_file_extent_num_bytes(leaf, fi,
  							extent_end - end);
  			btrfs_mark_buffer_dirty(leaf);
  			if (disk_bytenr > 0) {
  				inode_sub_bytes(inode, end - key.offset);
  				*hint_byte = disk_bytenr;
39279cc3d   Chris Mason   Btrfs: split up s...
693
  			}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
694
  			break;
39279cc3d   Chris Mason   Btrfs: split up s...
695
  		}
771ed689d   Chris Mason   Btrfs: Optimize c...
696

920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
697
698
699
700
701
702
703
704
  		search_start = extent_end;
  		/*
  		 *       | ---- range to drop ----- |
  		 *  | -------- extent -------- |
  		 */
  		if (start > key.offset && end >= extent_end) {
  			BUG_ON(del_nr > 0);
  			BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
8c2383c3d   Chris Mason   Subject: Rework b...
705

920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
706
707
708
709
710
711
712
713
714
  			btrfs_set_file_extent_num_bytes(leaf, fi,
  							start - key.offset);
  			btrfs_mark_buffer_dirty(leaf);
  			if (disk_bytenr > 0) {
  				inode_sub_bytes(inode, extent_end - start);
  				*hint_byte = disk_bytenr;
  			}
  			if (end == extent_end)
  				break;
c8b978188   Chris Mason   Btrfs: Add zlib c...
715

920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
716
717
  			path->slots[0]++;
  			goto next_slot;
31840ae1a   Zheng Yan   Btrfs: Full back ...
718
  		}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
719
720
721
722
723
724
725
726
727
728
729
730
  		/*
  		 *  | ---- range to drop ----- |
  		 *    | ------ extent ------ |
  		 */
  		if (start <= key.offset && end >= extent_end) {
  			if (del_nr == 0) {
  				del_slot = path->slots[0];
  				del_nr = 1;
  			} else {
  				BUG_ON(del_slot + del_nr != path->slots[0]);
  				del_nr++;
  			}
31840ae1a   Zheng Yan   Btrfs: Full back ...
731

920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
732
  			if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
a76a3cd40   Yan Zheng   Btrfs: Count spac...
733
  				inode_sub_bytes(inode,
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
734
735
736
737
  						extent_end - key.offset);
  				extent_end = ALIGN(extent_end,
  						   root->sectorsize);
  			} else if (disk_bytenr > 0) {
31840ae1a   Zheng Yan   Btrfs: Full back ...
738
  				ret = btrfs_free_extent(trans, root,
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
739
740
  						disk_bytenr, num_bytes, 0,
  						root->root_key.objectid,
5d4f98a28   Yan Zheng   Btrfs: Mixed back...
741
  						key.objectid, key.offset -
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
742
  						extent_offset);
31840ae1a   Zheng Yan   Btrfs: Full back ...
743
  				BUG_ON(ret);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
744
745
746
  				inode_sub_bytes(inode,
  						extent_end - key.offset);
  				*hint_byte = disk_bytenr;
31840ae1a   Zheng Yan   Btrfs: Full back ...
747
  			}
31840ae1a   Zheng Yan   Btrfs: Full back ...
748

920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
749
750
751
752
753
754
755
756
757
758
759
760
761
762
  			if (end == extent_end)
  				break;
  
  			if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
  				path->slots[0]++;
  				goto next_slot;
  			}
  
  			ret = btrfs_del_items(trans, root, path, del_slot,
  					      del_nr);
  			BUG_ON(ret);
  
  			del_nr = 0;
  			del_slot = 0;
b3b4aa74b   David Sterba   btrfs: drop unuse...
763
  			btrfs_release_path(path);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
764
  			continue;
39279cc3d   Chris Mason   Btrfs: split up s...
765
  		}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
766
767
  
  		BUG_ON(1);
39279cc3d   Chris Mason   Btrfs: split up s...
768
  	}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
769
770
771
772
  
  	if (del_nr > 0) {
  		ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
  		BUG_ON(ret);
6643558db   Yan Zheng   Btrfs: Fix booken...
773
  	}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
774
775
  
  	btrfs_free_path(path);
39279cc3d   Chris Mason   Btrfs: split up s...
776
777
  	return ret;
  }
d899e0521   Yan Zheng   Btrfs: Add falloc...
778
  static int extent_mergeable(struct extent_buffer *leaf, int slot,
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
779
780
  			    u64 objectid, u64 bytenr, u64 orig_offset,
  			    u64 *start, u64 *end)
d899e0521   Yan Zheng   Btrfs: Add falloc...
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
  {
  	struct btrfs_file_extent_item *fi;
  	struct btrfs_key key;
  	u64 extent_end;
  
  	if (slot < 0 || slot >= btrfs_header_nritems(leaf))
  		return 0;
  
  	btrfs_item_key_to_cpu(leaf, &key, slot);
  	if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
  		return 0;
  
  	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
  	if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
  	    btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
796
  	    btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
d899e0521   Yan Zheng   Btrfs: Add falloc...
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
  	    btrfs_file_extent_compression(leaf, fi) ||
  	    btrfs_file_extent_encryption(leaf, fi) ||
  	    btrfs_file_extent_other_encoding(leaf, fi))
  		return 0;
  
  	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
  	if ((*start && *start != key.offset) || (*end && *end != extent_end))
  		return 0;
  
  	*start = key.offset;
  	*end = extent_end;
  	return 1;
  }
  
  /*
   * Mark extent in the range start - end as written.
   *
   * This changes extent type from 'pre-allocated' to 'regular'. If only
   * part of extent is marked as written, the extent will be split into
   * two or three.
   */
  int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
d899e0521   Yan Zheng   Btrfs: Add falloc...
819
820
  			      struct inode *inode, u64 start, u64 end)
  {
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
821
  	struct btrfs_root *root = BTRFS_I(inode)->root;
d899e0521   Yan Zheng   Btrfs: Add falloc...
822
823
824
825
  	struct extent_buffer *leaf;
  	struct btrfs_path *path;
  	struct btrfs_file_extent_item *fi;
  	struct btrfs_key key;
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
826
  	struct btrfs_key new_key;
d899e0521   Yan Zheng   Btrfs: Add falloc...
827
828
829
  	u64 bytenr;
  	u64 num_bytes;
  	u64 extent_end;
5d4f98a28   Yan Zheng   Btrfs: Mixed back...
830
  	u64 orig_offset;
d899e0521   Yan Zheng   Btrfs: Add falloc...
831
832
  	u64 other_start;
  	u64 other_end;
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
833
834
835
  	u64 split;
  	int del_nr = 0;
  	int del_slot = 0;
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
836
  	int recow;
d899e0521   Yan Zheng   Btrfs: Add falloc...
837
  	int ret;
33345d015   Li Zefan   Btrfs: Always use...
838
  	u64 ino = btrfs_ino(inode);
d899e0521   Yan Zheng   Btrfs: Add falloc...
839
840
841
842
  
  	btrfs_drop_extent_cache(inode, start, end - 1, 0);
  
  	path = btrfs_alloc_path();
d8926bb3b   Mark Fasheh   btrfs: don't BUG_...
843
844
  	if (!path)
  		return -ENOMEM;
d899e0521   Yan Zheng   Btrfs: Add falloc...
845
  again:
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
846
  	recow = 0;
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
847
  	split = start;
33345d015   Li Zefan   Btrfs: Always use...
848
  	key.objectid = ino;
d899e0521   Yan Zheng   Btrfs: Add falloc...
849
  	key.type = BTRFS_EXTENT_DATA_KEY;
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
850
  	key.offset = split;
d899e0521   Yan Zheng   Btrfs: Add falloc...
851
852
  
  	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
41415730a   Josef Bacik   Btrfs: check retu...
853
854
  	if (ret < 0)
  		goto out;
d899e0521   Yan Zheng   Btrfs: Add falloc...
855
856
857
858
859
  	if (ret > 0 && path->slots[0] > 0)
  		path->slots[0]--;
  
  	leaf = path->nodes[0];
  	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
33345d015   Li Zefan   Btrfs: Always use...
860
  	BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY);
d899e0521   Yan Zheng   Btrfs: Add falloc...
861
862
  	fi = btrfs_item_ptr(leaf, path->slots[0],
  			    struct btrfs_file_extent_item);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
863
864
  	BUG_ON(btrfs_file_extent_type(leaf, fi) !=
  	       BTRFS_FILE_EXTENT_PREALLOC);
d899e0521   Yan Zheng   Btrfs: Add falloc...
865
866
867
868
869
  	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
  	BUG_ON(key.offset > start || extent_end < end);
  
  	bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
  	num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
5d4f98a28   Yan Zheng   Btrfs: Mixed back...
870
  	orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
871
872
873
874
875
876
  	memcpy(&new_key, &key, sizeof(new_key));
  
  	if (start == key.offset && end < extent_end) {
  		other_start = 0;
  		other_end = start;
  		if (extent_mergeable(leaf, path->slots[0] - 1,
33345d015   Li Zefan   Btrfs: Always use...
877
  				     ino, bytenr, orig_offset,
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
  				     &other_start, &other_end)) {
  			new_key.offset = end;
  			btrfs_set_item_key_safe(trans, root, path, &new_key);
  			fi = btrfs_item_ptr(leaf, path->slots[0],
  					    struct btrfs_file_extent_item);
  			btrfs_set_file_extent_num_bytes(leaf, fi,
  							extent_end - end);
  			btrfs_set_file_extent_offset(leaf, fi,
  						     end - orig_offset);
  			fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
  					    struct btrfs_file_extent_item);
  			btrfs_set_file_extent_num_bytes(leaf, fi,
  							end - other_start);
  			btrfs_mark_buffer_dirty(leaf);
  			goto out;
  		}
  	}
  
  	if (start > key.offset && end == extent_end) {
  		other_start = end;
  		other_end = 0;
  		if (extent_mergeable(leaf, path->slots[0] + 1,
33345d015   Li Zefan   Btrfs: Always use...
900
  				     ino, bytenr, orig_offset,
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
  				     &other_start, &other_end)) {
  			fi = btrfs_item_ptr(leaf, path->slots[0],
  					    struct btrfs_file_extent_item);
  			btrfs_set_file_extent_num_bytes(leaf, fi,
  							start - key.offset);
  			path->slots[0]++;
  			new_key.offset = start;
  			btrfs_set_item_key_safe(trans, root, path, &new_key);
  
  			fi = btrfs_item_ptr(leaf, path->slots[0],
  					    struct btrfs_file_extent_item);
  			btrfs_set_file_extent_num_bytes(leaf, fi,
  							other_end - start);
  			btrfs_set_file_extent_offset(leaf, fi,
  						     start - orig_offset);
  			btrfs_mark_buffer_dirty(leaf);
  			goto out;
  		}
  	}
d899e0521   Yan Zheng   Btrfs: Add falloc...
920

920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
921
922
923
  	while (start > key.offset || end < extent_end) {
  		if (key.offset == start)
  			split = end;
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
924
925
926
  		new_key.offset = split;
  		ret = btrfs_duplicate_item(trans, root, path, &new_key);
  		if (ret == -EAGAIN) {
b3b4aa74b   David Sterba   btrfs: drop unuse...
927
  			btrfs_release_path(path);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
928
  			goto again;
d899e0521   Yan Zheng   Btrfs: Add falloc...
929
  		}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
930
  		BUG_ON(ret < 0);
d899e0521   Yan Zheng   Btrfs: Add falloc...
931

920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
932
933
  		leaf = path->nodes[0];
  		fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
d899e0521   Yan Zheng   Btrfs: Add falloc...
934
  				    struct btrfs_file_extent_item);
d899e0521   Yan Zheng   Btrfs: Add falloc...
935
  		btrfs_set_file_extent_num_bytes(leaf, fi,
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
936
937
938
939
940
941
942
943
  						split - key.offset);
  
  		fi = btrfs_item_ptr(leaf, path->slots[0],
  				    struct btrfs_file_extent_item);
  
  		btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
  		btrfs_set_file_extent_num_bytes(leaf, fi,
  						extent_end - split);
d899e0521   Yan Zheng   Btrfs: Add falloc...
944
  		btrfs_mark_buffer_dirty(leaf);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
945
946
  		ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
  					   root->root_key.objectid,
33345d015   Li Zefan   Btrfs: Always use...
947
  					   ino, orig_offset);
d899e0521   Yan Zheng   Btrfs: Add falloc...
948
  		BUG_ON(ret);
d899e0521   Yan Zheng   Btrfs: Add falloc...
949

920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
950
951
952
953
  		if (split == start) {
  			key.offset = start;
  		} else {
  			BUG_ON(start != key.offset);
d899e0521   Yan Zheng   Btrfs: Add falloc...
954
  			path->slots[0]--;
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
955
  			extent_end = end;
d899e0521   Yan Zheng   Btrfs: Add falloc...
956
  		}
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
957
  		recow = 1;
d899e0521   Yan Zheng   Btrfs: Add falloc...
958
  	}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
959
960
  	other_start = end;
  	other_end = 0;
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
961
  	if (extent_mergeable(leaf, path->slots[0] + 1,
33345d015   Li Zefan   Btrfs: Always use...
962
  			     ino, bytenr, orig_offset,
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
963
964
  			     &other_start, &other_end)) {
  		if (recow) {
b3b4aa74b   David Sterba   btrfs: drop unuse...
965
  			btrfs_release_path(path);
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
966
967
  			goto again;
  		}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
968
969
970
971
972
  		extent_end = other_end;
  		del_slot = path->slots[0] + 1;
  		del_nr++;
  		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
  					0, root->root_key.objectid,
33345d015   Li Zefan   Btrfs: Always use...
973
  					ino, orig_offset);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
974
  		BUG_ON(ret);
d899e0521   Yan Zheng   Btrfs: Add falloc...
975
  	}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
976
977
  	other_start = 0;
  	other_end = start;
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
978
  	if (extent_mergeable(leaf, path->slots[0] - 1,
33345d015   Li Zefan   Btrfs: Always use...
979
  			     ino, bytenr, orig_offset,
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
980
981
  			     &other_start, &other_end)) {
  		if (recow) {
b3b4aa74b   David Sterba   btrfs: drop unuse...
982
  			btrfs_release_path(path);
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
983
984
  			goto again;
  		}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
985
986
987
988
989
  		key.offset = other_start;
  		del_slot = path->slots[0];
  		del_nr++;
  		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
  					0, root->root_key.objectid,
33345d015   Li Zefan   Btrfs: Always use...
990
  					ino, orig_offset);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
991
992
993
  		BUG_ON(ret);
  	}
  	if (del_nr == 0) {
3f6fae955   Shaohua Li   Btrfs: btrfs_mark...
994
995
  		fi = btrfs_item_ptr(leaf, path->slots[0],
  			   struct btrfs_file_extent_item);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
996
997
998
  		btrfs_set_file_extent_type(leaf, fi,
  					   BTRFS_FILE_EXTENT_REG);
  		btrfs_mark_buffer_dirty(leaf);
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
999
  	} else {
3f6fae955   Shaohua Li   Btrfs: btrfs_mark...
1000
1001
  		fi = btrfs_item_ptr(leaf, del_slot - 1,
  			   struct btrfs_file_extent_item);
6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
1002
1003
1004
1005
1006
  		btrfs_set_file_extent_type(leaf, fi,
  					   BTRFS_FILE_EXTENT_REG);
  		btrfs_set_file_extent_num_bytes(leaf, fi,
  						extent_end - key.offset);
  		btrfs_mark_buffer_dirty(leaf);
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
1007

6c7d54ac8   Yan, Zheng   Btrfs: Fix race i...
1008
1009
1010
  		ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
  		BUG_ON(ret);
  	}
920bbbfb0   Yan, Zheng   Btrfs: Rewrite bt...
1011
  out:
d899e0521   Yan Zheng   Btrfs: Add falloc...
1012
1013
1014
  	btrfs_free_path(path);
  	return 0;
  }
39279cc3d   Chris Mason   Btrfs: split up s...
1015
  /*
b1bf862e9   Chris Mason   Btrfs: fix regres...
1016
1017
1018
   * on error we return an unlocked page and the error value
   * on success we return a locked page and 0
   */
b6316429a   Josef Bacik   Btrfs: force a pa...
1019
1020
  static int prepare_uptodate_page(struct page *page, u64 pos,
  				 bool force_uptodate)
b1bf862e9   Chris Mason   Btrfs: fix regres...
1021
1022
  {
  	int ret = 0;
b6316429a   Josef Bacik   Btrfs: force a pa...
1023
1024
  	if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
  	    !PageUptodate(page)) {
b1bf862e9   Chris Mason   Btrfs: fix regres...
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
  		ret = btrfs_readpage(NULL, page);
  		if (ret)
  			return ret;
  		lock_page(page);
  		if (!PageUptodate(page)) {
  			unlock_page(page);
  			return -EIO;
  		}
  	}
  	return 0;
  }
  
  /*
d352ac681   Chris Mason   Btrfs: add and im...
1038
1039
1040
   * this gets pages into the page cache and locks them down, it also properly
   * waits for data=ordered extents to finish before allowing the pages to be
   * modified.
39279cc3d   Chris Mason   Btrfs: split up s...
1041
   */
d397712bc   Chris Mason   Btrfs: Fix checkp...
1042
  static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
98ed51747   Chris Mason   Btrfs: Force inli...
1043
1044
  			 struct page **pages, size_t num_pages,
  			 loff_t pos, unsigned long first_index,
b6316429a   Josef Bacik   Btrfs: force a pa...
1045
  			 size_t write_bytes, bool force_uptodate)
39279cc3d   Chris Mason   Btrfs: split up s...
1046
  {
2ac55d41b   Josef Bacik   Btrfs: cache the ...
1047
  	struct extent_state *cached_state = NULL;
39279cc3d   Chris Mason   Btrfs: split up s...
1048
1049
  	int i;
  	unsigned long index = pos >> PAGE_CACHE_SHIFT;
6da6abae0   Chris Mason   Btrfs: Back port ...
1050
  	struct inode *inode = fdentry(file)->d_inode;
3b16a4e3c   Josef Bacik   Btrfs: use the in...
1051
  	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
39279cc3d   Chris Mason   Btrfs: split up s...
1052
  	int err = 0;
b1bf862e9   Chris Mason   Btrfs: fix regres...
1053
  	int faili = 0;
8c2383c3d   Chris Mason   Subject: Rework b...
1054
  	u64 start_pos;
e6dcd2dc9   Chris Mason   Btrfs: New data=o...
1055
  	u64 last_pos;
8c2383c3d   Chris Mason   Subject: Rework b...
1056

5f39d397d   Chris Mason   Btrfs: Create ext...
1057
  	start_pos = pos & ~((u64)root->sectorsize - 1);
e6dcd2dc9   Chris Mason   Btrfs: New data=o...
1058
  	last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
39279cc3d   Chris Mason   Btrfs: split up s...
1059

e6dcd2dc9   Chris Mason   Btrfs: New data=o...
1060
  again:
39279cc3d   Chris Mason   Btrfs: split up s...
1061
  	for (i = 0; i < num_pages; i++) {
a94733d0b   Josef Bacik   Btrfs: use find_o...
1062
  		pages[i] = find_or_create_page(inode->i_mapping, index + i,
e3a41a5ba   Johannes Weiner   btrfs: pass __GFP...
1063
  					       mask | __GFP_WRITE);
39279cc3d   Chris Mason   Btrfs: split up s...
1064
  		if (!pages[i]) {
b1bf862e9   Chris Mason   Btrfs: fix regres...
1065
1066
1067
1068
1069
1070
  			faili = i - 1;
  			err = -ENOMEM;
  			goto fail;
  		}
  
  		if (i == 0)
b6316429a   Josef Bacik   Btrfs: force a pa...
1071
1072
  			err = prepare_uptodate_page(pages[i], pos,
  						    force_uptodate);
b1bf862e9   Chris Mason   Btrfs: fix regres...
1073
1074
  		if (i == num_pages - 1)
  			err = prepare_uptodate_page(pages[i],
b6316429a   Josef Bacik   Btrfs: force a pa...
1075
  						    pos + write_bytes, false);
b1bf862e9   Chris Mason   Btrfs: fix regres...
1076
1077
1078
1079
  		if (err) {
  			page_cache_release(pages[i]);
  			faili = i - 1;
  			goto fail;
39279cc3d   Chris Mason   Btrfs: split up s...
1080
  		}
ccd467d60   Chris Mason   Btrfs: crash reco...
1081
  		wait_on_page_writeback(pages[i]);
39279cc3d   Chris Mason   Btrfs: split up s...
1082
  	}
b1bf862e9   Chris Mason   Btrfs: fix regres...
1083
  	err = 0;
0762704b1   Chris Mason   Btrfs: Properly c...
1084
  	if (start_pos < inode->i_size) {
e6dcd2dc9   Chris Mason   Btrfs: New data=o...
1085
  		struct btrfs_ordered_extent *ordered;
2ac55d41b   Josef Bacik   Btrfs: cache the ...
1086
1087
1088
  		lock_extent_bits(&BTRFS_I(inode)->io_tree,
  				 start_pos, last_pos - 1, 0, &cached_state,
  				 GFP_NOFS);
d397712bc   Chris Mason   Btrfs: Fix checkp...
1089
1090
  		ordered = btrfs_lookup_first_ordered_extent(inode,
  							    last_pos - 1);
e6dcd2dc9   Chris Mason   Btrfs: New data=o...
1091
1092
1093
1094
  		if (ordered &&
  		    ordered->file_offset + ordered->len > start_pos &&
  		    ordered->file_offset < last_pos) {
  			btrfs_put_ordered_extent(ordered);
2ac55d41b   Josef Bacik   Btrfs: cache the ...
1095
1096
1097
  			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
  					     start_pos, last_pos - 1,
  					     &cached_state, GFP_NOFS);
e6dcd2dc9   Chris Mason   Btrfs: New data=o...
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
  			for (i = 0; i < num_pages; i++) {
  				unlock_page(pages[i]);
  				page_cache_release(pages[i]);
  			}
  			btrfs_wait_ordered_range(inode, start_pos,
  						 last_pos - start_pos);
  			goto again;
  		}
  		if (ordered)
  			btrfs_put_ordered_extent(ordered);
2ac55d41b   Josef Bacik   Btrfs: cache the ...
1108
  		clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
32c00aff7   Josef Bacik   Btrfs: release de...
1109
  				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
2ac55d41b   Josef Bacik   Btrfs: cache the ...
1110
  				  EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
0762704b1   Chris Mason   Btrfs: Properly c...
1111
  				  GFP_NOFS);
2ac55d41b   Josef Bacik   Btrfs: cache the ...
1112
1113
1114
  		unlock_extent_cached(&BTRFS_I(inode)->io_tree,
  				     start_pos, last_pos - 1, &cached_state,
  				     GFP_NOFS);
0762704b1   Chris Mason   Btrfs: Properly c...
1115
  	}
e6dcd2dc9   Chris Mason   Btrfs: New data=o...
1116
  	for (i = 0; i < num_pages; i++) {
32c7f202a   Wu Fengguang   btrfs: fix dirtie...
1117
1118
  		if (clear_page_dirty_for_io(pages[i]))
  			account_page_redirty(pages[i]);
e6dcd2dc9   Chris Mason   Btrfs: New data=o...
1119
1120
1121
  		set_page_extent_mapped(pages[i]);
  		WARN_ON(!PageLocked(pages[i]));
  	}
39279cc3d   Chris Mason   Btrfs: split up s...
1122
  	return 0;
b1bf862e9   Chris Mason   Btrfs: fix regres...
1123
1124
1125
1126
1127
1128
1129
  fail:
  	while (faili >= 0) {
  		unlock_page(pages[faili]);
  		page_cache_release(pages[faili]);
  		faili--;
  	}
  	return err;
39279cc3d   Chris Mason   Btrfs: split up s...
1130
  }
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1131
1132
1133
  static noinline ssize_t __btrfs_buffered_write(struct file *file,
  					       struct iov_iter *i,
  					       loff_t pos)
4b46fce23   Josef Bacik   Btrfs: add basic ...
1134
  {
11c65dccf   Josef Bacik   Btrfs: do aio_wri...
1135
1136
  	struct inode *inode = fdentry(file)->d_inode;
  	struct btrfs_root *root = BTRFS_I(inode)->root;
11c65dccf   Josef Bacik   Btrfs: do aio_wri...
1137
  	struct page **pages = NULL;
39279cc3d   Chris Mason   Btrfs: split up s...
1138
  	unsigned long first_index;
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1139
1140
  	size_t num_written = 0;
  	int nrptrs;
c9149235a   Tsutomu Itoh   Btrfs: fix compil...
1141
  	int ret = 0;
b6316429a   Josef Bacik   Btrfs: force a pa...
1142
  	bool force_page_uptodate = false;
4b46fce23   Josef Bacik   Btrfs: add basic ...
1143

d0215f3e5   Josef Bacik   Btrfs: simplify o...
1144
  	nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
11c65dccf   Josef Bacik   Btrfs: do aio_wri...
1145
1146
  		     PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
  		     (sizeof(struct page *)));
142349f54   Wu Fengguang   btrfs: lower the ...
1147
1148
  	nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
  	nrptrs = max(nrptrs, 8);
8c2383c3d   Chris Mason   Subject: Rework b...
1149
  	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1150
1151
  	if (!pages)
  		return -ENOMEM;
ab93dbecf   Chris Mason   Btrfs: take i_mut...
1152

39279cc3d   Chris Mason   Btrfs: split up s...
1153
  	first_index = pos >> PAGE_CACHE_SHIFT;
39279cc3d   Chris Mason   Btrfs: split up s...
1154

d0215f3e5   Josef Bacik   Btrfs: simplify o...
1155
  	while (iov_iter_count(i) > 0) {
39279cc3d   Chris Mason   Btrfs: split up s...
1156
  		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1157
  		size_t write_bytes = min(iov_iter_count(i),
11c65dccf   Josef Bacik   Btrfs: do aio_wri...
1158
  					 nrptrs * (size_t)PAGE_CACHE_SIZE -
8c2383c3d   Chris Mason   Subject: Rework b...
1159
  					 offset);
3a90983db   Yan, Zheng   Btrfs: Fix page c...
1160
1161
  		size_t num_pages = (write_bytes + offset +
  				    PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1162
1163
  		size_t dirty_pages;
  		size_t copied;
39279cc3d   Chris Mason   Btrfs: split up s...
1164

8c2383c3d   Chris Mason   Subject: Rework b...
1165
  		WARN_ON(num_pages > nrptrs);
1832a6d5e   Chris Mason   Btrfs: Implement ...
1166

914ee295a   Xin Zhong   Btrfs: pwrite blo...
1167
1168
1169
1170
  		/*
  		 * Fault pages before locking them in prepare_pages
  		 * to avoid recursive lock
  		 */
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1171
  		if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
914ee295a   Xin Zhong   Btrfs: pwrite blo...
1172
  			ret = -EFAULT;
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1173
  			break;
914ee295a   Xin Zhong   Btrfs: pwrite blo...
1174
1175
1176
1177
  		}
  
  		ret = btrfs_delalloc_reserve_space(inode,
  					num_pages << PAGE_CACHE_SHIFT);
1832a6d5e   Chris Mason   Btrfs: Implement ...
1178
  		if (ret)
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1179
  			break;
1832a6d5e   Chris Mason   Btrfs: Implement ...
1180

4a64001f0   Josef Bacik   Btrfs: fix how we...
1181
1182
1183
1184
1185
  		/*
  		 * This is going to setup the pages array with the number of
  		 * pages we want, so we don't really need to worry about the
  		 * contents of pages from loop to loop
  		 */
39279cc3d   Chris Mason   Btrfs: split up s...
1186
  		ret = prepare_pages(root, file, pages, num_pages,
b6316429a   Josef Bacik   Btrfs: force a pa...
1187
1188
  				    pos, first_index, write_bytes,
  				    force_page_uptodate);
6a63209fc   Josef Bacik   Btrfs: add better...
1189
  		if (ret) {
914ee295a   Xin Zhong   Btrfs: pwrite blo...
1190
1191
  			btrfs_delalloc_release_space(inode,
  					num_pages << PAGE_CACHE_SHIFT);
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1192
  			break;
6a63209fc   Josef Bacik   Btrfs: add better...
1193
  		}
39279cc3d   Chris Mason   Btrfs: split up s...
1194

914ee295a   Xin Zhong   Btrfs: pwrite blo...
1195
  		copied = btrfs_copy_from_user(pos, num_pages,
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1196
  					   write_bytes, pages, i);
b1bf862e9   Chris Mason   Btrfs: fix regres...
1197
1198
1199
1200
1201
1202
1203
  
  		/*
  		 * if we have trouble faulting in the pages, fall
  		 * back to one page at a time
  		 */
  		if (copied < write_bytes)
  			nrptrs = 1;
b6316429a   Josef Bacik   Btrfs: force a pa...
1204
1205
  		if (copied == 0) {
  			force_page_uptodate = true;
b1bf862e9   Chris Mason   Btrfs: fix regres...
1206
  			dirty_pages = 0;
b6316429a   Josef Bacik   Btrfs: force a pa...
1207
1208
  		} else {
  			force_page_uptodate = false;
b1bf862e9   Chris Mason   Btrfs: fix regres...
1209
1210
1211
  			dirty_pages = (copied + offset +
  				       PAGE_CACHE_SIZE - 1) >>
  				       PAGE_CACHE_SHIFT;
b6316429a   Josef Bacik   Btrfs: force a pa...
1212
  		}
914ee295a   Xin Zhong   Btrfs: pwrite blo...
1213

d0215f3e5   Josef Bacik   Btrfs: simplify o...
1214
1215
1216
1217
1218
1219
1220
  		/*
  		 * If we had a short copy we need to release the excess delaloc
  		 * bytes we reserved.  We need to increment outstanding_extents
  		 * because btrfs_delalloc_release_space will decrement it, but
  		 * we still have an outstanding extent for the chunk we actually
  		 * managed to copy.
  		 */
914ee295a   Xin Zhong   Btrfs: pwrite blo...
1221
  		if (num_pages > dirty_pages) {
9e0baf60d   Josef Bacik   Btrfs: fix enospc...
1222
1223
1224
1225
1226
  			if (copied > 0) {
  				spin_lock(&BTRFS_I(inode)->lock);
  				BTRFS_I(inode)->outstanding_extents++;
  				spin_unlock(&BTRFS_I(inode)->lock);
  			}
914ee295a   Xin Zhong   Btrfs: pwrite blo...
1227
1228
1229
1230
1231
1232
  			btrfs_delalloc_release_space(inode,
  					(num_pages - dirty_pages) <<
  					PAGE_CACHE_SHIFT);
  		}
  
  		if (copied > 0) {
be1a12a0d   Josef Bacik   Btrfs: deal with ...
1233
1234
1235
  			ret = btrfs_dirty_pages(root, inode, pages,
  						dirty_pages, pos, copied,
  						NULL);
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1236
1237
1238
1239
1240
1241
  			if (ret) {
  				btrfs_delalloc_release_space(inode,
  					dirty_pages << PAGE_CACHE_SHIFT);
  				btrfs_drop_pages(pages, num_pages);
  				break;
  			}
54aa1f4df   Chris Mason   Btrfs: Audit call...
1242
  		}
39279cc3d   Chris Mason   Btrfs: split up s...
1243

39279cc3d   Chris Mason   Btrfs: split up s...
1244
  		btrfs_drop_pages(pages, num_pages);
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1245
1246
1247
1248
1249
1250
1251
  		cond_resched();
  
  		balance_dirty_pages_ratelimited_nr(inode->i_mapping,
  						   dirty_pages);
  		if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
  			btrfs_btree_balance_dirty(root, 1);
  		btrfs_throttle(root);
cb843a6f5   Chris Mason   Btrfs: O_DIRECT w...
1252

914ee295a   Xin Zhong   Btrfs: pwrite blo...
1253
1254
  		pos += copied;
  		num_written += copied;
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1255
  	}
39279cc3d   Chris Mason   Btrfs: split up s...
1256

d0215f3e5   Josef Bacik   Btrfs: simplify o...
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
  	kfree(pages);
  
  	return num_written ? num_written : ret;
  }
  
  static ssize_t __btrfs_direct_write(struct kiocb *iocb,
  				    const struct iovec *iov,
  				    unsigned long nr_segs, loff_t pos,
  				    loff_t *ppos, size_t count, size_t ocount)
  {
  	struct file *file = iocb->ki_filp;
  	struct inode *inode = fdentry(file)->d_inode;
  	struct iov_iter i;
  	ssize_t written;
  	ssize_t written_buffered;
  	loff_t endbyte;
  	int err;
  
  	written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
  					    count, ocount);
  
  	/*
  	 * the generic O_DIRECT will update in-memory i_size after the
  	 * DIOs are done.  But our endio handlers that update the on
  	 * disk i_size never update past the in memory i_size.  So we
  	 * need one more update here to catch any additions to the
  	 * file
  	 */
  	if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
  		btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
  		mark_inode_dirty(inode);
  	}
  
  	if (written < 0 || written == count)
  		return written;
  
  	pos += written;
  	count -= written;
  	iov_iter_init(&i, iov, nr_segs, count, written);
  	written_buffered = __btrfs_buffered_write(file, &i, pos);
  	if (written_buffered < 0) {
  		err = written_buffered;
  		goto out;
39279cc3d   Chris Mason   Btrfs: split up s...
1300
  	}
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1301
1302
1303
1304
1305
1306
1307
1308
  	endbyte = pos + written_buffered - 1;
  	err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
  	if (err)
  		goto out;
  	written += written_buffered;
  	*ppos = pos + written_buffered;
  	invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
  				 endbyte >> PAGE_CACHE_SHIFT);
39279cc3d   Chris Mason   Btrfs: split up s...
1309
  out:
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1310
1311
  	return written ? written : err;
  }
5b92ee720   Chris Mason   Btrfs: Fix lock o...
1312

d0215f3e5   Josef Bacik   Btrfs: simplify o...
1313
1314
1315
1316
1317
1318
1319
1320
  static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
  				    const struct iovec *iov,
  				    unsigned long nr_segs, loff_t pos)
  {
  	struct file *file = iocb->ki_filp;
  	struct inode *inode = fdentry(file)->d_inode;
  	struct btrfs_root *root = BTRFS_I(inode)->root;
  	loff_t *ppos = &iocb->ki_pos;
0c1a98c81   Miao Xie   Btrfs: fix the fi...
1321
  	u64 start_pos;
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
  	ssize_t num_written = 0;
  	ssize_t err = 0;
  	size_t count, ocount;
  
  	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
  
  	mutex_lock(&inode->i_mutex);
  
  	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
  	if (err) {
  		mutex_unlock(&inode->i_mutex);
  		goto out;
  	}
  	count = ocount;
  
  	current->backing_dev_info = inode->i_mapping->backing_dev_info;
  	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
  	if (err) {
  		mutex_unlock(&inode->i_mutex);
  		goto out;
  	}
  
  	if (count == 0) {
  		mutex_unlock(&inode->i_mutex);
  		goto out;
  	}
  
  	err = file_remove_suid(file);
  	if (err) {
  		mutex_unlock(&inode->i_mutex);
  		goto out;
  	}
  
  	/*
  	 * If BTRFS flips readonly due to some impossible error
  	 * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
  	 * although we have opened a file as writable, we have
  	 * to stop this write operation to ensure FS consistency.
  	 */
  	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
  		mutex_unlock(&inode->i_mutex);
  		err = -EROFS;
  		goto out;
  	}
22c44fe65   Josef Bacik   Btrfs: deal with ...
1366
1367
1368
1369
1370
  	err = btrfs_update_time(file);
  	if (err) {
  		mutex_unlock(&inode->i_mutex);
  		goto out;
  	}
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1371
  	BTRFS_I(inode)->sequence++;
0c1a98c81   Miao Xie   Btrfs: fix the fi...
1372
1373
1374
1375
1376
1377
1378
1379
  	start_pos = round_down(pos, root->sectorsize);
  	if (start_pos > i_size_read(inode)) {
  		err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
  		if (err) {
  			mutex_unlock(&inode->i_mutex);
  			goto out;
  		}
  	}
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
  	if (unlikely(file->f_flags & O_DIRECT)) {
  		num_written = __btrfs_direct_write(iocb, iov, nr_segs,
  						   pos, ppos, count, ocount);
  	} else {
  		struct iov_iter i;
  
  		iov_iter_init(&i, iov, nr_segs, count, num_written);
  
  		num_written = __btrfs_buffered_write(file, &i, pos);
  		if (num_written > 0)
  			*ppos = pos + num_written;
  	}
  
  	mutex_unlock(&inode->i_mutex);
2ff3e9b61   Chris Mason   Add O_SYNC suppor...
1394

5a3f23d51   Chris Mason   Btrfs: add extra ...
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
  	/*
  	 * we want to make sure fsync finds this change
  	 * but we haven't joined a transaction running right now.
  	 *
  	 * Later on, someone is sure to update the inode and get the
  	 * real transid recorded.
  	 *
  	 * We set last_trans now to the fs_info generation + 1,
  	 * this will either be one more than the running transaction
  	 * or the generation used for the next transaction if there isn't
  	 * one running right now.
  	 */
  	BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1408
1409
1410
  	if (num_written > 0 || num_written == -EIOCBQUEUED) {
  		err = generic_write_sync(file, pos, num_written);
  		if (err < 0 && num_written > 0)
2ff3e9b61   Chris Mason   Add O_SYNC suppor...
1411
1412
  			num_written = err;
  	}
d0215f3e5   Josef Bacik   Btrfs: simplify o...
1413
  out:
39279cc3d   Chris Mason   Btrfs: split up s...
1414
  	current->backing_dev_info = NULL;
39279cc3d   Chris Mason   Btrfs: split up s...
1415
1416
  	return num_written ? num_written : err;
  }
d397712bc   Chris Mason   Btrfs: Fix checkp...
1417
  int btrfs_release_file(struct inode *inode, struct file *filp)
e1b81e676   Mingming   btrfs delete orde...
1418
  {
5a3f23d51   Chris Mason   Btrfs: add extra ...
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
  	/*
  	 * ordered_data_close is set by settattr when we are about to truncate
  	 * a file from a non-zero size to a zero size.  This tries to
  	 * flush down new bytes that may have been written if the
  	 * application were using truncate to replace a file in place.
  	 */
  	if (BTRFS_I(inode)->ordered_data_close) {
  		BTRFS_I(inode)->ordered_data_close = 0;
  		btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
  		if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
  			filemap_flush(inode->i_mapping);
  	}
6bf13c0cc   Sage Weil   Btrfs: transactio...
1431
1432
  	if (filp->private_data)
  		btrfs_ioctl_trans_end(filp);
e1b81e676   Mingming   btrfs delete orde...
1433
1434
  	return 0;
  }
d352ac681   Chris Mason   Btrfs: add and im...
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
  /*
   * fsync call for both files and directories.  This logs the inode into
   * the tree log instead of forcing full commits whenever possible.
   *
   * It needs to call filemap_fdatawait so that all ordered extent updates are
   * in the metadata btree are up to date for copying to the log.
   *
   * It drops the inode mutex before doing the tree log commit.  This is an
   * important optimization for directories because holding the mutex prevents
   * new operations on the dir while we write to disk.
   */
02c24a821   Josef Bacik   fs: push i_mutex ...
1446
  int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
39279cc3d   Chris Mason   Btrfs: split up s...
1447
  {
7ea808591   Christoph Hellwig   drop unused dentr...
1448
  	struct dentry *dentry = file->f_path.dentry;
39279cc3d   Chris Mason   Btrfs: split up s...
1449
1450
  	struct inode *inode = dentry->d_inode;
  	struct btrfs_root *root = BTRFS_I(inode)->root;
15ee9bc7e   Josef Bacik   Btrfs: delay comm...
1451
  	int ret = 0;
39279cc3d   Chris Mason   Btrfs: split up s...
1452
  	struct btrfs_trans_handle *trans;
1abe9b8a1   liubo   Btrfs: add initia...
1453
  	trace_btrfs_sync_file(file, datasync);
257c62e1b   Chris Mason   Btrfs: avoid tree...
1454

02c24a821   Josef Bacik   fs: push i_mutex ...
1455
1456
1457
1458
  	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
  	if (ret)
  		return ret;
  	mutex_lock(&inode->i_mutex);
257c62e1b   Chris Mason   Btrfs: avoid tree...
1459
1460
  	/* we wait first, since the writeback may change the inode */
  	root->log_batch++;
257c62e1b   Chris Mason   Btrfs: avoid tree...
1461
1462
  	btrfs_wait_ordered_range(inode, 0, (u64)-1);
  	root->log_batch++;
39279cc3d   Chris Mason   Btrfs: split up s...
1463
  	/*
15ee9bc7e   Josef Bacik   Btrfs: delay comm...
1464
1465
  	 * check the transaction that last modified this inode
  	 * and see if its already been committed
39279cc3d   Chris Mason   Btrfs: split up s...
1466
  	 */
02c24a821   Josef Bacik   fs: push i_mutex ...
1467
1468
  	if (!BTRFS_I(inode)->last_trans) {
  		mutex_unlock(&inode->i_mutex);
15ee9bc7e   Josef Bacik   Btrfs: delay comm...
1469
  		goto out;
02c24a821   Josef Bacik   fs: push i_mutex ...
1470
  	}
a21350115   Chris Mason   Btrfs: Replace th...
1471

257c62e1b   Chris Mason   Btrfs: avoid tree...
1472
1473
1474
1475
1476
  	/*
  	 * if the last transaction that changed this file was before
  	 * the current transaction, we can bail out now without any
  	 * syncing
  	 */
a4abeea41   Josef Bacik   Btrfs: kill trans...
1477
  	smp_mb();
15ee9bc7e   Josef Bacik   Btrfs: delay comm...
1478
1479
1480
  	if (BTRFS_I(inode)->last_trans <=
  	    root->fs_info->last_trans_committed) {
  		BTRFS_I(inode)->last_trans = 0;
02c24a821   Josef Bacik   fs: push i_mutex ...
1481
  		mutex_unlock(&inode->i_mutex);
15ee9bc7e   Josef Bacik   Btrfs: delay comm...
1482
1483
  		goto out;
  	}
15ee9bc7e   Josef Bacik   Btrfs: delay comm...
1484
1485
  
  	/*
a52d9a803   Chris Mason   Btrfs: Extent bas...
1486
1487
  	 * ok we haven't committed the transaction yet, lets do a commit
  	 */
6f902af40   Dan Carpenter   Btrfs: The file a...
1488
  	if (file->private_data)
6bf13c0cc   Sage Weil   Btrfs: transactio...
1489
  		btrfs_ioctl_trans_end(file);
a22285a6a   Yan, Zheng   Btrfs: Integrate ...
1490
1491
1492
  	trans = btrfs_start_transaction(root, 0);
  	if (IS_ERR(trans)) {
  		ret = PTR_ERR(trans);
02c24a821   Josef Bacik   fs: push i_mutex ...
1493
  		mutex_unlock(&inode->i_mutex);
39279cc3d   Chris Mason   Btrfs: split up s...
1494
1495
  		goto out;
  	}
e02119d5a   Chris Mason   Btrfs: Add a writ...
1496

2cfbd50b5   Chris Mason   Btrfs: check file...
1497
  	ret = btrfs_log_dentry_safe(trans, root, dentry);
02c24a821   Josef Bacik   fs: push i_mutex ...
1498
1499
  	if (ret < 0) {
  		mutex_unlock(&inode->i_mutex);
e02119d5a   Chris Mason   Btrfs: Add a writ...
1500
  		goto out;
02c24a821   Josef Bacik   fs: push i_mutex ...
1501
  	}
49eb7e46d   Chris Mason   Btrfs: Dir fsync ...
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
  
  	/* we've logged all the items and now have a consistent
  	 * version of the file in the log.  It is possible that
  	 * someone will come in and modify the file, but that's
  	 * fine because the log is consistent on disk, and we
  	 * have references to all of the file's extents
  	 *
  	 * It is possible that someone will come in and log the
  	 * file again, but that will end up using the synchronization
  	 * inside btrfs_sync_log to keep things safe.
  	 */
02c24a821   Josef Bacik   fs: push i_mutex ...
1513
  	mutex_unlock(&inode->i_mutex);
49eb7e46d   Chris Mason   Btrfs: Dir fsync ...
1514

257c62e1b   Chris Mason   Btrfs: avoid tree...
1515
1516
  	if (ret != BTRFS_NO_LOG_SYNC) {
  		if (ret > 0) {
12fcfd22f   Chris Mason   Btrfs: tree loggi...
1517
  			ret = btrfs_commit_transaction(trans, root);
257c62e1b   Chris Mason   Btrfs: avoid tree...
1518
1519
1520
1521
1522
1523
1524
1525
1526
  		} else {
  			ret = btrfs_sync_log(trans, root);
  			if (ret == 0)
  				ret = btrfs_end_transaction(trans, root);
  			else
  				ret = btrfs_commit_transaction(trans, root);
  		}
  	} else {
  		ret = btrfs_end_transaction(trans, root);
e02119d5a   Chris Mason   Btrfs: Add a writ...
1527
  	}
39279cc3d   Chris Mason   Btrfs: split up s...
1528
  out:
014e4ac4f   Roel Kluin   Btrfs: make error...
1529
  	return ret > 0 ? -EIO : ret;
39279cc3d   Chris Mason   Btrfs: split up s...
1530
  }
f0f37e2f7   Alexey Dobriyan   const: mark struc...
1531
  static const struct vm_operations_struct btrfs_file_vm_ops = {
92fee66d4   Chris Mason   Btrfs: deal with ...
1532
  	.fault		= filemap_fault,
9ebefb180   Chris Mason   Btrfs: patch queu...
1533
1534
1535
1536
1537
  	.page_mkwrite	= btrfs_page_mkwrite,
  };
  
  static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
  {
058a457ef   Miao Xie   Btrfs: fix remap_...
1538
1539
1540
1541
  	struct address_space *mapping = filp->f_mapping;
  
  	if (!mapping->a_ops->readpage)
  		return -ENOEXEC;
9ebefb180   Chris Mason   Btrfs: patch queu...
1542
  	file_accessed(filp);
058a457ef   Miao Xie   Btrfs: fix remap_...
1543
1544
  	vma->vm_ops = &btrfs_file_vm_ops;
  	vma->vm_flags |= VM_CAN_NONLINEAR;
9ebefb180   Chris Mason   Btrfs: patch queu...
1545
1546
  	return 0;
  }
2fe17c107   Christoph Hellwig   fallocate should ...
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
  static long btrfs_fallocate(struct file *file, int mode,
  			    loff_t offset, loff_t len)
  {
  	struct inode *inode = file->f_path.dentry->d_inode;
  	struct extent_state *cached_state = NULL;
  	u64 cur_offset;
  	u64 last_byte;
  	u64 alloc_start;
  	u64 alloc_end;
  	u64 alloc_hint = 0;
  	u64 locked_end;
  	u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
  	struct extent_map *em;
  	int ret;
  
  	alloc_start = offset & ~mask;
  	alloc_end =  (offset + len + mask) & ~mask;
  
  	/* We only support the FALLOC_FL_KEEP_SIZE mode */
  	if (mode & ~FALLOC_FL_KEEP_SIZE)
  		return -EOPNOTSUPP;
  
  	/*
  	 * wait for ordered IO before we have any locks.  We'll loop again
  	 * below with the locks held.
  	 */
  	btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
  
  	mutex_lock(&inode->i_mutex);
  	ret = inode_newsize_ok(inode, alloc_end);
  	if (ret)
  		goto out;
  
  	if (alloc_start > inode->i_size) {
a41ad394a   Josef Bacik   Btrfs: convert to...
1581
1582
  		ret = btrfs_cont_expand(inode, i_size_read(inode),
  					alloc_start);
2fe17c107   Christoph Hellwig   fallocate should ...
1583
1584
1585
  		if (ret)
  			goto out;
  	}
2fe17c107   Christoph Hellwig   fallocate should ...
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
  	locked_end = alloc_end - 1;
  	while (1) {
  		struct btrfs_ordered_extent *ordered;
  
  		/* the extent lock is ordered inside the running
  		 * transaction
  		 */
  		lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
  				 locked_end, 0, &cached_state, GFP_NOFS);
  		ordered = btrfs_lookup_first_ordered_extent(inode,
  							    alloc_end - 1);
  		if (ordered &&
  		    ordered->file_offset + ordered->len > alloc_start &&
  		    ordered->file_offset < alloc_end) {
  			btrfs_put_ordered_extent(ordered);
  			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
  					     alloc_start, locked_end,
  					     &cached_state, GFP_NOFS);
  			/*
  			 * we can't wait on the range with the transaction
  			 * running or with the extent lock held
  			 */
  			btrfs_wait_ordered_range(inode, alloc_start,
  						 alloc_end - alloc_start);
  		} else {
  			if (ordered)
  				btrfs_put_ordered_extent(ordered);
  			break;
  		}
  	}
  
  	cur_offset = alloc_start;
  	while (1) {
f1e490a7e   Josef Bacik   Btrfs: set i_size...
1619
  		u64 actual_end;
2fe17c107   Christoph Hellwig   fallocate should ...
1620
1621
  		em = btrfs_get_extent(inode, NULL, 0, cur_offset,
  				      alloc_end - cur_offset, 0);
c704005d8   David Sterba   btrfs: unify chec...
1622
  		BUG_ON(IS_ERR_OR_NULL(em));
2fe17c107   Christoph Hellwig   fallocate should ...
1623
  		last_byte = min(extent_map_end(em), alloc_end);
f1e490a7e   Josef Bacik   Btrfs: set i_size...
1624
  		actual_end = min_t(u64, extent_map_end(em), offset + len);
2fe17c107   Christoph Hellwig   fallocate should ...
1625
  		last_byte = (last_byte + mask) & ~mask;
f1e490a7e   Josef Bacik   Btrfs: set i_size...
1626

2fe17c107   Christoph Hellwig   fallocate should ...
1627
1628
1629
  		if (em->block_start == EXTENT_MAP_HOLE ||
  		    (cur_offset >= inode->i_size &&
  		     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
1b9c332b6   Josef Bacik   Btrfs: only reser...
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
  
  			/*
  			 * Make sure we have enough space before we do the
  			 * allocation.
  			 */
  			ret = btrfs_check_data_free_space(inode, last_byte -
  							  cur_offset);
  			if (ret) {
  				free_extent_map(em);
  				break;
  			}
2fe17c107   Christoph Hellwig   fallocate should ...
1641
1642
1643
1644
1645
  			ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
  							last_byte - cur_offset,
  							1 << inode->i_blkbits,
  							offset + len,
  							&alloc_hint);
1b9c332b6   Josef Bacik   Btrfs: only reser...
1646
1647
1648
1649
  
  			/* Let go of our reservation. */
  			btrfs_free_reserved_data_space(inode, last_byte -
  						       cur_offset);
2fe17c107   Christoph Hellwig   fallocate should ...
1650
1651
1652
1653
  			if (ret < 0) {
  				free_extent_map(em);
  				break;
  			}
f1e490a7e   Josef Bacik   Btrfs: set i_size...
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
  		} else if (actual_end > inode->i_size &&
  			   !(mode & FALLOC_FL_KEEP_SIZE)) {
  			/*
  			 * We didn't need to allocate any more space, but we
  			 * still extended the size of the file so we need to
  			 * update i_size.
  			 */
  			inode->i_ctime = CURRENT_TIME;
  			i_size_write(inode, actual_end);
  			btrfs_ordered_update_i_size(inode, actual_end, NULL);
2fe17c107   Christoph Hellwig   fallocate should ...
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
  		}
  		free_extent_map(em);
  
  		cur_offset = last_byte;
  		if (cur_offset >= alloc_end) {
  			ret = 0;
  			break;
  		}
  	}
  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
  			     &cached_state, GFP_NOFS);
2fe17c107   Christoph Hellwig   fallocate should ...
1675
1676
1677
1678
  out:
  	mutex_unlock(&inode->i_mutex);
  	return ret;
  }
b26751575   Josef Bacik   Btrfs: implement ...
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
  static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
  {
  	struct btrfs_root *root = BTRFS_I(inode)->root;
  	struct extent_map *em;
  	struct extent_state *cached_state = NULL;
  	u64 lockstart = *offset;
  	u64 lockend = i_size_read(inode);
  	u64 start = *offset;
  	u64 orig_start = *offset;
  	u64 len = i_size_read(inode);
  	u64 last_end = 0;
  	int ret = 0;
  
  	lockend = max_t(u64, root->sectorsize, lockend);
  	if (lockend <= lockstart)
  		lockend = lockstart + root->sectorsize;
  
  	len = lockend - lockstart + 1;
  
  	len = max_t(u64, len, root->sectorsize);
  	if (inode->i_size == 0)
  		return -ENXIO;
  
  	lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
  			 &cached_state, GFP_NOFS);
  
  	/*
  	 * Delalloc is such a pain.  If we have a hole and we have pending
  	 * delalloc for a portion of the hole we will get back a hole that
  	 * exists for the entire range since it hasn't been actually written
  	 * yet.  So to take care of this case we need to look for an extent just
  	 * before the position we want in case there is outstanding delalloc
  	 * going on here.
  	 */
  	if (origin == SEEK_HOLE && start != 0) {
  		if (start <= root->sectorsize)
  			em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
  						     root->sectorsize, 0);
  		else
  			em = btrfs_get_extent_fiemap(inode, NULL, 0,
  						     start - root->sectorsize,
  						     root->sectorsize, 0);
  		if (IS_ERR(em)) {
  			ret = -ENXIO;
  			goto out;
  		}
  		last_end = em->start + em->len;
  		if (em->block_start == EXTENT_MAP_DELALLOC)
  			last_end = min_t(u64, last_end, inode->i_size);
  		free_extent_map(em);
  	}
  
  	while (1) {
  		em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
  		if (IS_ERR(em)) {
  			ret = -ENXIO;
  			break;
  		}
  
  		if (em->block_start == EXTENT_MAP_HOLE) {
  			if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
  				if (last_end <= orig_start) {
  					free_extent_map(em);
  					ret = -ENXIO;
  					break;
  				}
  			}
  
  			if (origin == SEEK_HOLE) {
  				*offset = start;
  				free_extent_map(em);
  				break;
  			}
  		} else {
  			if (origin == SEEK_DATA) {
  				if (em->block_start == EXTENT_MAP_DELALLOC) {
  					if (start >= inode->i_size) {
  						free_extent_map(em);
  						ret = -ENXIO;
  						break;
  					}
  				}
  
  				*offset = start;
  				free_extent_map(em);
  				break;
  			}
  		}
  
  		start = em->start + em->len;
  		last_end = em->start + em->len;
  
  		if (em->block_start == EXTENT_MAP_DELALLOC)
  			last_end = min_t(u64, last_end, inode->i_size);
  
  		if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
  			free_extent_map(em);
  			ret = -ENXIO;
  			break;
  		}
  		free_extent_map(em);
  		cond_resched();
  	}
  	if (!ret)
  		*offset = min(*offset, inode->i_size);
  out:
  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
  			     &cached_state, GFP_NOFS);
  	return ret;
  }
  
  static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
  {
  	struct inode *inode = file->f_mapping->host;
  	int ret;
  
  	mutex_lock(&inode->i_mutex);
  	switch (origin) {
  	case SEEK_END:
  	case SEEK_CUR:
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
1799
  		offset = generic_file_llseek(file, offset, origin);
b26751575   Josef Bacik   Btrfs: implement ...
1800
1801
1802
  		goto out;
  	case SEEK_DATA:
  	case SEEK_HOLE:
48802c8ae   Jeff Liu   BTRFS: Fix lseek ...
1803
1804
1805
1806
  		if (offset >= i_size_read(inode)) {
  			mutex_unlock(&inode->i_mutex);
  			return -ENXIO;
  		}
b26751575   Josef Bacik   Btrfs: implement ...
1807
1808
1809
1810
1811
1812
  		ret = find_desired_extent(inode, &offset, origin);
  		if (ret) {
  			mutex_unlock(&inode->i_mutex);
  			return ret;
  		}
  	}
9a4327ca1   Dan Carpenter   btrfs: unlock on ...
1813
  	if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) {
48802c8ae   Jeff Liu   BTRFS: Fix lseek ...
1814
  		offset = -EINVAL;
9a4327ca1   Dan Carpenter   btrfs: unlock on ...
1815
1816
1817
  		goto out;
  	}
  	if (offset > inode->i_sb->s_maxbytes) {
48802c8ae   Jeff Liu   BTRFS: Fix lseek ...
1818
  		offset = -EINVAL;
9a4327ca1   Dan Carpenter   btrfs: unlock on ...
1819
1820
  		goto out;
  	}
b26751575   Josef Bacik   Btrfs: implement ...
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
  
  	/* Special lock needed here? */
  	if (offset != file->f_pos) {
  		file->f_pos = offset;
  		file->f_version = 0;
  	}
  out:
  	mutex_unlock(&inode->i_mutex);
  	return offset;
  }
828c09509   Alexey Dobriyan   const: constify r...
1831
  const struct file_operations btrfs_file_operations = {
b26751575   Josef Bacik   Btrfs: implement ...
1832
  	.llseek		= btrfs_file_llseek,
39279cc3d   Chris Mason   Btrfs: split up s...
1833
  	.read		= do_sync_read,
4a001071d   Miao Xie   Btrfs: fix loop d...
1834
  	.write		= do_sync_write,
9ebefb180   Chris Mason   Btrfs: patch queu...
1835
  	.aio_read       = generic_file_aio_read,
e9906a984   Chris Mason   Fixes for loopbac...
1836
  	.splice_read	= generic_file_splice_read,
11c65dccf   Josef Bacik   Btrfs: do aio_wri...
1837
  	.aio_write	= btrfs_file_aio_write,
9ebefb180   Chris Mason   Btrfs: patch queu...
1838
  	.mmap		= btrfs_file_mmap,
39279cc3d   Chris Mason   Btrfs: split up s...
1839
  	.open		= generic_file_open,
e1b81e676   Mingming   btrfs delete orde...
1840
  	.release	= btrfs_release_file,
39279cc3d   Chris Mason   Btrfs: split up s...
1841
  	.fsync		= btrfs_sync_file,
2fe17c107   Christoph Hellwig   fallocate should ...
1842
  	.fallocate	= btrfs_fallocate,
34287aa36   Christoph Hellwig   Btrfs: use unlock...
1843
  	.unlocked_ioctl	= btrfs_ioctl,
39279cc3d   Chris Mason   Btrfs: split up s...
1844
  #ifdef CONFIG_COMPAT
34287aa36   Christoph Hellwig   Btrfs: use unlock...
1845
  	.compat_ioctl	= btrfs_ioctl,
39279cc3d   Chris Mason   Btrfs: split up s...
1846
1847
  #endif
  };