Commit 2501c4a066e633524791e8ce8dbfe615aca071cf
Committed by
Greg Kroah-Hartman
1 parent
bddf0faccf
Btrfs: read inode size after acquiring the mutex when punching a hole
commit a1a50f60a6bf4f861eb94793420274bc1ccd409a upstream. In a previous change, commit 12870f1c9b2de7d475d22e73fd7db1b418599725, I accidentally moved the roundup of inode->i_size to outside of the critical section delimited by the inode mutex, which is not atomic and not correct since the size can be changed by other task before we acquire the mutex. Therefore fix it. Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com> Signed-off-by: Chris Mason <clm@fb.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Showing 1 changed file with 2 additions and 1 deletions Inline Diff
fs/btrfs/file.c
1 | /* | 1 | /* |
2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public | 5 | * modify it under the terms of the GNU General Public |
6 | * License v2 as published by the Free Software Foundation. | 6 | * License v2 as published by the Free Software Foundation. |
7 | * | 7 | * |
8 | * This program is distributed in the hope that it will be useful, | 8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | * General Public License for more details. | 11 | * General Public License for more details. |
12 | * | 12 | * |
13 | * You should have received a copy of the GNU General Public | 13 | * You should have received a copy of the GNU General Public |
14 | * License along with this program; if not, write to the | 14 | * License along with this program; if not, write to the |
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/pagemap.h> | 20 | #include <linux/pagemap.h> |
21 | #include <linux/highmem.h> | 21 | #include <linux/highmem.h> |
22 | #include <linux/time.h> | 22 | #include <linux/time.h> |
23 | #include <linux/init.h> | 23 | #include <linux/init.h> |
24 | #include <linux/string.h> | 24 | #include <linux/string.h> |
25 | #include <linux/backing-dev.h> | 25 | #include <linux/backing-dev.h> |
26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
27 | #include <linux/aio.h> | 27 | #include <linux/aio.h> |
28 | #include <linux/falloc.h> | 28 | #include <linux/falloc.h> |
29 | #include <linux/swap.h> | 29 | #include <linux/swap.h> |
30 | #include <linux/writeback.h> | 30 | #include <linux/writeback.h> |
31 | #include <linux/statfs.h> | 31 | #include <linux/statfs.h> |
32 | #include <linux/compat.h> | 32 | #include <linux/compat.h> |
33 | #include <linux/slab.h> | 33 | #include <linux/slab.h> |
34 | #include <linux/btrfs.h> | 34 | #include <linux/btrfs.h> |
35 | #include "ctree.h" | 35 | #include "ctree.h" |
36 | #include "disk-io.h" | 36 | #include "disk-io.h" |
37 | #include "transaction.h" | 37 | #include "transaction.h" |
38 | #include "btrfs_inode.h" | 38 | #include "btrfs_inode.h" |
39 | #include "print-tree.h" | 39 | #include "print-tree.h" |
40 | #include "tree-log.h" | 40 | #include "tree-log.h" |
41 | #include "locking.h" | 41 | #include "locking.h" |
42 | #include "volumes.h" | 42 | #include "volumes.h" |
43 | 43 | ||
44 | static struct kmem_cache *btrfs_inode_defrag_cachep; | 44 | static struct kmem_cache *btrfs_inode_defrag_cachep; |
45 | /* | 45 | /* |
46 | * when auto defrag is enabled we | 46 | * when auto defrag is enabled we |
47 | * queue up these defrag structs to remember which | 47 | * queue up these defrag structs to remember which |
48 | * inodes need defragging passes | 48 | * inodes need defragging passes |
49 | */ | 49 | */ |
50 | struct inode_defrag { | 50 | struct inode_defrag { |
51 | struct rb_node rb_node; | 51 | struct rb_node rb_node; |
52 | /* objectid */ | 52 | /* objectid */ |
53 | u64 ino; | 53 | u64 ino; |
54 | /* | 54 | /* |
55 | * transid where the defrag was added, we search for | 55 | * transid where the defrag was added, we search for |
56 | * extents newer than this | 56 | * extents newer than this |
57 | */ | 57 | */ |
58 | u64 transid; | 58 | u64 transid; |
59 | 59 | ||
60 | /* root objectid */ | 60 | /* root objectid */ |
61 | u64 root; | 61 | u64 root; |
62 | 62 | ||
63 | /* last offset we were able to defrag */ | 63 | /* last offset we were able to defrag */ |
64 | u64 last_offset; | 64 | u64 last_offset; |
65 | 65 | ||
66 | /* if we've wrapped around back to zero once already */ | 66 | /* if we've wrapped around back to zero once already */ |
67 | int cycled; | 67 | int cycled; |
68 | }; | 68 | }; |
69 | 69 | ||
70 | static int __compare_inode_defrag(struct inode_defrag *defrag1, | 70 | static int __compare_inode_defrag(struct inode_defrag *defrag1, |
71 | struct inode_defrag *defrag2) | 71 | struct inode_defrag *defrag2) |
72 | { | 72 | { |
73 | if (defrag1->root > defrag2->root) | 73 | if (defrag1->root > defrag2->root) |
74 | return 1; | 74 | return 1; |
75 | else if (defrag1->root < defrag2->root) | 75 | else if (defrag1->root < defrag2->root) |
76 | return -1; | 76 | return -1; |
77 | else if (defrag1->ino > defrag2->ino) | 77 | else if (defrag1->ino > defrag2->ino) |
78 | return 1; | 78 | return 1; |
79 | else if (defrag1->ino < defrag2->ino) | 79 | else if (defrag1->ino < defrag2->ino) |
80 | return -1; | 80 | return -1; |
81 | else | 81 | else |
82 | return 0; | 82 | return 0; |
83 | } | 83 | } |
84 | 84 | ||
85 | /* pop a record for an inode into the defrag tree. The lock | 85 | /* pop a record for an inode into the defrag tree. The lock |
86 | * must be held already | 86 | * must be held already |
87 | * | 87 | * |
88 | * If you're inserting a record for an older transid than an | 88 | * If you're inserting a record for an older transid than an |
89 | * existing record, the transid already in the tree is lowered | 89 | * existing record, the transid already in the tree is lowered |
90 | * | 90 | * |
91 | * If an existing record is found the defrag item you | 91 | * If an existing record is found the defrag item you |
92 | * pass in is freed | 92 | * pass in is freed |
93 | */ | 93 | */ |
94 | static int __btrfs_add_inode_defrag(struct inode *inode, | 94 | static int __btrfs_add_inode_defrag(struct inode *inode, |
95 | struct inode_defrag *defrag) | 95 | struct inode_defrag *defrag) |
96 | { | 96 | { |
97 | struct btrfs_root *root = BTRFS_I(inode)->root; | 97 | struct btrfs_root *root = BTRFS_I(inode)->root; |
98 | struct inode_defrag *entry; | 98 | struct inode_defrag *entry; |
99 | struct rb_node **p; | 99 | struct rb_node **p; |
100 | struct rb_node *parent = NULL; | 100 | struct rb_node *parent = NULL; |
101 | int ret; | 101 | int ret; |
102 | 102 | ||
103 | p = &root->fs_info->defrag_inodes.rb_node; | 103 | p = &root->fs_info->defrag_inodes.rb_node; |
104 | while (*p) { | 104 | while (*p) { |
105 | parent = *p; | 105 | parent = *p; |
106 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 106 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
107 | 107 | ||
108 | ret = __compare_inode_defrag(defrag, entry); | 108 | ret = __compare_inode_defrag(defrag, entry); |
109 | if (ret < 0) | 109 | if (ret < 0) |
110 | p = &parent->rb_left; | 110 | p = &parent->rb_left; |
111 | else if (ret > 0) | 111 | else if (ret > 0) |
112 | p = &parent->rb_right; | 112 | p = &parent->rb_right; |
113 | else { | 113 | else { |
114 | /* if we're reinserting an entry for | 114 | /* if we're reinserting an entry for |
115 | * an old defrag run, make sure to | 115 | * an old defrag run, make sure to |
116 | * lower the transid of our existing record | 116 | * lower the transid of our existing record |
117 | */ | 117 | */ |
118 | if (defrag->transid < entry->transid) | 118 | if (defrag->transid < entry->transid) |
119 | entry->transid = defrag->transid; | 119 | entry->transid = defrag->transid; |
120 | if (defrag->last_offset > entry->last_offset) | 120 | if (defrag->last_offset > entry->last_offset) |
121 | entry->last_offset = defrag->last_offset; | 121 | entry->last_offset = defrag->last_offset; |
122 | return -EEXIST; | 122 | return -EEXIST; |
123 | } | 123 | } |
124 | } | 124 | } |
125 | set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | 125 | set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
126 | rb_link_node(&defrag->rb_node, parent, p); | 126 | rb_link_node(&defrag->rb_node, parent, p); |
127 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | 127 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); |
128 | return 0; | 128 | return 0; |
129 | } | 129 | } |
130 | 130 | ||
131 | static inline int __need_auto_defrag(struct btrfs_root *root) | 131 | static inline int __need_auto_defrag(struct btrfs_root *root) |
132 | { | 132 | { |
133 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) | 133 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) |
134 | return 0; | 134 | return 0; |
135 | 135 | ||
136 | if (btrfs_fs_closing(root->fs_info)) | 136 | if (btrfs_fs_closing(root->fs_info)) |
137 | return 0; | 137 | return 0; |
138 | 138 | ||
139 | return 1; | 139 | return 1; |
140 | } | 140 | } |
141 | 141 | ||
142 | /* | 142 | /* |
143 | * insert a defrag record for this inode if auto defrag is | 143 | * insert a defrag record for this inode if auto defrag is |
144 | * enabled | 144 | * enabled |
145 | */ | 145 | */ |
146 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | 146 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, |
147 | struct inode *inode) | 147 | struct inode *inode) |
148 | { | 148 | { |
149 | struct btrfs_root *root = BTRFS_I(inode)->root; | 149 | struct btrfs_root *root = BTRFS_I(inode)->root; |
150 | struct inode_defrag *defrag; | 150 | struct inode_defrag *defrag; |
151 | u64 transid; | 151 | u64 transid; |
152 | int ret; | 152 | int ret; |
153 | 153 | ||
154 | if (!__need_auto_defrag(root)) | 154 | if (!__need_auto_defrag(root)) |
155 | return 0; | 155 | return 0; |
156 | 156 | ||
157 | if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) | 157 | if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) |
158 | return 0; | 158 | return 0; |
159 | 159 | ||
160 | if (trans) | 160 | if (trans) |
161 | transid = trans->transid; | 161 | transid = trans->transid; |
162 | else | 162 | else |
163 | transid = BTRFS_I(inode)->root->last_trans; | 163 | transid = BTRFS_I(inode)->root->last_trans; |
164 | 164 | ||
165 | defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS); | 165 | defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS); |
166 | if (!defrag) | 166 | if (!defrag) |
167 | return -ENOMEM; | 167 | return -ENOMEM; |
168 | 168 | ||
169 | defrag->ino = btrfs_ino(inode); | 169 | defrag->ino = btrfs_ino(inode); |
170 | defrag->transid = transid; | 170 | defrag->transid = transid; |
171 | defrag->root = root->root_key.objectid; | 171 | defrag->root = root->root_key.objectid; |
172 | 172 | ||
173 | spin_lock(&root->fs_info->defrag_inodes_lock); | 173 | spin_lock(&root->fs_info->defrag_inodes_lock); |
174 | if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) { | 174 | if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) { |
175 | /* | 175 | /* |
176 | * If we set IN_DEFRAG flag and evict the inode from memory, | 176 | * If we set IN_DEFRAG flag and evict the inode from memory, |
177 | * and then re-read this inode, this new inode doesn't have | 177 | * and then re-read this inode, this new inode doesn't have |
178 | * IN_DEFRAG flag. At the case, we may find the existed defrag. | 178 | * IN_DEFRAG flag. At the case, we may find the existed defrag. |
179 | */ | 179 | */ |
180 | ret = __btrfs_add_inode_defrag(inode, defrag); | 180 | ret = __btrfs_add_inode_defrag(inode, defrag); |
181 | if (ret) | 181 | if (ret) |
182 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | 182 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); |
183 | } else { | 183 | } else { |
184 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | 184 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); |
185 | } | 185 | } |
186 | spin_unlock(&root->fs_info->defrag_inodes_lock); | 186 | spin_unlock(&root->fs_info->defrag_inodes_lock); |
187 | return 0; | 187 | return 0; |
188 | } | 188 | } |
189 | 189 | ||
190 | /* | 190 | /* |
191 | * Requeue the defrag object. If there is a defrag object that points to | 191 | * Requeue the defrag object. If there is a defrag object that points to |
192 | * the same inode in the tree, we will merge them together (by | 192 | * the same inode in the tree, we will merge them together (by |
193 | * __btrfs_add_inode_defrag()) and free the one that we want to requeue. | 193 | * __btrfs_add_inode_defrag()) and free the one that we want to requeue. |
194 | */ | 194 | */ |
195 | static void btrfs_requeue_inode_defrag(struct inode *inode, | 195 | static void btrfs_requeue_inode_defrag(struct inode *inode, |
196 | struct inode_defrag *defrag) | 196 | struct inode_defrag *defrag) |
197 | { | 197 | { |
198 | struct btrfs_root *root = BTRFS_I(inode)->root; | 198 | struct btrfs_root *root = BTRFS_I(inode)->root; |
199 | int ret; | 199 | int ret; |
200 | 200 | ||
201 | if (!__need_auto_defrag(root)) | 201 | if (!__need_auto_defrag(root)) |
202 | goto out; | 202 | goto out; |
203 | 203 | ||
204 | /* | 204 | /* |
205 | * Here we don't check the IN_DEFRAG flag, because we need merge | 205 | * Here we don't check the IN_DEFRAG flag, because we need merge |
206 | * them together. | 206 | * them together. |
207 | */ | 207 | */ |
208 | spin_lock(&root->fs_info->defrag_inodes_lock); | 208 | spin_lock(&root->fs_info->defrag_inodes_lock); |
209 | ret = __btrfs_add_inode_defrag(inode, defrag); | 209 | ret = __btrfs_add_inode_defrag(inode, defrag); |
210 | spin_unlock(&root->fs_info->defrag_inodes_lock); | 210 | spin_unlock(&root->fs_info->defrag_inodes_lock); |
211 | if (ret) | 211 | if (ret) |
212 | goto out; | 212 | goto out; |
213 | return; | 213 | return; |
214 | out: | 214 | out: |
215 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | 215 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); |
216 | } | 216 | } |
217 | 217 | ||
218 | /* | 218 | /* |
219 | * pick the defragable inode that we want, if it doesn't exist, we will get | 219 | * pick the defragable inode that we want, if it doesn't exist, we will get |
220 | * the next one. | 220 | * the next one. |
221 | */ | 221 | */ |
222 | static struct inode_defrag * | 222 | static struct inode_defrag * |
223 | btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino) | 223 | btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino) |
224 | { | 224 | { |
225 | struct inode_defrag *entry = NULL; | 225 | struct inode_defrag *entry = NULL; |
226 | struct inode_defrag tmp; | 226 | struct inode_defrag tmp; |
227 | struct rb_node *p; | 227 | struct rb_node *p; |
228 | struct rb_node *parent = NULL; | 228 | struct rb_node *parent = NULL; |
229 | int ret; | 229 | int ret; |
230 | 230 | ||
231 | tmp.ino = ino; | 231 | tmp.ino = ino; |
232 | tmp.root = root; | 232 | tmp.root = root; |
233 | 233 | ||
234 | spin_lock(&fs_info->defrag_inodes_lock); | 234 | spin_lock(&fs_info->defrag_inodes_lock); |
235 | p = fs_info->defrag_inodes.rb_node; | 235 | p = fs_info->defrag_inodes.rb_node; |
236 | while (p) { | 236 | while (p) { |
237 | parent = p; | 237 | parent = p; |
238 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 238 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
239 | 239 | ||
240 | ret = __compare_inode_defrag(&tmp, entry); | 240 | ret = __compare_inode_defrag(&tmp, entry); |
241 | if (ret < 0) | 241 | if (ret < 0) |
242 | p = parent->rb_left; | 242 | p = parent->rb_left; |
243 | else if (ret > 0) | 243 | else if (ret > 0) |
244 | p = parent->rb_right; | 244 | p = parent->rb_right; |
245 | else | 245 | else |
246 | goto out; | 246 | goto out; |
247 | } | 247 | } |
248 | 248 | ||
249 | if (parent && __compare_inode_defrag(&tmp, entry) > 0) { | 249 | if (parent && __compare_inode_defrag(&tmp, entry) > 0) { |
250 | parent = rb_next(parent); | 250 | parent = rb_next(parent); |
251 | if (parent) | 251 | if (parent) |
252 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 252 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
253 | else | 253 | else |
254 | entry = NULL; | 254 | entry = NULL; |
255 | } | 255 | } |
256 | out: | 256 | out: |
257 | if (entry) | 257 | if (entry) |
258 | rb_erase(parent, &fs_info->defrag_inodes); | 258 | rb_erase(parent, &fs_info->defrag_inodes); |
259 | spin_unlock(&fs_info->defrag_inodes_lock); | 259 | spin_unlock(&fs_info->defrag_inodes_lock); |
260 | return entry; | 260 | return entry; |
261 | } | 261 | } |
262 | 262 | ||
263 | void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info) | 263 | void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info) |
264 | { | 264 | { |
265 | struct inode_defrag *defrag; | 265 | struct inode_defrag *defrag; |
266 | struct rb_node *node; | 266 | struct rb_node *node; |
267 | 267 | ||
268 | spin_lock(&fs_info->defrag_inodes_lock); | 268 | spin_lock(&fs_info->defrag_inodes_lock); |
269 | node = rb_first(&fs_info->defrag_inodes); | 269 | node = rb_first(&fs_info->defrag_inodes); |
270 | while (node) { | 270 | while (node) { |
271 | rb_erase(node, &fs_info->defrag_inodes); | 271 | rb_erase(node, &fs_info->defrag_inodes); |
272 | defrag = rb_entry(node, struct inode_defrag, rb_node); | 272 | defrag = rb_entry(node, struct inode_defrag, rb_node); |
273 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | 273 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); |
274 | 274 | ||
275 | if (need_resched()) { | 275 | if (need_resched()) { |
276 | spin_unlock(&fs_info->defrag_inodes_lock); | 276 | spin_unlock(&fs_info->defrag_inodes_lock); |
277 | cond_resched(); | 277 | cond_resched(); |
278 | spin_lock(&fs_info->defrag_inodes_lock); | 278 | spin_lock(&fs_info->defrag_inodes_lock); |
279 | } | 279 | } |
280 | 280 | ||
281 | node = rb_first(&fs_info->defrag_inodes); | 281 | node = rb_first(&fs_info->defrag_inodes); |
282 | } | 282 | } |
283 | spin_unlock(&fs_info->defrag_inodes_lock); | 283 | spin_unlock(&fs_info->defrag_inodes_lock); |
284 | } | 284 | } |
285 | 285 | ||
286 | #define BTRFS_DEFRAG_BATCH 1024 | 286 | #define BTRFS_DEFRAG_BATCH 1024 |
287 | 287 | ||
288 | static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | 288 | static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, |
289 | struct inode_defrag *defrag) | 289 | struct inode_defrag *defrag) |
290 | { | 290 | { |
291 | struct btrfs_root *inode_root; | 291 | struct btrfs_root *inode_root; |
292 | struct inode *inode; | 292 | struct inode *inode; |
293 | struct btrfs_key key; | 293 | struct btrfs_key key; |
294 | struct btrfs_ioctl_defrag_range_args range; | 294 | struct btrfs_ioctl_defrag_range_args range; |
295 | int num_defrag; | 295 | int num_defrag; |
296 | int index; | 296 | int index; |
297 | int ret; | 297 | int ret; |
298 | 298 | ||
299 | /* get the inode */ | 299 | /* get the inode */ |
300 | key.objectid = defrag->root; | 300 | key.objectid = defrag->root; |
301 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 301 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
302 | key.offset = (u64)-1; | 302 | key.offset = (u64)-1; |
303 | 303 | ||
304 | index = srcu_read_lock(&fs_info->subvol_srcu); | 304 | index = srcu_read_lock(&fs_info->subvol_srcu); |
305 | 305 | ||
306 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); | 306 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); |
307 | if (IS_ERR(inode_root)) { | 307 | if (IS_ERR(inode_root)) { |
308 | ret = PTR_ERR(inode_root); | 308 | ret = PTR_ERR(inode_root); |
309 | goto cleanup; | 309 | goto cleanup; |
310 | } | 310 | } |
311 | 311 | ||
312 | key.objectid = defrag->ino; | 312 | key.objectid = defrag->ino; |
313 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 313 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
314 | key.offset = 0; | 314 | key.offset = 0; |
315 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | 315 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); |
316 | if (IS_ERR(inode)) { | 316 | if (IS_ERR(inode)) { |
317 | ret = PTR_ERR(inode); | 317 | ret = PTR_ERR(inode); |
318 | goto cleanup; | 318 | goto cleanup; |
319 | } | 319 | } |
320 | srcu_read_unlock(&fs_info->subvol_srcu, index); | 320 | srcu_read_unlock(&fs_info->subvol_srcu, index); |
321 | 321 | ||
322 | /* do a chunk of defrag */ | 322 | /* do a chunk of defrag */ |
323 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | 323 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
324 | memset(&range, 0, sizeof(range)); | 324 | memset(&range, 0, sizeof(range)); |
325 | range.len = (u64)-1; | 325 | range.len = (u64)-1; |
326 | range.start = defrag->last_offset; | 326 | range.start = defrag->last_offset; |
327 | 327 | ||
328 | sb_start_write(fs_info->sb); | 328 | sb_start_write(fs_info->sb); |
329 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | 329 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, |
330 | BTRFS_DEFRAG_BATCH); | 330 | BTRFS_DEFRAG_BATCH); |
331 | sb_end_write(fs_info->sb); | 331 | sb_end_write(fs_info->sb); |
332 | /* | 332 | /* |
333 | * if we filled the whole defrag batch, there | 333 | * if we filled the whole defrag batch, there |
334 | * must be more work to do. Queue this defrag | 334 | * must be more work to do. Queue this defrag |
335 | * again | 335 | * again |
336 | */ | 336 | */ |
337 | if (num_defrag == BTRFS_DEFRAG_BATCH) { | 337 | if (num_defrag == BTRFS_DEFRAG_BATCH) { |
338 | defrag->last_offset = range.start; | 338 | defrag->last_offset = range.start; |
339 | btrfs_requeue_inode_defrag(inode, defrag); | 339 | btrfs_requeue_inode_defrag(inode, defrag); |
340 | } else if (defrag->last_offset && !defrag->cycled) { | 340 | } else if (defrag->last_offset && !defrag->cycled) { |
341 | /* | 341 | /* |
342 | * we didn't fill our defrag batch, but | 342 | * we didn't fill our defrag batch, but |
343 | * we didn't start at zero. Make sure we loop | 343 | * we didn't start at zero. Make sure we loop |
344 | * around to the start of the file. | 344 | * around to the start of the file. |
345 | */ | 345 | */ |
346 | defrag->last_offset = 0; | 346 | defrag->last_offset = 0; |
347 | defrag->cycled = 1; | 347 | defrag->cycled = 1; |
348 | btrfs_requeue_inode_defrag(inode, defrag); | 348 | btrfs_requeue_inode_defrag(inode, defrag); |
349 | } else { | 349 | } else { |
350 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | 350 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); |
351 | } | 351 | } |
352 | 352 | ||
353 | iput(inode); | 353 | iput(inode); |
354 | return 0; | 354 | return 0; |
355 | cleanup: | 355 | cleanup: |
356 | srcu_read_unlock(&fs_info->subvol_srcu, index); | 356 | srcu_read_unlock(&fs_info->subvol_srcu, index); |
357 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | 357 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); |
358 | return ret; | 358 | return ret; |
359 | } | 359 | } |
360 | 360 | ||
361 | /* | 361 | /* |
362 | * run through the list of inodes in the FS that need | 362 | * run through the list of inodes in the FS that need |
363 | * defragging | 363 | * defragging |
364 | */ | 364 | */ |
365 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | 365 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) |
366 | { | 366 | { |
367 | struct inode_defrag *defrag; | 367 | struct inode_defrag *defrag; |
368 | u64 first_ino = 0; | 368 | u64 first_ino = 0; |
369 | u64 root_objectid = 0; | 369 | u64 root_objectid = 0; |
370 | 370 | ||
371 | atomic_inc(&fs_info->defrag_running); | 371 | atomic_inc(&fs_info->defrag_running); |
372 | while (1) { | 372 | while (1) { |
373 | /* Pause the auto defragger. */ | 373 | /* Pause the auto defragger. */ |
374 | if (test_bit(BTRFS_FS_STATE_REMOUNTING, | 374 | if (test_bit(BTRFS_FS_STATE_REMOUNTING, |
375 | &fs_info->fs_state)) | 375 | &fs_info->fs_state)) |
376 | break; | 376 | break; |
377 | 377 | ||
378 | if (!__need_auto_defrag(fs_info->tree_root)) | 378 | if (!__need_auto_defrag(fs_info->tree_root)) |
379 | break; | 379 | break; |
380 | 380 | ||
381 | /* find an inode to defrag */ | 381 | /* find an inode to defrag */ |
382 | defrag = btrfs_pick_defrag_inode(fs_info, root_objectid, | 382 | defrag = btrfs_pick_defrag_inode(fs_info, root_objectid, |
383 | first_ino); | 383 | first_ino); |
384 | if (!defrag) { | 384 | if (!defrag) { |
385 | if (root_objectid || first_ino) { | 385 | if (root_objectid || first_ino) { |
386 | root_objectid = 0; | 386 | root_objectid = 0; |
387 | first_ino = 0; | 387 | first_ino = 0; |
388 | continue; | 388 | continue; |
389 | } else { | 389 | } else { |
390 | break; | 390 | break; |
391 | } | 391 | } |
392 | } | 392 | } |
393 | 393 | ||
394 | first_ino = defrag->ino + 1; | 394 | first_ino = defrag->ino + 1; |
395 | root_objectid = defrag->root; | 395 | root_objectid = defrag->root; |
396 | 396 | ||
397 | __btrfs_run_defrag_inode(fs_info, defrag); | 397 | __btrfs_run_defrag_inode(fs_info, defrag); |
398 | } | 398 | } |
399 | atomic_dec(&fs_info->defrag_running); | 399 | atomic_dec(&fs_info->defrag_running); |
400 | 400 | ||
401 | /* | 401 | /* |
402 | * during unmount, we use the transaction_wait queue to | 402 | * during unmount, we use the transaction_wait queue to |
403 | * wait for the defragger to stop | 403 | * wait for the defragger to stop |
404 | */ | 404 | */ |
405 | wake_up(&fs_info->transaction_wait); | 405 | wake_up(&fs_info->transaction_wait); |
406 | return 0; | 406 | return 0; |
407 | } | 407 | } |
408 | 408 | ||
409 | /* simple helper to fault in pages and copy. This should go away | 409 | /* simple helper to fault in pages and copy. This should go away |
410 | * and be replaced with calls into generic code. | 410 | * and be replaced with calls into generic code. |
411 | */ | 411 | */ |
412 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 412 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
413 | size_t write_bytes, | 413 | size_t write_bytes, |
414 | struct page **prepared_pages, | 414 | struct page **prepared_pages, |
415 | struct iov_iter *i) | 415 | struct iov_iter *i) |
416 | { | 416 | { |
417 | size_t copied = 0; | 417 | size_t copied = 0; |
418 | size_t total_copied = 0; | 418 | size_t total_copied = 0; |
419 | int pg = 0; | 419 | int pg = 0; |
420 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 420 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
421 | 421 | ||
422 | while (write_bytes > 0) { | 422 | while (write_bytes > 0) { |
423 | size_t count = min_t(size_t, | 423 | size_t count = min_t(size_t, |
424 | PAGE_CACHE_SIZE - offset, write_bytes); | 424 | PAGE_CACHE_SIZE - offset, write_bytes); |
425 | struct page *page = prepared_pages[pg]; | 425 | struct page *page = prepared_pages[pg]; |
426 | /* | 426 | /* |
427 | * Copy data from userspace to the current page | 427 | * Copy data from userspace to the current page |
428 | */ | 428 | */ |
429 | copied = iov_iter_copy_from_user_atomic(page, i, offset, count); | 429 | copied = iov_iter_copy_from_user_atomic(page, i, offset, count); |
430 | 430 | ||
431 | /* Flush processor's dcache for this page */ | 431 | /* Flush processor's dcache for this page */ |
432 | flush_dcache_page(page); | 432 | flush_dcache_page(page); |
433 | 433 | ||
434 | /* | 434 | /* |
435 | * if we get a partial write, we can end up with | 435 | * if we get a partial write, we can end up with |
436 | * partially up to date pages. These add | 436 | * partially up to date pages. These add |
437 | * a lot of complexity, so make sure they don't | 437 | * a lot of complexity, so make sure they don't |
438 | * happen by forcing this copy to be retried. | 438 | * happen by forcing this copy to be retried. |
439 | * | 439 | * |
440 | * The rest of the btrfs_file_write code will fall | 440 | * The rest of the btrfs_file_write code will fall |
441 | * back to page at a time copies after we return 0. | 441 | * back to page at a time copies after we return 0. |
442 | */ | 442 | */ |
443 | if (!PageUptodate(page) && copied < count) | 443 | if (!PageUptodate(page) && copied < count) |
444 | copied = 0; | 444 | copied = 0; |
445 | 445 | ||
446 | iov_iter_advance(i, copied); | 446 | iov_iter_advance(i, copied); |
447 | write_bytes -= copied; | 447 | write_bytes -= copied; |
448 | total_copied += copied; | 448 | total_copied += copied; |
449 | 449 | ||
450 | /* Return to btrfs_file_aio_write to fault page */ | 450 | /* Return to btrfs_file_aio_write to fault page */ |
451 | if (unlikely(copied == 0)) | 451 | if (unlikely(copied == 0)) |
452 | break; | 452 | break; |
453 | 453 | ||
454 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | 454 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { |
455 | offset += copied; | 455 | offset += copied; |
456 | } else { | 456 | } else { |
457 | pg++; | 457 | pg++; |
458 | offset = 0; | 458 | offset = 0; |
459 | } | 459 | } |
460 | } | 460 | } |
461 | return total_copied; | 461 | return total_copied; |
462 | } | 462 | } |
463 | 463 | ||
464 | /* | 464 | /* |
465 | * unlocks pages after btrfs_file_write is done with them | 465 | * unlocks pages after btrfs_file_write is done with them |
466 | */ | 466 | */ |
467 | static void btrfs_drop_pages(struct page **pages, size_t num_pages) | 467 | static void btrfs_drop_pages(struct page **pages, size_t num_pages) |
468 | { | 468 | { |
469 | size_t i; | 469 | size_t i; |
470 | for (i = 0; i < num_pages; i++) { | 470 | for (i = 0; i < num_pages; i++) { |
471 | /* page checked is some magic around finding pages that | 471 | /* page checked is some magic around finding pages that |
472 | * have been modified without going through btrfs_set_page_dirty | 472 | * have been modified without going through btrfs_set_page_dirty |
473 | * clear it here | 473 | * clear it here |
474 | */ | 474 | */ |
475 | ClearPageChecked(pages[i]); | 475 | ClearPageChecked(pages[i]); |
476 | unlock_page(pages[i]); | 476 | unlock_page(pages[i]); |
477 | mark_page_accessed(pages[i]); | 477 | mark_page_accessed(pages[i]); |
478 | page_cache_release(pages[i]); | 478 | page_cache_release(pages[i]); |
479 | } | 479 | } |
480 | } | 480 | } |
481 | 481 | ||
482 | /* | 482 | /* |
483 | * after copy_from_user, pages need to be dirtied and we need to make | 483 | * after copy_from_user, pages need to be dirtied and we need to make |
484 | * sure holes are created between the current EOF and the start of | 484 | * sure holes are created between the current EOF and the start of |
485 | * any next extents (if required). | 485 | * any next extents (if required). |
486 | * | 486 | * |
487 | * this also makes the decision about creating an inline extent vs | 487 | * this also makes the decision about creating an inline extent vs |
488 | * doing real data extents, marking pages dirty and delalloc as required. | 488 | * doing real data extents, marking pages dirty and delalloc as required. |
489 | */ | 489 | */ |
490 | int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, | 490 | int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, |
491 | struct page **pages, size_t num_pages, | 491 | struct page **pages, size_t num_pages, |
492 | loff_t pos, size_t write_bytes, | 492 | loff_t pos, size_t write_bytes, |
493 | struct extent_state **cached) | 493 | struct extent_state **cached) |
494 | { | 494 | { |
495 | int err = 0; | 495 | int err = 0; |
496 | int i; | 496 | int i; |
497 | u64 num_bytes; | 497 | u64 num_bytes; |
498 | u64 start_pos; | 498 | u64 start_pos; |
499 | u64 end_of_last_block; | 499 | u64 end_of_last_block; |
500 | u64 end_pos = pos + write_bytes; | 500 | u64 end_pos = pos + write_bytes; |
501 | loff_t isize = i_size_read(inode); | 501 | loff_t isize = i_size_read(inode); |
502 | 502 | ||
503 | start_pos = pos & ~((u64)root->sectorsize - 1); | 503 | start_pos = pos & ~((u64)root->sectorsize - 1); |
504 | num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize); | 504 | num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize); |
505 | 505 | ||
506 | end_of_last_block = start_pos + num_bytes - 1; | 506 | end_of_last_block = start_pos + num_bytes - 1; |
507 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 507 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
508 | cached); | 508 | cached); |
509 | if (err) | 509 | if (err) |
510 | return err; | 510 | return err; |
511 | 511 | ||
512 | for (i = 0; i < num_pages; i++) { | 512 | for (i = 0; i < num_pages; i++) { |
513 | struct page *p = pages[i]; | 513 | struct page *p = pages[i]; |
514 | SetPageUptodate(p); | 514 | SetPageUptodate(p); |
515 | ClearPageChecked(p); | 515 | ClearPageChecked(p); |
516 | set_page_dirty(p); | 516 | set_page_dirty(p); |
517 | } | 517 | } |
518 | 518 | ||
519 | /* | 519 | /* |
520 | * we've only changed i_size in ram, and we haven't updated | 520 | * we've only changed i_size in ram, and we haven't updated |
521 | * the disk i_size. There is no need to log the inode | 521 | * the disk i_size. There is no need to log the inode |
522 | * at this time. | 522 | * at this time. |
523 | */ | 523 | */ |
524 | if (end_pos > isize) | 524 | if (end_pos > isize) |
525 | i_size_write(inode, end_pos); | 525 | i_size_write(inode, end_pos); |
526 | return 0; | 526 | return 0; |
527 | } | 527 | } |
528 | 528 | ||
529 | /* | 529 | /* |
530 | * this drops all the extents in the cache that intersect the range | 530 | * this drops all the extents in the cache that intersect the range |
531 | * [start, end]. Existing extents are split as required. | 531 | * [start, end]. Existing extents are split as required. |
532 | */ | 532 | */ |
533 | void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 533 | void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
534 | int skip_pinned) | 534 | int skip_pinned) |
535 | { | 535 | { |
536 | struct extent_map *em; | 536 | struct extent_map *em; |
537 | struct extent_map *split = NULL; | 537 | struct extent_map *split = NULL; |
538 | struct extent_map *split2 = NULL; | 538 | struct extent_map *split2 = NULL; |
539 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 539 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
540 | u64 len = end - start + 1; | 540 | u64 len = end - start + 1; |
541 | u64 gen; | 541 | u64 gen; |
542 | int ret; | 542 | int ret; |
543 | int testend = 1; | 543 | int testend = 1; |
544 | unsigned long flags; | 544 | unsigned long flags; |
545 | int compressed = 0; | 545 | int compressed = 0; |
546 | bool modified; | 546 | bool modified; |
547 | 547 | ||
548 | WARN_ON(end < start); | 548 | WARN_ON(end < start); |
549 | if (end == (u64)-1) { | 549 | if (end == (u64)-1) { |
550 | len = (u64)-1; | 550 | len = (u64)-1; |
551 | testend = 0; | 551 | testend = 0; |
552 | } | 552 | } |
553 | while (1) { | 553 | while (1) { |
554 | int no_splits = 0; | 554 | int no_splits = 0; |
555 | 555 | ||
556 | modified = false; | 556 | modified = false; |
557 | if (!split) | 557 | if (!split) |
558 | split = alloc_extent_map(); | 558 | split = alloc_extent_map(); |
559 | if (!split2) | 559 | if (!split2) |
560 | split2 = alloc_extent_map(); | 560 | split2 = alloc_extent_map(); |
561 | if (!split || !split2) | 561 | if (!split || !split2) |
562 | no_splits = 1; | 562 | no_splits = 1; |
563 | 563 | ||
564 | write_lock(&em_tree->lock); | 564 | write_lock(&em_tree->lock); |
565 | em = lookup_extent_mapping(em_tree, start, len); | 565 | em = lookup_extent_mapping(em_tree, start, len); |
566 | if (!em) { | 566 | if (!em) { |
567 | write_unlock(&em_tree->lock); | 567 | write_unlock(&em_tree->lock); |
568 | break; | 568 | break; |
569 | } | 569 | } |
570 | flags = em->flags; | 570 | flags = em->flags; |
571 | gen = em->generation; | 571 | gen = em->generation; |
572 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { | 572 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { |
573 | if (testend && em->start + em->len >= start + len) { | 573 | if (testend && em->start + em->len >= start + len) { |
574 | free_extent_map(em); | 574 | free_extent_map(em); |
575 | write_unlock(&em_tree->lock); | 575 | write_unlock(&em_tree->lock); |
576 | break; | 576 | break; |
577 | } | 577 | } |
578 | start = em->start + em->len; | 578 | start = em->start + em->len; |
579 | if (testend) | 579 | if (testend) |
580 | len = start + len - (em->start + em->len); | 580 | len = start + len - (em->start + em->len); |
581 | free_extent_map(em); | 581 | free_extent_map(em); |
582 | write_unlock(&em_tree->lock); | 582 | write_unlock(&em_tree->lock); |
583 | continue; | 583 | continue; |
584 | } | 584 | } |
585 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 585 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
586 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 586 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
587 | clear_bit(EXTENT_FLAG_LOGGING, &flags); | 587 | clear_bit(EXTENT_FLAG_LOGGING, &flags); |
588 | modified = !list_empty(&em->list); | 588 | modified = !list_empty(&em->list); |
589 | if (no_splits) | 589 | if (no_splits) |
590 | goto next; | 590 | goto next; |
591 | 591 | ||
592 | if (em->start < start) { | 592 | if (em->start < start) { |
593 | split->start = em->start; | 593 | split->start = em->start; |
594 | split->len = start - em->start; | 594 | split->len = start - em->start; |
595 | 595 | ||
596 | if (em->block_start < EXTENT_MAP_LAST_BYTE) { | 596 | if (em->block_start < EXTENT_MAP_LAST_BYTE) { |
597 | split->orig_start = em->orig_start; | 597 | split->orig_start = em->orig_start; |
598 | split->block_start = em->block_start; | 598 | split->block_start = em->block_start; |
599 | 599 | ||
600 | if (compressed) | 600 | if (compressed) |
601 | split->block_len = em->block_len; | 601 | split->block_len = em->block_len; |
602 | else | 602 | else |
603 | split->block_len = split->len; | 603 | split->block_len = split->len; |
604 | split->orig_block_len = max(split->block_len, | 604 | split->orig_block_len = max(split->block_len, |
605 | em->orig_block_len); | 605 | em->orig_block_len); |
606 | split->ram_bytes = em->ram_bytes; | 606 | split->ram_bytes = em->ram_bytes; |
607 | } else { | 607 | } else { |
608 | split->orig_start = split->start; | 608 | split->orig_start = split->start; |
609 | split->block_len = 0; | 609 | split->block_len = 0; |
610 | split->block_start = em->block_start; | 610 | split->block_start = em->block_start; |
611 | split->orig_block_len = 0; | 611 | split->orig_block_len = 0; |
612 | split->ram_bytes = split->len; | 612 | split->ram_bytes = split->len; |
613 | } | 613 | } |
614 | 614 | ||
615 | split->generation = gen; | 615 | split->generation = gen; |
616 | split->bdev = em->bdev; | 616 | split->bdev = em->bdev; |
617 | split->flags = flags; | 617 | split->flags = flags; |
618 | split->compress_type = em->compress_type; | 618 | split->compress_type = em->compress_type; |
619 | replace_extent_mapping(em_tree, em, split, modified); | 619 | replace_extent_mapping(em_tree, em, split, modified); |
620 | free_extent_map(split); | 620 | free_extent_map(split); |
621 | split = split2; | 621 | split = split2; |
622 | split2 = NULL; | 622 | split2 = NULL; |
623 | } | 623 | } |
624 | if (testend && em->start + em->len > start + len) { | 624 | if (testend && em->start + em->len > start + len) { |
625 | u64 diff = start + len - em->start; | 625 | u64 diff = start + len - em->start; |
626 | 626 | ||
627 | split->start = start + len; | 627 | split->start = start + len; |
628 | split->len = em->start + em->len - (start + len); | 628 | split->len = em->start + em->len - (start + len); |
629 | split->bdev = em->bdev; | 629 | split->bdev = em->bdev; |
630 | split->flags = flags; | 630 | split->flags = flags; |
631 | split->compress_type = em->compress_type; | 631 | split->compress_type = em->compress_type; |
632 | split->generation = gen; | 632 | split->generation = gen; |
633 | 633 | ||
634 | if (em->block_start < EXTENT_MAP_LAST_BYTE) { | 634 | if (em->block_start < EXTENT_MAP_LAST_BYTE) { |
635 | split->orig_block_len = max(em->block_len, | 635 | split->orig_block_len = max(em->block_len, |
636 | em->orig_block_len); | 636 | em->orig_block_len); |
637 | 637 | ||
638 | split->ram_bytes = em->ram_bytes; | 638 | split->ram_bytes = em->ram_bytes; |
639 | if (compressed) { | 639 | if (compressed) { |
640 | split->block_len = em->block_len; | 640 | split->block_len = em->block_len; |
641 | split->block_start = em->block_start; | 641 | split->block_start = em->block_start; |
642 | split->orig_start = em->orig_start; | 642 | split->orig_start = em->orig_start; |
643 | } else { | 643 | } else { |
644 | split->block_len = split->len; | 644 | split->block_len = split->len; |
645 | split->block_start = em->block_start | 645 | split->block_start = em->block_start |
646 | + diff; | 646 | + diff; |
647 | split->orig_start = em->orig_start; | 647 | split->orig_start = em->orig_start; |
648 | } | 648 | } |
649 | } else { | 649 | } else { |
650 | split->ram_bytes = split->len; | 650 | split->ram_bytes = split->len; |
651 | split->orig_start = split->start; | 651 | split->orig_start = split->start; |
652 | split->block_len = 0; | 652 | split->block_len = 0; |
653 | split->block_start = em->block_start; | 653 | split->block_start = em->block_start; |
654 | split->orig_block_len = 0; | 654 | split->orig_block_len = 0; |
655 | } | 655 | } |
656 | 656 | ||
657 | if (extent_map_in_tree(em)) { | 657 | if (extent_map_in_tree(em)) { |
658 | replace_extent_mapping(em_tree, em, split, | 658 | replace_extent_mapping(em_tree, em, split, |
659 | modified); | 659 | modified); |
660 | } else { | 660 | } else { |
661 | ret = add_extent_mapping(em_tree, split, | 661 | ret = add_extent_mapping(em_tree, split, |
662 | modified); | 662 | modified); |
663 | ASSERT(ret == 0); /* Logic error */ | 663 | ASSERT(ret == 0); /* Logic error */ |
664 | } | 664 | } |
665 | free_extent_map(split); | 665 | free_extent_map(split); |
666 | split = NULL; | 666 | split = NULL; |
667 | } | 667 | } |
668 | next: | 668 | next: |
669 | if (extent_map_in_tree(em)) | 669 | if (extent_map_in_tree(em)) |
670 | remove_extent_mapping(em_tree, em); | 670 | remove_extent_mapping(em_tree, em); |
671 | write_unlock(&em_tree->lock); | 671 | write_unlock(&em_tree->lock); |
672 | 672 | ||
673 | /* once for us */ | 673 | /* once for us */ |
674 | free_extent_map(em); | 674 | free_extent_map(em); |
675 | /* once for the tree*/ | 675 | /* once for the tree*/ |
676 | free_extent_map(em); | 676 | free_extent_map(em); |
677 | } | 677 | } |
678 | if (split) | 678 | if (split) |
679 | free_extent_map(split); | 679 | free_extent_map(split); |
680 | if (split2) | 680 | if (split2) |
681 | free_extent_map(split2); | 681 | free_extent_map(split2); |
682 | } | 682 | } |
683 | 683 | ||
684 | /* | 684 | /* |
685 | * this is very complex, but the basic idea is to drop all extents | 685 | * this is very complex, but the basic idea is to drop all extents |
686 | * in the range start - end. hint_block is filled in with a block number | 686 | * in the range start - end. hint_block is filled in with a block number |
687 | * that would be a good hint to the block allocator for this file. | 687 | * that would be a good hint to the block allocator for this file. |
688 | * | 688 | * |
689 | * If an extent intersects the range but is not entirely inside the range | 689 | * If an extent intersects the range but is not entirely inside the range |
690 | * it is either truncated or split. Anything entirely inside the range | 690 | * it is either truncated or split. Anything entirely inside the range |
691 | * is deleted from the tree. | 691 | * is deleted from the tree. |
692 | */ | 692 | */ |
693 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | 693 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, |
694 | struct btrfs_root *root, struct inode *inode, | 694 | struct btrfs_root *root, struct inode *inode, |
695 | struct btrfs_path *path, u64 start, u64 end, | 695 | struct btrfs_path *path, u64 start, u64 end, |
696 | u64 *drop_end, int drop_cache, | 696 | u64 *drop_end, int drop_cache, |
697 | int replace_extent, | 697 | int replace_extent, |
698 | u32 extent_item_size, | 698 | u32 extent_item_size, |
699 | int *key_inserted) | 699 | int *key_inserted) |
700 | { | 700 | { |
701 | struct extent_buffer *leaf; | 701 | struct extent_buffer *leaf; |
702 | struct btrfs_file_extent_item *fi; | 702 | struct btrfs_file_extent_item *fi; |
703 | struct btrfs_key key; | 703 | struct btrfs_key key; |
704 | struct btrfs_key new_key; | 704 | struct btrfs_key new_key; |
705 | u64 ino = btrfs_ino(inode); | 705 | u64 ino = btrfs_ino(inode); |
706 | u64 search_start = start; | 706 | u64 search_start = start; |
707 | u64 disk_bytenr = 0; | 707 | u64 disk_bytenr = 0; |
708 | u64 num_bytes = 0; | 708 | u64 num_bytes = 0; |
709 | u64 extent_offset = 0; | 709 | u64 extent_offset = 0; |
710 | u64 extent_end = 0; | 710 | u64 extent_end = 0; |
711 | int del_nr = 0; | 711 | int del_nr = 0; |
712 | int del_slot = 0; | 712 | int del_slot = 0; |
713 | int extent_type; | 713 | int extent_type; |
714 | int recow; | 714 | int recow; |
715 | int ret; | 715 | int ret; |
716 | int modify_tree = -1; | 716 | int modify_tree = -1; |
717 | int update_refs = (root->ref_cows || root == root->fs_info->tree_root); | 717 | int update_refs = (root->ref_cows || root == root->fs_info->tree_root); |
718 | int found = 0; | 718 | int found = 0; |
719 | int leafs_visited = 0; | 719 | int leafs_visited = 0; |
720 | 720 | ||
721 | if (drop_cache) | 721 | if (drop_cache) |
722 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 722 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
723 | 723 | ||
724 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) | 724 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) |
725 | modify_tree = 0; | 725 | modify_tree = 0; |
726 | 726 | ||
727 | while (1) { | 727 | while (1) { |
728 | recow = 0; | 728 | recow = 0; |
729 | ret = btrfs_lookup_file_extent(trans, root, path, ino, | 729 | ret = btrfs_lookup_file_extent(trans, root, path, ino, |
730 | search_start, modify_tree); | 730 | search_start, modify_tree); |
731 | if (ret < 0) | 731 | if (ret < 0) |
732 | break; | 732 | break; |
733 | if (ret > 0 && path->slots[0] > 0 && search_start == start) { | 733 | if (ret > 0 && path->slots[0] > 0 && search_start == start) { |
734 | leaf = path->nodes[0]; | 734 | leaf = path->nodes[0]; |
735 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); | 735 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); |
736 | if (key.objectid == ino && | 736 | if (key.objectid == ino && |
737 | key.type == BTRFS_EXTENT_DATA_KEY) | 737 | key.type == BTRFS_EXTENT_DATA_KEY) |
738 | path->slots[0]--; | 738 | path->slots[0]--; |
739 | } | 739 | } |
740 | ret = 0; | 740 | ret = 0; |
741 | leafs_visited++; | 741 | leafs_visited++; |
742 | next_slot: | 742 | next_slot: |
743 | leaf = path->nodes[0]; | 743 | leaf = path->nodes[0]; |
744 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { | 744 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { |
745 | BUG_ON(del_nr > 0); | 745 | BUG_ON(del_nr > 0); |
746 | ret = btrfs_next_leaf(root, path); | 746 | ret = btrfs_next_leaf(root, path); |
747 | if (ret < 0) | 747 | if (ret < 0) |
748 | break; | 748 | break; |
749 | if (ret > 0) { | 749 | if (ret > 0) { |
750 | ret = 0; | 750 | ret = 0; |
751 | break; | 751 | break; |
752 | } | 752 | } |
753 | leafs_visited++; | 753 | leafs_visited++; |
754 | leaf = path->nodes[0]; | 754 | leaf = path->nodes[0]; |
755 | recow = 1; | 755 | recow = 1; |
756 | } | 756 | } |
757 | 757 | ||
758 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | 758 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
759 | if (key.objectid > ino || | 759 | if (key.objectid > ino || |
760 | key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) | 760 | key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) |
761 | break; | 761 | break; |
762 | 762 | ||
763 | fi = btrfs_item_ptr(leaf, path->slots[0], | 763 | fi = btrfs_item_ptr(leaf, path->slots[0], |
764 | struct btrfs_file_extent_item); | 764 | struct btrfs_file_extent_item); |
765 | extent_type = btrfs_file_extent_type(leaf, fi); | 765 | extent_type = btrfs_file_extent_type(leaf, fi); |
766 | 766 | ||
767 | if (extent_type == BTRFS_FILE_EXTENT_REG || | 767 | if (extent_type == BTRFS_FILE_EXTENT_REG || |
768 | extent_type == BTRFS_FILE_EXTENT_PREALLOC) { | 768 | extent_type == BTRFS_FILE_EXTENT_PREALLOC) { |
769 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | 769 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
770 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | 770 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); |
771 | extent_offset = btrfs_file_extent_offset(leaf, fi); | 771 | extent_offset = btrfs_file_extent_offset(leaf, fi); |
772 | extent_end = key.offset + | 772 | extent_end = key.offset + |
773 | btrfs_file_extent_num_bytes(leaf, fi); | 773 | btrfs_file_extent_num_bytes(leaf, fi); |
774 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 774 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
775 | extent_end = key.offset + | 775 | extent_end = key.offset + |
776 | btrfs_file_extent_inline_len(leaf, | 776 | btrfs_file_extent_inline_len(leaf, |
777 | path->slots[0], fi); | 777 | path->slots[0], fi); |
778 | } else { | 778 | } else { |
779 | WARN_ON(1); | 779 | WARN_ON(1); |
780 | extent_end = search_start; | 780 | extent_end = search_start; |
781 | } | 781 | } |
782 | 782 | ||
783 | /* | 783 | /* |
784 | * Don't skip extent items representing 0 byte lengths. They | 784 | * Don't skip extent items representing 0 byte lengths. They |
785 | * used to be created (bug) if while punching holes we hit | 785 | * used to be created (bug) if while punching holes we hit |
786 | * -ENOSPC condition. So if we find one here, just ensure we | 786 | * -ENOSPC condition. So if we find one here, just ensure we |
787 | * delete it, otherwise we would insert a new file extent item | 787 | * delete it, otherwise we would insert a new file extent item |
788 | * with the same key (offset) as that 0 bytes length file | 788 | * with the same key (offset) as that 0 bytes length file |
789 | * extent item in the call to setup_items_for_insert() later | 789 | * extent item in the call to setup_items_for_insert() later |
790 | * in this function. | 790 | * in this function. |
791 | */ | 791 | */ |
792 | if (extent_end == key.offset && extent_end >= search_start) | 792 | if (extent_end == key.offset && extent_end >= search_start) |
793 | goto delete_extent_item; | 793 | goto delete_extent_item; |
794 | 794 | ||
795 | if (extent_end <= search_start) { | 795 | if (extent_end <= search_start) { |
796 | path->slots[0]++; | 796 | path->slots[0]++; |
797 | goto next_slot; | 797 | goto next_slot; |
798 | } | 798 | } |
799 | 799 | ||
800 | found = 1; | 800 | found = 1; |
801 | search_start = max(key.offset, start); | 801 | search_start = max(key.offset, start); |
802 | if (recow || !modify_tree) { | 802 | if (recow || !modify_tree) { |
803 | modify_tree = -1; | 803 | modify_tree = -1; |
804 | btrfs_release_path(path); | 804 | btrfs_release_path(path); |
805 | continue; | 805 | continue; |
806 | } | 806 | } |
807 | 807 | ||
808 | /* | 808 | /* |
809 | * | - range to drop - | | 809 | * | - range to drop - | |
810 | * | -------- extent -------- | | 810 | * | -------- extent -------- | |
811 | */ | 811 | */ |
812 | if (start > key.offset && end < extent_end) { | 812 | if (start > key.offset && end < extent_end) { |
813 | BUG_ON(del_nr > 0); | 813 | BUG_ON(del_nr > 0); |
814 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 814 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
815 | ret = -EOPNOTSUPP; | 815 | ret = -EOPNOTSUPP; |
816 | break; | 816 | break; |
817 | } | 817 | } |
818 | 818 | ||
819 | memcpy(&new_key, &key, sizeof(new_key)); | 819 | memcpy(&new_key, &key, sizeof(new_key)); |
820 | new_key.offset = start; | 820 | new_key.offset = start; |
821 | ret = btrfs_duplicate_item(trans, root, path, | 821 | ret = btrfs_duplicate_item(trans, root, path, |
822 | &new_key); | 822 | &new_key); |
823 | if (ret == -EAGAIN) { | 823 | if (ret == -EAGAIN) { |
824 | btrfs_release_path(path); | 824 | btrfs_release_path(path); |
825 | continue; | 825 | continue; |
826 | } | 826 | } |
827 | if (ret < 0) | 827 | if (ret < 0) |
828 | break; | 828 | break; |
829 | 829 | ||
830 | leaf = path->nodes[0]; | 830 | leaf = path->nodes[0]; |
831 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, | 831 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, |
832 | struct btrfs_file_extent_item); | 832 | struct btrfs_file_extent_item); |
833 | btrfs_set_file_extent_num_bytes(leaf, fi, | 833 | btrfs_set_file_extent_num_bytes(leaf, fi, |
834 | start - key.offset); | 834 | start - key.offset); |
835 | 835 | ||
836 | fi = btrfs_item_ptr(leaf, path->slots[0], | 836 | fi = btrfs_item_ptr(leaf, path->slots[0], |
837 | struct btrfs_file_extent_item); | 837 | struct btrfs_file_extent_item); |
838 | 838 | ||
839 | extent_offset += start - key.offset; | 839 | extent_offset += start - key.offset; |
840 | btrfs_set_file_extent_offset(leaf, fi, extent_offset); | 840 | btrfs_set_file_extent_offset(leaf, fi, extent_offset); |
841 | btrfs_set_file_extent_num_bytes(leaf, fi, | 841 | btrfs_set_file_extent_num_bytes(leaf, fi, |
842 | extent_end - start); | 842 | extent_end - start); |
843 | btrfs_mark_buffer_dirty(leaf); | 843 | btrfs_mark_buffer_dirty(leaf); |
844 | 844 | ||
845 | if (update_refs && disk_bytenr > 0) { | 845 | if (update_refs && disk_bytenr > 0) { |
846 | ret = btrfs_inc_extent_ref(trans, root, | 846 | ret = btrfs_inc_extent_ref(trans, root, |
847 | disk_bytenr, num_bytes, 0, | 847 | disk_bytenr, num_bytes, 0, |
848 | root->root_key.objectid, | 848 | root->root_key.objectid, |
849 | new_key.objectid, | 849 | new_key.objectid, |
850 | start - extent_offset, 0); | 850 | start - extent_offset, 0); |
851 | BUG_ON(ret); /* -ENOMEM */ | 851 | BUG_ON(ret); /* -ENOMEM */ |
852 | } | 852 | } |
853 | key.offset = start; | 853 | key.offset = start; |
854 | } | 854 | } |
855 | /* | 855 | /* |
856 | * | ---- range to drop ----- | | 856 | * | ---- range to drop ----- | |
857 | * | -------- extent -------- | | 857 | * | -------- extent -------- | |
858 | */ | 858 | */ |
859 | if (start <= key.offset && end < extent_end) { | 859 | if (start <= key.offset && end < extent_end) { |
860 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 860 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
861 | ret = -EOPNOTSUPP; | 861 | ret = -EOPNOTSUPP; |
862 | break; | 862 | break; |
863 | } | 863 | } |
864 | 864 | ||
865 | memcpy(&new_key, &key, sizeof(new_key)); | 865 | memcpy(&new_key, &key, sizeof(new_key)); |
866 | new_key.offset = end; | 866 | new_key.offset = end; |
867 | btrfs_set_item_key_safe(root, path, &new_key); | 867 | btrfs_set_item_key_safe(root, path, &new_key); |
868 | 868 | ||
869 | extent_offset += end - key.offset; | 869 | extent_offset += end - key.offset; |
870 | btrfs_set_file_extent_offset(leaf, fi, extent_offset); | 870 | btrfs_set_file_extent_offset(leaf, fi, extent_offset); |
871 | btrfs_set_file_extent_num_bytes(leaf, fi, | 871 | btrfs_set_file_extent_num_bytes(leaf, fi, |
872 | extent_end - end); | 872 | extent_end - end); |
873 | btrfs_mark_buffer_dirty(leaf); | 873 | btrfs_mark_buffer_dirty(leaf); |
874 | if (update_refs && disk_bytenr > 0) | 874 | if (update_refs && disk_bytenr > 0) |
875 | inode_sub_bytes(inode, end - key.offset); | 875 | inode_sub_bytes(inode, end - key.offset); |
876 | break; | 876 | break; |
877 | } | 877 | } |
878 | 878 | ||
879 | search_start = extent_end; | 879 | search_start = extent_end; |
880 | /* | 880 | /* |
881 | * | ---- range to drop ----- | | 881 | * | ---- range to drop ----- | |
882 | * | -------- extent -------- | | 882 | * | -------- extent -------- | |
883 | */ | 883 | */ |
884 | if (start > key.offset && end >= extent_end) { | 884 | if (start > key.offset && end >= extent_end) { |
885 | BUG_ON(del_nr > 0); | 885 | BUG_ON(del_nr > 0); |
886 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 886 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
887 | ret = -EOPNOTSUPP; | 887 | ret = -EOPNOTSUPP; |
888 | break; | 888 | break; |
889 | } | 889 | } |
890 | 890 | ||
891 | btrfs_set_file_extent_num_bytes(leaf, fi, | 891 | btrfs_set_file_extent_num_bytes(leaf, fi, |
892 | start - key.offset); | 892 | start - key.offset); |
893 | btrfs_mark_buffer_dirty(leaf); | 893 | btrfs_mark_buffer_dirty(leaf); |
894 | if (update_refs && disk_bytenr > 0) | 894 | if (update_refs && disk_bytenr > 0) |
895 | inode_sub_bytes(inode, extent_end - start); | 895 | inode_sub_bytes(inode, extent_end - start); |
896 | if (end == extent_end) | 896 | if (end == extent_end) |
897 | break; | 897 | break; |
898 | 898 | ||
899 | path->slots[0]++; | 899 | path->slots[0]++; |
900 | goto next_slot; | 900 | goto next_slot; |
901 | } | 901 | } |
902 | 902 | ||
903 | /* | 903 | /* |
904 | * | ---- range to drop ----- | | 904 | * | ---- range to drop ----- | |
905 | * | ------ extent ------ | | 905 | * | ------ extent ------ | |
906 | */ | 906 | */ |
907 | if (start <= key.offset && end >= extent_end) { | 907 | if (start <= key.offset && end >= extent_end) { |
908 | delete_extent_item: | 908 | delete_extent_item: |
909 | if (del_nr == 0) { | 909 | if (del_nr == 0) { |
910 | del_slot = path->slots[0]; | 910 | del_slot = path->slots[0]; |
911 | del_nr = 1; | 911 | del_nr = 1; |
912 | } else { | 912 | } else { |
913 | BUG_ON(del_slot + del_nr != path->slots[0]); | 913 | BUG_ON(del_slot + del_nr != path->slots[0]); |
914 | del_nr++; | 914 | del_nr++; |
915 | } | 915 | } |
916 | 916 | ||
917 | if (update_refs && | 917 | if (update_refs && |
918 | extent_type == BTRFS_FILE_EXTENT_INLINE) { | 918 | extent_type == BTRFS_FILE_EXTENT_INLINE) { |
919 | inode_sub_bytes(inode, | 919 | inode_sub_bytes(inode, |
920 | extent_end - key.offset); | 920 | extent_end - key.offset); |
921 | extent_end = ALIGN(extent_end, | 921 | extent_end = ALIGN(extent_end, |
922 | root->sectorsize); | 922 | root->sectorsize); |
923 | } else if (update_refs && disk_bytenr > 0) { | 923 | } else if (update_refs && disk_bytenr > 0) { |
924 | ret = btrfs_free_extent(trans, root, | 924 | ret = btrfs_free_extent(trans, root, |
925 | disk_bytenr, num_bytes, 0, | 925 | disk_bytenr, num_bytes, 0, |
926 | root->root_key.objectid, | 926 | root->root_key.objectid, |
927 | key.objectid, key.offset - | 927 | key.objectid, key.offset - |
928 | extent_offset, 0); | 928 | extent_offset, 0); |
929 | BUG_ON(ret); /* -ENOMEM */ | 929 | BUG_ON(ret); /* -ENOMEM */ |
930 | inode_sub_bytes(inode, | 930 | inode_sub_bytes(inode, |
931 | extent_end - key.offset); | 931 | extent_end - key.offset); |
932 | } | 932 | } |
933 | 933 | ||
934 | if (end == extent_end) | 934 | if (end == extent_end) |
935 | break; | 935 | break; |
936 | 936 | ||
937 | if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) { | 937 | if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) { |
938 | path->slots[0]++; | 938 | path->slots[0]++; |
939 | goto next_slot; | 939 | goto next_slot; |
940 | } | 940 | } |
941 | 941 | ||
942 | ret = btrfs_del_items(trans, root, path, del_slot, | 942 | ret = btrfs_del_items(trans, root, path, del_slot, |
943 | del_nr); | 943 | del_nr); |
944 | if (ret) { | 944 | if (ret) { |
945 | btrfs_abort_transaction(trans, root, ret); | 945 | btrfs_abort_transaction(trans, root, ret); |
946 | break; | 946 | break; |
947 | } | 947 | } |
948 | 948 | ||
949 | del_nr = 0; | 949 | del_nr = 0; |
950 | del_slot = 0; | 950 | del_slot = 0; |
951 | 951 | ||
952 | btrfs_release_path(path); | 952 | btrfs_release_path(path); |
953 | continue; | 953 | continue; |
954 | } | 954 | } |
955 | 955 | ||
956 | BUG_ON(1); | 956 | BUG_ON(1); |
957 | } | 957 | } |
958 | 958 | ||
959 | if (!ret && del_nr > 0) { | 959 | if (!ret && del_nr > 0) { |
960 | /* | 960 | /* |
961 | * Set path->slots[0] to first slot, so that after the delete | 961 | * Set path->slots[0] to first slot, so that after the delete |
962 | * if items are move off from our leaf to its immediate left or | 962 | * if items are move off from our leaf to its immediate left or |
963 | * right neighbor leafs, we end up with a correct and adjusted | 963 | * right neighbor leafs, we end up with a correct and adjusted |
964 | * path->slots[0] for our insertion (if replace_extent != 0). | 964 | * path->slots[0] for our insertion (if replace_extent != 0). |
965 | */ | 965 | */ |
966 | path->slots[0] = del_slot; | 966 | path->slots[0] = del_slot; |
967 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | 967 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); |
968 | if (ret) | 968 | if (ret) |
969 | btrfs_abort_transaction(trans, root, ret); | 969 | btrfs_abort_transaction(trans, root, ret); |
970 | } | 970 | } |
971 | 971 | ||
972 | leaf = path->nodes[0]; | 972 | leaf = path->nodes[0]; |
973 | /* | 973 | /* |
974 | * If btrfs_del_items() was called, it might have deleted a leaf, in | 974 | * If btrfs_del_items() was called, it might have deleted a leaf, in |
975 | * which case it unlocked our path, so check path->locks[0] matches a | 975 | * which case it unlocked our path, so check path->locks[0] matches a |
976 | * write lock. | 976 | * write lock. |
977 | */ | 977 | */ |
978 | if (!ret && replace_extent && leafs_visited == 1 && | 978 | if (!ret && replace_extent && leafs_visited == 1 && |
979 | (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING || | 979 | (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING || |
980 | path->locks[0] == BTRFS_WRITE_LOCK) && | 980 | path->locks[0] == BTRFS_WRITE_LOCK) && |
981 | btrfs_leaf_free_space(root, leaf) >= | 981 | btrfs_leaf_free_space(root, leaf) >= |
982 | sizeof(struct btrfs_item) + extent_item_size) { | 982 | sizeof(struct btrfs_item) + extent_item_size) { |
983 | 983 | ||
984 | key.objectid = ino; | 984 | key.objectid = ino; |
985 | key.type = BTRFS_EXTENT_DATA_KEY; | 985 | key.type = BTRFS_EXTENT_DATA_KEY; |
986 | key.offset = start; | 986 | key.offset = start; |
987 | if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) { | 987 | if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) { |
988 | struct btrfs_key slot_key; | 988 | struct btrfs_key slot_key; |
989 | 989 | ||
990 | btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]); | 990 | btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]); |
991 | if (btrfs_comp_cpu_keys(&key, &slot_key) > 0) | 991 | if (btrfs_comp_cpu_keys(&key, &slot_key) > 0) |
992 | path->slots[0]++; | 992 | path->slots[0]++; |
993 | } | 993 | } |
994 | setup_items_for_insert(root, path, &key, | 994 | setup_items_for_insert(root, path, &key, |
995 | &extent_item_size, | 995 | &extent_item_size, |
996 | extent_item_size, | 996 | extent_item_size, |
997 | sizeof(struct btrfs_item) + | 997 | sizeof(struct btrfs_item) + |
998 | extent_item_size, 1); | 998 | extent_item_size, 1); |
999 | *key_inserted = 1; | 999 | *key_inserted = 1; |
1000 | } | 1000 | } |
1001 | 1001 | ||
1002 | if (!replace_extent || !(*key_inserted)) | 1002 | if (!replace_extent || !(*key_inserted)) |
1003 | btrfs_release_path(path); | 1003 | btrfs_release_path(path); |
1004 | if (drop_end) | 1004 | if (drop_end) |
1005 | *drop_end = found ? min(end, extent_end) : end; | 1005 | *drop_end = found ? min(end, extent_end) : end; |
1006 | return ret; | 1006 | return ret; |
1007 | } | 1007 | } |
1008 | 1008 | ||
1009 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 1009 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
1010 | struct btrfs_root *root, struct inode *inode, u64 start, | 1010 | struct btrfs_root *root, struct inode *inode, u64 start, |
1011 | u64 end, int drop_cache) | 1011 | u64 end, int drop_cache) |
1012 | { | 1012 | { |
1013 | struct btrfs_path *path; | 1013 | struct btrfs_path *path; |
1014 | int ret; | 1014 | int ret; |
1015 | 1015 | ||
1016 | path = btrfs_alloc_path(); | 1016 | path = btrfs_alloc_path(); |
1017 | if (!path) | 1017 | if (!path) |
1018 | return -ENOMEM; | 1018 | return -ENOMEM; |
1019 | ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, | 1019 | ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, |
1020 | drop_cache, 0, 0, NULL); | 1020 | drop_cache, 0, 0, NULL); |
1021 | btrfs_free_path(path); | 1021 | btrfs_free_path(path); |
1022 | return ret; | 1022 | return ret; |
1023 | } | 1023 | } |
1024 | 1024 | ||
1025 | static int extent_mergeable(struct extent_buffer *leaf, int slot, | 1025 | static int extent_mergeable(struct extent_buffer *leaf, int slot, |
1026 | u64 objectid, u64 bytenr, u64 orig_offset, | 1026 | u64 objectid, u64 bytenr, u64 orig_offset, |
1027 | u64 *start, u64 *end) | 1027 | u64 *start, u64 *end) |
1028 | { | 1028 | { |
1029 | struct btrfs_file_extent_item *fi; | 1029 | struct btrfs_file_extent_item *fi; |
1030 | struct btrfs_key key; | 1030 | struct btrfs_key key; |
1031 | u64 extent_end; | 1031 | u64 extent_end; |
1032 | 1032 | ||
1033 | if (slot < 0 || slot >= btrfs_header_nritems(leaf)) | 1033 | if (slot < 0 || slot >= btrfs_header_nritems(leaf)) |
1034 | return 0; | 1034 | return 0; |
1035 | 1035 | ||
1036 | btrfs_item_key_to_cpu(leaf, &key, slot); | 1036 | btrfs_item_key_to_cpu(leaf, &key, slot); |
1037 | if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY) | 1037 | if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY) |
1038 | return 0; | 1038 | return 0; |
1039 | 1039 | ||
1040 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | 1040 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); |
1041 | if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || | 1041 | if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || |
1042 | btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || | 1042 | btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || |
1043 | btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset || | 1043 | btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset || |
1044 | btrfs_file_extent_compression(leaf, fi) || | 1044 | btrfs_file_extent_compression(leaf, fi) || |
1045 | btrfs_file_extent_encryption(leaf, fi) || | 1045 | btrfs_file_extent_encryption(leaf, fi) || |
1046 | btrfs_file_extent_other_encoding(leaf, fi)) | 1046 | btrfs_file_extent_other_encoding(leaf, fi)) |
1047 | return 0; | 1047 | return 0; |
1048 | 1048 | ||
1049 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); | 1049 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); |
1050 | if ((*start && *start != key.offset) || (*end && *end != extent_end)) | 1050 | if ((*start && *start != key.offset) || (*end && *end != extent_end)) |
1051 | return 0; | 1051 | return 0; |
1052 | 1052 | ||
1053 | *start = key.offset; | 1053 | *start = key.offset; |
1054 | *end = extent_end; | 1054 | *end = extent_end; |
1055 | return 1; | 1055 | return 1; |
1056 | } | 1056 | } |
1057 | 1057 | ||
1058 | /* | 1058 | /* |
1059 | * Mark extent in the range start - end as written. | 1059 | * Mark extent in the range start - end as written. |
1060 | * | 1060 | * |
1061 | * This changes extent type from 'pre-allocated' to 'regular'. If only | 1061 | * This changes extent type from 'pre-allocated' to 'regular'. If only |
1062 | * part of extent is marked as written, the extent will be split into | 1062 | * part of extent is marked as written, the extent will be split into |
1063 | * two or three. | 1063 | * two or three. |
1064 | */ | 1064 | */ |
1065 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 1065 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
1066 | struct inode *inode, u64 start, u64 end) | 1066 | struct inode *inode, u64 start, u64 end) |
1067 | { | 1067 | { |
1068 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1068 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1069 | struct extent_buffer *leaf; | 1069 | struct extent_buffer *leaf; |
1070 | struct btrfs_path *path; | 1070 | struct btrfs_path *path; |
1071 | struct btrfs_file_extent_item *fi; | 1071 | struct btrfs_file_extent_item *fi; |
1072 | struct btrfs_key key; | 1072 | struct btrfs_key key; |
1073 | struct btrfs_key new_key; | 1073 | struct btrfs_key new_key; |
1074 | u64 bytenr; | 1074 | u64 bytenr; |
1075 | u64 num_bytes; | 1075 | u64 num_bytes; |
1076 | u64 extent_end; | 1076 | u64 extent_end; |
1077 | u64 orig_offset; | 1077 | u64 orig_offset; |
1078 | u64 other_start; | 1078 | u64 other_start; |
1079 | u64 other_end; | 1079 | u64 other_end; |
1080 | u64 split; | 1080 | u64 split; |
1081 | int del_nr = 0; | 1081 | int del_nr = 0; |
1082 | int del_slot = 0; | 1082 | int del_slot = 0; |
1083 | int recow; | 1083 | int recow; |
1084 | int ret; | 1084 | int ret; |
1085 | u64 ino = btrfs_ino(inode); | 1085 | u64 ino = btrfs_ino(inode); |
1086 | 1086 | ||
1087 | path = btrfs_alloc_path(); | 1087 | path = btrfs_alloc_path(); |
1088 | if (!path) | 1088 | if (!path) |
1089 | return -ENOMEM; | 1089 | return -ENOMEM; |
1090 | again: | 1090 | again: |
1091 | recow = 0; | 1091 | recow = 0; |
1092 | split = start; | 1092 | split = start; |
1093 | key.objectid = ino; | 1093 | key.objectid = ino; |
1094 | key.type = BTRFS_EXTENT_DATA_KEY; | 1094 | key.type = BTRFS_EXTENT_DATA_KEY; |
1095 | key.offset = split; | 1095 | key.offset = split; |
1096 | 1096 | ||
1097 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 1097 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
1098 | if (ret < 0) | 1098 | if (ret < 0) |
1099 | goto out; | 1099 | goto out; |
1100 | if (ret > 0 && path->slots[0] > 0) | 1100 | if (ret > 0 && path->slots[0] > 0) |
1101 | path->slots[0]--; | 1101 | path->slots[0]--; |
1102 | 1102 | ||
1103 | leaf = path->nodes[0]; | 1103 | leaf = path->nodes[0]; |
1104 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | 1104 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
1105 | BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY); | 1105 | BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY); |
1106 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1106 | fi = btrfs_item_ptr(leaf, path->slots[0], |
1107 | struct btrfs_file_extent_item); | 1107 | struct btrfs_file_extent_item); |
1108 | BUG_ON(btrfs_file_extent_type(leaf, fi) != | 1108 | BUG_ON(btrfs_file_extent_type(leaf, fi) != |
1109 | BTRFS_FILE_EXTENT_PREALLOC); | 1109 | BTRFS_FILE_EXTENT_PREALLOC); |
1110 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); | 1110 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); |
1111 | BUG_ON(key.offset > start || extent_end < end); | 1111 | BUG_ON(key.offset > start || extent_end < end); |
1112 | 1112 | ||
1113 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | 1113 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
1114 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | 1114 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); |
1115 | orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); | 1115 | orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); |
1116 | memcpy(&new_key, &key, sizeof(new_key)); | 1116 | memcpy(&new_key, &key, sizeof(new_key)); |
1117 | 1117 | ||
1118 | if (start == key.offset && end < extent_end) { | 1118 | if (start == key.offset && end < extent_end) { |
1119 | other_start = 0; | 1119 | other_start = 0; |
1120 | other_end = start; | 1120 | other_end = start; |
1121 | if (extent_mergeable(leaf, path->slots[0] - 1, | 1121 | if (extent_mergeable(leaf, path->slots[0] - 1, |
1122 | ino, bytenr, orig_offset, | 1122 | ino, bytenr, orig_offset, |
1123 | &other_start, &other_end)) { | 1123 | &other_start, &other_end)) { |
1124 | new_key.offset = end; | 1124 | new_key.offset = end; |
1125 | btrfs_set_item_key_safe(root, path, &new_key); | 1125 | btrfs_set_item_key_safe(root, path, &new_key); |
1126 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1126 | fi = btrfs_item_ptr(leaf, path->slots[0], |
1127 | struct btrfs_file_extent_item); | 1127 | struct btrfs_file_extent_item); |
1128 | btrfs_set_file_extent_generation(leaf, fi, | 1128 | btrfs_set_file_extent_generation(leaf, fi, |
1129 | trans->transid); | 1129 | trans->transid); |
1130 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1130 | btrfs_set_file_extent_num_bytes(leaf, fi, |
1131 | extent_end - end); | 1131 | extent_end - end); |
1132 | btrfs_set_file_extent_offset(leaf, fi, | 1132 | btrfs_set_file_extent_offset(leaf, fi, |
1133 | end - orig_offset); | 1133 | end - orig_offset); |
1134 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, | 1134 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, |
1135 | struct btrfs_file_extent_item); | 1135 | struct btrfs_file_extent_item); |
1136 | btrfs_set_file_extent_generation(leaf, fi, | 1136 | btrfs_set_file_extent_generation(leaf, fi, |
1137 | trans->transid); | 1137 | trans->transid); |
1138 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1138 | btrfs_set_file_extent_num_bytes(leaf, fi, |
1139 | end - other_start); | 1139 | end - other_start); |
1140 | btrfs_mark_buffer_dirty(leaf); | 1140 | btrfs_mark_buffer_dirty(leaf); |
1141 | goto out; | 1141 | goto out; |
1142 | } | 1142 | } |
1143 | } | 1143 | } |
1144 | 1144 | ||
1145 | if (start > key.offset && end == extent_end) { | 1145 | if (start > key.offset && end == extent_end) { |
1146 | other_start = end; | 1146 | other_start = end; |
1147 | other_end = 0; | 1147 | other_end = 0; |
1148 | if (extent_mergeable(leaf, path->slots[0] + 1, | 1148 | if (extent_mergeable(leaf, path->slots[0] + 1, |
1149 | ino, bytenr, orig_offset, | 1149 | ino, bytenr, orig_offset, |
1150 | &other_start, &other_end)) { | 1150 | &other_start, &other_end)) { |
1151 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1151 | fi = btrfs_item_ptr(leaf, path->slots[0], |
1152 | struct btrfs_file_extent_item); | 1152 | struct btrfs_file_extent_item); |
1153 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1153 | btrfs_set_file_extent_num_bytes(leaf, fi, |
1154 | start - key.offset); | 1154 | start - key.offset); |
1155 | btrfs_set_file_extent_generation(leaf, fi, | 1155 | btrfs_set_file_extent_generation(leaf, fi, |
1156 | trans->transid); | 1156 | trans->transid); |
1157 | path->slots[0]++; | 1157 | path->slots[0]++; |
1158 | new_key.offset = start; | 1158 | new_key.offset = start; |
1159 | btrfs_set_item_key_safe(root, path, &new_key); | 1159 | btrfs_set_item_key_safe(root, path, &new_key); |
1160 | 1160 | ||
1161 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1161 | fi = btrfs_item_ptr(leaf, path->slots[0], |
1162 | struct btrfs_file_extent_item); | 1162 | struct btrfs_file_extent_item); |
1163 | btrfs_set_file_extent_generation(leaf, fi, | 1163 | btrfs_set_file_extent_generation(leaf, fi, |
1164 | trans->transid); | 1164 | trans->transid); |
1165 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1165 | btrfs_set_file_extent_num_bytes(leaf, fi, |
1166 | other_end - start); | 1166 | other_end - start); |
1167 | btrfs_set_file_extent_offset(leaf, fi, | 1167 | btrfs_set_file_extent_offset(leaf, fi, |
1168 | start - orig_offset); | 1168 | start - orig_offset); |
1169 | btrfs_mark_buffer_dirty(leaf); | 1169 | btrfs_mark_buffer_dirty(leaf); |
1170 | goto out; | 1170 | goto out; |
1171 | } | 1171 | } |
1172 | } | 1172 | } |
1173 | 1173 | ||
1174 | while (start > key.offset || end < extent_end) { | 1174 | while (start > key.offset || end < extent_end) { |
1175 | if (key.offset == start) | 1175 | if (key.offset == start) |
1176 | split = end; | 1176 | split = end; |
1177 | 1177 | ||
1178 | new_key.offset = split; | 1178 | new_key.offset = split; |
1179 | ret = btrfs_duplicate_item(trans, root, path, &new_key); | 1179 | ret = btrfs_duplicate_item(trans, root, path, &new_key); |
1180 | if (ret == -EAGAIN) { | 1180 | if (ret == -EAGAIN) { |
1181 | btrfs_release_path(path); | 1181 | btrfs_release_path(path); |
1182 | goto again; | 1182 | goto again; |
1183 | } | 1183 | } |
1184 | if (ret < 0) { | 1184 | if (ret < 0) { |
1185 | btrfs_abort_transaction(trans, root, ret); | 1185 | btrfs_abort_transaction(trans, root, ret); |
1186 | goto out; | 1186 | goto out; |
1187 | } | 1187 | } |
1188 | 1188 | ||
1189 | leaf = path->nodes[0]; | 1189 | leaf = path->nodes[0]; |
1190 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, | 1190 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, |
1191 | struct btrfs_file_extent_item); | 1191 | struct btrfs_file_extent_item); |
1192 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | 1192 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); |
1193 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1193 | btrfs_set_file_extent_num_bytes(leaf, fi, |
1194 | split - key.offset); | 1194 | split - key.offset); |
1195 | 1195 | ||
1196 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1196 | fi = btrfs_item_ptr(leaf, path->slots[0], |
1197 | struct btrfs_file_extent_item); | 1197 | struct btrfs_file_extent_item); |
1198 | 1198 | ||
1199 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | 1199 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); |
1200 | btrfs_set_file_extent_offset(leaf, fi, split - orig_offset); | 1200 | btrfs_set_file_extent_offset(leaf, fi, split - orig_offset); |
1201 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1201 | btrfs_set_file_extent_num_bytes(leaf, fi, |
1202 | extent_end - split); | 1202 | extent_end - split); |
1203 | btrfs_mark_buffer_dirty(leaf); | 1203 | btrfs_mark_buffer_dirty(leaf); |
1204 | 1204 | ||
1205 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, | 1205 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, |
1206 | root->root_key.objectid, | 1206 | root->root_key.objectid, |
1207 | ino, orig_offset, 0); | 1207 | ino, orig_offset, 0); |
1208 | BUG_ON(ret); /* -ENOMEM */ | 1208 | BUG_ON(ret); /* -ENOMEM */ |
1209 | 1209 | ||
1210 | if (split == start) { | 1210 | if (split == start) { |
1211 | key.offset = start; | 1211 | key.offset = start; |
1212 | } else { | 1212 | } else { |
1213 | BUG_ON(start != key.offset); | 1213 | BUG_ON(start != key.offset); |
1214 | path->slots[0]--; | 1214 | path->slots[0]--; |
1215 | extent_end = end; | 1215 | extent_end = end; |
1216 | } | 1216 | } |
1217 | recow = 1; | 1217 | recow = 1; |
1218 | } | 1218 | } |
1219 | 1219 | ||
1220 | other_start = end; | 1220 | other_start = end; |
1221 | other_end = 0; | 1221 | other_end = 0; |
1222 | if (extent_mergeable(leaf, path->slots[0] + 1, | 1222 | if (extent_mergeable(leaf, path->slots[0] + 1, |
1223 | ino, bytenr, orig_offset, | 1223 | ino, bytenr, orig_offset, |
1224 | &other_start, &other_end)) { | 1224 | &other_start, &other_end)) { |
1225 | if (recow) { | 1225 | if (recow) { |
1226 | btrfs_release_path(path); | 1226 | btrfs_release_path(path); |
1227 | goto again; | 1227 | goto again; |
1228 | } | 1228 | } |
1229 | extent_end = other_end; | 1229 | extent_end = other_end; |
1230 | del_slot = path->slots[0] + 1; | 1230 | del_slot = path->slots[0] + 1; |
1231 | del_nr++; | 1231 | del_nr++; |
1232 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, | 1232 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
1233 | 0, root->root_key.objectid, | 1233 | 0, root->root_key.objectid, |
1234 | ino, orig_offset, 0); | 1234 | ino, orig_offset, 0); |
1235 | BUG_ON(ret); /* -ENOMEM */ | 1235 | BUG_ON(ret); /* -ENOMEM */ |
1236 | } | 1236 | } |
1237 | other_start = 0; | 1237 | other_start = 0; |
1238 | other_end = start; | 1238 | other_end = start; |
1239 | if (extent_mergeable(leaf, path->slots[0] - 1, | 1239 | if (extent_mergeable(leaf, path->slots[0] - 1, |
1240 | ino, bytenr, orig_offset, | 1240 | ino, bytenr, orig_offset, |
1241 | &other_start, &other_end)) { | 1241 | &other_start, &other_end)) { |
1242 | if (recow) { | 1242 | if (recow) { |
1243 | btrfs_release_path(path); | 1243 | btrfs_release_path(path); |
1244 | goto again; | 1244 | goto again; |
1245 | } | 1245 | } |
1246 | key.offset = other_start; | 1246 | key.offset = other_start; |
1247 | del_slot = path->slots[0]; | 1247 | del_slot = path->slots[0]; |
1248 | del_nr++; | 1248 | del_nr++; |
1249 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, | 1249 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
1250 | 0, root->root_key.objectid, | 1250 | 0, root->root_key.objectid, |
1251 | ino, orig_offset, 0); | 1251 | ino, orig_offset, 0); |
1252 | BUG_ON(ret); /* -ENOMEM */ | 1252 | BUG_ON(ret); /* -ENOMEM */ |
1253 | } | 1253 | } |
1254 | if (del_nr == 0) { | 1254 | if (del_nr == 0) { |
1255 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1255 | fi = btrfs_item_ptr(leaf, path->slots[0], |
1256 | struct btrfs_file_extent_item); | 1256 | struct btrfs_file_extent_item); |
1257 | btrfs_set_file_extent_type(leaf, fi, | 1257 | btrfs_set_file_extent_type(leaf, fi, |
1258 | BTRFS_FILE_EXTENT_REG); | 1258 | BTRFS_FILE_EXTENT_REG); |
1259 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | 1259 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); |
1260 | btrfs_mark_buffer_dirty(leaf); | 1260 | btrfs_mark_buffer_dirty(leaf); |
1261 | } else { | 1261 | } else { |
1262 | fi = btrfs_item_ptr(leaf, del_slot - 1, | 1262 | fi = btrfs_item_ptr(leaf, del_slot - 1, |
1263 | struct btrfs_file_extent_item); | 1263 | struct btrfs_file_extent_item); |
1264 | btrfs_set_file_extent_type(leaf, fi, | 1264 | btrfs_set_file_extent_type(leaf, fi, |
1265 | BTRFS_FILE_EXTENT_REG); | 1265 | BTRFS_FILE_EXTENT_REG); |
1266 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | 1266 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); |
1267 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1267 | btrfs_set_file_extent_num_bytes(leaf, fi, |
1268 | extent_end - key.offset); | 1268 | extent_end - key.offset); |
1269 | btrfs_mark_buffer_dirty(leaf); | 1269 | btrfs_mark_buffer_dirty(leaf); |
1270 | 1270 | ||
1271 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | 1271 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); |
1272 | if (ret < 0) { | 1272 | if (ret < 0) { |
1273 | btrfs_abort_transaction(trans, root, ret); | 1273 | btrfs_abort_transaction(trans, root, ret); |
1274 | goto out; | 1274 | goto out; |
1275 | } | 1275 | } |
1276 | } | 1276 | } |
1277 | out: | 1277 | out: |
1278 | btrfs_free_path(path); | 1278 | btrfs_free_path(path); |
1279 | return 0; | 1279 | return 0; |
1280 | } | 1280 | } |
1281 | 1281 | ||
1282 | /* | 1282 | /* |
1283 | * on error we return an unlocked page and the error value | 1283 | * on error we return an unlocked page and the error value |
1284 | * on success we return a locked page and 0 | 1284 | * on success we return a locked page and 0 |
1285 | */ | 1285 | */ |
1286 | static int prepare_uptodate_page(struct page *page, u64 pos, | 1286 | static int prepare_uptodate_page(struct page *page, u64 pos, |
1287 | bool force_uptodate) | 1287 | bool force_uptodate) |
1288 | { | 1288 | { |
1289 | int ret = 0; | 1289 | int ret = 0; |
1290 | 1290 | ||
1291 | if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) && | 1291 | if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) && |
1292 | !PageUptodate(page)) { | 1292 | !PageUptodate(page)) { |
1293 | ret = btrfs_readpage(NULL, page); | 1293 | ret = btrfs_readpage(NULL, page); |
1294 | if (ret) | 1294 | if (ret) |
1295 | return ret; | 1295 | return ret; |
1296 | lock_page(page); | 1296 | lock_page(page); |
1297 | if (!PageUptodate(page)) { | 1297 | if (!PageUptodate(page)) { |
1298 | unlock_page(page); | 1298 | unlock_page(page); |
1299 | return -EIO; | 1299 | return -EIO; |
1300 | } | 1300 | } |
1301 | } | 1301 | } |
1302 | return 0; | 1302 | return 0; |
1303 | } | 1303 | } |
1304 | 1304 | ||
1305 | /* | 1305 | /* |
1306 | * this just gets pages into the page cache and locks them down. | 1306 | * this just gets pages into the page cache and locks them down. |
1307 | */ | 1307 | */ |
1308 | static noinline int prepare_pages(struct inode *inode, struct page **pages, | 1308 | static noinline int prepare_pages(struct inode *inode, struct page **pages, |
1309 | size_t num_pages, loff_t pos, | 1309 | size_t num_pages, loff_t pos, |
1310 | size_t write_bytes, bool force_uptodate) | 1310 | size_t write_bytes, bool force_uptodate) |
1311 | { | 1311 | { |
1312 | int i; | 1312 | int i; |
1313 | unsigned long index = pos >> PAGE_CACHE_SHIFT; | 1313 | unsigned long index = pos >> PAGE_CACHE_SHIFT; |
1314 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); | 1314 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); |
1315 | int err = 0; | 1315 | int err = 0; |
1316 | int faili; | 1316 | int faili; |
1317 | 1317 | ||
1318 | for (i = 0; i < num_pages; i++) { | 1318 | for (i = 0; i < num_pages; i++) { |
1319 | pages[i] = find_or_create_page(inode->i_mapping, index + i, | 1319 | pages[i] = find_or_create_page(inode->i_mapping, index + i, |
1320 | mask | __GFP_WRITE); | 1320 | mask | __GFP_WRITE); |
1321 | if (!pages[i]) { | 1321 | if (!pages[i]) { |
1322 | faili = i - 1; | 1322 | faili = i - 1; |
1323 | err = -ENOMEM; | 1323 | err = -ENOMEM; |
1324 | goto fail; | 1324 | goto fail; |
1325 | } | 1325 | } |
1326 | 1326 | ||
1327 | if (i == 0) | 1327 | if (i == 0) |
1328 | err = prepare_uptodate_page(pages[i], pos, | 1328 | err = prepare_uptodate_page(pages[i], pos, |
1329 | force_uptodate); | 1329 | force_uptodate); |
1330 | if (i == num_pages - 1) | 1330 | if (i == num_pages - 1) |
1331 | err = prepare_uptodate_page(pages[i], | 1331 | err = prepare_uptodate_page(pages[i], |
1332 | pos + write_bytes, false); | 1332 | pos + write_bytes, false); |
1333 | if (err) { | 1333 | if (err) { |
1334 | page_cache_release(pages[i]); | 1334 | page_cache_release(pages[i]); |
1335 | faili = i - 1; | 1335 | faili = i - 1; |
1336 | goto fail; | 1336 | goto fail; |
1337 | } | 1337 | } |
1338 | wait_on_page_writeback(pages[i]); | 1338 | wait_on_page_writeback(pages[i]); |
1339 | } | 1339 | } |
1340 | 1340 | ||
1341 | return 0; | 1341 | return 0; |
1342 | fail: | 1342 | fail: |
1343 | while (faili >= 0) { | 1343 | while (faili >= 0) { |
1344 | unlock_page(pages[faili]); | 1344 | unlock_page(pages[faili]); |
1345 | page_cache_release(pages[faili]); | 1345 | page_cache_release(pages[faili]); |
1346 | faili--; | 1346 | faili--; |
1347 | } | 1347 | } |
1348 | return err; | 1348 | return err; |
1349 | 1349 | ||
1350 | } | 1350 | } |
1351 | 1351 | ||
1352 | /* | 1352 | /* |
1353 | * This function locks the extent and properly waits for data=ordered extents | 1353 | * This function locks the extent and properly waits for data=ordered extents |
1354 | * to finish before allowing the pages to be modified if need. | 1354 | * to finish before allowing the pages to be modified if need. |
1355 | * | 1355 | * |
1356 | * The return value: | 1356 | * The return value: |
1357 | * 1 - the extent is locked | 1357 | * 1 - the extent is locked |
1358 | * 0 - the extent is not locked, and everything is OK | 1358 | * 0 - the extent is not locked, and everything is OK |
1359 | * -EAGAIN - need re-prepare the pages | 1359 | * -EAGAIN - need re-prepare the pages |
1360 | * the other < 0 number - Something wrong happens | 1360 | * the other < 0 number - Something wrong happens |
1361 | */ | 1361 | */ |
1362 | static noinline int | 1362 | static noinline int |
1363 | lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, | 1363 | lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, |
1364 | size_t num_pages, loff_t pos, | 1364 | size_t num_pages, loff_t pos, |
1365 | u64 *lockstart, u64 *lockend, | 1365 | u64 *lockstart, u64 *lockend, |
1366 | struct extent_state **cached_state) | 1366 | struct extent_state **cached_state) |
1367 | { | 1367 | { |
1368 | u64 start_pos; | 1368 | u64 start_pos; |
1369 | u64 last_pos; | 1369 | u64 last_pos; |
1370 | int i; | 1370 | int i; |
1371 | int ret = 0; | 1371 | int ret = 0; |
1372 | 1372 | ||
1373 | start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); | 1373 | start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); |
1374 | last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1; | 1374 | last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1; |
1375 | 1375 | ||
1376 | if (start_pos < inode->i_size) { | 1376 | if (start_pos < inode->i_size) { |
1377 | struct btrfs_ordered_extent *ordered; | 1377 | struct btrfs_ordered_extent *ordered; |
1378 | lock_extent_bits(&BTRFS_I(inode)->io_tree, | 1378 | lock_extent_bits(&BTRFS_I(inode)->io_tree, |
1379 | start_pos, last_pos, 0, cached_state); | 1379 | start_pos, last_pos, 0, cached_state); |
1380 | ordered = btrfs_lookup_ordered_range(inode, start_pos, | 1380 | ordered = btrfs_lookup_ordered_range(inode, start_pos, |
1381 | last_pos - start_pos + 1); | 1381 | last_pos - start_pos + 1); |
1382 | if (ordered && | 1382 | if (ordered && |
1383 | ordered->file_offset + ordered->len > start_pos && | 1383 | ordered->file_offset + ordered->len > start_pos && |
1384 | ordered->file_offset <= last_pos) { | 1384 | ordered->file_offset <= last_pos) { |
1385 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1385 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
1386 | start_pos, last_pos, | 1386 | start_pos, last_pos, |
1387 | cached_state, GFP_NOFS); | 1387 | cached_state, GFP_NOFS); |
1388 | for (i = 0; i < num_pages; i++) { | 1388 | for (i = 0; i < num_pages; i++) { |
1389 | unlock_page(pages[i]); | 1389 | unlock_page(pages[i]); |
1390 | page_cache_release(pages[i]); | 1390 | page_cache_release(pages[i]); |
1391 | } | 1391 | } |
1392 | btrfs_start_ordered_extent(inode, ordered, 1); | 1392 | btrfs_start_ordered_extent(inode, ordered, 1); |
1393 | btrfs_put_ordered_extent(ordered); | 1393 | btrfs_put_ordered_extent(ordered); |
1394 | return -EAGAIN; | 1394 | return -EAGAIN; |
1395 | } | 1395 | } |
1396 | if (ordered) | 1396 | if (ordered) |
1397 | btrfs_put_ordered_extent(ordered); | 1397 | btrfs_put_ordered_extent(ordered); |
1398 | 1398 | ||
1399 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, | 1399 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, |
1400 | last_pos, EXTENT_DIRTY | EXTENT_DELALLOC | | 1400 | last_pos, EXTENT_DIRTY | EXTENT_DELALLOC | |
1401 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, | 1401 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
1402 | 0, 0, cached_state, GFP_NOFS); | 1402 | 0, 0, cached_state, GFP_NOFS); |
1403 | *lockstart = start_pos; | 1403 | *lockstart = start_pos; |
1404 | *lockend = last_pos; | 1404 | *lockend = last_pos; |
1405 | ret = 1; | 1405 | ret = 1; |
1406 | } | 1406 | } |
1407 | 1407 | ||
1408 | for (i = 0; i < num_pages; i++) { | 1408 | for (i = 0; i < num_pages; i++) { |
1409 | if (clear_page_dirty_for_io(pages[i])) | 1409 | if (clear_page_dirty_for_io(pages[i])) |
1410 | account_page_redirty(pages[i]); | 1410 | account_page_redirty(pages[i]); |
1411 | set_page_extent_mapped(pages[i]); | 1411 | set_page_extent_mapped(pages[i]); |
1412 | WARN_ON(!PageLocked(pages[i])); | 1412 | WARN_ON(!PageLocked(pages[i])); |
1413 | } | 1413 | } |
1414 | 1414 | ||
1415 | return ret; | 1415 | return ret; |
1416 | } | 1416 | } |
1417 | 1417 | ||
1418 | static noinline int check_can_nocow(struct inode *inode, loff_t pos, | 1418 | static noinline int check_can_nocow(struct inode *inode, loff_t pos, |
1419 | size_t *write_bytes) | 1419 | size_t *write_bytes) |
1420 | { | 1420 | { |
1421 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1421 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1422 | struct btrfs_ordered_extent *ordered; | 1422 | struct btrfs_ordered_extent *ordered; |
1423 | u64 lockstart, lockend; | 1423 | u64 lockstart, lockend; |
1424 | u64 num_bytes; | 1424 | u64 num_bytes; |
1425 | int ret; | 1425 | int ret; |
1426 | 1426 | ||
1427 | ret = btrfs_start_nocow_write(root); | 1427 | ret = btrfs_start_nocow_write(root); |
1428 | if (!ret) | 1428 | if (!ret) |
1429 | return -ENOSPC; | 1429 | return -ENOSPC; |
1430 | 1430 | ||
1431 | lockstart = round_down(pos, root->sectorsize); | 1431 | lockstart = round_down(pos, root->sectorsize); |
1432 | lockend = round_up(pos + *write_bytes, root->sectorsize) - 1; | 1432 | lockend = round_up(pos + *write_bytes, root->sectorsize) - 1; |
1433 | 1433 | ||
1434 | while (1) { | 1434 | while (1) { |
1435 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1435 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
1436 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | 1436 | ordered = btrfs_lookup_ordered_range(inode, lockstart, |
1437 | lockend - lockstart + 1); | 1437 | lockend - lockstart + 1); |
1438 | if (!ordered) { | 1438 | if (!ordered) { |
1439 | break; | 1439 | break; |
1440 | } | 1440 | } |
1441 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1441 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
1442 | btrfs_start_ordered_extent(inode, ordered, 1); | 1442 | btrfs_start_ordered_extent(inode, ordered, 1); |
1443 | btrfs_put_ordered_extent(ordered); | 1443 | btrfs_put_ordered_extent(ordered); |
1444 | } | 1444 | } |
1445 | 1445 | ||
1446 | num_bytes = lockend - lockstart + 1; | 1446 | num_bytes = lockend - lockstart + 1; |
1447 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); | 1447 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); |
1448 | if (ret <= 0) { | 1448 | if (ret <= 0) { |
1449 | ret = 0; | 1449 | ret = 0; |
1450 | btrfs_end_nocow_write(root); | 1450 | btrfs_end_nocow_write(root); |
1451 | } else { | 1451 | } else { |
1452 | *write_bytes = min_t(size_t, *write_bytes , | 1452 | *write_bytes = min_t(size_t, *write_bytes , |
1453 | num_bytes - pos + lockstart); | 1453 | num_bytes - pos + lockstart); |
1454 | } | 1454 | } |
1455 | 1455 | ||
1456 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1456 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
1457 | 1457 | ||
1458 | return ret; | 1458 | return ret; |
1459 | } | 1459 | } |
1460 | 1460 | ||
1461 | static noinline ssize_t __btrfs_buffered_write(struct file *file, | 1461 | static noinline ssize_t __btrfs_buffered_write(struct file *file, |
1462 | struct iov_iter *i, | 1462 | struct iov_iter *i, |
1463 | loff_t pos) | 1463 | loff_t pos) |
1464 | { | 1464 | { |
1465 | struct inode *inode = file_inode(file); | 1465 | struct inode *inode = file_inode(file); |
1466 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1466 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1467 | struct page **pages = NULL; | 1467 | struct page **pages = NULL; |
1468 | struct extent_state *cached_state = NULL; | 1468 | struct extent_state *cached_state = NULL; |
1469 | u64 release_bytes = 0; | 1469 | u64 release_bytes = 0; |
1470 | u64 lockstart; | 1470 | u64 lockstart; |
1471 | u64 lockend; | 1471 | u64 lockend; |
1472 | unsigned long first_index; | 1472 | unsigned long first_index; |
1473 | size_t num_written = 0; | 1473 | size_t num_written = 0; |
1474 | int nrptrs; | 1474 | int nrptrs; |
1475 | int ret = 0; | 1475 | int ret = 0; |
1476 | bool only_release_metadata = false; | 1476 | bool only_release_metadata = false; |
1477 | bool force_page_uptodate = false; | 1477 | bool force_page_uptodate = false; |
1478 | bool need_unlock; | 1478 | bool need_unlock; |
1479 | 1479 | ||
1480 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / | 1480 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
1481 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 1481 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / |
1482 | (sizeof(struct page *))); | 1482 | (sizeof(struct page *))); |
1483 | nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); | 1483 | nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); |
1484 | nrptrs = max(nrptrs, 8); | 1484 | nrptrs = max(nrptrs, 8); |
1485 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 1485 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
1486 | if (!pages) | 1486 | if (!pages) |
1487 | return -ENOMEM; | 1487 | return -ENOMEM; |
1488 | 1488 | ||
1489 | first_index = pos >> PAGE_CACHE_SHIFT; | 1489 | first_index = pos >> PAGE_CACHE_SHIFT; |
1490 | 1490 | ||
1491 | while (iov_iter_count(i) > 0) { | 1491 | while (iov_iter_count(i) > 0) { |
1492 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 1492 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
1493 | size_t write_bytes = min(iov_iter_count(i), | 1493 | size_t write_bytes = min(iov_iter_count(i), |
1494 | nrptrs * (size_t)PAGE_CACHE_SIZE - | 1494 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
1495 | offset); | 1495 | offset); |
1496 | size_t num_pages = (write_bytes + offset + | 1496 | size_t num_pages = (write_bytes + offset + |
1497 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1497 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1498 | size_t reserve_bytes; | 1498 | size_t reserve_bytes; |
1499 | size_t dirty_pages; | 1499 | size_t dirty_pages; |
1500 | size_t copied; | 1500 | size_t copied; |
1501 | 1501 | ||
1502 | WARN_ON(num_pages > nrptrs); | 1502 | WARN_ON(num_pages > nrptrs); |
1503 | 1503 | ||
1504 | /* | 1504 | /* |
1505 | * Fault pages before locking them in prepare_pages | 1505 | * Fault pages before locking them in prepare_pages |
1506 | * to avoid recursive lock | 1506 | * to avoid recursive lock |
1507 | */ | 1507 | */ |
1508 | if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) { | 1508 | if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) { |
1509 | ret = -EFAULT; | 1509 | ret = -EFAULT; |
1510 | break; | 1510 | break; |
1511 | } | 1511 | } |
1512 | 1512 | ||
1513 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; | 1513 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; |
1514 | ret = btrfs_check_data_free_space(inode, reserve_bytes); | 1514 | ret = btrfs_check_data_free_space(inode, reserve_bytes); |
1515 | if (ret == -ENOSPC && | 1515 | if (ret == -ENOSPC && |
1516 | (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | | 1516 | (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | |
1517 | BTRFS_INODE_PREALLOC))) { | 1517 | BTRFS_INODE_PREALLOC))) { |
1518 | ret = check_can_nocow(inode, pos, &write_bytes); | 1518 | ret = check_can_nocow(inode, pos, &write_bytes); |
1519 | if (ret > 0) { | 1519 | if (ret > 0) { |
1520 | only_release_metadata = true; | 1520 | only_release_metadata = true; |
1521 | /* | 1521 | /* |
1522 | * our prealloc extent may be smaller than | 1522 | * our prealloc extent may be smaller than |
1523 | * write_bytes, so scale down. | 1523 | * write_bytes, so scale down. |
1524 | */ | 1524 | */ |
1525 | num_pages = (write_bytes + offset + | 1525 | num_pages = (write_bytes + offset + |
1526 | PAGE_CACHE_SIZE - 1) >> | 1526 | PAGE_CACHE_SIZE - 1) >> |
1527 | PAGE_CACHE_SHIFT; | 1527 | PAGE_CACHE_SHIFT; |
1528 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; | 1528 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; |
1529 | ret = 0; | 1529 | ret = 0; |
1530 | } else { | 1530 | } else { |
1531 | ret = -ENOSPC; | 1531 | ret = -ENOSPC; |
1532 | } | 1532 | } |
1533 | } | 1533 | } |
1534 | 1534 | ||
1535 | if (ret) | 1535 | if (ret) |
1536 | break; | 1536 | break; |
1537 | 1537 | ||
1538 | ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); | 1538 | ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); |
1539 | if (ret) { | 1539 | if (ret) { |
1540 | if (!only_release_metadata) | 1540 | if (!only_release_metadata) |
1541 | btrfs_free_reserved_data_space(inode, | 1541 | btrfs_free_reserved_data_space(inode, |
1542 | reserve_bytes); | 1542 | reserve_bytes); |
1543 | else | 1543 | else |
1544 | btrfs_end_nocow_write(root); | 1544 | btrfs_end_nocow_write(root); |
1545 | break; | 1545 | break; |
1546 | } | 1546 | } |
1547 | 1547 | ||
1548 | release_bytes = reserve_bytes; | 1548 | release_bytes = reserve_bytes; |
1549 | need_unlock = false; | 1549 | need_unlock = false; |
1550 | again: | 1550 | again: |
1551 | /* | 1551 | /* |
1552 | * This is going to setup the pages array with the number of | 1552 | * This is going to setup the pages array with the number of |
1553 | * pages we want, so we don't really need to worry about the | 1553 | * pages we want, so we don't really need to worry about the |
1554 | * contents of pages from loop to loop | 1554 | * contents of pages from loop to loop |
1555 | */ | 1555 | */ |
1556 | ret = prepare_pages(inode, pages, num_pages, | 1556 | ret = prepare_pages(inode, pages, num_pages, |
1557 | pos, write_bytes, | 1557 | pos, write_bytes, |
1558 | force_page_uptodate); | 1558 | force_page_uptodate); |
1559 | if (ret) | 1559 | if (ret) |
1560 | break; | 1560 | break; |
1561 | 1561 | ||
1562 | ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages, | 1562 | ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages, |
1563 | pos, &lockstart, &lockend, | 1563 | pos, &lockstart, &lockend, |
1564 | &cached_state); | 1564 | &cached_state); |
1565 | if (ret < 0) { | 1565 | if (ret < 0) { |
1566 | if (ret == -EAGAIN) | 1566 | if (ret == -EAGAIN) |
1567 | goto again; | 1567 | goto again; |
1568 | break; | 1568 | break; |
1569 | } else if (ret > 0) { | 1569 | } else if (ret > 0) { |
1570 | need_unlock = true; | 1570 | need_unlock = true; |
1571 | ret = 0; | 1571 | ret = 0; |
1572 | } | 1572 | } |
1573 | 1573 | ||
1574 | copied = btrfs_copy_from_user(pos, num_pages, | 1574 | copied = btrfs_copy_from_user(pos, num_pages, |
1575 | write_bytes, pages, i); | 1575 | write_bytes, pages, i); |
1576 | 1576 | ||
1577 | /* | 1577 | /* |
1578 | * if we have trouble faulting in the pages, fall | 1578 | * if we have trouble faulting in the pages, fall |
1579 | * back to one page at a time | 1579 | * back to one page at a time |
1580 | */ | 1580 | */ |
1581 | if (copied < write_bytes) | 1581 | if (copied < write_bytes) |
1582 | nrptrs = 1; | 1582 | nrptrs = 1; |
1583 | 1583 | ||
1584 | if (copied == 0) { | 1584 | if (copied == 0) { |
1585 | force_page_uptodate = true; | 1585 | force_page_uptodate = true; |
1586 | dirty_pages = 0; | 1586 | dirty_pages = 0; |
1587 | } else { | 1587 | } else { |
1588 | force_page_uptodate = false; | 1588 | force_page_uptodate = false; |
1589 | dirty_pages = (copied + offset + | 1589 | dirty_pages = (copied + offset + |
1590 | PAGE_CACHE_SIZE - 1) >> | 1590 | PAGE_CACHE_SIZE - 1) >> |
1591 | PAGE_CACHE_SHIFT; | 1591 | PAGE_CACHE_SHIFT; |
1592 | } | 1592 | } |
1593 | 1593 | ||
1594 | /* | 1594 | /* |
1595 | * If we had a short copy we need to release the excess delaloc | 1595 | * If we had a short copy we need to release the excess delaloc |
1596 | * bytes we reserved. We need to increment outstanding_extents | 1596 | * bytes we reserved. We need to increment outstanding_extents |
1597 | * because btrfs_delalloc_release_space will decrement it, but | 1597 | * because btrfs_delalloc_release_space will decrement it, but |
1598 | * we still have an outstanding extent for the chunk we actually | 1598 | * we still have an outstanding extent for the chunk we actually |
1599 | * managed to copy. | 1599 | * managed to copy. |
1600 | */ | 1600 | */ |
1601 | if (num_pages > dirty_pages) { | 1601 | if (num_pages > dirty_pages) { |
1602 | release_bytes = (num_pages - dirty_pages) << | 1602 | release_bytes = (num_pages - dirty_pages) << |
1603 | PAGE_CACHE_SHIFT; | 1603 | PAGE_CACHE_SHIFT; |
1604 | if (copied > 0) { | 1604 | if (copied > 0) { |
1605 | spin_lock(&BTRFS_I(inode)->lock); | 1605 | spin_lock(&BTRFS_I(inode)->lock); |
1606 | BTRFS_I(inode)->outstanding_extents++; | 1606 | BTRFS_I(inode)->outstanding_extents++; |
1607 | spin_unlock(&BTRFS_I(inode)->lock); | 1607 | spin_unlock(&BTRFS_I(inode)->lock); |
1608 | } | 1608 | } |
1609 | if (only_release_metadata) | 1609 | if (only_release_metadata) |
1610 | btrfs_delalloc_release_metadata(inode, | 1610 | btrfs_delalloc_release_metadata(inode, |
1611 | release_bytes); | 1611 | release_bytes); |
1612 | else | 1612 | else |
1613 | btrfs_delalloc_release_space(inode, | 1613 | btrfs_delalloc_release_space(inode, |
1614 | release_bytes); | 1614 | release_bytes); |
1615 | } | 1615 | } |
1616 | 1616 | ||
1617 | release_bytes = dirty_pages << PAGE_CACHE_SHIFT; | 1617 | release_bytes = dirty_pages << PAGE_CACHE_SHIFT; |
1618 | 1618 | ||
1619 | if (copied > 0) | 1619 | if (copied > 0) |
1620 | ret = btrfs_dirty_pages(root, inode, pages, | 1620 | ret = btrfs_dirty_pages(root, inode, pages, |
1621 | dirty_pages, pos, copied, | 1621 | dirty_pages, pos, copied, |
1622 | NULL); | 1622 | NULL); |
1623 | if (need_unlock) | 1623 | if (need_unlock) |
1624 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1624 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
1625 | lockstart, lockend, &cached_state, | 1625 | lockstart, lockend, &cached_state, |
1626 | GFP_NOFS); | 1626 | GFP_NOFS); |
1627 | if (ret) { | 1627 | if (ret) { |
1628 | btrfs_drop_pages(pages, num_pages); | 1628 | btrfs_drop_pages(pages, num_pages); |
1629 | break; | 1629 | break; |
1630 | } | 1630 | } |
1631 | 1631 | ||
1632 | release_bytes = 0; | 1632 | release_bytes = 0; |
1633 | if (only_release_metadata) | 1633 | if (only_release_metadata) |
1634 | btrfs_end_nocow_write(root); | 1634 | btrfs_end_nocow_write(root); |
1635 | 1635 | ||
1636 | if (only_release_metadata && copied > 0) { | 1636 | if (only_release_metadata && copied > 0) { |
1637 | u64 lockstart = round_down(pos, root->sectorsize); | 1637 | u64 lockstart = round_down(pos, root->sectorsize); |
1638 | u64 lockend = lockstart + | 1638 | u64 lockend = lockstart + |
1639 | (dirty_pages << PAGE_CACHE_SHIFT) - 1; | 1639 | (dirty_pages << PAGE_CACHE_SHIFT) - 1; |
1640 | 1640 | ||
1641 | set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | 1641 | set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, |
1642 | lockend, EXTENT_NORESERVE, NULL, | 1642 | lockend, EXTENT_NORESERVE, NULL, |
1643 | NULL, GFP_NOFS); | 1643 | NULL, GFP_NOFS); |
1644 | only_release_metadata = false; | 1644 | only_release_metadata = false; |
1645 | } | 1645 | } |
1646 | 1646 | ||
1647 | btrfs_drop_pages(pages, num_pages); | 1647 | btrfs_drop_pages(pages, num_pages); |
1648 | 1648 | ||
1649 | cond_resched(); | 1649 | cond_resched(); |
1650 | 1650 | ||
1651 | balance_dirty_pages_ratelimited(inode->i_mapping); | 1651 | balance_dirty_pages_ratelimited(inode->i_mapping); |
1652 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1652 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
1653 | btrfs_btree_balance_dirty(root); | 1653 | btrfs_btree_balance_dirty(root); |
1654 | 1654 | ||
1655 | pos += copied; | 1655 | pos += copied; |
1656 | num_written += copied; | 1656 | num_written += copied; |
1657 | } | 1657 | } |
1658 | 1658 | ||
1659 | kfree(pages); | 1659 | kfree(pages); |
1660 | 1660 | ||
1661 | if (release_bytes) { | 1661 | if (release_bytes) { |
1662 | if (only_release_metadata) { | 1662 | if (only_release_metadata) { |
1663 | btrfs_end_nocow_write(root); | 1663 | btrfs_end_nocow_write(root); |
1664 | btrfs_delalloc_release_metadata(inode, release_bytes); | 1664 | btrfs_delalloc_release_metadata(inode, release_bytes); |
1665 | } else { | 1665 | } else { |
1666 | btrfs_delalloc_release_space(inode, release_bytes); | 1666 | btrfs_delalloc_release_space(inode, release_bytes); |
1667 | } | 1667 | } |
1668 | } | 1668 | } |
1669 | 1669 | ||
1670 | return num_written ? num_written : ret; | 1670 | return num_written ? num_written : ret; |
1671 | } | 1671 | } |
1672 | 1672 | ||
1673 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, | 1673 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, |
1674 | const struct iovec *iov, | 1674 | const struct iovec *iov, |
1675 | unsigned long nr_segs, loff_t pos, | 1675 | unsigned long nr_segs, loff_t pos, |
1676 | size_t count, size_t ocount) | 1676 | size_t count, size_t ocount) |
1677 | { | 1677 | { |
1678 | struct file *file = iocb->ki_filp; | 1678 | struct file *file = iocb->ki_filp; |
1679 | struct iov_iter i; | 1679 | struct iov_iter i; |
1680 | ssize_t written; | 1680 | ssize_t written; |
1681 | ssize_t written_buffered; | 1681 | ssize_t written_buffered; |
1682 | loff_t endbyte; | 1682 | loff_t endbyte; |
1683 | int err; | 1683 | int err; |
1684 | 1684 | ||
1685 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, | 1685 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, |
1686 | count, ocount); | 1686 | count, ocount); |
1687 | 1687 | ||
1688 | if (written < 0 || written == count) | 1688 | if (written < 0 || written == count) |
1689 | return written; | 1689 | return written; |
1690 | 1690 | ||
1691 | pos += written; | 1691 | pos += written; |
1692 | count -= written; | 1692 | count -= written; |
1693 | iov_iter_init(&i, iov, nr_segs, count, written); | 1693 | iov_iter_init(&i, iov, nr_segs, count, written); |
1694 | written_buffered = __btrfs_buffered_write(file, &i, pos); | 1694 | written_buffered = __btrfs_buffered_write(file, &i, pos); |
1695 | if (written_buffered < 0) { | 1695 | if (written_buffered < 0) { |
1696 | err = written_buffered; | 1696 | err = written_buffered; |
1697 | goto out; | 1697 | goto out; |
1698 | } | 1698 | } |
1699 | endbyte = pos + written_buffered - 1; | 1699 | endbyte = pos + written_buffered - 1; |
1700 | err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); | 1700 | err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); |
1701 | if (err) | 1701 | if (err) |
1702 | goto out; | 1702 | goto out; |
1703 | written += written_buffered; | 1703 | written += written_buffered; |
1704 | iocb->ki_pos = pos + written_buffered; | 1704 | iocb->ki_pos = pos + written_buffered; |
1705 | invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT, | 1705 | invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT, |
1706 | endbyte >> PAGE_CACHE_SHIFT); | 1706 | endbyte >> PAGE_CACHE_SHIFT); |
1707 | out: | 1707 | out: |
1708 | return written ? written : err; | 1708 | return written ? written : err; |
1709 | } | 1709 | } |
1710 | 1710 | ||
1711 | static void update_time_for_write(struct inode *inode) | 1711 | static void update_time_for_write(struct inode *inode) |
1712 | { | 1712 | { |
1713 | struct timespec now; | 1713 | struct timespec now; |
1714 | 1714 | ||
1715 | if (IS_NOCMTIME(inode)) | 1715 | if (IS_NOCMTIME(inode)) |
1716 | return; | 1716 | return; |
1717 | 1717 | ||
1718 | now = current_fs_time(inode->i_sb); | 1718 | now = current_fs_time(inode->i_sb); |
1719 | if (!timespec_equal(&inode->i_mtime, &now)) | 1719 | if (!timespec_equal(&inode->i_mtime, &now)) |
1720 | inode->i_mtime = now; | 1720 | inode->i_mtime = now; |
1721 | 1721 | ||
1722 | if (!timespec_equal(&inode->i_ctime, &now)) | 1722 | if (!timespec_equal(&inode->i_ctime, &now)) |
1723 | inode->i_ctime = now; | 1723 | inode->i_ctime = now; |
1724 | 1724 | ||
1725 | if (IS_I_VERSION(inode)) | 1725 | if (IS_I_VERSION(inode)) |
1726 | inode_inc_iversion(inode); | 1726 | inode_inc_iversion(inode); |
1727 | } | 1727 | } |
1728 | 1728 | ||
1729 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | 1729 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
1730 | const struct iovec *iov, | 1730 | const struct iovec *iov, |
1731 | unsigned long nr_segs, loff_t pos) | 1731 | unsigned long nr_segs, loff_t pos) |
1732 | { | 1732 | { |
1733 | struct file *file = iocb->ki_filp; | 1733 | struct file *file = iocb->ki_filp; |
1734 | struct inode *inode = file_inode(file); | 1734 | struct inode *inode = file_inode(file); |
1735 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1735 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1736 | u64 start_pos; | 1736 | u64 start_pos; |
1737 | u64 end_pos; | 1737 | u64 end_pos; |
1738 | ssize_t num_written = 0; | 1738 | ssize_t num_written = 0; |
1739 | ssize_t err = 0; | 1739 | ssize_t err = 0; |
1740 | size_t count, ocount; | 1740 | size_t count, ocount; |
1741 | bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); | 1741 | bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); |
1742 | 1742 | ||
1743 | mutex_lock(&inode->i_mutex); | 1743 | mutex_lock(&inode->i_mutex); |
1744 | 1744 | ||
1745 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | 1745 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); |
1746 | if (err) { | 1746 | if (err) { |
1747 | mutex_unlock(&inode->i_mutex); | 1747 | mutex_unlock(&inode->i_mutex); |
1748 | goto out; | 1748 | goto out; |
1749 | } | 1749 | } |
1750 | count = ocount; | 1750 | count = ocount; |
1751 | 1751 | ||
1752 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 1752 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
1753 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 1753 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
1754 | if (err) { | 1754 | if (err) { |
1755 | mutex_unlock(&inode->i_mutex); | 1755 | mutex_unlock(&inode->i_mutex); |
1756 | goto out; | 1756 | goto out; |
1757 | } | 1757 | } |
1758 | 1758 | ||
1759 | if (count == 0) { | 1759 | if (count == 0) { |
1760 | mutex_unlock(&inode->i_mutex); | 1760 | mutex_unlock(&inode->i_mutex); |
1761 | goto out; | 1761 | goto out; |
1762 | } | 1762 | } |
1763 | 1763 | ||
1764 | err = file_remove_suid(file); | 1764 | err = file_remove_suid(file); |
1765 | if (err) { | 1765 | if (err) { |
1766 | mutex_unlock(&inode->i_mutex); | 1766 | mutex_unlock(&inode->i_mutex); |
1767 | goto out; | 1767 | goto out; |
1768 | } | 1768 | } |
1769 | 1769 | ||
1770 | /* | 1770 | /* |
1771 | * If BTRFS flips readonly due to some impossible error | 1771 | * If BTRFS flips readonly due to some impossible error |
1772 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | 1772 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), |
1773 | * although we have opened a file as writable, we have | 1773 | * although we have opened a file as writable, we have |
1774 | * to stop this write operation to ensure FS consistency. | 1774 | * to stop this write operation to ensure FS consistency. |
1775 | */ | 1775 | */ |
1776 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) { | 1776 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) { |
1777 | mutex_unlock(&inode->i_mutex); | 1777 | mutex_unlock(&inode->i_mutex); |
1778 | err = -EROFS; | 1778 | err = -EROFS; |
1779 | goto out; | 1779 | goto out; |
1780 | } | 1780 | } |
1781 | 1781 | ||
1782 | /* | 1782 | /* |
1783 | * We reserve space for updating the inode when we reserve space for the | 1783 | * We reserve space for updating the inode when we reserve space for the |
1784 | * extent we are going to write, so we will enospc out there. We don't | 1784 | * extent we are going to write, so we will enospc out there. We don't |
1785 | * need to start yet another transaction to update the inode as we will | 1785 | * need to start yet another transaction to update the inode as we will |
1786 | * update the inode when we finish writing whatever data we write. | 1786 | * update the inode when we finish writing whatever data we write. |
1787 | */ | 1787 | */ |
1788 | update_time_for_write(inode); | 1788 | update_time_for_write(inode); |
1789 | 1789 | ||
1790 | start_pos = round_down(pos, root->sectorsize); | 1790 | start_pos = round_down(pos, root->sectorsize); |
1791 | if (start_pos > i_size_read(inode)) { | 1791 | if (start_pos > i_size_read(inode)) { |
1792 | /* Expand hole size to cover write data, preventing empty gap */ | 1792 | /* Expand hole size to cover write data, preventing empty gap */ |
1793 | end_pos = round_up(pos + count, root->sectorsize); | 1793 | end_pos = round_up(pos + count, root->sectorsize); |
1794 | err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); | 1794 | err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); |
1795 | if (err) { | 1795 | if (err) { |
1796 | mutex_unlock(&inode->i_mutex); | 1796 | mutex_unlock(&inode->i_mutex); |
1797 | goto out; | 1797 | goto out; |
1798 | } | 1798 | } |
1799 | } | 1799 | } |
1800 | 1800 | ||
1801 | if (sync) | 1801 | if (sync) |
1802 | atomic_inc(&BTRFS_I(inode)->sync_writers); | 1802 | atomic_inc(&BTRFS_I(inode)->sync_writers); |
1803 | 1803 | ||
1804 | if (unlikely(file->f_flags & O_DIRECT)) { | 1804 | if (unlikely(file->f_flags & O_DIRECT)) { |
1805 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | 1805 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, |
1806 | pos, count, ocount); | 1806 | pos, count, ocount); |
1807 | } else { | 1807 | } else { |
1808 | struct iov_iter i; | 1808 | struct iov_iter i; |
1809 | 1809 | ||
1810 | iov_iter_init(&i, iov, nr_segs, count, num_written); | 1810 | iov_iter_init(&i, iov, nr_segs, count, num_written); |
1811 | 1811 | ||
1812 | num_written = __btrfs_buffered_write(file, &i, pos); | 1812 | num_written = __btrfs_buffered_write(file, &i, pos); |
1813 | if (num_written > 0) | 1813 | if (num_written > 0) |
1814 | iocb->ki_pos = pos + num_written; | 1814 | iocb->ki_pos = pos + num_written; |
1815 | } | 1815 | } |
1816 | 1816 | ||
1817 | mutex_unlock(&inode->i_mutex); | 1817 | mutex_unlock(&inode->i_mutex); |
1818 | 1818 | ||
1819 | /* | 1819 | /* |
1820 | * we want to make sure fsync finds this change | 1820 | * we want to make sure fsync finds this change |
1821 | * but we haven't joined a transaction running right now. | 1821 | * but we haven't joined a transaction running right now. |
1822 | * | 1822 | * |
1823 | * Later on, someone is sure to update the inode and get the | 1823 | * Later on, someone is sure to update the inode and get the |
1824 | * real transid recorded. | 1824 | * real transid recorded. |
1825 | * | 1825 | * |
1826 | * We set last_trans now to the fs_info generation + 1, | 1826 | * We set last_trans now to the fs_info generation + 1, |
1827 | * this will either be one more than the running transaction | 1827 | * this will either be one more than the running transaction |
1828 | * or the generation used for the next transaction if there isn't | 1828 | * or the generation used for the next transaction if there isn't |
1829 | * one running right now. | 1829 | * one running right now. |
1830 | * | 1830 | * |
1831 | * We also have to set last_sub_trans to the current log transid, | 1831 | * We also have to set last_sub_trans to the current log transid, |
1832 | * otherwise subsequent syncs to a file that's been synced in this | 1832 | * otherwise subsequent syncs to a file that's been synced in this |
1833 | * transaction will appear to have already occured. | 1833 | * transaction will appear to have already occured. |
1834 | */ | 1834 | */ |
1835 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | 1835 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; |
1836 | BTRFS_I(inode)->last_sub_trans = root->log_transid; | 1836 | BTRFS_I(inode)->last_sub_trans = root->log_transid; |
1837 | if (num_written > 0) { | 1837 | if (num_written > 0) { |
1838 | err = generic_write_sync(file, pos, num_written); | 1838 | err = generic_write_sync(file, pos, num_written); |
1839 | if (err < 0) | 1839 | if (err < 0) |
1840 | num_written = err; | 1840 | num_written = err; |
1841 | } | 1841 | } |
1842 | 1842 | ||
1843 | if (sync) | 1843 | if (sync) |
1844 | atomic_dec(&BTRFS_I(inode)->sync_writers); | 1844 | atomic_dec(&BTRFS_I(inode)->sync_writers); |
1845 | out: | 1845 | out: |
1846 | current->backing_dev_info = NULL; | 1846 | current->backing_dev_info = NULL; |
1847 | return num_written ? num_written : err; | 1847 | return num_written ? num_written : err; |
1848 | } | 1848 | } |
1849 | 1849 | ||
1850 | int btrfs_release_file(struct inode *inode, struct file *filp) | 1850 | int btrfs_release_file(struct inode *inode, struct file *filp) |
1851 | { | 1851 | { |
1852 | /* | 1852 | /* |
1853 | * ordered_data_close is set by settattr when we are about to truncate | 1853 | * ordered_data_close is set by settattr when we are about to truncate |
1854 | * a file from a non-zero size to a zero size. This tries to | 1854 | * a file from a non-zero size to a zero size. This tries to |
1855 | * flush down new bytes that may have been written if the | 1855 | * flush down new bytes that may have been written if the |
1856 | * application were using truncate to replace a file in place. | 1856 | * application were using truncate to replace a file in place. |
1857 | */ | 1857 | */ |
1858 | if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, | 1858 | if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
1859 | &BTRFS_I(inode)->runtime_flags)) { | 1859 | &BTRFS_I(inode)->runtime_flags)) { |
1860 | struct btrfs_trans_handle *trans; | 1860 | struct btrfs_trans_handle *trans; |
1861 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1861 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1862 | 1862 | ||
1863 | /* | 1863 | /* |
1864 | * We need to block on a committing transaction to keep us from | 1864 | * We need to block on a committing transaction to keep us from |
1865 | * throwing a ordered operation on to the list and causing | 1865 | * throwing a ordered operation on to the list and causing |
1866 | * something like sync to deadlock trying to flush out this | 1866 | * something like sync to deadlock trying to flush out this |
1867 | * inode. | 1867 | * inode. |
1868 | */ | 1868 | */ |
1869 | trans = btrfs_start_transaction(root, 0); | 1869 | trans = btrfs_start_transaction(root, 0); |
1870 | if (IS_ERR(trans)) | 1870 | if (IS_ERR(trans)) |
1871 | return PTR_ERR(trans); | 1871 | return PTR_ERR(trans); |
1872 | btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode); | 1872 | btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode); |
1873 | btrfs_end_transaction(trans, root); | 1873 | btrfs_end_transaction(trans, root); |
1874 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | 1874 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) |
1875 | filemap_flush(inode->i_mapping); | 1875 | filemap_flush(inode->i_mapping); |
1876 | } | 1876 | } |
1877 | if (filp->private_data) | 1877 | if (filp->private_data) |
1878 | btrfs_ioctl_trans_end(filp); | 1878 | btrfs_ioctl_trans_end(filp); |
1879 | return 0; | 1879 | return 0; |
1880 | } | 1880 | } |
1881 | 1881 | ||
1882 | /* | 1882 | /* |
1883 | * fsync call for both files and directories. This logs the inode into | 1883 | * fsync call for both files and directories. This logs the inode into |
1884 | * the tree log instead of forcing full commits whenever possible. | 1884 | * the tree log instead of forcing full commits whenever possible. |
1885 | * | 1885 | * |
1886 | * It needs to call filemap_fdatawait so that all ordered extent updates are | 1886 | * It needs to call filemap_fdatawait so that all ordered extent updates are |
1887 | * in the metadata btree are up to date for copying to the log. | 1887 | * in the metadata btree are up to date for copying to the log. |
1888 | * | 1888 | * |
1889 | * It drops the inode mutex before doing the tree log commit. This is an | 1889 | * It drops the inode mutex before doing the tree log commit. This is an |
1890 | * important optimization for directories because holding the mutex prevents | 1890 | * important optimization for directories because holding the mutex prevents |
1891 | * new operations on the dir while we write to disk. | 1891 | * new operations on the dir while we write to disk. |
1892 | */ | 1892 | */ |
1893 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | 1893 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) |
1894 | { | 1894 | { |
1895 | struct dentry *dentry = file->f_path.dentry; | 1895 | struct dentry *dentry = file->f_path.dentry; |
1896 | struct inode *inode = dentry->d_inode; | 1896 | struct inode *inode = dentry->d_inode; |
1897 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1897 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1898 | struct btrfs_trans_handle *trans; | 1898 | struct btrfs_trans_handle *trans; |
1899 | struct btrfs_log_ctx ctx; | 1899 | struct btrfs_log_ctx ctx; |
1900 | int ret = 0; | 1900 | int ret = 0; |
1901 | bool full_sync = 0; | 1901 | bool full_sync = 0; |
1902 | 1902 | ||
1903 | trace_btrfs_sync_file(file, datasync); | 1903 | trace_btrfs_sync_file(file, datasync); |
1904 | 1904 | ||
1905 | /* | 1905 | /* |
1906 | * We write the dirty pages in the range and wait until they complete | 1906 | * We write the dirty pages in the range and wait until they complete |
1907 | * out of the ->i_mutex. If so, we can flush the dirty pages by | 1907 | * out of the ->i_mutex. If so, we can flush the dirty pages by |
1908 | * multi-task, and make the performance up. See | 1908 | * multi-task, and make the performance up. See |
1909 | * btrfs_wait_ordered_range for an explanation of the ASYNC check. | 1909 | * btrfs_wait_ordered_range for an explanation of the ASYNC check. |
1910 | */ | 1910 | */ |
1911 | atomic_inc(&BTRFS_I(inode)->sync_writers); | 1911 | atomic_inc(&BTRFS_I(inode)->sync_writers); |
1912 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | 1912 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); |
1913 | if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | 1913 | if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
1914 | &BTRFS_I(inode)->runtime_flags)) | 1914 | &BTRFS_I(inode)->runtime_flags)) |
1915 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | 1915 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); |
1916 | atomic_dec(&BTRFS_I(inode)->sync_writers); | 1916 | atomic_dec(&BTRFS_I(inode)->sync_writers); |
1917 | if (ret) | 1917 | if (ret) |
1918 | return ret; | 1918 | return ret; |
1919 | 1919 | ||
1920 | mutex_lock(&inode->i_mutex); | 1920 | mutex_lock(&inode->i_mutex); |
1921 | 1921 | ||
1922 | /* | 1922 | /* |
1923 | * We flush the dirty pages again to avoid some dirty pages in the | 1923 | * We flush the dirty pages again to avoid some dirty pages in the |
1924 | * range being left. | 1924 | * range being left. |
1925 | */ | 1925 | */ |
1926 | atomic_inc(&root->log_batch); | 1926 | atomic_inc(&root->log_batch); |
1927 | full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 1927 | full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
1928 | &BTRFS_I(inode)->runtime_flags); | 1928 | &BTRFS_I(inode)->runtime_flags); |
1929 | if (full_sync) { | 1929 | if (full_sync) { |
1930 | ret = btrfs_wait_ordered_range(inode, start, end - start + 1); | 1930 | ret = btrfs_wait_ordered_range(inode, start, end - start + 1); |
1931 | if (ret) { | 1931 | if (ret) { |
1932 | mutex_unlock(&inode->i_mutex); | 1932 | mutex_unlock(&inode->i_mutex); |
1933 | goto out; | 1933 | goto out; |
1934 | } | 1934 | } |
1935 | } | 1935 | } |
1936 | atomic_inc(&root->log_batch); | 1936 | atomic_inc(&root->log_batch); |
1937 | 1937 | ||
1938 | /* | 1938 | /* |
1939 | * check the transaction that last modified this inode | 1939 | * check the transaction that last modified this inode |
1940 | * and see if its already been committed | 1940 | * and see if its already been committed |
1941 | */ | 1941 | */ |
1942 | if (!BTRFS_I(inode)->last_trans) { | 1942 | if (!BTRFS_I(inode)->last_trans) { |
1943 | mutex_unlock(&inode->i_mutex); | 1943 | mutex_unlock(&inode->i_mutex); |
1944 | goto out; | 1944 | goto out; |
1945 | } | 1945 | } |
1946 | 1946 | ||
1947 | /* | 1947 | /* |
1948 | * if the last transaction that changed this file was before | 1948 | * if the last transaction that changed this file was before |
1949 | * the current transaction, we can bail out now without any | 1949 | * the current transaction, we can bail out now without any |
1950 | * syncing | 1950 | * syncing |
1951 | */ | 1951 | */ |
1952 | smp_mb(); | 1952 | smp_mb(); |
1953 | if (btrfs_inode_in_log(inode, root->fs_info->generation) || | 1953 | if (btrfs_inode_in_log(inode, root->fs_info->generation) || |
1954 | BTRFS_I(inode)->last_trans <= | 1954 | BTRFS_I(inode)->last_trans <= |
1955 | root->fs_info->last_trans_committed) { | 1955 | root->fs_info->last_trans_committed) { |
1956 | BTRFS_I(inode)->last_trans = 0; | 1956 | BTRFS_I(inode)->last_trans = 0; |
1957 | 1957 | ||
1958 | /* | 1958 | /* |
1959 | * We'v had everything committed since the last time we were | 1959 | * We'v had everything committed since the last time we were |
1960 | * modified so clear this flag in case it was set for whatever | 1960 | * modified so clear this flag in case it was set for whatever |
1961 | * reason, it's no longer relevant. | 1961 | * reason, it's no longer relevant. |
1962 | */ | 1962 | */ |
1963 | clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 1963 | clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
1964 | &BTRFS_I(inode)->runtime_flags); | 1964 | &BTRFS_I(inode)->runtime_flags); |
1965 | mutex_unlock(&inode->i_mutex); | 1965 | mutex_unlock(&inode->i_mutex); |
1966 | goto out; | 1966 | goto out; |
1967 | } | 1967 | } |
1968 | 1968 | ||
1969 | /* | 1969 | /* |
1970 | * ok we haven't committed the transaction yet, lets do a commit | 1970 | * ok we haven't committed the transaction yet, lets do a commit |
1971 | */ | 1971 | */ |
1972 | if (file->private_data) | 1972 | if (file->private_data) |
1973 | btrfs_ioctl_trans_end(file); | 1973 | btrfs_ioctl_trans_end(file); |
1974 | 1974 | ||
1975 | /* | 1975 | /* |
1976 | * We use start here because we will need to wait on the IO to complete | 1976 | * We use start here because we will need to wait on the IO to complete |
1977 | * in btrfs_sync_log, which could require joining a transaction (for | 1977 | * in btrfs_sync_log, which could require joining a transaction (for |
1978 | * example checking cross references in the nocow path). If we use join | 1978 | * example checking cross references in the nocow path). If we use join |
1979 | * here we could get into a situation where we're waiting on IO to | 1979 | * here we could get into a situation where we're waiting on IO to |
1980 | * happen that is blocked on a transaction trying to commit. With start | 1980 | * happen that is blocked on a transaction trying to commit. With start |
1981 | * we inc the extwriter counter, so we wait for all extwriters to exit | 1981 | * we inc the extwriter counter, so we wait for all extwriters to exit |
1982 | * before we start blocking join'ers. This comment is to keep somebody | 1982 | * before we start blocking join'ers. This comment is to keep somebody |
1983 | * from thinking they are super smart and changing this to | 1983 | * from thinking they are super smart and changing this to |
1984 | * btrfs_join_transaction *cough*Josef*cough*. | 1984 | * btrfs_join_transaction *cough*Josef*cough*. |
1985 | */ | 1985 | */ |
1986 | trans = btrfs_start_transaction(root, 0); | 1986 | trans = btrfs_start_transaction(root, 0); |
1987 | if (IS_ERR(trans)) { | 1987 | if (IS_ERR(trans)) { |
1988 | ret = PTR_ERR(trans); | 1988 | ret = PTR_ERR(trans); |
1989 | mutex_unlock(&inode->i_mutex); | 1989 | mutex_unlock(&inode->i_mutex); |
1990 | goto out; | 1990 | goto out; |
1991 | } | 1991 | } |
1992 | trans->sync = true; | 1992 | trans->sync = true; |
1993 | 1993 | ||
1994 | btrfs_init_log_ctx(&ctx); | 1994 | btrfs_init_log_ctx(&ctx); |
1995 | 1995 | ||
1996 | ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); | 1996 | ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); |
1997 | if (ret < 0) { | 1997 | if (ret < 0) { |
1998 | /* Fallthrough and commit/free transaction. */ | 1998 | /* Fallthrough and commit/free transaction. */ |
1999 | ret = 1; | 1999 | ret = 1; |
2000 | } | 2000 | } |
2001 | 2001 | ||
2002 | /* we've logged all the items and now have a consistent | 2002 | /* we've logged all the items and now have a consistent |
2003 | * version of the file in the log. It is possible that | 2003 | * version of the file in the log. It is possible that |
2004 | * someone will come in and modify the file, but that's | 2004 | * someone will come in and modify the file, but that's |
2005 | * fine because the log is consistent on disk, and we | 2005 | * fine because the log is consistent on disk, and we |
2006 | * have references to all of the file's extents | 2006 | * have references to all of the file's extents |
2007 | * | 2007 | * |
2008 | * It is possible that someone will come in and log the | 2008 | * It is possible that someone will come in and log the |
2009 | * file again, but that will end up using the synchronization | 2009 | * file again, but that will end up using the synchronization |
2010 | * inside btrfs_sync_log to keep things safe. | 2010 | * inside btrfs_sync_log to keep things safe. |
2011 | */ | 2011 | */ |
2012 | mutex_unlock(&inode->i_mutex); | 2012 | mutex_unlock(&inode->i_mutex); |
2013 | 2013 | ||
2014 | if (ret != BTRFS_NO_LOG_SYNC) { | 2014 | if (ret != BTRFS_NO_LOG_SYNC) { |
2015 | if (!ret) { | 2015 | if (!ret) { |
2016 | ret = btrfs_sync_log(trans, root, &ctx); | 2016 | ret = btrfs_sync_log(trans, root, &ctx); |
2017 | if (!ret) { | 2017 | if (!ret) { |
2018 | ret = btrfs_end_transaction(trans, root); | 2018 | ret = btrfs_end_transaction(trans, root); |
2019 | goto out; | 2019 | goto out; |
2020 | } | 2020 | } |
2021 | } | 2021 | } |
2022 | if (!full_sync) { | 2022 | if (!full_sync) { |
2023 | ret = btrfs_wait_ordered_range(inode, start, | 2023 | ret = btrfs_wait_ordered_range(inode, start, |
2024 | end - start + 1); | 2024 | end - start + 1); |
2025 | if (ret) | 2025 | if (ret) |
2026 | goto out; | 2026 | goto out; |
2027 | } | 2027 | } |
2028 | ret = btrfs_commit_transaction(trans, root); | 2028 | ret = btrfs_commit_transaction(trans, root); |
2029 | } else { | 2029 | } else { |
2030 | ret = btrfs_end_transaction(trans, root); | 2030 | ret = btrfs_end_transaction(trans, root); |
2031 | } | 2031 | } |
2032 | out: | 2032 | out: |
2033 | return ret > 0 ? -EIO : ret; | 2033 | return ret > 0 ? -EIO : ret; |
2034 | } | 2034 | } |
2035 | 2035 | ||
2036 | static const struct vm_operations_struct btrfs_file_vm_ops = { | 2036 | static const struct vm_operations_struct btrfs_file_vm_ops = { |
2037 | .fault = filemap_fault, | 2037 | .fault = filemap_fault, |
2038 | .map_pages = filemap_map_pages, | 2038 | .map_pages = filemap_map_pages, |
2039 | .page_mkwrite = btrfs_page_mkwrite, | 2039 | .page_mkwrite = btrfs_page_mkwrite, |
2040 | .remap_pages = generic_file_remap_pages, | 2040 | .remap_pages = generic_file_remap_pages, |
2041 | }; | 2041 | }; |
2042 | 2042 | ||
2043 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | 2043 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) |
2044 | { | 2044 | { |
2045 | struct address_space *mapping = filp->f_mapping; | 2045 | struct address_space *mapping = filp->f_mapping; |
2046 | 2046 | ||
2047 | if (!mapping->a_ops->readpage) | 2047 | if (!mapping->a_ops->readpage) |
2048 | return -ENOEXEC; | 2048 | return -ENOEXEC; |
2049 | 2049 | ||
2050 | file_accessed(filp); | 2050 | file_accessed(filp); |
2051 | vma->vm_ops = &btrfs_file_vm_ops; | 2051 | vma->vm_ops = &btrfs_file_vm_ops; |
2052 | 2052 | ||
2053 | return 0; | 2053 | return 0; |
2054 | } | 2054 | } |
2055 | 2055 | ||
2056 | static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf, | 2056 | static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf, |
2057 | int slot, u64 start, u64 end) | 2057 | int slot, u64 start, u64 end) |
2058 | { | 2058 | { |
2059 | struct btrfs_file_extent_item *fi; | 2059 | struct btrfs_file_extent_item *fi; |
2060 | struct btrfs_key key; | 2060 | struct btrfs_key key; |
2061 | 2061 | ||
2062 | if (slot < 0 || slot >= btrfs_header_nritems(leaf)) | 2062 | if (slot < 0 || slot >= btrfs_header_nritems(leaf)) |
2063 | return 0; | 2063 | return 0; |
2064 | 2064 | ||
2065 | btrfs_item_key_to_cpu(leaf, &key, slot); | 2065 | btrfs_item_key_to_cpu(leaf, &key, slot); |
2066 | if (key.objectid != btrfs_ino(inode) || | 2066 | if (key.objectid != btrfs_ino(inode) || |
2067 | key.type != BTRFS_EXTENT_DATA_KEY) | 2067 | key.type != BTRFS_EXTENT_DATA_KEY) |
2068 | return 0; | 2068 | return 0; |
2069 | 2069 | ||
2070 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | 2070 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); |
2071 | 2071 | ||
2072 | if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) | 2072 | if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) |
2073 | return 0; | 2073 | return 0; |
2074 | 2074 | ||
2075 | if (btrfs_file_extent_disk_bytenr(leaf, fi)) | 2075 | if (btrfs_file_extent_disk_bytenr(leaf, fi)) |
2076 | return 0; | 2076 | return 0; |
2077 | 2077 | ||
2078 | if (key.offset == end) | 2078 | if (key.offset == end) |
2079 | return 1; | 2079 | return 1; |
2080 | if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start) | 2080 | if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start) |
2081 | return 1; | 2081 | return 1; |
2082 | return 0; | 2082 | return 0; |
2083 | } | 2083 | } |
2084 | 2084 | ||
2085 | static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, | 2085 | static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, |
2086 | struct btrfs_path *path, u64 offset, u64 end) | 2086 | struct btrfs_path *path, u64 offset, u64 end) |
2087 | { | 2087 | { |
2088 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2088 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2089 | struct extent_buffer *leaf; | 2089 | struct extent_buffer *leaf; |
2090 | struct btrfs_file_extent_item *fi; | 2090 | struct btrfs_file_extent_item *fi; |
2091 | struct extent_map *hole_em; | 2091 | struct extent_map *hole_em; |
2092 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 2092 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
2093 | struct btrfs_key key; | 2093 | struct btrfs_key key; |
2094 | int ret; | 2094 | int ret; |
2095 | 2095 | ||
2096 | if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) | 2096 | if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) |
2097 | goto out; | 2097 | goto out; |
2098 | 2098 | ||
2099 | key.objectid = btrfs_ino(inode); | 2099 | key.objectid = btrfs_ino(inode); |
2100 | key.type = BTRFS_EXTENT_DATA_KEY; | 2100 | key.type = BTRFS_EXTENT_DATA_KEY; |
2101 | key.offset = offset; | 2101 | key.offset = offset; |
2102 | 2102 | ||
2103 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | 2103 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); |
2104 | if (ret < 0) | 2104 | if (ret < 0) |
2105 | return ret; | 2105 | return ret; |
2106 | BUG_ON(!ret); | 2106 | BUG_ON(!ret); |
2107 | 2107 | ||
2108 | leaf = path->nodes[0]; | 2108 | leaf = path->nodes[0]; |
2109 | if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) { | 2109 | if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) { |
2110 | u64 num_bytes; | 2110 | u64 num_bytes; |
2111 | 2111 | ||
2112 | path->slots[0]--; | 2112 | path->slots[0]--; |
2113 | fi = btrfs_item_ptr(leaf, path->slots[0], | 2113 | fi = btrfs_item_ptr(leaf, path->slots[0], |
2114 | struct btrfs_file_extent_item); | 2114 | struct btrfs_file_extent_item); |
2115 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + | 2115 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + |
2116 | end - offset; | 2116 | end - offset; |
2117 | btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); | 2117 | btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); |
2118 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); | 2118 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); |
2119 | btrfs_set_file_extent_offset(leaf, fi, 0); | 2119 | btrfs_set_file_extent_offset(leaf, fi, 0); |
2120 | btrfs_mark_buffer_dirty(leaf); | 2120 | btrfs_mark_buffer_dirty(leaf); |
2121 | goto out; | 2121 | goto out; |
2122 | } | 2122 | } |
2123 | 2123 | ||
2124 | if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { | 2124 | if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { |
2125 | u64 num_bytes; | 2125 | u64 num_bytes; |
2126 | 2126 | ||
2127 | path->slots[0]++; | 2127 | path->slots[0]++; |
2128 | key.offset = offset; | 2128 | key.offset = offset; |
2129 | btrfs_set_item_key_safe(root, path, &key); | 2129 | btrfs_set_item_key_safe(root, path, &key); |
2130 | fi = btrfs_item_ptr(leaf, path->slots[0], | 2130 | fi = btrfs_item_ptr(leaf, path->slots[0], |
2131 | struct btrfs_file_extent_item); | 2131 | struct btrfs_file_extent_item); |
2132 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - | 2132 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - |
2133 | offset; | 2133 | offset; |
2134 | btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); | 2134 | btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); |
2135 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); | 2135 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); |
2136 | btrfs_set_file_extent_offset(leaf, fi, 0); | 2136 | btrfs_set_file_extent_offset(leaf, fi, 0); |
2137 | btrfs_mark_buffer_dirty(leaf); | 2137 | btrfs_mark_buffer_dirty(leaf); |
2138 | goto out; | 2138 | goto out; |
2139 | } | 2139 | } |
2140 | btrfs_release_path(path); | 2140 | btrfs_release_path(path); |
2141 | 2141 | ||
2142 | ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, | 2142 | ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, |
2143 | 0, 0, end - offset, 0, end - offset, | 2143 | 0, 0, end - offset, 0, end - offset, |
2144 | 0, 0, 0); | 2144 | 0, 0, 0); |
2145 | if (ret) | 2145 | if (ret) |
2146 | return ret; | 2146 | return ret; |
2147 | 2147 | ||
2148 | out: | 2148 | out: |
2149 | btrfs_release_path(path); | 2149 | btrfs_release_path(path); |
2150 | 2150 | ||
2151 | hole_em = alloc_extent_map(); | 2151 | hole_em = alloc_extent_map(); |
2152 | if (!hole_em) { | 2152 | if (!hole_em) { |
2153 | btrfs_drop_extent_cache(inode, offset, end - 1, 0); | 2153 | btrfs_drop_extent_cache(inode, offset, end - 1, 0); |
2154 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 2154 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
2155 | &BTRFS_I(inode)->runtime_flags); | 2155 | &BTRFS_I(inode)->runtime_flags); |
2156 | } else { | 2156 | } else { |
2157 | hole_em->start = offset; | 2157 | hole_em->start = offset; |
2158 | hole_em->len = end - offset; | 2158 | hole_em->len = end - offset; |
2159 | hole_em->ram_bytes = hole_em->len; | 2159 | hole_em->ram_bytes = hole_em->len; |
2160 | hole_em->orig_start = offset; | 2160 | hole_em->orig_start = offset; |
2161 | 2161 | ||
2162 | hole_em->block_start = EXTENT_MAP_HOLE; | 2162 | hole_em->block_start = EXTENT_MAP_HOLE; |
2163 | hole_em->block_len = 0; | 2163 | hole_em->block_len = 0; |
2164 | hole_em->orig_block_len = 0; | 2164 | hole_em->orig_block_len = 0; |
2165 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; | 2165 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; |
2166 | hole_em->compress_type = BTRFS_COMPRESS_NONE; | 2166 | hole_em->compress_type = BTRFS_COMPRESS_NONE; |
2167 | hole_em->generation = trans->transid; | 2167 | hole_em->generation = trans->transid; |
2168 | 2168 | ||
2169 | do { | 2169 | do { |
2170 | btrfs_drop_extent_cache(inode, offset, end - 1, 0); | 2170 | btrfs_drop_extent_cache(inode, offset, end - 1, 0); |
2171 | write_lock(&em_tree->lock); | 2171 | write_lock(&em_tree->lock); |
2172 | ret = add_extent_mapping(em_tree, hole_em, 1); | 2172 | ret = add_extent_mapping(em_tree, hole_em, 1); |
2173 | write_unlock(&em_tree->lock); | 2173 | write_unlock(&em_tree->lock); |
2174 | } while (ret == -EEXIST); | 2174 | } while (ret == -EEXIST); |
2175 | free_extent_map(hole_em); | 2175 | free_extent_map(hole_em); |
2176 | if (ret) | 2176 | if (ret) |
2177 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 2177 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
2178 | &BTRFS_I(inode)->runtime_flags); | 2178 | &BTRFS_I(inode)->runtime_flags); |
2179 | } | 2179 | } |
2180 | 2180 | ||
2181 | return 0; | 2181 | return 0; |
2182 | } | 2182 | } |
2183 | 2183 | ||
2184 | static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | 2184 | static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) |
2185 | { | 2185 | { |
2186 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2186 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2187 | struct extent_state *cached_state = NULL; | 2187 | struct extent_state *cached_state = NULL; |
2188 | struct btrfs_path *path; | 2188 | struct btrfs_path *path; |
2189 | struct btrfs_block_rsv *rsv; | 2189 | struct btrfs_block_rsv *rsv; |
2190 | struct btrfs_trans_handle *trans; | 2190 | struct btrfs_trans_handle *trans; |
2191 | u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); | 2191 | u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); |
2192 | u64 lockend = round_down(offset + len, | 2192 | u64 lockend = round_down(offset + len, |
2193 | BTRFS_I(inode)->root->sectorsize) - 1; | 2193 | BTRFS_I(inode)->root->sectorsize) - 1; |
2194 | u64 cur_offset = lockstart; | 2194 | u64 cur_offset = lockstart; |
2195 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 2195 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
2196 | u64 drop_end; | 2196 | u64 drop_end; |
2197 | int ret = 0; | 2197 | int ret = 0; |
2198 | int err = 0; | 2198 | int err = 0; |
2199 | int rsv_count; | 2199 | int rsv_count; |
2200 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == | 2200 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == |
2201 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | 2201 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); |
2202 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); | 2202 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); |
2203 | u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | 2203 | u64 ino_size; |
2204 | 2204 | ||
2205 | ret = btrfs_wait_ordered_range(inode, offset, len); | 2205 | ret = btrfs_wait_ordered_range(inode, offset, len); |
2206 | if (ret) | 2206 | if (ret) |
2207 | return ret; | 2207 | return ret; |
2208 | 2208 | ||
2209 | mutex_lock(&inode->i_mutex); | 2209 | mutex_lock(&inode->i_mutex); |
2210 | ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | ||
2210 | /* | 2211 | /* |
2211 | * We needn't truncate any page which is beyond the end of the file | 2212 | * We needn't truncate any page which is beyond the end of the file |
2212 | * because we are sure there is no data there. | 2213 | * because we are sure there is no data there. |
2213 | */ | 2214 | */ |
2214 | /* | 2215 | /* |
2215 | * Only do this if we are in the same page and we aren't doing the | 2216 | * Only do this if we are in the same page and we aren't doing the |
2216 | * entire page. | 2217 | * entire page. |
2217 | */ | 2218 | */ |
2218 | if (same_page && len < PAGE_CACHE_SIZE) { | 2219 | if (same_page && len < PAGE_CACHE_SIZE) { |
2219 | if (offset < ino_size) | 2220 | if (offset < ino_size) |
2220 | ret = btrfs_truncate_page(inode, offset, len, 0); | 2221 | ret = btrfs_truncate_page(inode, offset, len, 0); |
2221 | mutex_unlock(&inode->i_mutex); | 2222 | mutex_unlock(&inode->i_mutex); |
2222 | return ret; | 2223 | return ret; |
2223 | } | 2224 | } |
2224 | 2225 | ||
2225 | /* zero back part of the first page */ | 2226 | /* zero back part of the first page */ |
2226 | if (offset < ino_size) { | 2227 | if (offset < ino_size) { |
2227 | ret = btrfs_truncate_page(inode, offset, 0, 0); | 2228 | ret = btrfs_truncate_page(inode, offset, 0, 0); |
2228 | if (ret) { | 2229 | if (ret) { |
2229 | mutex_unlock(&inode->i_mutex); | 2230 | mutex_unlock(&inode->i_mutex); |
2230 | return ret; | 2231 | return ret; |
2231 | } | 2232 | } |
2232 | } | 2233 | } |
2233 | 2234 | ||
2234 | /* zero the front end of the last page */ | 2235 | /* zero the front end of the last page */ |
2235 | if (offset + len < ino_size) { | 2236 | if (offset + len < ino_size) { |
2236 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | 2237 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); |
2237 | if (ret) { | 2238 | if (ret) { |
2238 | mutex_unlock(&inode->i_mutex); | 2239 | mutex_unlock(&inode->i_mutex); |
2239 | return ret; | 2240 | return ret; |
2240 | } | 2241 | } |
2241 | } | 2242 | } |
2242 | 2243 | ||
2243 | if (lockend < lockstart) { | 2244 | if (lockend < lockstart) { |
2244 | mutex_unlock(&inode->i_mutex); | 2245 | mutex_unlock(&inode->i_mutex); |
2245 | return 0; | 2246 | return 0; |
2246 | } | 2247 | } |
2247 | 2248 | ||
2248 | while (1) { | 2249 | while (1) { |
2249 | struct btrfs_ordered_extent *ordered; | 2250 | struct btrfs_ordered_extent *ordered; |
2250 | 2251 | ||
2251 | truncate_pagecache_range(inode, lockstart, lockend); | 2252 | truncate_pagecache_range(inode, lockstart, lockend); |
2252 | 2253 | ||
2253 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 2254 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
2254 | 0, &cached_state); | 2255 | 0, &cached_state); |
2255 | ordered = btrfs_lookup_first_ordered_extent(inode, lockend); | 2256 | ordered = btrfs_lookup_first_ordered_extent(inode, lockend); |
2256 | 2257 | ||
2257 | /* | 2258 | /* |
2258 | * We need to make sure we have no ordered extents in this range | 2259 | * We need to make sure we have no ordered extents in this range |
2259 | * and nobody raced in and read a page in this range, if we did | 2260 | * and nobody raced in and read a page in this range, if we did |
2260 | * we need to try again. | 2261 | * we need to try again. |
2261 | */ | 2262 | */ |
2262 | if ((!ordered || | 2263 | if ((!ordered || |
2263 | (ordered->file_offset + ordered->len <= lockstart || | 2264 | (ordered->file_offset + ordered->len <= lockstart || |
2264 | ordered->file_offset > lockend)) && | 2265 | ordered->file_offset > lockend)) && |
2265 | !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, | 2266 | !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, |
2266 | lockend, EXTENT_UPTODATE, 0, | 2267 | lockend, EXTENT_UPTODATE, 0, |
2267 | cached_state)) { | 2268 | cached_state)) { |
2268 | if (ordered) | 2269 | if (ordered) |
2269 | btrfs_put_ordered_extent(ordered); | 2270 | btrfs_put_ordered_extent(ordered); |
2270 | break; | 2271 | break; |
2271 | } | 2272 | } |
2272 | if (ordered) | 2273 | if (ordered) |
2273 | btrfs_put_ordered_extent(ordered); | 2274 | btrfs_put_ordered_extent(ordered); |
2274 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, | 2275 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, |
2275 | lockend, &cached_state, GFP_NOFS); | 2276 | lockend, &cached_state, GFP_NOFS); |
2276 | ret = btrfs_wait_ordered_range(inode, lockstart, | 2277 | ret = btrfs_wait_ordered_range(inode, lockstart, |
2277 | lockend - lockstart + 1); | 2278 | lockend - lockstart + 1); |
2278 | if (ret) { | 2279 | if (ret) { |
2279 | mutex_unlock(&inode->i_mutex); | 2280 | mutex_unlock(&inode->i_mutex); |
2280 | return ret; | 2281 | return ret; |
2281 | } | 2282 | } |
2282 | } | 2283 | } |
2283 | 2284 | ||
2284 | path = btrfs_alloc_path(); | 2285 | path = btrfs_alloc_path(); |
2285 | if (!path) { | 2286 | if (!path) { |
2286 | ret = -ENOMEM; | 2287 | ret = -ENOMEM; |
2287 | goto out; | 2288 | goto out; |
2288 | } | 2289 | } |
2289 | 2290 | ||
2290 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); | 2291 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); |
2291 | if (!rsv) { | 2292 | if (!rsv) { |
2292 | ret = -ENOMEM; | 2293 | ret = -ENOMEM; |
2293 | goto out_free; | 2294 | goto out_free; |
2294 | } | 2295 | } |
2295 | rsv->size = btrfs_calc_trunc_metadata_size(root, 1); | 2296 | rsv->size = btrfs_calc_trunc_metadata_size(root, 1); |
2296 | rsv->failfast = 1; | 2297 | rsv->failfast = 1; |
2297 | 2298 | ||
2298 | /* | 2299 | /* |
2299 | * 1 - update the inode | 2300 | * 1 - update the inode |
2300 | * 1 - removing the extents in the range | 2301 | * 1 - removing the extents in the range |
2301 | * 1 - adding the hole extent if no_holes isn't set | 2302 | * 1 - adding the hole extent if no_holes isn't set |
2302 | */ | 2303 | */ |
2303 | rsv_count = no_holes ? 2 : 3; | 2304 | rsv_count = no_holes ? 2 : 3; |
2304 | trans = btrfs_start_transaction(root, rsv_count); | 2305 | trans = btrfs_start_transaction(root, rsv_count); |
2305 | if (IS_ERR(trans)) { | 2306 | if (IS_ERR(trans)) { |
2306 | err = PTR_ERR(trans); | 2307 | err = PTR_ERR(trans); |
2307 | goto out_free; | 2308 | goto out_free; |
2308 | } | 2309 | } |
2309 | 2310 | ||
2310 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, | 2311 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, |
2311 | min_size); | 2312 | min_size); |
2312 | BUG_ON(ret); | 2313 | BUG_ON(ret); |
2313 | trans->block_rsv = rsv; | 2314 | trans->block_rsv = rsv; |
2314 | 2315 | ||
2315 | while (cur_offset < lockend) { | 2316 | while (cur_offset < lockend) { |
2316 | ret = __btrfs_drop_extents(trans, root, inode, path, | 2317 | ret = __btrfs_drop_extents(trans, root, inode, path, |
2317 | cur_offset, lockend + 1, | 2318 | cur_offset, lockend + 1, |
2318 | &drop_end, 1, 0, 0, NULL); | 2319 | &drop_end, 1, 0, 0, NULL); |
2319 | if (ret != -ENOSPC) | 2320 | if (ret != -ENOSPC) |
2320 | break; | 2321 | break; |
2321 | 2322 | ||
2322 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2323 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
2323 | 2324 | ||
2324 | if (cur_offset < ino_size) { | 2325 | if (cur_offset < ino_size) { |
2325 | ret = fill_holes(trans, inode, path, cur_offset, | 2326 | ret = fill_holes(trans, inode, path, cur_offset, |
2326 | drop_end); | 2327 | drop_end); |
2327 | if (ret) { | 2328 | if (ret) { |
2328 | err = ret; | 2329 | err = ret; |
2329 | break; | 2330 | break; |
2330 | } | 2331 | } |
2331 | } | 2332 | } |
2332 | 2333 | ||
2333 | cur_offset = drop_end; | 2334 | cur_offset = drop_end; |
2334 | 2335 | ||
2335 | ret = btrfs_update_inode(trans, root, inode); | 2336 | ret = btrfs_update_inode(trans, root, inode); |
2336 | if (ret) { | 2337 | if (ret) { |
2337 | err = ret; | 2338 | err = ret; |
2338 | break; | 2339 | break; |
2339 | } | 2340 | } |
2340 | 2341 | ||
2341 | btrfs_end_transaction(trans, root); | 2342 | btrfs_end_transaction(trans, root); |
2342 | btrfs_btree_balance_dirty(root); | 2343 | btrfs_btree_balance_dirty(root); |
2343 | 2344 | ||
2344 | trans = btrfs_start_transaction(root, rsv_count); | 2345 | trans = btrfs_start_transaction(root, rsv_count); |
2345 | if (IS_ERR(trans)) { | 2346 | if (IS_ERR(trans)) { |
2346 | ret = PTR_ERR(trans); | 2347 | ret = PTR_ERR(trans); |
2347 | trans = NULL; | 2348 | trans = NULL; |
2348 | break; | 2349 | break; |
2349 | } | 2350 | } |
2350 | 2351 | ||
2351 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, | 2352 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, |
2352 | rsv, min_size); | 2353 | rsv, min_size); |
2353 | BUG_ON(ret); /* shouldn't happen */ | 2354 | BUG_ON(ret); /* shouldn't happen */ |
2354 | trans->block_rsv = rsv; | 2355 | trans->block_rsv = rsv; |
2355 | } | 2356 | } |
2356 | 2357 | ||
2357 | if (ret) { | 2358 | if (ret) { |
2358 | err = ret; | 2359 | err = ret; |
2359 | goto out_trans; | 2360 | goto out_trans; |
2360 | } | 2361 | } |
2361 | 2362 | ||
2362 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2363 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
2363 | /* | 2364 | /* |
2364 | * Don't insert file hole extent item if it's for a range beyond eof | 2365 | * Don't insert file hole extent item if it's for a range beyond eof |
2365 | * (because it's useless) or if it represents a 0 bytes range (when | 2366 | * (because it's useless) or if it represents a 0 bytes range (when |
2366 | * cur_offset == drop_end). | 2367 | * cur_offset == drop_end). |
2367 | */ | 2368 | */ |
2368 | if (cur_offset < ino_size && cur_offset < drop_end) { | 2369 | if (cur_offset < ino_size && cur_offset < drop_end) { |
2369 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2370 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); |
2370 | if (ret) { | 2371 | if (ret) { |
2371 | err = ret; | 2372 | err = ret; |
2372 | goto out_trans; | 2373 | goto out_trans; |
2373 | } | 2374 | } |
2374 | } | 2375 | } |
2375 | 2376 | ||
2376 | out_trans: | 2377 | out_trans: |
2377 | if (!trans) | 2378 | if (!trans) |
2378 | goto out_free; | 2379 | goto out_free; |
2379 | 2380 | ||
2380 | inode_inc_iversion(inode); | 2381 | inode_inc_iversion(inode); |
2381 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 2382 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
2382 | 2383 | ||
2383 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2384 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
2384 | ret = btrfs_update_inode(trans, root, inode); | 2385 | ret = btrfs_update_inode(trans, root, inode); |
2385 | btrfs_end_transaction(trans, root); | 2386 | btrfs_end_transaction(trans, root); |
2386 | btrfs_btree_balance_dirty(root); | 2387 | btrfs_btree_balance_dirty(root); |
2387 | out_free: | 2388 | out_free: |
2388 | btrfs_free_path(path); | 2389 | btrfs_free_path(path); |
2389 | btrfs_free_block_rsv(root, rsv); | 2390 | btrfs_free_block_rsv(root, rsv); |
2390 | out: | 2391 | out: |
2391 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 2392 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
2392 | &cached_state, GFP_NOFS); | 2393 | &cached_state, GFP_NOFS); |
2393 | mutex_unlock(&inode->i_mutex); | 2394 | mutex_unlock(&inode->i_mutex); |
2394 | if (ret && !err) | 2395 | if (ret && !err) |
2395 | err = ret; | 2396 | err = ret; |
2396 | return err; | 2397 | return err; |
2397 | } | 2398 | } |
2398 | 2399 | ||
2399 | static long btrfs_fallocate(struct file *file, int mode, | 2400 | static long btrfs_fallocate(struct file *file, int mode, |
2400 | loff_t offset, loff_t len) | 2401 | loff_t offset, loff_t len) |
2401 | { | 2402 | { |
2402 | struct inode *inode = file_inode(file); | 2403 | struct inode *inode = file_inode(file); |
2403 | struct extent_state *cached_state = NULL; | 2404 | struct extent_state *cached_state = NULL; |
2404 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2405 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2405 | u64 cur_offset; | 2406 | u64 cur_offset; |
2406 | u64 last_byte; | 2407 | u64 last_byte; |
2407 | u64 alloc_start; | 2408 | u64 alloc_start; |
2408 | u64 alloc_end; | 2409 | u64 alloc_end; |
2409 | u64 alloc_hint = 0; | 2410 | u64 alloc_hint = 0; |
2410 | u64 locked_end; | 2411 | u64 locked_end; |
2411 | struct extent_map *em; | 2412 | struct extent_map *em; |
2412 | int blocksize = BTRFS_I(inode)->root->sectorsize; | 2413 | int blocksize = BTRFS_I(inode)->root->sectorsize; |
2413 | int ret; | 2414 | int ret; |
2414 | 2415 | ||
2415 | alloc_start = round_down(offset, blocksize); | 2416 | alloc_start = round_down(offset, blocksize); |
2416 | alloc_end = round_up(offset + len, blocksize); | 2417 | alloc_end = round_up(offset + len, blocksize); |
2417 | 2418 | ||
2418 | /* Make sure we aren't being give some crap mode */ | 2419 | /* Make sure we aren't being give some crap mode */ |
2419 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 2420 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
2420 | return -EOPNOTSUPP; | 2421 | return -EOPNOTSUPP; |
2421 | 2422 | ||
2422 | if (mode & FALLOC_FL_PUNCH_HOLE) | 2423 | if (mode & FALLOC_FL_PUNCH_HOLE) |
2423 | return btrfs_punch_hole(inode, offset, len); | 2424 | return btrfs_punch_hole(inode, offset, len); |
2424 | 2425 | ||
2425 | /* | 2426 | /* |
2426 | * Make sure we have enough space before we do the | 2427 | * Make sure we have enough space before we do the |
2427 | * allocation. | 2428 | * allocation. |
2428 | */ | 2429 | */ |
2429 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | 2430 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); |
2430 | if (ret) | 2431 | if (ret) |
2431 | return ret; | 2432 | return ret; |
2432 | if (root->fs_info->quota_enabled) { | 2433 | if (root->fs_info->quota_enabled) { |
2433 | ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start); | 2434 | ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start); |
2434 | if (ret) | 2435 | if (ret) |
2435 | goto out_reserve_fail; | 2436 | goto out_reserve_fail; |
2436 | } | 2437 | } |
2437 | 2438 | ||
2438 | mutex_lock(&inode->i_mutex); | 2439 | mutex_lock(&inode->i_mutex); |
2439 | ret = inode_newsize_ok(inode, alloc_end); | 2440 | ret = inode_newsize_ok(inode, alloc_end); |
2440 | if (ret) | 2441 | if (ret) |
2441 | goto out; | 2442 | goto out; |
2442 | 2443 | ||
2443 | if (alloc_start > inode->i_size) { | 2444 | if (alloc_start > inode->i_size) { |
2444 | ret = btrfs_cont_expand(inode, i_size_read(inode), | 2445 | ret = btrfs_cont_expand(inode, i_size_read(inode), |
2445 | alloc_start); | 2446 | alloc_start); |
2446 | if (ret) | 2447 | if (ret) |
2447 | goto out; | 2448 | goto out; |
2448 | } else { | 2449 | } else { |
2449 | /* | 2450 | /* |
2450 | * If we are fallocating from the end of the file onward we | 2451 | * If we are fallocating from the end of the file onward we |
2451 | * need to zero out the end of the page if i_size lands in the | 2452 | * need to zero out the end of the page if i_size lands in the |
2452 | * middle of a page. | 2453 | * middle of a page. |
2453 | */ | 2454 | */ |
2454 | ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); | 2455 | ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); |
2455 | if (ret) | 2456 | if (ret) |
2456 | goto out; | 2457 | goto out; |
2457 | } | 2458 | } |
2458 | 2459 | ||
2459 | /* | 2460 | /* |
2460 | * wait for ordered IO before we have any locks. We'll loop again | 2461 | * wait for ordered IO before we have any locks. We'll loop again |
2461 | * below with the locks held. | 2462 | * below with the locks held. |
2462 | */ | 2463 | */ |
2463 | ret = btrfs_wait_ordered_range(inode, alloc_start, | 2464 | ret = btrfs_wait_ordered_range(inode, alloc_start, |
2464 | alloc_end - alloc_start); | 2465 | alloc_end - alloc_start); |
2465 | if (ret) | 2466 | if (ret) |
2466 | goto out; | 2467 | goto out; |
2467 | 2468 | ||
2468 | locked_end = alloc_end - 1; | 2469 | locked_end = alloc_end - 1; |
2469 | while (1) { | 2470 | while (1) { |
2470 | struct btrfs_ordered_extent *ordered; | 2471 | struct btrfs_ordered_extent *ordered; |
2471 | 2472 | ||
2472 | /* the extent lock is ordered inside the running | 2473 | /* the extent lock is ordered inside the running |
2473 | * transaction | 2474 | * transaction |
2474 | */ | 2475 | */ |
2475 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | 2476 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, |
2476 | locked_end, 0, &cached_state); | 2477 | locked_end, 0, &cached_state); |
2477 | ordered = btrfs_lookup_first_ordered_extent(inode, | 2478 | ordered = btrfs_lookup_first_ordered_extent(inode, |
2478 | alloc_end - 1); | 2479 | alloc_end - 1); |
2479 | if (ordered && | 2480 | if (ordered && |
2480 | ordered->file_offset + ordered->len > alloc_start && | 2481 | ordered->file_offset + ordered->len > alloc_start && |
2481 | ordered->file_offset < alloc_end) { | 2482 | ordered->file_offset < alloc_end) { |
2482 | btrfs_put_ordered_extent(ordered); | 2483 | btrfs_put_ordered_extent(ordered); |
2483 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 2484 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
2484 | alloc_start, locked_end, | 2485 | alloc_start, locked_end, |
2485 | &cached_state, GFP_NOFS); | 2486 | &cached_state, GFP_NOFS); |
2486 | /* | 2487 | /* |
2487 | * we can't wait on the range with the transaction | 2488 | * we can't wait on the range with the transaction |
2488 | * running or with the extent lock held | 2489 | * running or with the extent lock held |
2489 | */ | 2490 | */ |
2490 | ret = btrfs_wait_ordered_range(inode, alloc_start, | 2491 | ret = btrfs_wait_ordered_range(inode, alloc_start, |
2491 | alloc_end - alloc_start); | 2492 | alloc_end - alloc_start); |
2492 | if (ret) | 2493 | if (ret) |
2493 | goto out; | 2494 | goto out; |
2494 | } else { | 2495 | } else { |
2495 | if (ordered) | 2496 | if (ordered) |
2496 | btrfs_put_ordered_extent(ordered); | 2497 | btrfs_put_ordered_extent(ordered); |
2497 | break; | 2498 | break; |
2498 | } | 2499 | } |
2499 | } | 2500 | } |
2500 | 2501 | ||
2501 | cur_offset = alloc_start; | 2502 | cur_offset = alloc_start; |
2502 | while (1) { | 2503 | while (1) { |
2503 | u64 actual_end; | 2504 | u64 actual_end; |
2504 | 2505 | ||
2505 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | 2506 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, |
2506 | alloc_end - cur_offset, 0); | 2507 | alloc_end - cur_offset, 0); |
2507 | if (IS_ERR_OR_NULL(em)) { | 2508 | if (IS_ERR_OR_NULL(em)) { |
2508 | if (!em) | 2509 | if (!em) |
2509 | ret = -ENOMEM; | 2510 | ret = -ENOMEM; |
2510 | else | 2511 | else |
2511 | ret = PTR_ERR(em); | 2512 | ret = PTR_ERR(em); |
2512 | break; | 2513 | break; |
2513 | } | 2514 | } |
2514 | last_byte = min(extent_map_end(em), alloc_end); | 2515 | last_byte = min(extent_map_end(em), alloc_end); |
2515 | actual_end = min_t(u64, extent_map_end(em), offset + len); | 2516 | actual_end = min_t(u64, extent_map_end(em), offset + len); |
2516 | last_byte = ALIGN(last_byte, blocksize); | 2517 | last_byte = ALIGN(last_byte, blocksize); |
2517 | 2518 | ||
2518 | if (em->block_start == EXTENT_MAP_HOLE || | 2519 | if (em->block_start == EXTENT_MAP_HOLE || |
2519 | (cur_offset >= inode->i_size && | 2520 | (cur_offset >= inode->i_size && |
2520 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 2521 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
2521 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | 2522 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, |
2522 | last_byte - cur_offset, | 2523 | last_byte - cur_offset, |
2523 | 1 << inode->i_blkbits, | 2524 | 1 << inode->i_blkbits, |
2524 | offset + len, | 2525 | offset + len, |
2525 | &alloc_hint); | 2526 | &alloc_hint); |
2526 | 2527 | ||
2527 | if (ret < 0) { | 2528 | if (ret < 0) { |
2528 | free_extent_map(em); | 2529 | free_extent_map(em); |
2529 | break; | 2530 | break; |
2530 | } | 2531 | } |
2531 | } else if (actual_end > inode->i_size && | 2532 | } else if (actual_end > inode->i_size && |
2532 | !(mode & FALLOC_FL_KEEP_SIZE)) { | 2533 | !(mode & FALLOC_FL_KEEP_SIZE)) { |
2533 | /* | 2534 | /* |
2534 | * We didn't need to allocate any more space, but we | 2535 | * We didn't need to allocate any more space, but we |
2535 | * still extended the size of the file so we need to | 2536 | * still extended the size of the file so we need to |
2536 | * update i_size. | 2537 | * update i_size. |
2537 | */ | 2538 | */ |
2538 | inode->i_ctime = CURRENT_TIME; | 2539 | inode->i_ctime = CURRENT_TIME; |
2539 | i_size_write(inode, actual_end); | 2540 | i_size_write(inode, actual_end); |
2540 | btrfs_ordered_update_i_size(inode, actual_end, NULL); | 2541 | btrfs_ordered_update_i_size(inode, actual_end, NULL); |
2541 | } | 2542 | } |
2542 | free_extent_map(em); | 2543 | free_extent_map(em); |
2543 | 2544 | ||
2544 | cur_offset = last_byte; | 2545 | cur_offset = last_byte; |
2545 | if (cur_offset >= alloc_end) { | 2546 | if (cur_offset >= alloc_end) { |
2546 | ret = 0; | 2547 | ret = 0; |
2547 | break; | 2548 | break; |
2548 | } | 2549 | } |
2549 | } | 2550 | } |
2550 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | 2551 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
2551 | &cached_state, GFP_NOFS); | 2552 | &cached_state, GFP_NOFS); |
2552 | out: | 2553 | out: |
2553 | mutex_unlock(&inode->i_mutex); | 2554 | mutex_unlock(&inode->i_mutex); |
2554 | if (root->fs_info->quota_enabled) | 2555 | if (root->fs_info->quota_enabled) |
2555 | btrfs_qgroup_free(root, alloc_end - alloc_start); | 2556 | btrfs_qgroup_free(root, alloc_end - alloc_start); |
2556 | out_reserve_fail: | 2557 | out_reserve_fail: |
2557 | /* Let go of our reservation. */ | 2558 | /* Let go of our reservation. */ |
2558 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | 2559 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); |
2559 | return ret; | 2560 | return ret; |
2560 | } | 2561 | } |
2561 | 2562 | ||
2562 | static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) | 2563 | static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) |
2563 | { | 2564 | { |
2564 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2565 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2565 | struct extent_map *em = NULL; | 2566 | struct extent_map *em = NULL; |
2566 | struct extent_state *cached_state = NULL; | 2567 | struct extent_state *cached_state = NULL; |
2567 | u64 lockstart = *offset; | 2568 | u64 lockstart = *offset; |
2568 | u64 lockend = i_size_read(inode); | 2569 | u64 lockend = i_size_read(inode); |
2569 | u64 start = *offset; | 2570 | u64 start = *offset; |
2570 | u64 len = i_size_read(inode); | 2571 | u64 len = i_size_read(inode); |
2571 | int ret = 0; | 2572 | int ret = 0; |
2572 | 2573 | ||
2573 | lockend = max_t(u64, root->sectorsize, lockend); | 2574 | lockend = max_t(u64, root->sectorsize, lockend); |
2574 | if (lockend <= lockstart) | 2575 | if (lockend <= lockstart) |
2575 | lockend = lockstart + root->sectorsize; | 2576 | lockend = lockstart + root->sectorsize; |
2576 | 2577 | ||
2577 | lockend--; | 2578 | lockend--; |
2578 | len = lockend - lockstart + 1; | 2579 | len = lockend - lockstart + 1; |
2579 | 2580 | ||
2580 | len = max_t(u64, len, root->sectorsize); | 2581 | len = max_t(u64, len, root->sectorsize); |
2581 | if (inode->i_size == 0) | 2582 | if (inode->i_size == 0) |
2582 | return -ENXIO; | 2583 | return -ENXIO; |
2583 | 2584 | ||
2584 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, | 2585 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, |
2585 | &cached_state); | 2586 | &cached_state); |
2586 | 2587 | ||
2587 | while (start < inode->i_size) { | 2588 | while (start < inode->i_size) { |
2588 | em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); | 2589 | em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); |
2589 | if (IS_ERR(em)) { | 2590 | if (IS_ERR(em)) { |
2590 | ret = PTR_ERR(em); | 2591 | ret = PTR_ERR(em); |
2591 | em = NULL; | 2592 | em = NULL; |
2592 | break; | 2593 | break; |
2593 | } | 2594 | } |
2594 | 2595 | ||
2595 | if (whence == SEEK_HOLE && | 2596 | if (whence == SEEK_HOLE && |
2596 | (em->block_start == EXTENT_MAP_HOLE || | 2597 | (em->block_start == EXTENT_MAP_HOLE || |
2597 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) | 2598 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) |
2598 | break; | 2599 | break; |
2599 | else if (whence == SEEK_DATA && | 2600 | else if (whence == SEEK_DATA && |
2600 | (em->block_start != EXTENT_MAP_HOLE && | 2601 | (em->block_start != EXTENT_MAP_HOLE && |
2601 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) | 2602 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) |
2602 | break; | 2603 | break; |
2603 | 2604 | ||
2604 | start = em->start + em->len; | 2605 | start = em->start + em->len; |
2605 | free_extent_map(em); | 2606 | free_extent_map(em); |
2606 | em = NULL; | 2607 | em = NULL; |
2607 | cond_resched(); | 2608 | cond_resched(); |
2608 | } | 2609 | } |
2609 | free_extent_map(em); | 2610 | free_extent_map(em); |
2610 | if (!ret) { | 2611 | if (!ret) { |
2611 | if (whence == SEEK_DATA && start >= inode->i_size) | 2612 | if (whence == SEEK_DATA && start >= inode->i_size) |
2612 | ret = -ENXIO; | 2613 | ret = -ENXIO; |
2613 | else | 2614 | else |
2614 | *offset = min_t(loff_t, start, inode->i_size); | 2615 | *offset = min_t(loff_t, start, inode->i_size); |
2615 | } | 2616 | } |
2616 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 2617 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
2617 | &cached_state, GFP_NOFS); | 2618 | &cached_state, GFP_NOFS); |
2618 | return ret; | 2619 | return ret; |
2619 | } | 2620 | } |
2620 | 2621 | ||
2621 | static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) | 2622 | static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) |
2622 | { | 2623 | { |
2623 | struct inode *inode = file->f_mapping->host; | 2624 | struct inode *inode = file->f_mapping->host; |
2624 | int ret; | 2625 | int ret; |
2625 | 2626 | ||
2626 | mutex_lock(&inode->i_mutex); | 2627 | mutex_lock(&inode->i_mutex); |
2627 | switch (whence) { | 2628 | switch (whence) { |
2628 | case SEEK_END: | 2629 | case SEEK_END: |
2629 | case SEEK_CUR: | 2630 | case SEEK_CUR: |
2630 | offset = generic_file_llseek(file, offset, whence); | 2631 | offset = generic_file_llseek(file, offset, whence); |
2631 | goto out; | 2632 | goto out; |
2632 | case SEEK_DATA: | 2633 | case SEEK_DATA: |
2633 | case SEEK_HOLE: | 2634 | case SEEK_HOLE: |
2634 | if (offset >= i_size_read(inode)) { | 2635 | if (offset >= i_size_read(inode)) { |
2635 | mutex_unlock(&inode->i_mutex); | 2636 | mutex_unlock(&inode->i_mutex); |
2636 | return -ENXIO; | 2637 | return -ENXIO; |
2637 | } | 2638 | } |
2638 | 2639 | ||
2639 | ret = find_desired_extent(inode, &offset, whence); | 2640 | ret = find_desired_extent(inode, &offset, whence); |
2640 | if (ret) { | 2641 | if (ret) { |
2641 | mutex_unlock(&inode->i_mutex); | 2642 | mutex_unlock(&inode->i_mutex); |
2642 | return ret; | 2643 | return ret; |
2643 | } | 2644 | } |
2644 | } | 2645 | } |
2645 | 2646 | ||
2646 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); | 2647 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
2647 | out: | 2648 | out: |
2648 | mutex_unlock(&inode->i_mutex); | 2649 | mutex_unlock(&inode->i_mutex); |
2649 | return offset; | 2650 | return offset; |
2650 | } | 2651 | } |
2651 | 2652 | ||
2652 | const struct file_operations btrfs_file_operations = { | 2653 | const struct file_operations btrfs_file_operations = { |
2653 | .llseek = btrfs_file_llseek, | 2654 | .llseek = btrfs_file_llseek, |
2654 | .read = do_sync_read, | 2655 | .read = do_sync_read, |
2655 | .write = do_sync_write, | 2656 | .write = do_sync_write, |
2656 | .aio_read = generic_file_aio_read, | 2657 | .aio_read = generic_file_aio_read, |
2657 | .splice_read = generic_file_splice_read, | 2658 | .splice_read = generic_file_splice_read, |
2658 | .aio_write = btrfs_file_aio_write, | 2659 | .aio_write = btrfs_file_aio_write, |
2659 | .mmap = btrfs_file_mmap, | 2660 | .mmap = btrfs_file_mmap, |
2660 | .open = generic_file_open, | 2661 | .open = generic_file_open, |
2661 | .release = btrfs_release_file, | 2662 | .release = btrfs_release_file, |
2662 | .fsync = btrfs_sync_file, | 2663 | .fsync = btrfs_sync_file, |
2663 | .fallocate = btrfs_fallocate, | 2664 | .fallocate = btrfs_fallocate, |
2664 | .unlocked_ioctl = btrfs_ioctl, | 2665 | .unlocked_ioctl = btrfs_ioctl, |
2665 | #ifdef CONFIG_COMPAT | 2666 | #ifdef CONFIG_COMPAT |
2666 | .compat_ioctl = btrfs_ioctl, | 2667 | .compat_ioctl = btrfs_ioctl, |
2667 | #endif | 2668 | #endif |
2668 | }; | 2669 | }; |
2669 | 2670 | ||
2670 | void btrfs_auto_defrag_exit(void) | 2671 | void btrfs_auto_defrag_exit(void) |
2671 | { | 2672 | { |
2672 | if (btrfs_inode_defrag_cachep) | 2673 | if (btrfs_inode_defrag_cachep) |
2673 | kmem_cache_destroy(btrfs_inode_defrag_cachep); | 2674 | kmem_cache_destroy(btrfs_inode_defrag_cachep); |
2674 | } | 2675 | } |
2675 | 2676 | ||
2676 | int btrfs_auto_defrag_init(void) | 2677 | int btrfs_auto_defrag_init(void) |
2677 | { | 2678 | { |
2678 | btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag", | 2679 | btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag", |
2679 | sizeof(struct inode_defrag), 0, | 2680 | sizeof(struct inode_defrag), 0, |
2680 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | 2681 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, |
2681 | NULL); | 2682 | NULL); |
2682 | if (!btrfs_inode_defrag_cachep) | 2683 | if (!btrfs_inode_defrag_cachep) |
2683 | return -ENOMEM; | 2684 | return -ENOMEM; |
2684 | 2685 | ||
2685 | return 0; | 2686 | return 0; |
2686 | } | 2687 | } |
2687 | 2688 |