Commit 632dd772fcbde2ba37c0e8983bd38ef4a1eac906

Authored by Arne Jansen
1 parent 8c51032f97

btrfs: reinitialize scrub workers

Scrub starts the workers each time a scrub starts and stops them after it
finished. This patch adds an initialization for the workers before each
start, otherwise the workers behave strangely.

Signed-off-by: Arne Jansen <sensille@gmx.net>

Showing 2 changed files with 5 additions and 3 deletions Inline Diff

1 /* 1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved. 2 * Copyright (C) 2007 Oracle. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public 5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation. 6 * License v2 as published by the Free Software Foundation.
7 * 7 *
8 * This program is distributed in the hope that it will be useful, 8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details. 11 * General Public License for more details.
12 * 12 *
13 * You should have received a copy of the GNU General Public 13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the 14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19 #include <linux/fs.h> 19 #include <linux/fs.h>
20 #include <linux/blkdev.h> 20 #include <linux/blkdev.h>
21 #include <linux/scatterlist.h> 21 #include <linux/scatterlist.h>
22 #include <linux/swap.h> 22 #include <linux/swap.h>
23 #include <linux/radix-tree.h> 23 #include <linux/radix-tree.h>
24 #include <linux/writeback.h> 24 #include <linux/writeback.h>
25 #include <linux/buffer_head.h> 25 #include <linux/buffer_head.h>
26 #include <linux/workqueue.h> 26 #include <linux/workqueue.h>
27 #include <linux/kthread.h> 27 #include <linux/kthread.h>
28 #include <linux/freezer.h> 28 #include <linux/freezer.h>
29 #include <linux/crc32c.h> 29 #include <linux/crc32c.h>
30 #include <linux/slab.h> 30 #include <linux/slab.h>
31 #include <linux/migrate.h> 31 #include <linux/migrate.h>
32 #include <linux/ratelimit.h> 32 #include <linux/ratelimit.h>
33 #include <asm/unaligned.h> 33 #include <asm/unaligned.h>
34 #include "compat.h" 34 #include "compat.h"
35 #include "ctree.h" 35 #include "ctree.h"
36 #include "disk-io.h" 36 #include "disk-io.h"
37 #include "transaction.h" 37 #include "transaction.h"
38 #include "btrfs_inode.h" 38 #include "btrfs_inode.h"
39 #include "volumes.h" 39 #include "volumes.h"
40 #include "print-tree.h" 40 #include "print-tree.h"
41 #include "async-thread.h" 41 #include "async-thread.h"
42 #include "locking.h" 42 #include "locking.h"
43 #include "tree-log.h" 43 #include "tree-log.h"
44 #include "free-space-cache.h" 44 #include "free-space-cache.h"
45 #include "inode-map.h" 45 #include "inode-map.h"
46 46
47 static struct extent_io_ops btree_extent_io_ops; 47 static struct extent_io_ops btree_extent_io_ops;
48 static void end_workqueue_fn(struct btrfs_work *work); 48 static void end_workqueue_fn(struct btrfs_work *work);
49 static void free_fs_root(struct btrfs_root *root); 49 static void free_fs_root(struct btrfs_root *root);
50 static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 50 static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
51 int read_only); 51 int read_only);
52 static int btrfs_destroy_ordered_operations(struct btrfs_root *root); 52 static int btrfs_destroy_ordered_operations(struct btrfs_root *root);
53 static int btrfs_destroy_ordered_extents(struct btrfs_root *root); 53 static int btrfs_destroy_ordered_extents(struct btrfs_root *root);
54 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 54 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
55 struct btrfs_root *root); 55 struct btrfs_root *root);
56 static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); 56 static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
57 static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); 57 static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
58 static int btrfs_destroy_marked_extents(struct btrfs_root *root, 58 static int btrfs_destroy_marked_extents(struct btrfs_root *root,
59 struct extent_io_tree *dirty_pages, 59 struct extent_io_tree *dirty_pages,
60 int mark); 60 int mark);
61 static int btrfs_destroy_pinned_extent(struct btrfs_root *root, 61 static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
62 struct extent_io_tree *pinned_extents); 62 struct extent_io_tree *pinned_extents);
63 static int btrfs_cleanup_transaction(struct btrfs_root *root); 63 static int btrfs_cleanup_transaction(struct btrfs_root *root);
64 64
65 /* 65 /*
66 * end_io_wq structs are used to do processing in task context when an IO is 66 * end_io_wq structs are used to do processing in task context when an IO is
67 * complete. This is used during reads to verify checksums, and it is used 67 * complete. This is used during reads to verify checksums, and it is used
68 * by writes to insert metadata for new file extents after IO is complete. 68 * by writes to insert metadata for new file extents after IO is complete.
69 */ 69 */
70 struct end_io_wq { 70 struct end_io_wq {
71 struct bio *bio; 71 struct bio *bio;
72 bio_end_io_t *end_io; 72 bio_end_io_t *end_io;
73 void *private; 73 void *private;
74 struct btrfs_fs_info *info; 74 struct btrfs_fs_info *info;
75 int error; 75 int error;
76 int metadata; 76 int metadata;
77 struct list_head list; 77 struct list_head list;
78 struct btrfs_work work; 78 struct btrfs_work work;
79 }; 79 };
80 80
81 /* 81 /*
82 * async submit bios are used to offload expensive checksumming 82 * async submit bios are used to offload expensive checksumming
83 * onto the worker threads. They checksum file and metadata bios 83 * onto the worker threads. They checksum file and metadata bios
84 * just before they are sent down the IO stack. 84 * just before they are sent down the IO stack.
85 */ 85 */
86 struct async_submit_bio { 86 struct async_submit_bio {
87 struct inode *inode; 87 struct inode *inode;
88 struct bio *bio; 88 struct bio *bio;
89 struct list_head list; 89 struct list_head list;
90 extent_submit_bio_hook_t *submit_bio_start; 90 extent_submit_bio_hook_t *submit_bio_start;
91 extent_submit_bio_hook_t *submit_bio_done; 91 extent_submit_bio_hook_t *submit_bio_done;
92 int rw; 92 int rw;
93 int mirror_num; 93 int mirror_num;
94 unsigned long bio_flags; 94 unsigned long bio_flags;
95 /* 95 /*
96 * bio_offset is optional, can be used if the pages in the bio 96 * bio_offset is optional, can be used if the pages in the bio
97 * can't tell us where in the file the bio should go 97 * can't tell us where in the file the bio should go
98 */ 98 */
99 u64 bio_offset; 99 u64 bio_offset;
100 struct btrfs_work work; 100 struct btrfs_work work;
101 }; 101 };
102 102
103 /* These are used to set the lockdep class on the extent buffer locks. 103 /* These are used to set the lockdep class on the extent buffer locks.
104 * The class is set by the readpage_end_io_hook after the buffer has 104 * The class is set by the readpage_end_io_hook after the buffer has
105 * passed csum validation but before the pages are unlocked. 105 * passed csum validation but before the pages are unlocked.
106 * 106 *
107 * The lockdep class is also set by btrfs_init_new_buffer on freshly 107 * The lockdep class is also set by btrfs_init_new_buffer on freshly
108 * allocated blocks. 108 * allocated blocks.
109 * 109 *
110 * The class is based on the level in the tree block, which allows lockdep 110 * The class is based on the level in the tree block, which allows lockdep
111 * to know that lower nodes nest inside the locks of higher nodes. 111 * to know that lower nodes nest inside the locks of higher nodes.
112 * 112 *
113 * We also add a check to make sure the highest level of the tree is 113 * We also add a check to make sure the highest level of the tree is
114 * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this 114 * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this
115 * code needs update as well. 115 * code needs update as well.
116 */ 116 */
117 #ifdef CONFIG_DEBUG_LOCK_ALLOC 117 #ifdef CONFIG_DEBUG_LOCK_ALLOC
118 # if BTRFS_MAX_LEVEL != 8 118 # if BTRFS_MAX_LEVEL != 8
119 # error 119 # error
120 # endif 120 # endif
121 static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; 121 static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
122 static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { 122 static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
123 /* leaf */ 123 /* leaf */
124 "btrfs-extent-00", 124 "btrfs-extent-00",
125 "btrfs-extent-01", 125 "btrfs-extent-01",
126 "btrfs-extent-02", 126 "btrfs-extent-02",
127 "btrfs-extent-03", 127 "btrfs-extent-03",
128 "btrfs-extent-04", 128 "btrfs-extent-04",
129 "btrfs-extent-05", 129 "btrfs-extent-05",
130 "btrfs-extent-06", 130 "btrfs-extent-06",
131 "btrfs-extent-07", 131 "btrfs-extent-07",
132 /* highest possible level */ 132 /* highest possible level */
133 "btrfs-extent-08", 133 "btrfs-extent-08",
134 }; 134 };
135 #endif 135 #endif
136 136
137 /* 137 /*
138 * extents on the btree inode are pretty simple, there's one extent 138 * extents on the btree inode are pretty simple, there's one extent
139 * that covers the entire device 139 * that covers the entire device
140 */ 140 */
141 static struct extent_map *btree_get_extent(struct inode *inode, 141 static struct extent_map *btree_get_extent(struct inode *inode,
142 struct page *page, size_t pg_offset, u64 start, u64 len, 142 struct page *page, size_t pg_offset, u64 start, u64 len,
143 int create) 143 int create)
144 { 144 {
145 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 145 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
146 struct extent_map *em; 146 struct extent_map *em;
147 int ret; 147 int ret;
148 148
149 read_lock(&em_tree->lock); 149 read_lock(&em_tree->lock);
150 em = lookup_extent_mapping(em_tree, start, len); 150 em = lookup_extent_mapping(em_tree, start, len);
151 if (em) { 151 if (em) {
152 em->bdev = 152 em->bdev =
153 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 153 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
154 read_unlock(&em_tree->lock); 154 read_unlock(&em_tree->lock);
155 goto out; 155 goto out;
156 } 156 }
157 read_unlock(&em_tree->lock); 157 read_unlock(&em_tree->lock);
158 158
159 em = alloc_extent_map(); 159 em = alloc_extent_map();
160 if (!em) { 160 if (!em) {
161 em = ERR_PTR(-ENOMEM); 161 em = ERR_PTR(-ENOMEM);
162 goto out; 162 goto out;
163 } 163 }
164 em->start = 0; 164 em->start = 0;
165 em->len = (u64)-1; 165 em->len = (u64)-1;
166 em->block_len = (u64)-1; 166 em->block_len = (u64)-1;
167 em->block_start = 0; 167 em->block_start = 0;
168 em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 168 em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
169 169
170 write_lock(&em_tree->lock); 170 write_lock(&em_tree->lock);
171 ret = add_extent_mapping(em_tree, em); 171 ret = add_extent_mapping(em_tree, em);
172 if (ret == -EEXIST) { 172 if (ret == -EEXIST) {
173 u64 failed_start = em->start; 173 u64 failed_start = em->start;
174 u64 failed_len = em->len; 174 u64 failed_len = em->len;
175 175
176 free_extent_map(em); 176 free_extent_map(em);
177 em = lookup_extent_mapping(em_tree, start, len); 177 em = lookup_extent_mapping(em_tree, start, len);
178 if (em) { 178 if (em) {
179 ret = 0; 179 ret = 0;
180 } else { 180 } else {
181 em = lookup_extent_mapping(em_tree, failed_start, 181 em = lookup_extent_mapping(em_tree, failed_start,
182 failed_len); 182 failed_len);
183 ret = -EIO; 183 ret = -EIO;
184 } 184 }
185 } else if (ret) { 185 } else if (ret) {
186 free_extent_map(em); 186 free_extent_map(em);
187 em = NULL; 187 em = NULL;
188 } 188 }
189 write_unlock(&em_tree->lock); 189 write_unlock(&em_tree->lock);
190 190
191 if (ret) 191 if (ret)
192 em = ERR_PTR(ret); 192 em = ERR_PTR(ret);
193 out: 193 out:
194 return em; 194 return em;
195 } 195 }
196 196
197 u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) 197 u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
198 { 198 {
199 return crc32c(seed, data, len); 199 return crc32c(seed, data, len);
200 } 200 }
201 201
202 void btrfs_csum_final(u32 crc, char *result) 202 void btrfs_csum_final(u32 crc, char *result)
203 { 203 {
204 put_unaligned_le32(~crc, result); 204 put_unaligned_le32(~crc, result);
205 } 205 }
206 206
207 /* 207 /*
208 * compute the csum for a btree block, and either verify it or write it 208 * compute the csum for a btree block, and either verify it or write it
209 * into the csum field of the block. 209 * into the csum field of the block.
210 */ 210 */
211 static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, 211 static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
212 int verify) 212 int verify)
213 { 213 {
214 u16 csum_size = 214 u16 csum_size =
215 btrfs_super_csum_size(&root->fs_info->super_copy); 215 btrfs_super_csum_size(&root->fs_info->super_copy);
216 char *result = NULL; 216 char *result = NULL;
217 unsigned long len; 217 unsigned long len;
218 unsigned long cur_len; 218 unsigned long cur_len;
219 unsigned long offset = BTRFS_CSUM_SIZE; 219 unsigned long offset = BTRFS_CSUM_SIZE;
220 char *map_token = NULL; 220 char *map_token = NULL;
221 char *kaddr; 221 char *kaddr;
222 unsigned long map_start; 222 unsigned long map_start;
223 unsigned long map_len; 223 unsigned long map_len;
224 int err; 224 int err;
225 u32 crc = ~(u32)0; 225 u32 crc = ~(u32)0;
226 unsigned long inline_result; 226 unsigned long inline_result;
227 227
228 len = buf->len - offset; 228 len = buf->len - offset;
229 while (len > 0) { 229 while (len > 0) {
230 err = map_private_extent_buffer(buf, offset, 32, 230 err = map_private_extent_buffer(buf, offset, 32,
231 &map_token, &kaddr, 231 &map_token, &kaddr,
232 &map_start, &map_len, KM_USER0); 232 &map_start, &map_len, KM_USER0);
233 if (err) 233 if (err)
234 return 1; 234 return 1;
235 cur_len = min(len, map_len - (offset - map_start)); 235 cur_len = min(len, map_len - (offset - map_start));
236 crc = btrfs_csum_data(root, kaddr + offset - map_start, 236 crc = btrfs_csum_data(root, kaddr + offset - map_start,
237 crc, cur_len); 237 crc, cur_len);
238 len -= cur_len; 238 len -= cur_len;
239 offset += cur_len; 239 offset += cur_len;
240 unmap_extent_buffer(buf, map_token, KM_USER0); 240 unmap_extent_buffer(buf, map_token, KM_USER0);
241 } 241 }
242 if (csum_size > sizeof(inline_result)) { 242 if (csum_size > sizeof(inline_result)) {
243 result = kzalloc(csum_size * sizeof(char), GFP_NOFS); 243 result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
244 if (!result) 244 if (!result)
245 return 1; 245 return 1;
246 } else { 246 } else {
247 result = (char *)&inline_result; 247 result = (char *)&inline_result;
248 } 248 }
249 249
250 btrfs_csum_final(crc, result); 250 btrfs_csum_final(crc, result);
251 251
252 if (verify) { 252 if (verify) {
253 if (memcmp_extent_buffer(buf, result, 0, csum_size)) { 253 if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
254 u32 val; 254 u32 val;
255 u32 found = 0; 255 u32 found = 0;
256 memcpy(&found, result, csum_size); 256 memcpy(&found, result, csum_size);
257 257
258 read_extent_buffer(buf, &val, 0, csum_size); 258 read_extent_buffer(buf, &val, 0, csum_size);
259 printk_ratelimited(KERN_INFO "btrfs: %s checksum verify " 259 printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
260 "failed on %llu wanted %X found %X " 260 "failed on %llu wanted %X found %X "
261 "level %d\n", 261 "level %d\n",
262 root->fs_info->sb->s_id, 262 root->fs_info->sb->s_id,
263 (unsigned long long)buf->start, val, found, 263 (unsigned long long)buf->start, val, found,
264 btrfs_header_level(buf)); 264 btrfs_header_level(buf));
265 if (result != (char *)&inline_result) 265 if (result != (char *)&inline_result)
266 kfree(result); 266 kfree(result);
267 return 1; 267 return 1;
268 } 268 }
269 } else { 269 } else {
270 write_extent_buffer(buf, result, 0, csum_size); 270 write_extent_buffer(buf, result, 0, csum_size);
271 } 271 }
272 if (result != (char *)&inline_result) 272 if (result != (char *)&inline_result)
273 kfree(result); 273 kfree(result);
274 return 0; 274 return 0;
275 } 275 }
276 276
277 /* 277 /*
278 * we can't consider a given block up to date unless the transid of the 278 * we can't consider a given block up to date unless the transid of the
279 * block matches the transid in the parent node's pointer. This is how we 279 * block matches the transid in the parent node's pointer. This is how we
280 * detect blocks that either didn't get written at all or got written 280 * detect blocks that either didn't get written at all or got written
281 * in the wrong place. 281 * in the wrong place.
282 */ 282 */
283 static int verify_parent_transid(struct extent_io_tree *io_tree, 283 static int verify_parent_transid(struct extent_io_tree *io_tree,
284 struct extent_buffer *eb, u64 parent_transid) 284 struct extent_buffer *eb, u64 parent_transid)
285 { 285 {
286 struct extent_state *cached_state = NULL; 286 struct extent_state *cached_state = NULL;
287 int ret; 287 int ret;
288 288
289 if (!parent_transid || btrfs_header_generation(eb) == parent_transid) 289 if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
290 return 0; 290 return 0;
291 291
292 lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, 292 lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
293 0, &cached_state, GFP_NOFS); 293 0, &cached_state, GFP_NOFS);
294 if (extent_buffer_uptodate(io_tree, eb, cached_state) && 294 if (extent_buffer_uptodate(io_tree, eb, cached_state) &&
295 btrfs_header_generation(eb) == parent_transid) { 295 btrfs_header_generation(eb) == parent_transid) {
296 ret = 0; 296 ret = 0;
297 goto out; 297 goto out;
298 } 298 }
299 printk_ratelimited("parent transid verify failed on %llu wanted %llu " 299 printk_ratelimited("parent transid verify failed on %llu wanted %llu "
300 "found %llu\n", 300 "found %llu\n",
301 (unsigned long long)eb->start, 301 (unsigned long long)eb->start,
302 (unsigned long long)parent_transid, 302 (unsigned long long)parent_transid,
303 (unsigned long long)btrfs_header_generation(eb)); 303 (unsigned long long)btrfs_header_generation(eb));
304 ret = 1; 304 ret = 1;
305 clear_extent_buffer_uptodate(io_tree, eb, &cached_state); 305 clear_extent_buffer_uptodate(io_tree, eb, &cached_state);
306 out: 306 out:
307 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, 307 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
308 &cached_state, GFP_NOFS); 308 &cached_state, GFP_NOFS);
309 return ret; 309 return ret;
310 } 310 }
311 311
312 /* 312 /*
313 * helper to read a given tree block, doing retries as required when 313 * helper to read a given tree block, doing retries as required when
314 * the checksums don't match and we have alternate mirrors to try. 314 * the checksums don't match and we have alternate mirrors to try.
315 */ 315 */
316 static int btree_read_extent_buffer_pages(struct btrfs_root *root, 316 static int btree_read_extent_buffer_pages(struct btrfs_root *root,
317 struct extent_buffer *eb, 317 struct extent_buffer *eb,
318 u64 start, u64 parent_transid) 318 u64 start, u64 parent_transid)
319 { 319 {
320 struct extent_io_tree *io_tree; 320 struct extent_io_tree *io_tree;
321 int ret; 321 int ret;
322 int num_copies = 0; 322 int num_copies = 0;
323 int mirror_num = 0; 323 int mirror_num = 0;
324 324
325 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); 325 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
326 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 326 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
327 while (1) { 327 while (1) {
328 ret = read_extent_buffer_pages(io_tree, eb, start, 1, 328 ret = read_extent_buffer_pages(io_tree, eb, start, 1,
329 btree_get_extent, mirror_num); 329 btree_get_extent, mirror_num);
330 if (!ret && 330 if (!ret &&
331 !verify_parent_transid(io_tree, eb, parent_transid)) 331 !verify_parent_transid(io_tree, eb, parent_transid))
332 return ret; 332 return ret;
333 333
334 /* 334 /*
335 * This buffer's crc is fine, but its contents are corrupted, so 335 * This buffer's crc is fine, but its contents are corrupted, so
336 * there is no reason to read the other copies, they won't be 336 * there is no reason to read the other copies, they won't be
337 * any less wrong. 337 * any less wrong.
338 */ 338 */
339 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) 339 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
340 return ret; 340 return ret;
341 341
342 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, 342 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
343 eb->start, eb->len); 343 eb->start, eb->len);
344 if (num_copies == 1) 344 if (num_copies == 1)
345 return ret; 345 return ret;
346 346
347 mirror_num++; 347 mirror_num++;
348 if (mirror_num > num_copies) 348 if (mirror_num > num_copies)
349 return ret; 349 return ret;
350 } 350 }
351 return -EIO; 351 return -EIO;
352 } 352 }
353 353
354 /* 354 /*
355 * checksum a dirty tree block before IO. This has extra checks to make sure 355 * checksum a dirty tree block before IO. This has extra checks to make sure
356 * we only fill in the checksum field in the first page of a multi-page block 356 * we only fill in the checksum field in the first page of a multi-page block
357 */ 357 */
358 358
359 static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) 359 static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
360 { 360 {
361 struct extent_io_tree *tree; 361 struct extent_io_tree *tree;
362 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 362 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
363 u64 found_start; 363 u64 found_start;
364 unsigned long len; 364 unsigned long len;
365 struct extent_buffer *eb; 365 struct extent_buffer *eb;
366 int ret; 366 int ret;
367 367
368 tree = &BTRFS_I(page->mapping->host)->io_tree; 368 tree = &BTRFS_I(page->mapping->host)->io_tree;
369 369
370 if (page->private == EXTENT_PAGE_PRIVATE) { 370 if (page->private == EXTENT_PAGE_PRIVATE) {
371 WARN_ON(1); 371 WARN_ON(1);
372 goto out; 372 goto out;
373 } 373 }
374 if (!page->private) { 374 if (!page->private) {
375 WARN_ON(1); 375 WARN_ON(1);
376 goto out; 376 goto out;
377 } 377 }
378 len = page->private >> 2; 378 len = page->private >> 2;
379 WARN_ON(len == 0); 379 WARN_ON(len == 0);
380 380
381 eb = alloc_extent_buffer(tree, start, len, page); 381 eb = alloc_extent_buffer(tree, start, len, page);
382 if (eb == NULL) { 382 if (eb == NULL) {
383 WARN_ON(1); 383 WARN_ON(1);
384 goto out; 384 goto out;
385 } 385 }
386 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, 386 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
387 btrfs_header_generation(eb)); 387 btrfs_header_generation(eb));
388 BUG_ON(ret); 388 BUG_ON(ret);
389 WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN)); 389 WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
390 390
391 found_start = btrfs_header_bytenr(eb); 391 found_start = btrfs_header_bytenr(eb);
392 if (found_start != start) { 392 if (found_start != start) {
393 WARN_ON(1); 393 WARN_ON(1);
394 goto err; 394 goto err;
395 } 395 }
396 if (eb->first_page != page) { 396 if (eb->first_page != page) {
397 WARN_ON(1); 397 WARN_ON(1);
398 goto err; 398 goto err;
399 } 399 }
400 if (!PageUptodate(page)) { 400 if (!PageUptodate(page)) {
401 WARN_ON(1); 401 WARN_ON(1);
402 goto err; 402 goto err;
403 } 403 }
404 csum_tree_block(root, eb, 0); 404 csum_tree_block(root, eb, 0);
405 err: 405 err:
406 free_extent_buffer(eb); 406 free_extent_buffer(eb);
407 out: 407 out:
408 return 0; 408 return 0;
409 } 409 }
410 410
411 static int check_tree_block_fsid(struct btrfs_root *root, 411 static int check_tree_block_fsid(struct btrfs_root *root,
412 struct extent_buffer *eb) 412 struct extent_buffer *eb)
413 { 413 {
414 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 414 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
415 u8 fsid[BTRFS_UUID_SIZE]; 415 u8 fsid[BTRFS_UUID_SIZE];
416 int ret = 1; 416 int ret = 1;
417 417
418 read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb), 418 read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb),
419 BTRFS_FSID_SIZE); 419 BTRFS_FSID_SIZE);
420 while (fs_devices) { 420 while (fs_devices) {
421 if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { 421 if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
422 ret = 0; 422 ret = 0;
423 break; 423 break;
424 } 424 }
425 fs_devices = fs_devices->seed; 425 fs_devices = fs_devices->seed;
426 } 426 }
427 return ret; 427 return ret;
428 } 428 }
429 429
430 #define CORRUPT(reason, eb, root, slot) \ 430 #define CORRUPT(reason, eb, root, slot) \
431 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ 431 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
432 "root=%llu, slot=%d\n", reason, \ 432 "root=%llu, slot=%d\n", reason, \
433 (unsigned long long)btrfs_header_bytenr(eb), \ 433 (unsigned long long)btrfs_header_bytenr(eb), \
434 (unsigned long long)root->objectid, slot) 434 (unsigned long long)root->objectid, slot)
435 435
436 static noinline int check_leaf(struct btrfs_root *root, 436 static noinline int check_leaf(struct btrfs_root *root,
437 struct extent_buffer *leaf) 437 struct extent_buffer *leaf)
438 { 438 {
439 struct btrfs_key key; 439 struct btrfs_key key;
440 struct btrfs_key leaf_key; 440 struct btrfs_key leaf_key;
441 u32 nritems = btrfs_header_nritems(leaf); 441 u32 nritems = btrfs_header_nritems(leaf);
442 int slot; 442 int slot;
443 443
444 if (nritems == 0) 444 if (nritems == 0)
445 return 0; 445 return 0;
446 446
447 /* Check the 0 item */ 447 /* Check the 0 item */
448 if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != 448 if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
449 BTRFS_LEAF_DATA_SIZE(root)) { 449 BTRFS_LEAF_DATA_SIZE(root)) {
450 CORRUPT("invalid item offset size pair", leaf, root, 0); 450 CORRUPT("invalid item offset size pair", leaf, root, 0);
451 return -EIO; 451 return -EIO;
452 } 452 }
453 453
454 /* 454 /*
455 * Check to make sure each items keys are in the correct order and their 455 * Check to make sure each items keys are in the correct order and their
456 * offsets make sense. We only have to loop through nritems-1 because 456 * offsets make sense. We only have to loop through nritems-1 because
457 * we check the current slot against the next slot, which verifies the 457 * we check the current slot against the next slot, which verifies the
458 * next slot's offset+size makes sense and that the current's slot 458 * next slot's offset+size makes sense and that the current's slot
459 * offset is correct. 459 * offset is correct.
460 */ 460 */
461 for (slot = 0; slot < nritems - 1; slot++) { 461 for (slot = 0; slot < nritems - 1; slot++) {
462 btrfs_item_key_to_cpu(leaf, &leaf_key, slot); 462 btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
463 btrfs_item_key_to_cpu(leaf, &key, slot + 1); 463 btrfs_item_key_to_cpu(leaf, &key, slot + 1);
464 464
465 /* Make sure the keys are in the right order */ 465 /* Make sure the keys are in the right order */
466 if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { 466 if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
467 CORRUPT("bad key order", leaf, root, slot); 467 CORRUPT("bad key order", leaf, root, slot);
468 return -EIO; 468 return -EIO;
469 } 469 }
470 470
471 /* 471 /*
472 * Make sure the offset and ends are right, remember that the 472 * Make sure the offset and ends are right, remember that the
473 * item data starts at the end of the leaf and grows towards the 473 * item data starts at the end of the leaf and grows towards the
474 * front. 474 * front.
475 */ 475 */
476 if (btrfs_item_offset_nr(leaf, slot) != 476 if (btrfs_item_offset_nr(leaf, slot) !=
477 btrfs_item_end_nr(leaf, slot + 1)) { 477 btrfs_item_end_nr(leaf, slot + 1)) {
478 CORRUPT("slot offset bad", leaf, root, slot); 478 CORRUPT("slot offset bad", leaf, root, slot);
479 return -EIO; 479 return -EIO;
480 } 480 }
481 481
482 /* 482 /*
483 * Check to make sure that we don't point outside of the leaf, 483 * Check to make sure that we don't point outside of the leaf,
484 * just incase all the items are consistent to eachother, but 484 * just incase all the items are consistent to eachother, but
485 * all point outside of the leaf. 485 * all point outside of the leaf.
486 */ 486 */
487 if (btrfs_item_end_nr(leaf, slot) > 487 if (btrfs_item_end_nr(leaf, slot) >
488 BTRFS_LEAF_DATA_SIZE(root)) { 488 BTRFS_LEAF_DATA_SIZE(root)) {
489 CORRUPT("slot end outside of leaf", leaf, root, slot); 489 CORRUPT("slot end outside of leaf", leaf, root, slot);
490 return -EIO; 490 return -EIO;
491 } 491 }
492 } 492 }
493 493
494 return 0; 494 return 0;
495 } 495 }
496 496
497 #ifdef CONFIG_DEBUG_LOCK_ALLOC 497 #ifdef CONFIG_DEBUG_LOCK_ALLOC
498 void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) 498 void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
499 { 499 {
500 lockdep_set_class_and_name(&eb->lock, 500 lockdep_set_class_and_name(&eb->lock,
501 &btrfs_eb_class[level], 501 &btrfs_eb_class[level],
502 btrfs_eb_name[level]); 502 btrfs_eb_name[level]);
503 } 503 }
504 #endif 504 #endif
505 505
506 static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 506 static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
507 struct extent_state *state) 507 struct extent_state *state)
508 { 508 {
509 struct extent_io_tree *tree; 509 struct extent_io_tree *tree;
510 u64 found_start; 510 u64 found_start;
511 int found_level; 511 int found_level;
512 unsigned long len; 512 unsigned long len;
513 struct extent_buffer *eb; 513 struct extent_buffer *eb;
514 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 514 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
515 int ret = 0; 515 int ret = 0;
516 516
517 tree = &BTRFS_I(page->mapping->host)->io_tree; 517 tree = &BTRFS_I(page->mapping->host)->io_tree;
518 if (page->private == EXTENT_PAGE_PRIVATE) 518 if (page->private == EXTENT_PAGE_PRIVATE)
519 goto out; 519 goto out;
520 if (!page->private) 520 if (!page->private)
521 goto out; 521 goto out;
522 522
523 len = page->private >> 2; 523 len = page->private >> 2;
524 WARN_ON(len == 0); 524 WARN_ON(len == 0);
525 525
526 eb = alloc_extent_buffer(tree, start, len, page); 526 eb = alloc_extent_buffer(tree, start, len, page);
527 if (eb == NULL) { 527 if (eb == NULL) {
528 ret = -EIO; 528 ret = -EIO;
529 goto out; 529 goto out;
530 } 530 }
531 531
532 found_start = btrfs_header_bytenr(eb); 532 found_start = btrfs_header_bytenr(eb);
533 if (found_start != start) { 533 if (found_start != start) {
534 printk_ratelimited(KERN_INFO "btrfs bad tree block start " 534 printk_ratelimited(KERN_INFO "btrfs bad tree block start "
535 "%llu %llu\n", 535 "%llu %llu\n",
536 (unsigned long long)found_start, 536 (unsigned long long)found_start,
537 (unsigned long long)eb->start); 537 (unsigned long long)eb->start);
538 ret = -EIO; 538 ret = -EIO;
539 goto err; 539 goto err;
540 } 540 }
541 if (eb->first_page != page) { 541 if (eb->first_page != page) {
542 printk(KERN_INFO "btrfs bad first page %lu %lu\n", 542 printk(KERN_INFO "btrfs bad first page %lu %lu\n",
543 eb->first_page->index, page->index); 543 eb->first_page->index, page->index);
544 WARN_ON(1); 544 WARN_ON(1);
545 ret = -EIO; 545 ret = -EIO;
546 goto err; 546 goto err;
547 } 547 }
548 if (check_tree_block_fsid(root, eb)) { 548 if (check_tree_block_fsid(root, eb)) {
549 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", 549 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
550 (unsigned long long)eb->start); 550 (unsigned long long)eb->start);
551 ret = -EIO; 551 ret = -EIO;
552 goto err; 552 goto err;
553 } 553 }
554 found_level = btrfs_header_level(eb); 554 found_level = btrfs_header_level(eb);
555 555
556 btrfs_set_buffer_lockdep_class(eb, found_level); 556 btrfs_set_buffer_lockdep_class(eb, found_level);
557 557
558 ret = csum_tree_block(root, eb, 1); 558 ret = csum_tree_block(root, eb, 1);
559 if (ret) { 559 if (ret) {
560 ret = -EIO; 560 ret = -EIO;
561 goto err; 561 goto err;
562 } 562 }
563 563
564 /* 564 /*
565 * If this is a leaf block and it is corrupt, set the corrupt bit so 565 * If this is a leaf block and it is corrupt, set the corrupt bit so
566 * that we don't try and read the other copies of this block, just 566 * that we don't try and read the other copies of this block, just
567 * return -EIO. 567 * return -EIO.
568 */ 568 */
569 if (found_level == 0 && check_leaf(root, eb)) { 569 if (found_level == 0 && check_leaf(root, eb)) {
570 set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); 570 set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
571 ret = -EIO; 571 ret = -EIO;
572 } 572 }
573 573
574 end = min_t(u64, eb->len, PAGE_CACHE_SIZE); 574 end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
575 end = eb->start + end - 1; 575 end = eb->start + end - 1;
576 err: 576 err:
577 free_extent_buffer(eb); 577 free_extent_buffer(eb);
578 out: 578 out:
579 return ret; 579 return ret;
580 } 580 }
581 581
582 static void end_workqueue_bio(struct bio *bio, int err) 582 static void end_workqueue_bio(struct bio *bio, int err)
583 { 583 {
584 struct end_io_wq *end_io_wq = bio->bi_private; 584 struct end_io_wq *end_io_wq = bio->bi_private;
585 struct btrfs_fs_info *fs_info; 585 struct btrfs_fs_info *fs_info;
586 586
587 fs_info = end_io_wq->info; 587 fs_info = end_io_wq->info;
588 end_io_wq->error = err; 588 end_io_wq->error = err;
589 end_io_wq->work.func = end_workqueue_fn; 589 end_io_wq->work.func = end_workqueue_fn;
590 end_io_wq->work.flags = 0; 590 end_io_wq->work.flags = 0;
591 591
592 if (bio->bi_rw & REQ_WRITE) { 592 if (bio->bi_rw & REQ_WRITE) {
593 if (end_io_wq->metadata == 1) 593 if (end_io_wq->metadata == 1)
594 btrfs_queue_worker(&fs_info->endio_meta_write_workers, 594 btrfs_queue_worker(&fs_info->endio_meta_write_workers,
595 &end_io_wq->work); 595 &end_io_wq->work);
596 else if (end_io_wq->metadata == 2) 596 else if (end_io_wq->metadata == 2)
597 btrfs_queue_worker(&fs_info->endio_freespace_worker, 597 btrfs_queue_worker(&fs_info->endio_freespace_worker,
598 &end_io_wq->work); 598 &end_io_wq->work);
599 else 599 else
600 btrfs_queue_worker(&fs_info->endio_write_workers, 600 btrfs_queue_worker(&fs_info->endio_write_workers,
601 &end_io_wq->work); 601 &end_io_wq->work);
602 } else { 602 } else {
603 if (end_io_wq->metadata) 603 if (end_io_wq->metadata)
604 btrfs_queue_worker(&fs_info->endio_meta_workers, 604 btrfs_queue_worker(&fs_info->endio_meta_workers,
605 &end_io_wq->work); 605 &end_io_wq->work);
606 else 606 else
607 btrfs_queue_worker(&fs_info->endio_workers, 607 btrfs_queue_worker(&fs_info->endio_workers,
608 &end_io_wq->work); 608 &end_io_wq->work);
609 } 609 }
610 } 610 }
611 611
612 /* 612 /*
613 * For the metadata arg you want 613 * For the metadata arg you want
614 * 614 *
615 * 0 - if data 615 * 0 - if data
616 * 1 - if normal metadta 616 * 1 - if normal metadta
617 * 2 - if writing to the free space cache area 617 * 2 - if writing to the free space cache area
618 */ 618 */
619 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 619 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
620 int metadata) 620 int metadata)
621 { 621 {
622 struct end_io_wq *end_io_wq; 622 struct end_io_wq *end_io_wq;
623 end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); 623 end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
624 if (!end_io_wq) 624 if (!end_io_wq)
625 return -ENOMEM; 625 return -ENOMEM;
626 626
627 end_io_wq->private = bio->bi_private; 627 end_io_wq->private = bio->bi_private;
628 end_io_wq->end_io = bio->bi_end_io; 628 end_io_wq->end_io = bio->bi_end_io;
629 end_io_wq->info = info; 629 end_io_wq->info = info;
630 end_io_wq->error = 0; 630 end_io_wq->error = 0;
631 end_io_wq->bio = bio; 631 end_io_wq->bio = bio;
632 end_io_wq->metadata = metadata; 632 end_io_wq->metadata = metadata;
633 633
634 bio->bi_private = end_io_wq; 634 bio->bi_private = end_io_wq;
635 bio->bi_end_io = end_workqueue_bio; 635 bio->bi_end_io = end_workqueue_bio;
636 return 0; 636 return 0;
637 } 637 }
638 638
639 unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) 639 unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
640 { 640 {
641 unsigned long limit = min_t(unsigned long, 641 unsigned long limit = min_t(unsigned long,
642 info->workers.max_workers, 642 info->workers.max_workers,
643 info->fs_devices->open_devices); 643 info->fs_devices->open_devices);
644 return 256 * limit; 644 return 256 * limit;
645 } 645 }
646 646
647 static void run_one_async_start(struct btrfs_work *work) 647 static void run_one_async_start(struct btrfs_work *work)
648 { 648 {
649 struct async_submit_bio *async; 649 struct async_submit_bio *async;
650 650
651 async = container_of(work, struct async_submit_bio, work); 651 async = container_of(work, struct async_submit_bio, work);
652 async->submit_bio_start(async->inode, async->rw, async->bio, 652 async->submit_bio_start(async->inode, async->rw, async->bio,
653 async->mirror_num, async->bio_flags, 653 async->mirror_num, async->bio_flags,
654 async->bio_offset); 654 async->bio_offset);
655 } 655 }
656 656
657 static void run_one_async_done(struct btrfs_work *work) 657 static void run_one_async_done(struct btrfs_work *work)
658 { 658 {
659 struct btrfs_fs_info *fs_info; 659 struct btrfs_fs_info *fs_info;
660 struct async_submit_bio *async; 660 struct async_submit_bio *async;
661 int limit; 661 int limit;
662 662
663 async = container_of(work, struct async_submit_bio, work); 663 async = container_of(work, struct async_submit_bio, work);
664 fs_info = BTRFS_I(async->inode)->root->fs_info; 664 fs_info = BTRFS_I(async->inode)->root->fs_info;
665 665
666 limit = btrfs_async_submit_limit(fs_info); 666 limit = btrfs_async_submit_limit(fs_info);
667 limit = limit * 2 / 3; 667 limit = limit * 2 / 3;
668 668
669 atomic_dec(&fs_info->nr_async_submits); 669 atomic_dec(&fs_info->nr_async_submits);
670 670
671 if (atomic_read(&fs_info->nr_async_submits) < limit && 671 if (atomic_read(&fs_info->nr_async_submits) < limit &&
672 waitqueue_active(&fs_info->async_submit_wait)) 672 waitqueue_active(&fs_info->async_submit_wait))
673 wake_up(&fs_info->async_submit_wait); 673 wake_up(&fs_info->async_submit_wait);
674 674
675 async->submit_bio_done(async->inode, async->rw, async->bio, 675 async->submit_bio_done(async->inode, async->rw, async->bio,
676 async->mirror_num, async->bio_flags, 676 async->mirror_num, async->bio_flags,
677 async->bio_offset); 677 async->bio_offset);
678 } 678 }
679 679
680 static void run_one_async_free(struct btrfs_work *work) 680 static void run_one_async_free(struct btrfs_work *work)
681 { 681 {
682 struct async_submit_bio *async; 682 struct async_submit_bio *async;
683 683
684 async = container_of(work, struct async_submit_bio, work); 684 async = container_of(work, struct async_submit_bio, work);
685 kfree(async); 685 kfree(async);
686 } 686 }
687 687
688 int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, 688 int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
689 int rw, struct bio *bio, int mirror_num, 689 int rw, struct bio *bio, int mirror_num,
690 unsigned long bio_flags, 690 unsigned long bio_flags,
691 u64 bio_offset, 691 u64 bio_offset,
692 extent_submit_bio_hook_t *submit_bio_start, 692 extent_submit_bio_hook_t *submit_bio_start,
693 extent_submit_bio_hook_t *submit_bio_done) 693 extent_submit_bio_hook_t *submit_bio_done)
694 { 694 {
695 struct async_submit_bio *async; 695 struct async_submit_bio *async;
696 696
697 async = kmalloc(sizeof(*async), GFP_NOFS); 697 async = kmalloc(sizeof(*async), GFP_NOFS);
698 if (!async) 698 if (!async)
699 return -ENOMEM; 699 return -ENOMEM;
700 700
701 async->inode = inode; 701 async->inode = inode;
702 async->rw = rw; 702 async->rw = rw;
703 async->bio = bio; 703 async->bio = bio;
704 async->mirror_num = mirror_num; 704 async->mirror_num = mirror_num;
705 async->submit_bio_start = submit_bio_start; 705 async->submit_bio_start = submit_bio_start;
706 async->submit_bio_done = submit_bio_done; 706 async->submit_bio_done = submit_bio_done;
707 707
708 async->work.func = run_one_async_start; 708 async->work.func = run_one_async_start;
709 async->work.ordered_func = run_one_async_done; 709 async->work.ordered_func = run_one_async_done;
710 async->work.ordered_free = run_one_async_free; 710 async->work.ordered_free = run_one_async_free;
711 711
712 async->work.flags = 0; 712 async->work.flags = 0;
713 async->bio_flags = bio_flags; 713 async->bio_flags = bio_flags;
714 async->bio_offset = bio_offset; 714 async->bio_offset = bio_offset;
715 715
716 atomic_inc(&fs_info->nr_async_submits); 716 atomic_inc(&fs_info->nr_async_submits);
717 717
718 if (rw & REQ_SYNC) 718 if (rw & REQ_SYNC)
719 btrfs_set_work_high_prio(&async->work); 719 btrfs_set_work_high_prio(&async->work);
720 720
721 btrfs_queue_worker(&fs_info->workers, &async->work); 721 btrfs_queue_worker(&fs_info->workers, &async->work);
722 722
723 while (atomic_read(&fs_info->async_submit_draining) && 723 while (atomic_read(&fs_info->async_submit_draining) &&
724 atomic_read(&fs_info->nr_async_submits)) { 724 atomic_read(&fs_info->nr_async_submits)) {
725 wait_event(fs_info->async_submit_wait, 725 wait_event(fs_info->async_submit_wait,
726 (atomic_read(&fs_info->nr_async_submits) == 0)); 726 (atomic_read(&fs_info->nr_async_submits) == 0));
727 } 727 }
728 728
729 return 0; 729 return 0;
730 } 730 }
731 731
732 static int btree_csum_one_bio(struct bio *bio) 732 static int btree_csum_one_bio(struct bio *bio)
733 { 733 {
734 struct bio_vec *bvec = bio->bi_io_vec; 734 struct bio_vec *bvec = bio->bi_io_vec;
735 int bio_index = 0; 735 int bio_index = 0;
736 struct btrfs_root *root; 736 struct btrfs_root *root;
737 737
738 WARN_ON(bio->bi_vcnt <= 0); 738 WARN_ON(bio->bi_vcnt <= 0);
739 while (bio_index < bio->bi_vcnt) { 739 while (bio_index < bio->bi_vcnt) {
740 root = BTRFS_I(bvec->bv_page->mapping->host)->root; 740 root = BTRFS_I(bvec->bv_page->mapping->host)->root;
741 csum_dirty_buffer(root, bvec->bv_page); 741 csum_dirty_buffer(root, bvec->bv_page);
742 bio_index++; 742 bio_index++;
743 bvec++; 743 bvec++;
744 } 744 }
745 return 0; 745 return 0;
746 } 746 }
747 747
748 static int __btree_submit_bio_start(struct inode *inode, int rw, 748 static int __btree_submit_bio_start(struct inode *inode, int rw,
749 struct bio *bio, int mirror_num, 749 struct bio *bio, int mirror_num,
750 unsigned long bio_flags, 750 unsigned long bio_flags,
751 u64 bio_offset) 751 u64 bio_offset)
752 { 752 {
753 /* 753 /*
754 * when we're called for a write, we're already in the async 754 * when we're called for a write, we're already in the async
755 * submission context. Just jump into btrfs_map_bio 755 * submission context. Just jump into btrfs_map_bio
756 */ 756 */
757 btree_csum_one_bio(bio); 757 btree_csum_one_bio(bio);
758 return 0; 758 return 0;
759 } 759 }
760 760
761 static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, 761 static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
762 int mirror_num, unsigned long bio_flags, 762 int mirror_num, unsigned long bio_flags,
763 u64 bio_offset) 763 u64 bio_offset)
764 { 764 {
765 /* 765 /*
766 * when we're called for a write, we're already in the async 766 * when we're called for a write, we're already in the async
767 * submission context. Just jump into btrfs_map_bio 767 * submission context. Just jump into btrfs_map_bio
768 */ 768 */
769 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); 769 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
770 } 770 }
771 771
772 static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, 772 static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
773 int mirror_num, unsigned long bio_flags, 773 int mirror_num, unsigned long bio_flags,
774 u64 bio_offset) 774 u64 bio_offset)
775 { 775 {
776 int ret; 776 int ret;
777 777
778 ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, 778 ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
779 bio, 1); 779 bio, 1);
780 BUG_ON(ret); 780 BUG_ON(ret);
781 781
782 if (!(rw & REQ_WRITE)) { 782 if (!(rw & REQ_WRITE)) {
783 /* 783 /*
784 * called for a read, do the setup so that checksum validation 784 * called for a read, do the setup so that checksum validation
785 * can happen in the async kernel threads 785 * can happen in the async kernel threads
786 */ 786 */
787 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 787 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
788 mirror_num, 0); 788 mirror_num, 0);
789 } 789 }
790 790
791 /* 791 /*
792 * kthread helpers are used to submit writes so that checksumming 792 * kthread helpers are used to submit writes so that checksumming
793 * can happen in parallel across all CPUs 793 * can happen in parallel across all CPUs
794 */ 794 */
795 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, 795 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
796 inode, rw, bio, mirror_num, 0, 796 inode, rw, bio, mirror_num, 0,
797 bio_offset, 797 bio_offset,
798 __btree_submit_bio_start, 798 __btree_submit_bio_start,
799 __btree_submit_bio_done); 799 __btree_submit_bio_done);
800 } 800 }
801 801
802 #ifdef CONFIG_MIGRATION 802 #ifdef CONFIG_MIGRATION
803 static int btree_migratepage(struct address_space *mapping, 803 static int btree_migratepage(struct address_space *mapping,
804 struct page *newpage, struct page *page) 804 struct page *newpage, struct page *page)
805 { 805 {
806 /* 806 /*
807 * we can't safely write a btree page from here, 807 * we can't safely write a btree page from here,
808 * we haven't done the locking hook 808 * we haven't done the locking hook
809 */ 809 */
810 if (PageDirty(page)) 810 if (PageDirty(page))
811 return -EAGAIN; 811 return -EAGAIN;
812 /* 812 /*
813 * Buffers may be managed in a filesystem specific way. 813 * Buffers may be managed in a filesystem specific way.
814 * We must have no buffers or drop them. 814 * We must have no buffers or drop them.
815 */ 815 */
816 if (page_has_private(page) && 816 if (page_has_private(page) &&
817 !try_to_release_page(page, GFP_KERNEL)) 817 !try_to_release_page(page, GFP_KERNEL))
818 return -EAGAIN; 818 return -EAGAIN;
819 return migrate_page(mapping, newpage, page); 819 return migrate_page(mapping, newpage, page);
820 } 820 }
821 #endif 821 #endif
822 822
823 static int btree_writepage(struct page *page, struct writeback_control *wbc) 823 static int btree_writepage(struct page *page, struct writeback_control *wbc)
824 { 824 {
825 struct extent_io_tree *tree; 825 struct extent_io_tree *tree;
826 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 826 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
827 struct extent_buffer *eb; 827 struct extent_buffer *eb;
828 int was_dirty; 828 int was_dirty;
829 829
830 tree = &BTRFS_I(page->mapping->host)->io_tree; 830 tree = &BTRFS_I(page->mapping->host)->io_tree;
831 if (!(current->flags & PF_MEMALLOC)) { 831 if (!(current->flags & PF_MEMALLOC)) {
832 return extent_write_full_page(tree, page, 832 return extent_write_full_page(tree, page,
833 btree_get_extent, wbc); 833 btree_get_extent, wbc);
834 } 834 }
835 835
836 redirty_page_for_writepage(wbc, page); 836 redirty_page_for_writepage(wbc, page);
837 eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE); 837 eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
838 WARN_ON(!eb); 838 WARN_ON(!eb);
839 839
840 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 840 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
841 if (!was_dirty) { 841 if (!was_dirty) {
842 spin_lock(&root->fs_info->delalloc_lock); 842 spin_lock(&root->fs_info->delalloc_lock);
843 root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE; 843 root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
844 spin_unlock(&root->fs_info->delalloc_lock); 844 spin_unlock(&root->fs_info->delalloc_lock);
845 } 845 }
846 free_extent_buffer(eb); 846 free_extent_buffer(eb);
847 847
848 unlock_page(page); 848 unlock_page(page);
849 return 0; 849 return 0;
850 } 850 }
851 851
852 static int btree_writepages(struct address_space *mapping, 852 static int btree_writepages(struct address_space *mapping,
853 struct writeback_control *wbc) 853 struct writeback_control *wbc)
854 { 854 {
855 struct extent_io_tree *tree; 855 struct extent_io_tree *tree;
856 tree = &BTRFS_I(mapping->host)->io_tree; 856 tree = &BTRFS_I(mapping->host)->io_tree;
857 if (wbc->sync_mode == WB_SYNC_NONE) { 857 if (wbc->sync_mode == WB_SYNC_NONE) {
858 struct btrfs_root *root = BTRFS_I(mapping->host)->root; 858 struct btrfs_root *root = BTRFS_I(mapping->host)->root;
859 u64 num_dirty; 859 u64 num_dirty;
860 unsigned long thresh = 32 * 1024 * 1024; 860 unsigned long thresh = 32 * 1024 * 1024;
861 861
862 if (wbc->for_kupdate) 862 if (wbc->for_kupdate)
863 return 0; 863 return 0;
864 864
865 /* this is a bit racy, but that's ok */ 865 /* this is a bit racy, but that's ok */
866 num_dirty = root->fs_info->dirty_metadata_bytes; 866 num_dirty = root->fs_info->dirty_metadata_bytes;
867 if (num_dirty < thresh) 867 if (num_dirty < thresh)
868 return 0; 868 return 0;
869 } 869 }
870 return extent_writepages(tree, mapping, btree_get_extent, wbc); 870 return extent_writepages(tree, mapping, btree_get_extent, wbc);
871 } 871 }
872 872
873 static int btree_readpage(struct file *file, struct page *page) 873 static int btree_readpage(struct file *file, struct page *page)
874 { 874 {
875 struct extent_io_tree *tree; 875 struct extent_io_tree *tree;
876 tree = &BTRFS_I(page->mapping->host)->io_tree; 876 tree = &BTRFS_I(page->mapping->host)->io_tree;
877 return extent_read_full_page(tree, page, btree_get_extent); 877 return extent_read_full_page(tree, page, btree_get_extent);
878 } 878 }
879 879
880 static int btree_releasepage(struct page *page, gfp_t gfp_flags) 880 static int btree_releasepage(struct page *page, gfp_t gfp_flags)
881 { 881 {
882 struct extent_io_tree *tree; 882 struct extent_io_tree *tree;
883 struct extent_map_tree *map; 883 struct extent_map_tree *map;
884 int ret; 884 int ret;
885 885
886 if (PageWriteback(page) || PageDirty(page)) 886 if (PageWriteback(page) || PageDirty(page))
887 return 0; 887 return 0;
888 888
889 tree = &BTRFS_I(page->mapping->host)->io_tree; 889 tree = &BTRFS_I(page->mapping->host)->io_tree;
890 map = &BTRFS_I(page->mapping->host)->extent_tree; 890 map = &BTRFS_I(page->mapping->host)->extent_tree;
891 891
892 ret = try_release_extent_state(map, tree, page, gfp_flags); 892 ret = try_release_extent_state(map, tree, page, gfp_flags);
893 if (!ret) 893 if (!ret)
894 return 0; 894 return 0;
895 895
896 ret = try_release_extent_buffer(tree, page); 896 ret = try_release_extent_buffer(tree, page);
897 if (ret == 1) { 897 if (ret == 1) {
898 ClearPagePrivate(page); 898 ClearPagePrivate(page);
899 set_page_private(page, 0); 899 set_page_private(page, 0);
900 page_cache_release(page); 900 page_cache_release(page);
901 } 901 }
902 902
903 return ret; 903 return ret;
904 } 904 }
905 905
906 static void btree_invalidatepage(struct page *page, unsigned long offset) 906 static void btree_invalidatepage(struct page *page, unsigned long offset)
907 { 907 {
908 struct extent_io_tree *tree; 908 struct extent_io_tree *tree;
909 tree = &BTRFS_I(page->mapping->host)->io_tree; 909 tree = &BTRFS_I(page->mapping->host)->io_tree;
910 extent_invalidatepage(tree, page, offset); 910 extent_invalidatepage(tree, page, offset);
911 btree_releasepage(page, GFP_NOFS); 911 btree_releasepage(page, GFP_NOFS);
912 if (PagePrivate(page)) { 912 if (PagePrivate(page)) {
913 printk(KERN_WARNING "btrfs warning page private not zero " 913 printk(KERN_WARNING "btrfs warning page private not zero "
914 "on page %llu\n", (unsigned long long)page_offset(page)); 914 "on page %llu\n", (unsigned long long)page_offset(page));
915 ClearPagePrivate(page); 915 ClearPagePrivate(page);
916 set_page_private(page, 0); 916 set_page_private(page, 0);
917 page_cache_release(page); 917 page_cache_release(page);
918 } 918 }
919 } 919 }
920 920
921 static const struct address_space_operations btree_aops = { 921 static const struct address_space_operations btree_aops = {
922 .readpage = btree_readpage, 922 .readpage = btree_readpage,
923 .writepage = btree_writepage, 923 .writepage = btree_writepage,
924 .writepages = btree_writepages, 924 .writepages = btree_writepages,
925 .releasepage = btree_releasepage, 925 .releasepage = btree_releasepage,
926 .invalidatepage = btree_invalidatepage, 926 .invalidatepage = btree_invalidatepage,
927 #ifdef CONFIG_MIGRATION 927 #ifdef CONFIG_MIGRATION
928 .migratepage = btree_migratepage, 928 .migratepage = btree_migratepage,
929 #endif 929 #endif
930 }; 930 };
931 931
932 int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, 932 int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
933 u64 parent_transid) 933 u64 parent_transid)
934 { 934 {
935 struct extent_buffer *buf = NULL; 935 struct extent_buffer *buf = NULL;
936 struct inode *btree_inode = root->fs_info->btree_inode; 936 struct inode *btree_inode = root->fs_info->btree_inode;
937 int ret = 0; 937 int ret = 0;
938 938
939 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 939 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
940 if (!buf) 940 if (!buf)
941 return 0; 941 return 0;
942 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, 942 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
943 buf, 0, 0, btree_get_extent, 0); 943 buf, 0, 0, btree_get_extent, 0);
944 free_extent_buffer(buf); 944 free_extent_buffer(buf);
945 return ret; 945 return ret;
946 } 946 }
947 947
948 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 948 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
949 u64 bytenr, u32 blocksize) 949 u64 bytenr, u32 blocksize)
950 { 950 {
951 struct inode *btree_inode = root->fs_info->btree_inode; 951 struct inode *btree_inode = root->fs_info->btree_inode;
952 struct extent_buffer *eb; 952 struct extent_buffer *eb;
953 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 953 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
954 bytenr, blocksize); 954 bytenr, blocksize);
955 return eb; 955 return eb;
956 } 956 }
957 957
958 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 958 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
959 u64 bytenr, u32 blocksize) 959 u64 bytenr, u32 blocksize)
960 { 960 {
961 struct inode *btree_inode = root->fs_info->btree_inode; 961 struct inode *btree_inode = root->fs_info->btree_inode;
962 struct extent_buffer *eb; 962 struct extent_buffer *eb;
963 963
964 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 964 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
965 bytenr, blocksize, NULL); 965 bytenr, blocksize, NULL);
966 return eb; 966 return eb;
967 } 967 }
968 968
969 969
970 int btrfs_write_tree_block(struct extent_buffer *buf) 970 int btrfs_write_tree_block(struct extent_buffer *buf)
971 { 971 {
972 return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, 972 return filemap_fdatawrite_range(buf->first_page->mapping, buf->start,
973 buf->start + buf->len - 1); 973 buf->start + buf->len - 1);
974 } 974 }
975 975
976 int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) 976 int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
977 { 977 {
978 return filemap_fdatawait_range(buf->first_page->mapping, 978 return filemap_fdatawait_range(buf->first_page->mapping,
979 buf->start, buf->start + buf->len - 1); 979 buf->start, buf->start + buf->len - 1);
980 } 980 }
981 981
982 struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, 982 struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
983 u32 blocksize, u64 parent_transid) 983 u32 blocksize, u64 parent_transid)
984 { 984 {
985 struct extent_buffer *buf = NULL; 985 struct extent_buffer *buf = NULL;
986 int ret; 986 int ret;
987 987
988 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 988 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
989 if (!buf) 989 if (!buf)
990 return NULL; 990 return NULL;
991 991
992 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 992 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
993 993
994 if (ret == 0) 994 if (ret == 0)
995 set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); 995 set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
996 return buf; 996 return buf;
997 997
998 } 998 }
999 999
1000 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1000 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1001 struct extent_buffer *buf) 1001 struct extent_buffer *buf)
1002 { 1002 {
1003 struct inode *btree_inode = root->fs_info->btree_inode; 1003 struct inode *btree_inode = root->fs_info->btree_inode;
1004 if (btrfs_header_generation(buf) == 1004 if (btrfs_header_generation(buf) ==
1005 root->fs_info->running_transaction->transid) { 1005 root->fs_info->running_transaction->transid) {
1006 btrfs_assert_tree_locked(buf); 1006 btrfs_assert_tree_locked(buf);
1007 1007
1008 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { 1008 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
1009 spin_lock(&root->fs_info->delalloc_lock); 1009 spin_lock(&root->fs_info->delalloc_lock);
1010 if (root->fs_info->dirty_metadata_bytes >= buf->len) 1010 if (root->fs_info->dirty_metadata_bytes >= buf->len)
1011 root->fs_info->dirty_metadata_bytes -= buf->len; 1011 root->fs_info->dirty_metadata_bytes -= buf->len;
1012 else 1012 else
1013 WARN_ON(1); 1013 WARN_ON(1);
1014 spin_unlock(&root->fs_info->delalloc_lock); 1014 spin_unlock(&root->fs_info->delalloc_lock);
1015 } 1015 }
1016 1016
1017 /* ugh, clear_extent_buffer_dirty needs to lock the page */ 1017 /* ugh, clear_extent_buffer_dirty needs to lock the page */
1018 btrfs_set_lock_blocking(buf); 1018 btrfs_set_lock_blocking(buf);
1019 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 1019 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
1020 buf); 1020 buf);
1021 } 1021 }
1022 return 0; 1022 return 0;
1023 } 1023 }
1024 1024
1025 static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, 1025 static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1026 u32 stripesize, struct btrfs_root *root, 1026 u32 stripesize, struct btrfs_root *root,
1027 struct btrfs_fs_info *fs_info, 1027 struct btrfs_fs_info *fs_info,
1028 u64 objectid) 1028 u64 objectid)
1029 { 1029 {
1030 root->node = NULL; 1030 root->node = NULL;
1031 root->commit_root = NULL; 1031 root->commit_root = NULL;
1032 root->sectorsize = sectorsize; 1032 root->sectorsize = sectorsize;
1033 root->nodesize = nodesize; 1033 root->nodesize = nodesize;
1034 root->leafsize = leafsize; 1034 root->leafsize = leafsize;
1035 root->stripesize = stripesize; 1035 root->stripesize = stripesize;
1036 root->ref_cows = 0; 1036 root->ref_cows = 0;
1037 root->track_dirty = 0; 1037 root->track_dirty = 0;
1038 root->in_radix = 0; 1038 root->in_radix = 0;
1039 root->orphan_item_inserted = 0; 1039 root->orphan_item_inserted = 0;
1040 root->orphan_cleanup_state = 0; 1040 root->orphan_cleanup_state = 0;
1041 1041
1042 root->fs_info = fs_info; 1042 root->fs_info = fs_info;
1043 root->objectid = objectid; 1043 root->objectid = objectid;
1044 root->last_trans = 0; 1044 root->last_trans = 0;
1045 root->highest_objectid = 0; 1045 root->highest_objectid = 0;
1046 root->name = NULL; 1046 root->name = NULL;
1047 root->in_sysfs = 0; 1047 root->in_sysfs = 0;
1048 root->inode_tree = RB_ROOT; 1048 root->inode_tree = RB_ROOT;
1049 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); 1049 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
1050 root->block_rsv = NULL; 1050 root->block_rsv = NULL;
1051 root->orphan_block_rsv = NULL; 1051 root->orphan_block_rsv = NULL;
1052 1052
1053 INIT_LIST_HEAD(&root->dirty_list); 1053 INIT_LIST_HEAD(&root->dirty_list);
1054 INIT_LIST_HEAD(&root->orphan_list); 1054 INIT_LIST_HEAD(&root->orphan_list);
1055 INIT_LIST_HEAD(&root->root_list); 1055 INIT_LIST_HEAD(&root->root_list);
1056 spin_lock_init(&root->orphan_lock); 1056 spin_lock_init(&root->orphan_lock);
1057 spin_lock_init(&root->inode_lock); 1057 spin_lock_init(&root->inode_lock);
1058 spin_lock_init(&root->accounting_lock); 1058 spin_lock_init(&root->accounting_lock);
1059 mutex_init(&root->objectid_mutex); 1059 mutex_init(&root->objectid_mutex);
1060 mutex_init(&root->log_mutex); 1060 mutex_init(&root->log_mutex);
1061 init_waitqueue_head(&root->log_writer_wait); 1061 init_waitqueue_head(&root->log_writer_wait);
1062 init_waitqueue_head(&root->log_commit_wait[0]); 1062 init_waitqueue_head(&root->log_commit_wait[0]);
1063 init_waitqueue_head(&root->log_commit_wait[1]); 1063 init_waitqueue_head(&root->log_commit_wait[1]);
1064 atomic_set(&root->log_commit[0], 0); 1064 atomic_set(&root->log_commit[0], 0);
1065 atomic_set(&root->log_commit[1], 0); 1065 atomic_set(&root->log_commit[1], 0);
1066 atomic_set(&root->log_writers, 0); 1066 atomic_set(&root->log_writers, 0);
1067 root->log_batch = 0; 1067 root->log_batch = 0;
1068 root->log_transid = 0; 1068 root->log_transid = 0;
1069 root->last_log_commit = 0; 1069 root->last_log_commit = 0;
1070 extent_io_tree_init(&root->dirty_log_pages, 1070 extent_io_tree_init(&root->dirty_log_pages,
1071 fs_info->btree_inode->i_mapping); 1071 fs_info->btree_inode->i_mapping);
1072 1072
1073 memset(&root->root_key, 0, sizeof(root->root_key)); 1073 memset(&root->root_key, 0, sizeof(root->root_key));
1074 memset(&root->root_item, 0, sizeof(root->root_item)); 1074 memset(&root->root_item, 0, sizeof(root->root_item));
1075 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 1075 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
1076 memset(&root->root_kobj, 0, sizeof(root->root_kobj)); 1076 memset(&root->root_kobj, 0, sizeof(root->root_kobj));
1077 root->defrag_trans_start = fs_info->generation; 1077 root->defrag_trans_start = fs_info->generation;
1078 init_completion(&root->kobj_unregister); 1078 init_completion(&root->kobj_unregister);
1079 root->defrag_running = 0; 1079 root->defrag_running = 0;
1080 root->root_key.objectid = objectid; 1080 root->root_key.objectid = objectid;
1081 root->anon_super.s_root = NULL; 1081 root->anon_super.s_root = NULL;
1082 root->anon_super.s_dev = 0; 1082 root->anon_super.s_dev = 0;
1083 INIT_LIST_HEAD(&root->anon_super.s_list); 1083 INIT_LIST_HEAD(&root->anon_super.s_list);
1084 INIT_LIST_HEAD(&root->anon_super.s_instances); 1084 INIT_LIST_HEAD(&root->anon_super.s_instances);
1085 init_rwsem(&root->anon_super.s_umount); 1085 init_rwsem(&root->anon_super.s_umount);
1086 1086
1087 return 0; 1087 return 0;
1088 } 1088 }
1089 1089
1090 static int find_and_setup_root(struct btrfs_root *tree_root, 1090 static int find_and_setup_root(struct btrfs_root *tree_root,
1091 struct btrfs_fs_info *fs_info, 1091 struct btrfs_fs_info *fs_info,
1092 u64 objectid, 1092 u64 objectid,
1093 struct btrfs_root *root) 1093 struct btrfs_root *root)
1094 { 1094 {
1095 int ret; 1095 int ret;
1096 u32 blocksize; 1096 u32 blocksize;
1097 u64 generation; 1097 u64 generation;
1098 1098
1099 __setup_root(tree_root->nodesize, tree_root->leafsize, 1099 __setup_root(tree_root->nodesize, tree_root->leafsize,
1100 tree_root->sectorsize, tree_root->stripesize, 1100 tree_root->sectorsize, tree_root->stripesize,
1101 root, fs_info, objectid); 1101 root, fs_info, objectid);
1102 ret = btrfs_find_last_root(tree_root, objectid, 1102 ret = btrfs_find_last_root(tree_root, objectid,
1103 &root->root_item, &root->root_key); 1103 &root->root_item, &root->root_key);
1104 if (ret > 0) 1104 if (ret > 0)
1105 return -ENOENT; 1105 return -ENOENT;
1106 BUG_ON(ret); 1106 BUG_ON(ret);
1107 1107
1108 generation = btrfs_root_generation(&root->root_item); 1108 generation = btrfs_root_generation(&root->root_item);
1109 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1109 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1110 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1110 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1111 blocksize, generation); 1111 blocksize, generation);
1112 if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { 1112 if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
1113 free_extent_buffer(root->node); 1113 free_extent_buffer(root->node);
1114 return -EIO; 1114 return -EIO;
1115 } 1115 }
1116 root->commit_root = btrfs_root_node(root); 1116 root->commit_root = btrfs_root_node(root);
1117 return 0; 1117 return 0;
1118 } 1118 }
1119 1119
1120 static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, 1120 static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
1121 struct btrfs_fs_info *fs_info) 1121 struct btrfs_fs_info *fs_info)
1122 { 1122 {
1123 struct btrfs_root *root; 1123 struct btrfs_root *root;
1124 struct btrfs_root *tree_root = fs_info->tree_root; 1124 struct btrfs_root *tree_root = fs_info->tree_root;
1125 struct extent_buffer *leaf; 1125 struct extent_buffer *leaf;
1126 1126
1127 root = kzalloc(sizeof(*root), GFP_NOFS); 1127 root = kzalloc(sizeof(*root), GFP_NOFS);
1128 if (!root) 1128 if (!root)
1129 return ERR_PTR(-ENOMEM); 1129 return ERR_PTR(-ENOMEM);
1130 1130
1131 __setup_root(tree_root->nodesize, tree_root->leafsize, 1131 __setup_root(tree_root->nodesize, tree_root->leafsize,
1132 tree_root->sectorsize, tree_root->stripesize, 1132 tree_root->sectorsize, tree_root->stripesize,
1133 root, fs_info, BTRFS_TREE_LOG_OBJECTID); 1133 root, fs_info, BTRFS_TREE_LOG_OBJECTID);
1134 1134
1135 root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; 1135 root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
1136 root->root_key.type = BTRFS_ROOT_ITEM_KEY; 1136 root->root_key.type = BTRFS_ROOT_ITEM_KEY;
1137 root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; 1137 root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
1138 /* 1138 /*
1139 * log trees do not get reference counted because they go away 1139 * log trees do not get reference counted because they go away
1140 * before a real commit is actually done. They do store pointers 1140 * before a real commit is actually done. They do store pointers
1141 * to file data extents, and those reference counts still get 1141 * to file data extents, and those reference counts still get
1142 * updated (along with back refs to the log tree). 1142 * updated (along with back refs to the log tree).
1143 */ 1143 */
1144 root->ref_cows = 0; 1144 root->ref_cows = 0;
1145 1145
1146 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 1146 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
1147 BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0); 1147 BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
1148 if (IS_ERR(leaf)) { 1148 if (IS_ERR(leaf)) {
1149 kfree(root); 1149 kfree(root);
1150 return ERR_CAST(leaf); 1150 return ERR_CAST(leaf);
1151 } 1151 }
1152 1152
1153 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); 1153 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
1154 btrfs_set_header_bytenr(leaf, leaf->start); 1154 btrfs_set_header_bytenr(leaf, leaf->start);
1155 btrfs_set_header_generation(leaf, trans->transid); 1155 btrfs_set_header_generation(leaf, trans->transid);
1156 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 1156 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
1157 btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); 1157 btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
1158 root->node = leaf; 1158 root->node = leaf;
1159 1159
1160 write_extent_buffer(root->node, root->fs_info->fsid, 1160 write_extent_buffer(root->node, root->fs_info->fsid,
1161 (unsigned long)btrfs_header_fsid(root->node), 1161 (unsigned long)btrfs_header_fsid(root->node),
1162 BTRFS_FSID_SIZE); 1162 BTRFS_FSID_SIZE);
1163 btrfs_mark_buffer_dirty(root->node); 1163 btrfs_mark_buffer_dirty(root->node);
1164 btrfs_tree_unlock(root->node); 1164 btrfs_tree_unlock(root->node);
1165 return root; 1165 return root;
1166 } 1166 }
1167 1167
1168 int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, 1168 int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
1169 struct btrfs_fs_info *fs_info) 1169 struct btrfs_fs_info *fs_info)
1170 { 1170 {
1171 struct btrfs_root *log_root; 1171 struct btrfs_root *log_root;
1172 1172
1173 log_root = alloc_log_tree(trans, fs_info); 1173 log_root = alloc_log_tree(trans, fs_info);
1174 if (IS_ERR(log_root)) 1174 if (IS_ERR(log_root))
1175 return PTR_ERR(log_root); 1175 return PTR_ERR(log_root);
1176 WARN_ON(fs_info->log_root_tree); 1176 WARN_ON(fs_info->log_root_tree);
1177 fs_info->log_root_tree = log_root; 1177 fs_info->log_root_tree = log_root;
1178 return 0; 1178 return 0;
1179 } 1179 }
1180 1180
1181 int btrfs_add_log_tree(struct btrfs_trans_handle *trans, 1181 int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1182 struct btrfs_root *root) 1182 struct btrfs_root *root)
1183 { 1183 {
1184 struct btrfs_root *log_root; 1184 struct btrfs_root *log_root;
1185 struct btrfs_inode_item *inode_item; 1185 struct btrfs_inode_item *inode_item;
1186 1186
1187 log_root = alloc_log_tree(trans, root->fs_info); 1187 log_root = alloc_log_tree(trans, root->fs_info);
1188 if (IS_ERR(log_root)) 1188 if (IS_ERR(log_root))
1189 return PTR_ERR(log_root); 1189 return PTR_ERR(log_root);
1190 1190
1191 log_root->last_trans = trans->transid; 1191 log_root->last_trans = trans->transid;
1192 log_root->root_key.offset = root->root_key.objectid; 1192 log_root->root_key.offset = root->root_key.objectid;
1193 1193
1194 inode_item = &log_root->root_item.inode; 1194 inode_item = &log_root->root_item.inode;
1195 inode_item->generation = cpu_to_le64(1); 1195 inode_item->generation = cpu_to_le64(1);
1196 inode_item->size = cpu_to_le64(3); 1196 inode_item->size = cpu_to_le64(3);
1197 inode_item->nlink = cpu_to_le32(1); 1197 inode_item->nlink = cpu_to_le32(1);
1198 inode_item->nbytes = cpu_to_le64(root->leafsize); 1198 inode_item->nbytes = cpu_to_le64(root->leafsize);
1199 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 1199 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
1200 1200
1201 btrfs_set_root_node(&log_root->root_item, log_root->node); 1201 btrfs_set_root_node(&log_root->root_item, log_root->node);
1202 1202
1203 WARN_ON(root->log_root); 1203 WARN_ON(root->log_root);
1204 root->log_root = log_root; 1204 root->log_root = log_root;
1205 root->log_transid = 0; 1205 root->log_transid = 0;
1206 root->last_log_commit = 0; 1206 root->last_log_commit = 0;
1207 return 0; 1207 return 0;
1208 } 1208 }
1209 1209
1210 struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, 1210 struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1211 struct btrfs_key *location) 1211 struct btrfs_key *location)
1212 { 1212 {
1213 struct btrfs_root *root; 1213 struct btrfs_root *root;
1214 struct btrfs_fs_info *fs_info = tree_root->fs_info; 1214 struct btrfs_fs_info *fs_info = tree_root->fs_info;
1215 struct btrfs_path *path; 1215 struct btrfs_path *path;
1216 struct extent_buffer *l; 1216 struct extent_buffer *l;
1217 u64 generation; 1217 u64 generation;
1218 u32 blocksize; 1218 u32 blocksize;
1219 int ret = 0; 1219 int ret = 0;
1220 1220
1221 root = kzalloc(sizeof(*root), GFP_NOFS); 1221 root = kzalloc(sizeof(*root), GFP_NOFS);
1222 if (!root) 1222 if (!root)
1223 return ERR_PTR(-ENOMEM); 1223 return ERR_PTR(-ENOMEM);
1224 if (location->offset == (u64)-1) { 1224 if (location->offset == (u64)-1) {
1225 ret = find_and_setup_root(tree_root, fs_info, 1225 ret = find_and_setup_root(tree_root, fs_info,
1226 location->objectid, root); 1226 location->objectid, root);
1227 if (ret) { 1227 if (ret) {
1228 kfree(root); 1228 kfree(root);
1229 return ERR_PTR(ret); 1229 return ERR_PTR(ret);
1230 } 1230 }
1231 goto out; 1231 goto out;
1232 } 1232 }
1233 1233
1234 __setup_root(tree_root->nodesize, tree_root->leafsize, 1234 __setup_root(tree_root->nodesize, tree_root->leafsize,
1235 tree_root->sectorsize, tree_root->stripesize, 1235 tree_root->sectorsize, tree_root->stripesize,
1236 root, fs_info, location->objectid); 1236 root, fs_info, location->objectid);
1237 1237
1238 path = btrfs_alloc_path(); 1238 path = btrfs_alloc_path();
1239 if (!path) { 1239 if (!path) {
1240 kfree(root); 1240 kfree(root);
1241 return ERR_PTR(-ENOMEM); 1241 return ERR_PTR(-ENOMEM);
1242 } 1242 }
1243 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); 1243 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
1244 if (ret == 0) { 1244 if (ret == 0) {
1245 l = path->nodes[0]; 1245 l = path->nodes[0];
1246 read_extent_buffer(l, &root->root_item, 1246 read_extent_buffer(l, &root->root_item,
1247 btrfs_item_ptr_offset(l, path->slots[0]), 1247 btrfs_item_ptr_offset(l, path->slots[0]),
1248 sizeof(root->root_item)); 1248 sizeof(root->root_item));
1249 memcpy(&root->root_key, location, sizeof(*location)); 1249 memcpy(&root->root_key, location, sizeof(*location));
1250 } 1250 }
1251 btrfs_free_path(path); 1251 btrfs_free_path(path);
1252 if (ret) { 1252 if (ret) {
1253 kfree(root); 1253 kfree(root);
1254 if (ret > 0) 1254 if (ret > 0)
1255 ret = -ENOENT; 1255 ret = -ENOENT;
1256 return ERR_PTR(ret); 1256 return ERR_PTR(ret);
1257 } 1257 }
1258 1258
1259 generation = btrfs_root_generation(&root->root_item); 1259 generation = btrfs_root_generation(&root->root_item);
1260 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1260 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1261 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1261 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1262 blocksize, generation); 1262 blocksize, generation);
1263 root->commit_root = btrfs_root_node(root); 1263 root->commit_root = btrfs_root_node(root);
1264 BUG_ON(!root->node); 1264 BUG_ON(!root->node);
1265 out: 1265 out:
1266 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 1266 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
1267 root->ref_cows = 1; 1267 root->ref_cows = 1;
1268 btrfs_check_and_init_root_item(&root->root_item); 1268 btrfs_check_and_init_root_item(&root->root_item);
1269 } 1269 }
1270 1270
1271 return root; 1271 return root;
1272 } 1272 }
1273 1273
1274 struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, 1274 struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1275 struct btrfs_key *location) 1275 struct btrfs_key *location)
1276 { 1276 {
1277 struct btrfs_root *root; 1277 struct btrfs_root *root;
1278 int ret; 1278 int ret;
1279 1279
1280 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) 1280 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1281 return fs_info->tree_root; 1281 return fs_info->tree_root;
1282 if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) 1282 if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
1283 return fs_info->extent_root; 1283 return fs_info->extent_root;
1284 if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID) 1284 if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
1285 return fs_info->chunk_root; 1285 return fs_info->chunk_root;
1286 if (location->objectid == BTRFS_DEV_TREE_OBJECTID) 1286 if (location->objectid == BTRFS_DEV_TREE_OBJECTID)
1287 return fs_info->dev_root; 1287 return fs_info->dev_root;
1288 if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) 1288 if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
1289 return fs_info->csum_root; 1289 return fs_info->csum_root;
1290 again: 1290 again:
1291 spin_lock(&fs_info->fs_roots_radix_lock); 1291 spin_lock(&fs_info->fs_roots_radix_lock);
1292 root = radix_tree_lookup(&fs_info->fs_roots_radix, 1292 root = radix_tree_lookup(&fs_info->fs_roots_radix,
1293 (unsigned long)location->objectid); 1293 (unsigned long)location->objectid);
1294 spin_unlock(&fs_info->fs_roots_radix_lock); 1294 spin_unlock(&fs_info->fs_roots_radix_lock);
1295 if (root) 1295 if (root)
1296 return root; 1296 return root;
1297 1297
1298 root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); 1298 root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
1299 if (IS_ERR(root)) 1299 if (IS_ERR(root))
1300 return root; 1300 return root;
1301 1301
1302 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); 1302 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
1303 if (!root->free_ino_ctl) 1303 if (!root->free_ino_ctl)
1304 goto fail; 1304 goto fail;
1305 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), 1305 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
1306 GFP_NOFS); 1306 GFP_NOFS);
1307 if (!root->free_ino_pinned) 1307 if (!root->free_ino_pinned)
1308 goto fail; 1308 goto fail;
1309 1309
1310 btrfs_init_free_ino_ctl(root); 1310 btrfs_init_free_ino_ctl(root);
1311 mutex_init(&root->fs_commit_mutex); 1311 mutex_init(&root->fs_commit_mutex);
1312 spin_lock_init(&root->cache_lock); 1312 spin_lock_init(&root->cache_lock);
1313 init_waitqueue_head(&root->cache_wait); 1313 init_waitqueue_head(&root->cache_wait);
1314 1314
1315 set_anon_super(&root->anon_super, NULL); 1315 set_anon_super(&root->anon_super, NULL);
1316 1316
1317 if (btrfs_root_refs(&root->root_item) == 0) { 1317 if (btrfs_root_refs(&root->root_item) == 0) {
1318 ret = -ENOENT; 1318 ret = -ENOENT;
1319 goto fail; 1319 goto fail;
1320 } 1320 }
1321 1321
1322 ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); 1322 ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
1323 if (ret < 0) 1323 if (ret < 0)
1324 goto fail; 1324 goto fail;
1325 if (ret == 0) 1325 if (ret == 0)
1326 root->orphan_item_inserted = 1; 1326 root->orphan_item_inserted = 1;
1327 1327
1328 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); 1328 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
1329 if (ret) 1329 if (ret)
1330 goto fail; 1330 goto fail;
1331 1331
1332 spin_lock(&fs_info->fs_roots_radix_lock); 1332 spin_lock(&fs_info->fs_roots_radix_lock);
1333 ret = radix_tree_insert(&fs_info->fs_roots_radix, 1333 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1334 (unsigned long)root->root_key.objectid, 1334 (unsigned long)root->root_key.objectid,
1335 root); 1335 root);
1336 if (ret == 0) 1336 if (ret == 0)
1337 root->in_radix = 1; 1337 root->in_radix = 1;
1338 1338
1339 spin_unlock(&fs_info->fs_roots_radix_lock); 1339 spin_unlock(&fs_info->fs_roots_radix_lock);
1340 radix_tree_preload_end(); 1340 radix_tree_preload_end();
1341 if (ret) { 1341 if (ret) {
1342 if (ret == -EEXIST) { 1342 if (ret == -EEXIST) {
1343 free_fs_root(root); 1343 free_fs_root(root);
1344 goto again; 1344 goto again;
1345 } 1345 }
1346 goto fail; 1346 goto fail;
1347 } 1347 }
1348 1348
1349 ret = btrfs_find_dead_roots(fs_info->tree_root, 1349 ret = btrfs_find_dead_roots(fs_info->tree_root,
1350 root->root_key.objectid); 1350 root->root_key.objectid);
1351 WARN_ON(ret); 1351 WARN_ON(ret);
1352 return root; 1352 return root;
1353 fail: 1353 fail:
1354 free_fs_root(root); 1354 free_fs_root(root);
1355 return ERR_PTR(ret); 1355 return ERR_PTR(ret);
1356 } 1356 }
1357 1357
1358 static int btrfs_congested_fn(void *congested_data, int bdi_bits) 1358 static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1359 { 1359 {
1360 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; 1360 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
1361 int ret = 0; 1361 int ret = 0;
1362 struct btrfs_device *device; 1362 struct btrfs_device *device;
1363 struct backing_dev_info *bdi; 1363 struct backing_dev_info *bdi;
1364 1364
1365 rcu_read_lock(); 1365 rcu_read_lock();
1366 list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) { 1366 list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) {
1367 if (!device->bdev) 1367 if (!device->bdev)
1368 continue; 1368 continue;
1369 bdi = blk_get_backing_dev_info(device->bdev); 1369 bdi = blk_get_backing_dev_info(device->bdev);
1370 if (bdi && bdi_congested(bdi, bdi_bits)) { 1370 if (bdi && bdi_congested(bdi, bdi_bits)) {
1371 ret = 1; 1371 ret = 1;
1372 break; 1372 break;
1373 } 1373 }
1374 } 1374 }
1375 rcu_read_unlock(); 1375 rcu_read_unlock();
1376 return ret; 1376 return ret;
1377 } 1377 }
1378 1378
1379 /* 1379 /*
1380 * If this fails, caller must call bdi_destroy() to get rid of the 1380 * If this fails, caller must call bdi_destroy() to get rid of the
1381 * bdi again. 1381 * bdi again.
1382 */ 1382 */
1383 static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) 1383 static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1384 { 1384 {
1385 int err; 1385 int err;
1386 1386
1387 bdi->capabilities = BDI_CAP_MAP_COPY; 1387 bdi->capabilities = BDI_CAP_MAP_COPY;
1388 err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY); 1388 err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY);
1389 if (err) 1389 if (err)
1390 return err; 1390 return err;
1391 1391
1392 bdi->ra_pages = default_backing_dev_info.ra_pages; 1392 bdi->ra_pages = default_backing_dev_info.ra_pages;
1393 bdi->congested_fn = btrfs_congested_fn; 1393 bdi->congested_fn = btrfs_congested_fn;
1394 bdi->congested_data = info; 1394 bdi->congested_data = info;
1395 return 0; 1395 return 0;
1396 } 1396 }
1397 1397
1398 static int bio_ready_for_csum(struct bio *bio) 1398 static int bio_ready_for_csum(struct bio *bio)
1399 { 1399 {
1400 u64 length = 0; 1400 u64 length = 0;
1401 u64 buf_len = 0; 1401 u64 buf_len = 0;
1402 u64 start = 0; 1402 u64 start = 0;
1403 struct page *page; 1403 struct page *page;
1404 struct extent_io_tree *io_tree = NULL; 1404 struct extent_io_tree *io_tree = NULL;
1405 struct bio_vec *bvec; 1405 struct bio_vec *bvec;
1406 int i; 1406 int i;
1407 int ret; 1407 int ret;
1408 1408
1409 bio_for_each_segment(bvec, bio, i) { 1409 bio_for_each_segment(bvec, bio, i) {
1410 page = bvec->bv_page; 1410 page = bvec->bv_page;
1411 if (page->private == EXTENT_PAGE_PRIVATE) { 1411 if (page->private == EXTENT_PAGE_PRIVATE) {
1412 length += bvec->bv_len; 1412 length += bvec->bv_len;
1413 continue; 1413 continue;
1414 } 1414 }
1415 if (!page->private) { 1415 if (!page->private) {
1416 length += bvec->bv_len; 1416 length += bvec->bv_len;
1417 continue; 1417 continue;
1418 } 1418 }
1419 length = bvec->bv_len; 1419 length = bvec->bv_len;
1420 buf_len = page->private >> 2; 1420 buf_len = page->private >> 2;
1421 start = page_offset(page) + bvec->bv_offset; 1421 start = page_offset(page) + bvec->bv_offset;
1422 io_tree = &BTRFS_I(page->mapping->host)->io_tree; 1422 io_tree = &BTRFS_I(page->mapping->host)->io_tree;
1423 } 1423 }
1424 /* are we fully contained in this bio? */ 1424 /* are we fully contained in this bio? */
1425 if (buf_len <= length) 1425 if (buf_len <= length)
1426 return 1; 1426 return 1;
1427 1427
1428 ret = extent_range_uptodate(io_tree, start + length, 1428 ret = extent_range_uptodate(io_tree, start + length,
1429 start + buf_len - 1); 1429 start + buf_len - 1);
1430 return ret; 1430 return ret;
1431 } 1431 }
1432 1432
1433 /* 1433 /*
1434 * called by the kthread helper functions to finally call the bio end_io 1434 * called by the kthread helper functions to finally call the bio end_io
1435 * functions. This is where read checksum verification actually happens 1435 * functions. This is where read checksum verification actually happens
1436 */ 1436 */
1437 static void end_workqueue_fn(struct btrfs_work *work) 1437 static void end_workqueue_fn(struct btrfs_work *work)
1438 { 1438 {
1439 struct bio *bio; 1439 struct bio *bio;
1440 struct end_io_wq *end_io_wq; 1440 struct end_io_wq *end_io_wq;
1441 struct btrfs_fs_info *fs_info; 1441 struct btrfs_fs_info *fs_info;
1442 int error; 1442 int error;
1443 1443
1444 end_io_wq = container_of(work, struct end_io_wq, work); 1444 end_io_wq = container_of(work, struct end_io_wq, work);
1445 bio = end_io_wq->bio; 1445 bio = end_io_wq->bio;
1446 fs_info = end_io_wq->info; 1446 fs_info = end_io_wq->info;
1447 1447
1448 /* metadata bio reads are special because the whole tree block must 1448 /* metadata bio reads are special because the whole tree block must
1449 * be checksummed at once. This makes sure the entire block is in 1449 * be checksummed at once. This makes sure the entire block is in
1450 * ram and up to date before trying to verify things. For 1450 * ram and up to date before trying to verify things. For
1451 * blocksize <= pagesize, it is basically a noop 1451 * blocksize <= pagesize, it is basically a noop
1452 */ 1452 */
1453 if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata && 1453 if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
1454 !bio_ready_for_csum(bio)) { 1454 !bio_ready_for_csum(bio)) {
1455 btrfs_queue_worker(&fs_info->endio_meta_workers, 1455 btrfs_queue_worker(&fs_info->endio_meta_workers,
1456 &end_io_wq->work); 1456 &end_io_wq->work);
1457 return; 1457 return;
1458 } 1458 }
1459 error = end_io_wq->error; 1459 error = end_io_wq->error;
1460 bio->bi_private = end_io_wq->private; 1460 bio->bi_private = end_io_wq->private;
1461 bio->bi_end_io = end_io_wq->end_io; 1461 bio->bi_end_io = end_io_wq->end_io;
1462 kfree(end_io_wq); 1462 kfree(end_io_wq);
1463 bio_endio(bio, error); 1463 bio_endio(bio, error);
1464 } 1464 }
1465 1465
1466 static int cleaner_kthread(void *arg) 1466 static int cleaner_kthread(void *arg)
1467 { 1467 {
1468 struct btrfs_root *root = arg; 1468 struct btrfs_root *root = arg;
1469 1469
1470 do { 1470 do {
1471 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1471 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1472 1472
1473 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1473 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1474 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1474 mutex_trylock(&root->fs_info->cleaner_mutex)) {
1475 btrfs_run_delayed_iputs(root); 1475 btrfs_run_delayed_iputs(root);
1476 btrfs_clean_old_snapshots(root); 1476 btrfs_clean_old_snapshots(root);
1477 mutex_unlock(&root->fs_info->cleaner_mutex); 1477 mutex_unlock(&root->fs_info->cleaner_mutex);
1478 btrfs_run_defrag_inodes(root->fs_info); 1478 btrfs_run_defrag_inodes(root->fs_info);
1479 } 1479 }
1480 1480
1481 if (freezing(current)) { 1481 if (freezing(current)) {
1482 refrigerator(); 1482 refrigerator();
1483 } else { 1483 } else {
1484 set_current_state(TASK_INTERRUPTIBLE); 1484 set_current_state(TASK_INTERRUPTIBLE);
1485 if (!kthread_should_stop()) 1485 if (!kthread_should_stop())
1486 schedule(); 1486 schedule();
1487 __set_current_state(TASK_RUNNING); 1487 __set_current_state(TASK_RUNNING);
1488 } 1488 }
1489 } while (!kthread_should_stop()); 1489 } while (!kthread_should_stop());
1490 return 0; 1490 return 0;
1491 } 1491 }
1492 1492
1493 static int transaction_kthread(void *arg) 1493 static int transaction_kthread(void *arg)
1494 { 1494 {
1495 struct btrfs_root *root = arg; 1495 struct btrfs_root *root = arg;
1496 struct btrfs_trans_handle *trans; 1496 struct btrfs_trans_handle *trans;
1497 struct btrfs_transaction *cur; 1497 struct btrfs_transaction *cur;
1498 u64 transid; 1498 u64 transid;
1499 unsigned long now; 1499 unsigned long now;
1500 unsigned long delay; 1500 unsigned long delay;
1501 int ret; 1501 int ret;
1502 1502
1503 do { 1503 do {
1504 delay = HZ * 30; 1504 delay = HZ * 30;
1505 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1505 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1506 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1506 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1507 1507
1508 spin_lock(&root->fs_info->trans_lock); 1508 spin_lock(&root->fs_info->trans_lock);
1509 cur = root->fs_info->running_transaction; 1509 cur = root->fs_info->running_transaction;
1510 if (!cur) { 1510 if (!cur) {
1511 spin_unlock(&root->fs_info->trans_lock); 1511 spin_unlock(&root->fs_info->trans_lock);
1512 goto sleep; 1512 goto sleep;
1513 } 1513 }
1514 1514
1515 now = get_seconds(); 1515 now = get_seconds();
1516 if (!cur->blocked && 1516 if (!cur->blocked &&
1517 (now < cur->start_time || now - cur->start_time < 30)) { 1517 (now < cur->start_time || now - cur->start_time < 30)) {
1518 spin_unlock(&root->fs_info->trans_lock); 1518 spin_unlock(&root->fs_info->trans_lock);
1519 delay = HZ * 5; 1519 delay = HZ * 5;
1520 goto sleep; 1520 goto sleep;
1521 } 1521 }
1522 transid = cur->transid; 1522 transid = cur->transid;
1523 spin_unlock(&root->fs_info->trans_lock); 1523 spin_unlock(&root->fs_info->trans_lock);
1524 1524
1525 trans = btrfs_join_transaction(root); 1525 trans = btrfs_join_transaction(root);
1526 BUG_ON(IS_ERR(trans)); 1526 BUG_ON(IS_ERR(trans));
1527 if (transid == trans->transid) { 1527 if (transid == trans->transid) {
1528 ret = btrfs_commit_transaction(trans, root); 1528 ret = btrfs_commit_transaction(trans, root);
1529 BUG_ON(ret); 1529 BUG_ON(ret);
1530 } else { 1530 } else {
1531 btrfs_end_transaction(trans, root); 1531 btrfs_end_transaction(trans, root);
1532 } 1532 }
1533 sleep: 1533 sleep:
1534 wake_up_process(root->fs_info->cleaner_kthread); 1534 wake_up_process(root->fs_info->cleaner_kthread);
1535 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 1535 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
1536 1536
1537 if (freezing(current)) { 1537 if (freezing(current)) {
1538 refrigerator(); 1538 refrigerator();
1539 } else { 1539 } else {
1540 set_current_state(TASK_INTERRUPTIBLE); 1540 set_current_state(TASK_INTERRUPTIBLE);
1541 if (!kthread_should_stop() && 1541 if (!kthread_should_stop() &&
1542 !btrfs_transaction_blocked(root->fs_info)) 1542 !btrfs_transaction_blocked(root->fs_info))
1543 schedule_timeout(delay); 1543 schedule_timeout(delay);
1544 __set_current_state(TASK_RUNNING); 1544 __set_current_state(TASK_RUNNING);
1545 } 1545 }
1546 } while (!kthread_should_stop()); 1546 } while (!kthread_should_stop());
1547 return 0; 1547 return 0;
1548 } 1548 }
1549 1549
1550 struct btrfs_root *open_ctree(struct super_block *sb, 1550 struct btrfs_root *open_ctree(struct super_block *sb,
1551 struct btrfs_fs_devices *fs_devices, 1551 struct btrfs_fs_devices *fs_devices,
1552 char *options) 1552 char *options)
1553 { 1553 {
1554 u32 sectorsize; 1554 u32 sectorsize;
1555 u32 nodesize; 1555 u32 nodesize;
1556 u32 leafsize; 1556 u32 leafsize;
1557 u32 blocksize; 1557 u32 blocksize;
1558 u32 stripesize; 1558 u32 stripesize;
1559 u64 generation; 1559 u64 generation;
1560 u64 features; 1560 u64 features;
1561 struct btrfs_key location; 1561 struct btrfs_key location;
1562 struct buffer_head *bh; 1562 struct buffer_head *bh;
1563 struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), 1563 struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root),
1564 GFP_NOFS); 1564 GFP_NOFS);
1565 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), 1565 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
1566 GFP_NOFS); 1566 GFP_NOFS);
1567 struct btrfs_root *tree_root = btrfs_sb(sb); 1567 struct btrfs_root *tree_root = btrfs_sb(sb);
1568 struct btrfs_fs_info *fs_info = NULL; 1568 struct btrfs_fs_info *fs_info = NULL;
1569 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), 1569 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
1570 GFP_NOFS); 1570 GFP_NOFS);
1571 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), 1571 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
1572 GFP_NOFS); 1572 GFP_NOFS);
1573 struct btrfs_root *log_tree_root; 1573 struct btrfs_root *log_tree_root;
1574 1574
1575 int ret; 1575 int ret;
1576 int err = -EINVAL; 1576 int err = -EINVAL;
1577 1577
1578 struct btrfs_super_block *disk_super; 1578 struct btrfs_super_block *disk_super;
1579 1579
1580 if (!extent_root || !tree_root || !tree_root->fs_info || 1580 if (!extent_root || !tree_root || !tree_root->fs_info ||
1581 !chunk_root || !dev_root || !csum_root) { 1581 !chunk_root || !dev_root || !csum_root) {
1582 err = -ENOMEM; 1582 err = -ENOMEM;
1583 goto fail; 1583 goto fail;
1584 } 1584 }
1585 fs_info = tree_root->fs_info; 1585 fs_info = tree_root->fs_info;
1586 1586
1587 ret = init_srcu_struct(&fs_info->subvol_srcu); 1587 ret = init_srcu_struct(&fs_info->subvol_srcu);
1588 if (ret) { 1588 if (ret) {
1589 err = ret; 1589 err = ret;
1590 goto fail; 1590 goto fail;
1591 } 1591 }
1592 1592
1593 ret = setup_bdi(fs_info, &fs_info->bdi); 1593 ret = setup_bdi(fs_info, &fs_info->bdi);
1594 if (ret) { 1594 if (ret) {
1595 err = ret; 1595 err = ret;
1596 goto fail_srcu; 1596 goto fail_srcu;
1597 } 1597 }
1598 1598
1599 fs_info->btree_inode = new_inode(sb); 1599 fs_info->btree_inode = new_inode(sb);
1600 if (!fs_info->btree_inode) { 1600 if (!fs_info->btree_inode) {
1601 err = -ENOMEM; 1601 err = -ENOMEM;
1602 goto fail_bdi; 1602 goto fail_bdi;
1603 } 1603 }
1604 1604
1605 fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; 1605 fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
1606 1606
1607 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1607 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1608 INIT_LIST_HEAD(&fs_info->trans_list); 1608 INIT_LIST_HEAD(&fs_info->trans_list);
1609 INIT_LIST_HEAD(&fs_info->dead_roots); 1609 INIT_LIST_HEAD(&fs_info->dead_roots);
1610 INIT_LIST_HEAD(&fs_info->delayed_iputs); 1610 INIT_LIST_HEAD(&fs_info->delayed_iputs);
1611 INIT_LIST_HEAD(&fs_info->hashers); 1611 INIT_LIST_HEAD(&fs_info->hashers);
1612 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1612 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1613 INIT_LIST_HEAD(&fs_info->ordered_operations); 1613 INIT_LIST_HEAD(&fs_info->ordered_operations);
1614 INIT_LIST_HEAD(&fs_info->caching_block_groups); 1614 INIT_LIST_HEAD(&fs_info->caching_block_groups);
1615 spin_lock_init(&fs_info->delalloc_lock); 1615 spin_lock_init(&fs_info->delalloc_lock);
1616 spin_lock_init(&fs_info->trans_lock); 1616 spin_lock_init(&fs_info->trans_lock);
1617 spin_lock_init(&fs_info->ref_cache_lock); 1617 spin_lock_init(&fs_info->ref_cache_lock);
1618 spin_lock_init(&fs_info->fs_roots_radix_lock); 1618 spin_lock_init(&fs_info->fs_roots_radix_lock);
1619 spin_lock_init(&fs_info->delayed_iput_lock); 1619 spin_lock_init(&fs_info->delayed_iput_lock);
1620 spin_lock_init(&fs_info->defrag_inodes_lock); 1620 spin_lock_init(&fs_info->defrag_inodes_lock);
1621 1621
1622 init_completion(&fs_info->kobj_unregister); 1622 init_completion(&fs_info->kobj_unregister);
1623 fs_info->tree_root = tree_root; 1623 fs_info->tree_root = tree_root;
1624 fs_info->extent_root = extent_root; 1624 fs_info->extent_root = extent_root;
1625 fs_info->csum_root = csum_root; 1625 fs_info->csum_root = csum_root;
1626 fs_info->chunk_root = chunk_root; 1626 fs_info->chunk_root = chunk_root;
1627 fs_info->dev_root = dev_root; 1627 fs_info->dev_root = dev_root;
1628 fs_info->fs_devices = fs_devices; 1628 fs_info->fs_devices = fs_devices;
1629 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); 1629 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
1630 INIT_LIST_HEAD(&fs_info->space_info); 1630 INIT_LIST_HEAD(&fs_info->space_info);
1631 btrfs_mapping_init(&fs_info->mapping_tree); 1631 btrfs_mapping_init(&fs_info->mapping_tree);
1632 btrfs_init_block_rsv(&fs_info->global_block_rsv); 1632 btrfs_init_block_rsv(&fs_info->global_block_rsv);
1633 btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); 1633 btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
1634 btrfs_init_block_rsv(&fs_info->trans_block_rsv); 1634 btrfs_init_block_rsv(&fs_info->trans_block_rsv);
1635 btrfs_init_block_rsv(&fs_info->chunk_block_rsv); 1635 btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
1636 btrfs_init_block_rsv(&fs_info->empty_block_rsv); 1636 btrfs_init_block_rsv(&fs_info->empty_block_rsv);
1637 INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); 1637 INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
1638 mutex_init(&fs_info->durable_block_rsv_mutex); 1638 mutex_init(&fs_info->durable_block_rsv_mutex);
1639 atomic_set(&fs_info->nr_async_submits, 0); 1639 atomic_set(&fs_info->nr_async_submits, 0);
1640 atomic_set(&fs_info->async_delalloc_pages, 0); 1640 atomic_set(&fs_info->async_delalloc_pages, 0);
1641 atomic_set(&fs_info->async_submit_draining, 0); 1641 atomic_set(&fs_info->async_submit_draining, 0);
1642 atomic_set(&fs_info->nr_async_bios, 0); 1642 atomic_set(&fs_info->nr_async_bios, 0);
1643 atomic_set(&fs_info->defrag_running, 0); 1643 atomic_set(&fs_info->defrag_running, 0);
1644 fs_info->sb = sb; 1644 fs_info->sb = sb;
1645 fs_info->max_inline = 8192 * 1024; 1645 fs_info->max_inline = 8192 * 1024;
1646 fs_info->metadata_ratio = 0; 1646 fs_info->metadata_ratio = 0;
1647 fs_info->defrag_inodes = RB_ROOT; 1647 fs_info->defrag_inodes = RB_ROOT;
1648 fs_info->trans_no_join = 0; 1648 fs_info->trans_no_join = 0;
1649 1649
1650 fs_info->thread_pool_size = min_t(unsigned long, 1650 fs_info->thread_pool_size = min_t(unsigned long,
1651 num_online_cpus() + 2, 8); 1651 num_online_cpus() + 2, 8);
1652 1652
1653 INIT_LIST_HEAD(&fs_info->ordered_extents); 1653 INIT_LIST_HEAD(&fs_info->ordered_extents);
1654 spin_lock_init(&fs_info->ordered_extent_lock); 1654 spin_lock_init(&fs_info->ordered_extent_lock);
1655 fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), 1655 fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
1656 GFP_NOFS); 1656 GFP_NOFS);
1657 if (!fs_info->delayed_root) { 1657 if (!fs_info->delayed_root) {
1658 err = -ENOMEM; 1658 err = -ENOMEM;
1659 goto fail_iput; 1659 goto fail_iput;
1660 } 1660 }
1661 btrfs_init_delayed_root(fs_info->delayed_root); 1661 btrfs_init_delayed_root(fs_info->delayed_root);
1662 1662
1663 mutex_init(&fs_info->scrub_lock); 1663 mutex_init(&fs_info->scrub_lock);
1664 atomic_set(&fs_info->scrubs_running, 0); 1664 atomic_set(&fs_info->scrubs_running, 0);
1665 atomic_set(&fs_info->scrub_pause_req, 0); 1665 atomic_set(&fs_info->scrub_pause_req, 0);
1666 atomic_set(&fs_info->scrubs_paused, 0); 1666 atomic_set(&fs_info->scrubs_paused, 0);
1667 atomic_set(&fs_info->scrub_cancel_req, 0); 1667 atomic_set(&fs_info->scrub_cancel_req, 0);
1668 init_waitqueue_head(&fs_info->scrub_pause_wait); 1668 init_waitqueue_head(&fs_info->scrub_pause_wait);
1669 init_rwsem(&fs_info->scrub_super_lock); 1669 init_rwsem(&fs_info->scrub_super_lock);
1670 fs_info->scrub_workers_refcnt = 0; 1670 fs_info->scrub_workers_refcnt = 0;
1671 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1672 fs_info->thread_pool_size, &fs_info->generic_worker);
1673 1671
1674 sb->s_blocksize = 4096; 1672 sb->s_blocksize = 4096;
1675 sb->s_blocksize_bits = blksize_bits(4096); 1673 sb->s_blocksize_bits = blksize_bits(4096);
1676 sb->s_bdi = &fs_info->bdi; 1674 sb->s_bdi = &fs_info->bdi;
1677 1675
1678 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; 1676 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
1679 fs_info->btree_inode->i_nlink = 1; 1677 fs_info->btree_inode->i_nlink = 1;
1680 /* 1678 /*
1681 * we set the i_size on the btree inode to the max possible int. 1679 * we set the i_size on the btree inode to the max possible int.
1682 * the real end of the address space is determined by all of 1680 * the real end of the address space is determined by all of
1683 * the devices in the system 1681 * the devices in the system
1684 */ 1682 */
1685 fs_info->btree_inode->i_size = OFFSET_MAX; 1683 fs_info->btree_inode->i_size = OFFSET_MAX;
1686 fs_info->btree_inode->i_mapping->a_ops = &btree_aops; 1684 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
1687 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; 1685 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
1688 1686
1689 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); 1687 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
1690 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 1688 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
1691 fs_info->btree_inode->i_mapping); 1689 fs_info->btree_inode->i_mapping);
1692 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree); 1690 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
1693 1691
1694 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; 1692 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
1695 1693
1696 BTRFS_I(fs_info->btree_inode)->root = tree_root; 1694 BTRFS_I(fs_info->btree_inode)->root = tree_root;
1697 memset(&BTRFS_I(fs_info->btree_inode)->location, 0, 1695 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
1698 sizeof(struct btrfs_key)); 1696 sizeof(struct btrfs_key));
1699 BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; 1697 BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
1700 insert_inode_hash(fs_info->btree_inode); 1698 insert_inode_hash(fs_info->btree_inode);
1701 1699
1702 spin_lock_init(&fs_info->block_group_cache_lock); 1700 spin_lock_init(&fs_info->block_group_cache_lock);
1703 fs_info->block_group_cache_tree = RB_ROOT; 1701 fs_info->block_group_cache_tree = RB_ROOT;
1704 1702
1705 extent_io_tree_init(&fs_info->freed_extents[0], 1703 extent_io_tree_init(&fs_info->freed_extents[0],
1706 fs_info->btree_inode->i_mapping); 1704 fs_info->btree_inode->i_mapping);
1707 extent_io_tree_init(&fs_info->freed_extents[1], 1705 extent_io_tree_init(&fs_info->freed_extents[1],
1708 fs_info->btree_inode->i_mapping); 1706 fs_info->btree_inode->i_mapping);
1709 fs_info->pinned_extents = &fs_info->freed_extents[0]; 1707 fs_info->pinned_extents = &fs_info->freed_extents[0];
1710 fs_info->do_barriers = 1; 1708 fs_info->do_barriers = 1;
1711 1709
1712 1710
1713 mutex_init(&fs_info->ordered_operations_mutex); 1711 mutex_init(&fs_info->ordered_operations_mutex);
1714 mutex_init(&fs_info->tree_log_mutex); 1712 mutex_init(&fs_info->tree_log_mutex);
1715 mutex_init(&fs_info->chunk_mutex); 1713 mutex_init(&fs_info->chunk_mutex);
1716 mutex_init(&fs_info->transaction_kthread_mutex); 1714 mutex_init(&fs_info->transaction_kthread_mutex);
1717 mutex_init(&fs_info->cleaner_mutex); 1715 mutex_init(&fs_info->cleaner_mutex);
1718 mutex_init(&fs_info->volume_mutex); 1716 mutex_init(&fs_info->volume_mutex);
1719 init_rwsem(&fs_info->extent_commit_sem); 1717 init_rwsem(&fs_info->extent_commit_sem);
1720 init_rwsem(&fs_info->cleanup_work_sem); 1718 init_rwsem(&fs_info->cleanup_work_sem);
1721 init_rwsem(&fs_info->subvol_sem); 1719 init_rwsem(&fs_info->subvol_sem);
1722 1720
1723 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 1721 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
1724 btrfs_init_free_cluster(&fs_info->data_alloc_cluster); 1722 btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
1725 1723
1726 init_waitqueue_head(&fs_info->transaction_throttle); 1724 init_waitqueue_head(&fs_info->transaction_throttle);
1727 init_waitqueue_head(&fs_info->transaction_wait); 1725 init_waitqueue_head(&fs_info->transaction_wait);
1728 init_waitqueue_head(&fs_info->transaction_blocked_wait); 1726 init_waitqueue_head(&fs_info->transaction_blocked_wait);
1729 init_waitqueue_head(&fs_info->async_submit_wait); 1727 init_waitqueue_head(&fs_info->async_submit_wait);
1730 1728
1731 __setup_root(4096, 4096, 4096, 4096, tree_root, 1729 __setup_root(4096, 4096, 4096, 4096, tree_root,
1732 fs_info, BTRFS_ROOT_TREE_OBJECTID); 1730 fs_info, BTRFS_ROOT_TREE_OBJECTID);
1733 1731
1734 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1732 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
1735 if (!bh) { 1733 if (!bh) {
1736 err = -EINVAL; 1734 err = -EINVAL;
1737 goto fail_alloc; 1735 goto fail_alloc;
1738 } 1736 }
1739 1737
1740 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); 1738 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
1741 memcpy(&fs_info->super_for_commit, &fs_info->super_copy, 1739 memcpy(&fs_info->super_for_commit, &fs_info->super_copy,
1742 sizeof(fs_info->super_for_commit)); 1740 sizeof(fs_info->super_for_commit));
1743 brelse(bh); 1741 brelse(bh);
1744 1742
1745 memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); 1743 memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE);
1746 1744
1747 disk_super = &fs_info->super_copy; 1745 disk_super = &fs_info->super_copy;
1748 if (!btrfs_super_root(disk_super)) 1746 if (!btrfs_super_root(disk_super))
1749 goto fail_alloc; 1747 goto fail_alloc;
1750 1748
1751 /* check FS state, whether FS is broken. */ 1749 /* check FS state, whether FS is broken. */
1752 fs_info->fs_state |= btrfs_super_flags(disk_super); 1750 fs_info->fs_state |= btrfs_super_flags(disk_super);
1753 1751
1754 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 1752 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
1755 1753
1756 /* 1754 /*
1757 * In the long term, we'll store the compression type in the super 1755 * In the long term, we'll store the compression type in the super
1758 * block, and it'll be used for per file compression control. 1756 * block, and it'll be used for per file compression control.
1759 */ 1757 */
1760 fs_info->compress_type = BTRFS_COMPRESS_ZLIB; 1758 fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
1761 1759
1762 ret = btrfs_parse_options(tree_root, options); 1760 ret = btrfs_parse_options(tree_root, options);
1763 if (ret) { 1761 if (ret) {
1764 err = ret; 1762 err = ret;
1765 goto fail_alloc; 1763 goto fail_alloc;
1766 } 1764 }
1767 1765
1768 features = btrfs_super_incompat_flags(disk_super) & 1766 features = btrfs_super_incompat_flags(disk_super) &
1769 ~BTRFS_FEATURE_INCOMPAT_SUPP; 1767 ~BTRFS_FEATURE_INCOMPAT_SUPP;
1770 if (features) { 1768 if (features) {
1771 printk(KERN_ERR "BTRFS: couldn't mount because of " 1769 printk(KERN_ERR "BTRFS: couldn't mount because of "
1772 "unsupported optional features (%Lx).\n", 1770 "unsupported optional features (%Lx).\n",
1773 (unsigned long long)features); 1771 (unsigned long long)features);
1774 err = -EINVAL; 1772 err = -EINVAL;
1775 goto fail_alloc; 1773 goto fail_alloc;
1776 } 1774 }
1777 1775
1778 features = btrfs_super_incompat_flags(disk_super); 1776 features = btrfs_super_incompat_flags(disk_super);
1779 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 1777 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
1780 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) 1778 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
1781 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 1779 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
1782 btrfs_set_super_incompat_flags(disk_super, features); 1780 btrfs_set_super_incompat_flags(disk_super, features);
1783 1781
1784 features = btrfs_super_compat_ro_flags(disk_super) & 1782 features = btrfs_super_compat_ro_flags(disk_super) &
1785 ~BTRFS_FEATURE_COMPAT_RO_SUPP; 1783 ~BTRFS_FEATURE_COMPAT_RO_SUPP;
1786 if (!(sb->s_flags & MS_RDONLY) && features) { 1784 if (!(sb->s_flags & MS_RDONLY) && features) {
1787 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " 1785 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
1788 "unsupported option features (%Lx).\n", 1786 "unsupported option features (%Lx).\n",
1789 (unsigned long long)features); 1787 (unsigned long long)features);
1790 err = -EINVAL; 1788 err = -EINVAL;
1791 goto fail_alloc; 1789 goto fail_alloc;
1792 } 1790 }
1793 1791
1794 btrfs_init_workers(&fs_info->generic_worker, 1792 btrfs_init_workers(&fs_info->generic_worker,
1795 "genwork", 1, NULL); 1793 "genwork", 1, NULL);
1796 1794
1797 btrfs_init_workers(&fs_info->workers, "worker", 1795 btrfs_init_workers(&fs_info->workers, "worker",
1798 fs_info->thread_pool_size, 1796 fs_info->thread_pool_size,
1799 &fs_info->generic_worker); 1797 &fs_info->generic_worker);
1800 1798
1801 btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", 1799 btrfs_init_workers(&fs_info->delalloc_workers, "delalloc",
1802 fs_info->thread_pool_size, 1800 fs_info->thread_pool_size,
1803 &fs_info->generic_worker); 1801 &fs_info->generic_worker);
1804 1802
1805 btrfs_init_workers(&fs_info->submit_workers, "submit", 1803 btrfs_init_workers(&fs_info->submit_workers, "submit",
1806 min_t(u64, fs_devices->num_devices, 1804 min_t(u64, fs_devices->num_devices,
1807 fs_info->thread_pool_size), 1805 fs_info->thread_pool_size),
1808 &fs_info->generic_worker); 1806 &fs_info->generic_worker);
1809 1807
1810 /* a higher idle thresh on the submit workers makes it much more 1808 /* a higher idle thresh on the submit workers makes it much more
1811 * likely that bios will be send down in a sane order to the 1809 * likely that bios will be send down in a sane order to the
1812 * devices 1810 * devices
1813 */ 1811 */
1814 fs_info->submit_workers.idle_thresh = 64; 1812 fs_info->submit_workers.idle_thresh = 64;
1815 1813
1816 fs_info->workers.idle_thresh = 16; 1814 fs_info->workers.idle_thresh = 16;
1817 fs_info->workers.ordered = 1; 1815 fs_info->workers.ordered = 1;
1818 1816
1819 fs_info->delalloc_workers.idle_thresh = 2; 1817 fs_info->delalloc_workers.idle_thresh = 2;
1820 fs_info->delalloc_workers.ordered = 1; 1818 fs_info->delalloc_workers.ordered = 1;
1821 1819
1822 btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, 1820 btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1,
1823 &fs_info->generic_worker); 1821 &fs_info->generic_worker);
1824 btrfs_init_workers(&fs_info->endio_workers, "endio", 1822 btrfs_init_workers(&fs_info->endio_workers, "endio",
1825 fs_info->thread_pool_size, 1823 fs_info->thread_pool_size,
1826 &fs_info->generic_worker); 1824 &fs_info->generic_worker);
1827 btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", 1825 btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
1828 fs_info->thread_pool_size, 1826 fs_info->thread_pool_size,
1829 &fs_info->generic_worker); 1827 &fs_info->generic_worker);
1830 btrfs_init_workers(&fs_info->endio_meta_write_workers, 1828 btrfs_init_workers(&fs_info->endio_meta_write_workers,
1831 "endio-meta-write", fs_info->thread_pool_size, 1829 "endio-meta-write", fs_info->thread_pool_size,
1832 &fs_info->generic_worker); 1830 &fs_info->generic_worker);
1833 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 1831 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
1834 fs_info->thread_pool_size, 1832 fs_info->thread_pool_size,
1835 &fs_info->generic_worker); 1833 &fs_info->generic_worker);
1836 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", 1834 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
1837 1, &fs_info->generic_worker); 1835 1, &fs_info->generic_worker);
1838 btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", 1836 btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta",
1839 fs_info->thread_pool_size, 1837 fs_info->thread_pool_size,
1840 &fs_info->generic_worker); 1838 &fs_info->generic_worker);
1841 1839
1842 /* 1840 /*
1843 * endios are largely parallel and should have a very 1841 * endios are largely parallel and should have a very
1844 * low idle thresh 1842 * low idle thresh
1845 */ 1843 */
1846 fs_info->endio_workers.idle_thresh = 4; 1844 fs_info->endio_workers.idle_thresh = 4;
1847 fs_info->endio_meta_workers.idle_thresh = 4; 1845 fs_info->endio_meta_workers.idle_thresh = 4;
1848 1846
1849 fs_info->endio_write_workers.idle_thresh = 2; 1847 fs_info->endio_write_workers.idle_thresh = 2;
1850 fs_info->endio_meta_write_workers.idle_thresh = 2; 1848 fs_info->endio_meta_write_workers.idle_thresh = 2;
1851 1849
1852 btrfs_start_workers(&fs_info->workers, 1); 1850 btrfs_start_workers(&fs_info->workers, 1);
1853 btrfs_start_workers(&fs_info->generic_worker, 1); 1851 btrfs_start_workers(&fs_info->generic_worker, 1);
1854 btrfs_start_workers(&fs_info->submit_workers, 1); 1852 btrfs_start_workers(&fs_info->submit_workers, 1);
1855 btrfs_start_workers(&fs_info->delalloc_workers, 1); 1853 btrfs_start_workers(&fs_info->delalloc_workers, 1);
1856 btrfs_start_workers(&fs_info->fixup_workers, 1); 1854 btrfs_start_workers(&fs_info->fixup_workers, 1);
1857 btrfs_start_workers(&fs_info->endio_workers, 1); 1855 btrfs_start_workers(&fs_info->endio_workers, 1);
1858 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 1856 btrfs_start_workers(&fs_info->endio_meta_workers, 1);
1859 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1857 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1860 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1858 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1861 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 1859 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1862 btrfs_start_workers(&fs_info->delayed_workers, 1); 1860 btrfs_start_workers(&fs_info->delayed_workers, 1);
1863 1861
1864 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1862 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1865 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1863 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
1866 4 * 1024 * 1024 / PAGE_CACHE_SIZE); 1864 4 * 1024 * 1024 / PAGE_CACHE_SIZE);
1867 1865
1868 nodesize = btrfs_super_nodesize(disk_super); 1866 nodesize = btrfs_super_nodesize(disk_super);
1869 leafsize = btrfs_super_leafsize(disk_super); 1867 leafsize = btrfs_super_leafsize(disk_super);
1870 sectorsize = btrfs_super_sectorsize(disk_super); 1868 sectorsize = btrfs_super_sectorsize(disk_super);
1871 stripesize = btrfs_super_stripesize(disk_super); 1869 stripesize = btrfs_super_stripesize(disk_super);
1872 tree_root->nodesize = nodesize; 1870 tree_root->nodesize = nodesize;
1873 tree_root->leafsize = leafsize; 1871 tree_root->leafsize = leafsize;
1874 tree_root->sectorsize = sectorsize; 1872 tree_root->sectorsize = sectorsize;
1875 tree_root->stripesize = stripesize; 1873 tree_root->stripesize = stripesize;
1876 1874
1877 sb->s_blocksize = sectorsize; 1875 sb->s_blocksize = sectorsize;
1878 sb->s_blocksize_bits = blksize_bits(sectorsize); 1876 sb->s_blocksize_bits = blksize_bits(sectorsize);
1879 1877
1880 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, 1878 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
1881 sizeof(disk_super->magic))) { 1879 sizeof(disk_super->magic))) {
1882 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); 1880 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
1883 goto fail_sb_buffer; 1881 goto fail_sb_buffer;
1884 } 1882 }
1885 1883
1886 mutex_lock(&fs_info->chunk_mutex); 1884 mutex_lock(&fs_info->chunk_mutex);
1887 ret = btrfs_read_sys_array(tree_root); 1885 ret = btrfs_read_sys_array(tree_root);
1888 mutex_unlock(&fs_info->chunk_mutex); 1886 mutex_unlock(&fs_info->chunk_mutex);
1889 if (ret) { 1887 if (ret) {
1890 printk(KERN_WARNING "btrfs: failed to read the system " 1888 printk(KERN_WARNING "btrfs: failed to read the system "
1891 "array on %s\n", sb->s_id); 1889 "array on %s\n", sb->s_id);
1892 goto fail_sb_buffer; 1890 goto fail_sb_buffer;
1893 } 1891 }
1894 1892
1895 blocksize = btrfs_level_size(tree_root, 1893 blocksize = btrfs_level_size(tree_root,
1896 btrfs_super_chunk_root_level(disk_super)); 1894 btrfs_super_chunk_root_level(disk_super));
1897 generation = btrfs_super_chunk_root_generation(disk_super); 1895 generation = btrfs_super_chunk_root_generation(disk_super);
1898 1896
1899 __setup_root(nodesize, leafsize, sectorsize, stripesize, 1897 __setup_root(nodesize, leafsize, sectorsize, stripesize,
1900 chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); 1898 chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
1901 1899
1902 chunk_root->node = read_tree_block(chunk_root, 1900 chunk_root->node = read_tree_block(chunk_root,
1903 btrfs_super_chunk_root(disk_super), 1901 btrfs_super_chunk_root(disk_super),
1904 blocksize, generation); 1902 blocksize, generation);
1905 BUG_ON(!chunk_root->node); 1903 BUG_ON(!chunk_root->node);
1906 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 1904 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
1907 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", 1905 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
1908 sb->s_id); 1906 sb->s_id);
1909 goto fail_chunk_root; 1907 goto fail_chunk_root;
1910 } 1908 }
1911 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); 1909 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
1912 chunk_root->commit_root = btrfs_root_node(chunk_root); 1910 chunk_root->commit_root = btrfs_root_node(chunk_root);
1913 1911
1914 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, 1912 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
1915 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), 1913 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
1916 BTRFS_UUID_SIZE); 1914 BTRFS_UUID_SIZE);
1917 1915
1918 mutex_lock(&fs_info->chunk_mutex); 1916 mutex_lock(&fs_info->chunk_mutex);
1919 ret = btrfs_read_chunk_tree(chunk_root); 1917 ret = btrfs_read_chunk_tree(chunk_root);
1920 mutex_unlock(&fs_info->chunk_mutex); 1918 mutex_unlock(&fs_info->chunk_mutex);
1921 if (ret) { 1919 if (ret) {
1922 printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", 1920 printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
1923 sb->s_id); 1921 sb->s_id);
1924 goto fail_chunk_root; 1922 goto fail_chunk_root;
1925 } 1923 }
1926 1924
1927 btrfs_close_extra_devices(fs_devices); 1925 btrfs_close_extra_devices(fs_devices);
1928 1926
1929 blocksize = btrfs_level_size(tree_root, 1927 blocksize = btrfs_level_size(tree_root,
1930 btrfs_super_root_level(disk_super)); 1928 btrfs_super_root_level(disk_super));
1931 generation = btrfs_super_generation(disk_super); 1929 generation = btrfs_super_generation(disk_super);
1932 1930
1933 tree_root->node = read_tree_block(tree_root, 1931 tree_root->node = read_tree_block(tree_root,
1934 btrfs_super_root(disk_super), 1932 btrfs_super_root(disk_super),
1935 blocksize, generation); 1933 blocksize, generation);
1936 if (!tree_root->node) 1934 if (!tree_root->node)
1937 goto fail_chunk_root; 1935 goto fail_chunk_root;
1938 if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { 1936 if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
1939 printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", 1937 printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
1940 sb->s_id); 1938 sb->s_id);
1941 goto fail_tree_root; 1939 goto fail_tree_root;
1942 } 1940 }
1943 btrfs_set_root_node(&tree_root->root_item, tree_root->node); 1941 btrfs_set_root_node(&tree_root->root_item, tree_root->node);
1944 tree_root->commit_root = btrfs_root_node(tree_root); 1942 tree_root->commit_root = btrfs_root_node(tree_root);
1945 1943
1946 ret = find_and_setup_root(tree_root, fs_info, 1944 ret = find_and_setup_root(tree_root, fs_info,
1947 BTRFS_EXTENT_TREE_OBJECTID, extent_root); 1945 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
1948 if (ret) 1946 if (ret)
1949 goto fail_tree_root; 1947 goto fail_tree_root;
1950 extent_root->track_dirty = 1; 1948 extent_root->track_dirty = 1;
1951 1949
1952 ret = find_and_setup_root(tree_root, fs_info, 1950 ret = find_and_setup_root(tree_root, fs_info,
1953 BTRFS_DEV_TREE_OBJECTID, dev_root); 1951 BTRFS_DEV_TREE_OBJECTID, dev_root);
1954 if (ret) 1952 if (ret)
1955 goto fail_extent_root; 1953 goto fail_extent_root;
1956 dev_root->track_dirty = 1; 1954 dev_root->track_dirty = 1;
1957 1955
1958 ret = find_and_setup_root(tree_root, fs_info, 1956 ret = find_and_setup_root(tree_root, fs_info,
1959 BTRFS_CSUM_TREE_OBJECTID, csum_root); 1957 BTRFS_CSUM_TREE_OBJECTID, csum_root);
1960 if (ret) 1958 if (ret)
1961 goto fail_dev_root; 1959 goto fail_dev_root;
1962 1960
1963 csum_root->track_dirty = 1; 1961 csum_root->track_dirty = 1;
1964 1962
1965 fs_info->generation = generation; 1963 fs_info->generation = generation;
1966 fs_info->last_trans_committed = generation; 1964 fs_info->last_trans_committed = generation;
1967 fs_info->data_alloc_profile = (u64)-1; 1965 fs_info->data_alloc_profile = (u64)-1;
1968 fs_info->metadata_alloc_profile = (u64)-1; 1966 fs_info->metadata_alloc_profile = (u64)-1;
1969 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; 1967 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
1970 1968
1971 ret = btrfs_init_space_info(fs_info); 1969 ret = btrfs_init_space_info(fs_info);
1972 if (ret) { 1970 if (ret) {
1973 printk(KERN_ERR "Failed to initial space info: %d\n", ret); 1971 printk(KERN_ERR "Failed to initial space info: %d\n", ret);
1974 goto fail_block_groups; 1972 goto fail_block_groups;
1975 } 1973 }
1976 1974
1977 ret = btrfs_read_block_groups(extent_root); 1975 ret = btrfs_read_block_groups(extent_root);
1978 if (ret) { 1976 if (ret) {
1979 printk(KERN_ERR "Failed to read block groups: %d\n", ret); 1977 printk(KERN_ERR "Failed to read block groups: %d\n", ret);
1980 goto fail_block_groups; 1978 goto fail_block_groups;
1981 } 1979 }
1982 1980
1983 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 1981 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
1984 "btrfs-cleaner"); 1982 "btrfs-cleaner");
1985 if (IS_ERR(fs_info->cleaner_kthread)) 1983 if (IS_ERR(fs_info->cleaner_kthread))
1986 goto fail_block_groups; 1984 goto fail_block_groups;
1987 1985
1988 fs_info->transaction_kthread = kthread_run(transaction_kthread, 1986 fs_info->transaction_kthread = kthread_run(transaction_kthread,
1989 tree_root, 1987 tree_root,
1990 "btrfs-transaction"); 1988 "btrfs-transaction");
1991 if (IS_ERR(fs_info->transaction_kthread)) 1989 if (IS_ERR(fs_info->transaction_kthread))
1992 goto fail_cleaner; 1990 goto fail_cleaner;
1993 1991
1994 if (!btrfs_test_opt(tree_root, SSD) && 1992 if (!btrfs_test_opt(tree_root, SSD) &&
1995 !btrfs_test_opt(tree_root, NOSSD) && 1993 !btrfs_test_opt(tree_root, NOSSD) &&
1996 !fs_info->fs_devices->rotating) { 1994 !fs_info->fs_devices->rotating) {
1997 printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD " 1995 printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD "
1998 "mode\n"); 1996 "mode\n");
1999 btrfs_set_opt(fs_info->mount_opt, SSD); 1997 btrfs_set_opt(fs_info->mount_opt, SSD);
2000 } 1998 }
2001 1999
2002 /* do not make disk changes in broken FS */ 2000 /* do not make disk changes in broken FS */
2003 if (btrfs_super_log_root(disk_super) != 0 && 2001 if (btrfs_super_log_root(disk_super) != 0 &&
2004 !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { 2002 !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
2005 u64 bytenr = btrfs_super_log_root(disk_super); 2003 u64 bytenr = btrfs_super_log_root(disk_super);
2006 2004
2007 if (fs_devices->rw_devices == 0) { 2005 if (fs_devices->rw_devices == 0) {
2008 printk(KERN_WARNING "Btrfs log replay required " 2006 printk(KERN_WARNING "Btrfs log replay required "
2009 "on RO media\n"); 2007 "on RO media\n");
2010 err = -EIO; 2008 err = -EIO;
2011 goto fail_trans_kthread; 2009 goto fail_trans_kthread;
2012 } 2010 }
2013 blocksize = 2011 blocksize =
2014 btrfs_level_size(tree_root, 2012 btrfs_level_size(tree_root,
2015 btrfs_super_log_root_level(disk_super)); 2013 btrfs_super_log_root_level(disk_super));
2016 2014
2017 log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); 2015 log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
2018 if (!log_tree_root) { 2016 if (!log_tree_root) {
2019 err = -ENOMEM; 2017 err = -ENOMEM;
2020 goto fail_trans_kthread; 2018 goto fail_trans_kthread;
2021 } 2019 }
2022 2020
2023 __setup_root(nodesize, leafsize, sectorsize, stripesize, 2021 __setup_root(nodesize, leafsize, sectorsize, stripesize,
2024 log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); 2022 log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
2025 2023
2026 log_tree_root->node = read_tree_block(tree_root, bytenr, 2024 log_tree_root->node = read_tree_block(tree_root, bytenr,
2027 blocksize, 2025 blocksize,
2028 generation + 1); 2026 generation + 1);
2029 ret = btrfs_recover_log_trees(log_tree_root); 2027 ret = btrfs_recover_log_trees(log_tree_root);
2030 BUG_ON(ret); 2028 BUG_ON(ret);
2031 2029
2032 if (sb->s_flags & MS_RDONLY) { 2030 if (sb->s_flags & MS_RDONLY) {
2033 ret = btrfs_commit_super(tree_root); 2031 ret = btrfs_commit_super(tree_root);
2034 BUG_ON(ret); 2032 BUG_ON(ret);
2035 } 2033 }
2036 } 2034 }
2037 2035
2038 ret = btrfs_find_orphan_roots(tree_root); 2036 ret = btrfs_find_orphan_roots(tree_root);
2039 BUG_ON(ret); 2037 BUG_ON(ret);
2040 2038
2041 if (!(sb->s_flags & MS_RDONLY)) { 2039 if (!(sb->s_flags & MS_RDONLY)) {
2042 ret = btrfs_cleanup_fs_roots(fs_info); 2040 ret = btrfs_cleanup_fs_roots(fs_info);
2043 BUG_ON(ret); 2041 BUG_ON(ret);
2044 2042
2045 ret = btrfs_recover_relocation(tree_root); 2043 ret = btrfs_recover_relocation(tree_root);
2046 if (ret < 0) { 2044 if (ret < 0) {
2047 printk(KERN_WARNING 2045 printk(KERN_WARNING
2048 "btrfs: failed to recover relocation\n"); 2046 "btrfs: failed to recover relocation\n");
2049 err = -EINVAL; 2047 err = -EINVAL;
2050 goto fail_trans_kthread; 2048 goto fail_trans_kthread;
2051 } 2049 }
2052 } 2050 }
2053 2051
2054 location.objectid = BTRFS_FS_TREE_OBJECTID; 2052 location.objectid = BTRFS_FS_TREE_OBJECTID;
2055 location.type = BTRFS_ROOT_ITEM_KEY; 2053 location.type = BTRFS_ROOT_ITEM_KEY;
2056 location.offset = (u64)-1; 2054 location.offset = (u64)-1;
2057 2055
2058 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); 2056 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
2059 if (!fs_info->fs_root) 2057 if (!fs_info->fs_root)
2060 goto fail_trans_kthread; 2058 goto fail_trans_kthread;
2061 if (IS_ERR(fs_info->fs_root)) { 2059 if (IS_ERR(fs_info->fs_root)) {
2062 err = PTR_ERR(fs_info->fs_root); 2060 err = PTR_ERR(fs_info->fs_root);
2063 goto fail_trans_kthread; 2061 goto fail_trans_kthread;
2064 } 2062 }
2065 2063
2066 if (!(sb->s_flags & MS_RDONLY)) { 2064 if (!(sb->s_flags & MS_RDONLY)) {
2067 down_read(&fs_info->cleanup_work_sem); 2065 down_read(&fs_info->cleanup_work_sem);
2068 err = btrfs_orphan_cleanup(fs_info->fs_root); 2066 err = btrfs_orphan_cleanup(fs_info->fs_root);
2069 if (!err) 2067 if (!err)
2070 err = btrfs_orphan_cleanup(fs_info->tree_root); 2068 err = btrfs_orphan_cleanup(fs_info->tree_root);
2071 up_read(&fs_info->cleanup_work_sem); 2069 up_read(&fs_info->cleanup_work_sem);
2072 if (err) { 2070 if (err) {
2073 close_ctree(tree_root); 2071 close_ctree(tree_root);
2074 return ERR_PTR(err); 2072 return ERR_PTR(err);
2075 } 2073 }
2076 } 2074 }
2077 2075
2078 return tree_root; 2076 return tree_root;
2079 2077
2080 fail_trans_kthread: 2078 fail_trans_kthread:
2081 kthread_stop(fs_info->transaction_kthread); 2079 kthread_stop(fs_info->transaction_kthread);
2082 fail_cleaner: 2080 fail_cleaner:
2083 kthread_stop(fs_info->cleaner_kthread); 2081 kthread_stop(fs_info->cleaner_kthread);
2084 2082
2085 /* 2083 /*
2086 * make sure we're done with the btree inode before we stop our 2084 * make sure we're done with the btree inode before we stop our
2087 * kthreads 2085 * kthreads
2088 */ 2086 */
2089 filemap_write_and_wait(fs_info->btree_inode->i_mapping); 2087 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
2090 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2088 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2091 2089
2092 fail_block_groups: 2090 fail_block_groups:
2093 btrfs_free_block_groups(fs_info); 2091 btrfs_free_block_groups(fs_info);
2094 free_extent_buffer(csum_root->node); 2092 free_extent_buffer(csum_root->node);
2095 free_extent_buffer(csum_root->commit_root); 2093 free_extent_buffer(csum_root->commit_root);
2096 fail_dev_root: 2094 fail_dev_root:
2097 free_extent_buffer(dev_root->node); 2095 free_extent_buffer(dev_root->node);
2098 free_extent_buffer(dev_root->commit_root); 2096 free_extent_buffer(dev_root->commit_root);
2099 fail_extent_root: 2097 fail_extent_root:
2100 free_extent_buffer(extent_root->node); 2098 free_extent_buffer(extent_root->node);
2101 free_extent_buffer(extent_root->commit_root); 2099 free_extent_buffer(extent_root->commit_root);
2102 fail_tree_root: 2100 fail_tree_root:
2103 free_extent_buffer(tree_root->node); 2101 free_extent_buffer(tree_root->node);
2104 free_extent_buffer(tree_root->commit_root); 2102 free_extent_buffer(tree_root->commit_root);
2105 fail_chunk_root: 2103 fail_chunk_root:
2106 free_extent_buffer(chunk_root->node); 2104 free_extent_buffer(chunk_root->node);
2107 free_extent_buffer(chunk_root->commit_root); 2105 free_extent_buffer(chunk_root->commit_root);
2108 fail_sb_buffer: 2106 fail_sb_buffer:
2109 btrfs_stop_workers(&fs_info->generic_worker); 2107 btrfs_stop_workers(&fs_info->generic_worker);
2110 btrfs_stop_workers(&fs_info->fixup_workers); 2108 btrfs_stop_workers(&fs_info->fixup_workers);
2111 btrfs_stop_workers(&fs_info->delalloc_workers); 2109 btrfs_stop_workers(&fs_info->delalloc_workers);
2112 btrfs_stop_workers(&fs_info->workers); 2110 btrfs_stop_workers(&fs_info->workers);
2113 btrfs_stop_workers(&fs_info->endio_workers); 2111 btrfs_stop_workers(&fs_info->endio_workers);
2114 btrfs_stop_workers(&fs_info->endio_meta_workers); 2112 btrfs_stop_workers(&fs_info->endio_meta_workers);
2115 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2113 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2116 btrfs_stop_workers(&fs_info->endio_write_workers); 2114 btrfs_stop_workers(&fs_info->endio_write_workers);
2117 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2115 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2118 btrfs_stop_workers(&fs_info->submit_workers); 2116 btrfs_stop_workers(&fs_info->submit_workers);
2119 btrfs_stop_workers(&fs_info->delayed_workers); 2117 btrfs_stop_workers(&fs_info->delayed_workers);
2120 fail_alloc: 2118 fail_alloc:
2121 kfree(fs_info->delayed_root); 2119 kfree(fs_info->delayed_root);
2122 fail_iput: 2120 fail_iput:
2123 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2121 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2124 iput(fs_info->btree_inode); 2122 iput(fs_info->btree_inode);
2125 2123
2126 btrfs_close_devices(fs_info->fs_devices); 2124 btrfs_close_devices(fs_info->fs_devices);
2127 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2125 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2128 fail_bdi: 2126 fail_bdi:
2129 bdi_destroy(&fs_info->bdi); 2127 bdi_destroy(&fs_info->bdi);
2130 fail_srcu: 2128 fail_srcu:
2131 cleanup_srcu_struct(&fs_info->subvol_srcu); 2129 cleanup_srcu_struct(&fs_info->subvol_srcu);
2132 fail: 2130 fail:
2133 kfree(extent_root); 2131 kfree(extent_root);
2134 kfree(tree_root); 2132 kfree(tree_root);
2135 kfree(fs_info); 2133 kfree(fs_info);
2136 kfree(chunk_root); 2134 kfree(chunk_root);
2137 kfree(dev_root); 2135 kfree(dev_root);
2138 kfree(csum_root); 2136 kfree(csum_root);
2139 return ERR_PTR(err); 2137 return ERR_PTR(err);
2140 } 2138 }
2141 2139
2142 static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) 2140 static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
2143 { 2141 {
2144 char b[BDEVNAME_SIZE]; 2142 char b[BDEVNAME_SIZE];
2145 2143
2146 if (uptodate) { 2144 if (uptodate) {
2147 set_buffer_uptodate(bh); 2145 set_buffer_uptodate(bh);
2148 } else { 2146 } else {
2149 printk_ratelimited(KERN_WARNING "lost page write due to " 2147 printk_ratelimited(KERN_WARNING "lost page write due to "
2150 "I/O error on %s\n", 2148 "I/O error on %s\n",
2151 bdevname(bh->b_bdev, b)); 2149 bdevname(bh->b_bdev, b));
2152 /* note, we dont' set_buffer_write_io_error because we have 2150 /* note, we dont' set_buffer_write_io_error because we have
2153 * our own ways of dealing with the IO errors 2151 * our own ways of dealing with the IO errors
2154 */ 2152 */
2155 clear_buffer_uptodate(bh); 2153 clear_buffer_uptodate(bh);
2156 } 2154 }
2157 unlock_buffer(bh); 2155 unlock_buffer(bh);
2158 put_bh(bh); 2156 put_bh(bh);
2159 } 2157 }
2160 2158
2161 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) 2159 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
2162 { 2160 {
2163 struct buffer_head *bh; 2161 struct buffer_head *bh;
2164 struct buffer_head *latest = NULL; 2162 struct buffer_head *latest = NULL;
2165 struct btrfs_super_block *super; 2163 struct btrfs_super_block *super;
2166 int i; 2164 int i;
2167 u64 transid = 0; 2165 u64 transid = 0;
2168 u64 bytenr; 2166 u64 bytenr;
2169 2167
2170 /* we would like to check all the supers, but that would make 2168 /* we would like to check all the supers, but that would make
2171 * a btrfs mount succeed after a mkfs from a different FS. 2169 * a btrfs mount succeed after a mkfs from a different FS.
2172 * So, we need to add a special mount option to scan for 2170 * So, we need to add a special mount option to scan for
2173 * later supers, using BTRFS_SUPER_MIRROR_MAX instead 2171 * later supers, using BTRFS_SUPER_MIRROR_MAX instead
2174 */ 2172 */
2175 for (i = 0; i < 1; i++) { 2173 for (i = 0; i < 1; i++) {
2176 bytenr = btrfs_sb_offset(i); 2174 bytenr = btrfs_sb_offset(i);
2177 if (bytenr + 4096 >= i_size_read(bdev->bd_inode)) 2175 if (bytenr + 4096 >= i_size_read(bdev->bd_inode))
2178 break; 2176 break;
2179 bh = __bread(bdev, bytenr / 4096, 4096); 2177 bh = __bread(bdev, bytenr / 4096, 4096);
2180 if (!bh) 2178 if (!bh)
2181 continue; 2179 continue;
2182 2180
2183 super = (struct btrfs_super_block *)bh->b_data; 2181 super = (struct btrfs_super_block *)bh->b_data;
2184 if (btrfs_super_bytenr(super) != bytenr || 2182 if (btrfs_super_bytenr(super) != bytenr ||
2185 strncmp((char *)(&super->magic), BTRFS_MAGIC, 2183 strncmp((char *)(&super->magic), BTRFS_MAGIC,
2186 sizeof(super->magic))) { 2184 sizeof(super->magic))) {
2187 brelse(bh); 2185 brelse(bh);
2188 continue; 2186 continue;
2189 } 2187 }
2190 2188
2191 if (!latest || btrfs_super_generation(super) > transid) { 2189 if (!latest || btrfs_super_generation(super) > transid) {
2192 brelse(latest); 2190 brelse(latest);
2193 latest = bh; 2191 latest = bh;
2194 transid = btrfs_super_generation(super); 2192 transid = btrfs_super_generation(super);
2195 } else { 2193 } else {
2196 brelse(bh); 2194 brelse(bh);
2197 } 2195 }
2198 } 2196 }
2199 return latest; 2197 return latest;
2200 } 2198 }
2201 2199
2202 /* 2200 /*
2203 * this should be called twice, once with wait == 0 and 2201 * this should be called twice, once with wait == 0 and
2204 * once with wait == 1. When wait == 0 is done, all the buffer heads 2202 * once with wait == 1. When wait == 0 is done, all the buffer heads
2205 * we write are pinned. 2203 * we write are pinned.
2206 * 2204 *
2207 * They are released when wait == 1 is done. 2205 * They are released when wait == 1 is done.
2208 * max_mirrors must be the same for both runs, and it indicates how 2206 * max_mirrors must be the same for both runs, and it indicates how
2209 * many supers on this one device should be written. 2207 * many supers on this one device should be written.
2210 * 2208 *
2211 * max_mirrors == 0 means to write them all. 2209 * max_mirrors == 0 means to write them all.
2212 */ 2210 */
2213 static int write_dev_supers(struct btrfs_device *device, 2211 static int write_dev_supers(struct btrfs_device *device,
2214 struct btrfs_super_block *sb, 2212 struct btrfs_super_block *sb,
2215 int do_barriers, int wait, int max_mirrors) 2213 int do_barriers, int wait, int max_mirrors)
2216 { 2214 {
2217 struct buffer_head *bh; 2215 struct buffer_head *bh;
2218 int i; 2216 int i;
2219 int ret; 2217 int ret;
2220 int errors = 0; 2218 int errors = 0;
2221 u32 crc; 2219 u32 crc;
2222 u64 bytenr; 2220 u64 bytenr;
2223 int last_barrier = 0; 2221 int last_barrier = 0;
2224 2222
2225 if (max_mirrors == 0) 2223 if (max_mirrors == 0)
2226 max_mirrors = BTRFS_SUPER_MIRROR_MAX; 2224 max_mirrors = BTRFS_SUPER_MIRROR_MAX;
2227 2225
2228 /* make sure only the last submit_bh does a barrier */ 2226 /* make sure only the last submit_bh does a barrier */
2229 if (do_barriers) { 2227 if (do_barriers) {
2230 for (i = 0; i < max_mirrors; i++) { 2228 for (i = 0; i < max_mirrors; i++) {
2231 bytenr = btrfs_sb_offset(i); 2229 bytenr = btrfs_sb_offset(i);
2232 if (bytenr + BTRFS_SUPER_INFO_SIZE >= 2230 if (bytenr + BTRFS_SUPER_INFO_SIZE >=
2233 device->total_bytes) 2231 device->total_bytes)
2234 break; 2232 break;
2235 last_barrier = i; 2233 last_barrier = i;
2236 } 2234 }
2237 } 2235 }
2238 2236
2239 for (i = 0; i < max_mirrors; i++) { 2237 for (i = 0; i < max_mirrors; i++) {
2240 bytenr = btrfs_sb_offset(i); 2238 bytenr = btrfs_sb_offset(i);
2241 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) 2239 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
2242 break; 2240 break;
2243 2241
2244 if (wait) { 2242 if (wait) {
2245 bh = __find_get_block(device->bdev, bytenr / 4096, 2243 bh = __find_get_block(device->bdev, bytenr / 4096,
2246 BTRFS_SUPER_INFO_SIZE); 2244 BTRFS_SUPER_INFO_SIZE);
2247 BUG_ON(!bh); 2245 BUG_ON(!bh);
2248 wait_on_buffer(bh); 2246 wait_on_buffer(bh);
2249 if (!buffer_uptodate(bh)) 2247 if (!buffer_uptodate(bh))
2250 errors++; 2248 errors++;
2251 2249
2252 /* drop our reference */ 2250 /* drop our reference */
2253 brelse(bh); 2251 brelse(bh);
2254 2252
2255 /* drop the reference from the wait == 0 run */ 2253 /* drop the reference from the wait == 0 run */
2256 brelse(bh); 2254 brelse(bh);
2257 continue; 2255 continue;
2258 } else { 2256 } else {
2259 btrfs_set_super_bytenr(sb, bytenr); 2257 btrfs_set_super_bytenr(sb, bytenr);
2260 2258
2261 crc = ~(u32)0; 2259 crc = ~(u32)0;
2262 crc = btrfs_csum_data(NULL, (char *)sb + 2260 crc = btrfs_csum_data(NULL, (char *)sb +
2263 BTRFS_CSUM_SIZE, crc, 2261 BTRFS_CSUM_SIZE, crc,
2264 BTRFS_SUPER_INFO_SIZE - 2262 BTRFS_SUPER_INFO_SIZE -
2265 BTRFS_CSUM_SIZE); 2263 BTRFS_CSUM_SIZE);
2266 btrfs_csum_final(crc, sb->csum); 2264 btrfs_csum_final(crc, sb->csum);
2267 2265
2268 /* 2266 /*
2269 * one reference for us, and we leave it for the 2267 * one reference for us, and we leave it for the
2270 * caller 2268 * caller
2271 */ 2269 */
2272 bh = __getblk(device->bdev, bytenr / 4096, 2270 bh = __getblk(device->bdev, bytenr / 4096,
2273 BTRFS_SUPER_INFO_SIZE); 2271 BTRFS_SUPER_INFO_SIZE);
2274 memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); 2272 memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
2275 2273
2276 /* one reference for submit_bh */ 2274 /* one reference for submit_bh */
2277 get_bh(bh); 2275 get_bh(bh);
2278 2276
2279 set_buffer_uptodate(bh); 2277 set_buffer_uptodate(bh);
2280 lock_buffer(bh); 2278 lock_buffer(bh);
2281 bh->b_end_io = btrfs_end_buffer_write_sync; 2279 bh->b_end_io = btrfs_end_buffer_write_sync;
2282 } 2280 }
2283 2281
2284 if (i == last_barrier && do_barriers) 2282 if (i == last_barrier && do_barriers)
2285 ret = submit_bh(WRITE_FLUSH_FUA, bh); 2283 ret = submit_bh(WRITE_FLUSH_FUA, bh);
2286 else 2284 else
2287 ret = submit_bh(WRITE_SYNC, bh); 2285 ret = submit_bh(WRITE_SYNC, bh);
2288 2286
2289 if (ret) 2287 if (ret)
2290 errors++; 2288 errors++;
2291 } 2289 }
2292 return errors < i ? 0 : -1; 2290 return errors < i ? 0 : -1;
2293 } 2291 }
2294 2292
2295 int write_all_supers(struct btrfs_root *root, int max_mirrors) 2293 int write_all_supers(struct btrfs_root *root, int max_mirrors)
2296 { 2294 {
2297 struct list_head *head; 2295 struct list_head *head;
2298 struct btrfs_device *dev; 2296 struct btrfs_device *dev;
2299 struct btrfs_super_block *sb; 2297 struct btrfs_super_block *sb;
2300 struct btrfs_dev_item *dev_item; 2298 struct btrfs_dev_item *dev_item;
2301 int ret; 2299 int ret;
2302 int do_barriers; 2300 int do_barriers;
2303 int max_errors; 2301 int max_errors;
2304 int total_errors = 0; 2302 int total_errors = 0;
2305 u64 flags; 2303 u64 flags;
2306 2304
2307 max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; 2305 max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
2308 do_barriers = !btrfs_test_opt(root, NOBARRIER); 2306 do_barriers = !btrfs_test_opt(root, NOBARRIER);
2309 2307
2310 sb = &root->fs_info->super_for_commit; 2308 sb = &root->fs_info->super_for_commit;
2311 dev_item = &sb->dev_item; 2309 dev_item = &sb->dev_item;
2312 2310
2313 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 2311 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2314 head = &root->fs_info->fs_devices->devices; 2312 head = &root->fs_info->fs_devices->devices;
2315 list_for_each_entry_rcu(dev, head, dev_list) { 2313 list_for_each_entry_rcu(dev, head, dev_list) {
2316 if (!dev->bdev) { 2314 if (!dev->bdev) {
2317 total_errors++; 2315 total_errors++;
2318 continue; 2316 continue;
2319 } 2317 }
2320 if (!dev->in_fs_metadata || !dev->writeable) 2318 if (!dev->in_fs_metadata || !dev->writeable)
2321 continue; 2319 continue;
2322 2320
2323 btrfs_set_stack_device_generation(dev_item, 0); 2321 btrfs_set_stack_device_generation(dev_item, 0);
2324 btrfs_set_stack_device_type(dev_item, dev->type); 2322 btrfs_set_stack_device_type(dev_item, dev->type);
2325 btrfs_set_stack_device_id(dev_item, dev->devid); 2323 btrfs_set_stack_device_id(dev_item, dev->devid);
2326 btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); 2324 btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
2327 btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); 2325 btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
2328 btrfs_set_stack_device_io_align(dev_item, dev->io_align); 2326 btrfs_set_stack_device_io_align(dev_item, dev->io_align);
2329 btrfs_set_stack_device_io_width(dev_item, dev->io_width); 2327 btrfs_set_stack_device_io_width(dev_item, dev->io_width);
2330 btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); 2328 btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
2331 memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); 2329 memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
2332 memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); 2330 memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
2333 2331
2334 flags = btrfs_super_flags(sb); 2332 flags = btrfs_super_flags(sb);
2335 btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); 2333 btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
2336 2334
2337 ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors); 2335 ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors);
2338 if (ret) 2336 if (ret)
2339 total_errors++; 2337 total_errors++;
2340 } 2338 }
2341 if (total_errors > max_errors) { 2339 if (total_errors > max_errors) {
2342 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 2340 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
2343 total_errors); 2341 total_errors);
2344 BUG(); 2342 BUG();
2345 } 2343 }
2346 2344
2347 total_errors = 0; 2345 total_errors = 0;
2348 list_for_each_entry_rcu(dev, head, dev_list) { 2346 list_for_each_entry_rcu(dev, head, dev_list) {
2349 if (!dev->bdev) 2347 if (!dev->bdev)
2350 continue; 2348 continue;
2351 if (!dev->in_fs_metadata || !dev->writeable) 2349 if (!dev->in_fs_metadata || !dev->writeable)
2352 continue; 2350 continue;
2353 2351
2354 ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors); 2352 ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors);
2355 if (ret) 2353 if (ret)
2356 total_errors++; 2354 total_errors++;
2357 } 2355 }
2358 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 2356 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2359 if (total_errors > max_errors) { 2357 if (total_errors > max_errors) {
2360 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 2358 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
2361 total_errors); 2359 total_errors);
2362 BUG(); 2360 BUG();
2363 } 2361 }
2364 return 0; 2362 return 0;
2365 } 2363 }
2366 2364
2367 int write_ctree_super(struct btrfs_trans_handle *trans, 2365 int write_ctree_super(struct btrfs_trans_handle *trans,
2368 struct btrfs_root *root, int max_mirrors) 2366 struct btrfs_root *root, int max_mirrors)
2369 { 2367 {
2370 int ret; 2368 int ret;
2371 2369
2372 ret = write_all_supers(root, max_mirrors); 2370 ret = write_all_supers(root, max_mirrors);
2373 return ret; 2371 return ret;
2374 } 2372 }
2375 2373
2376 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2374 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2377 { 2375 {
2378 spin_lock(&fs_info->fs_roots_radix_lock); 2376 spin_lock(&fs_info->fs_roots_radix_lock);
2379 radix_tree_delete(&fs_info->fs_roots_radix, 2377 radix_tree_delete(&fs_info->fs_roots_radix,
2380 (unsigned long)root->root_key.objectid); 2378 (unsigned long)root->root_key.objectid);
2381 spin_unlock(&fs_info->fs_roots_radix_lock); 2379 spin_unlock(&fs_info->fs_roots_radix_lock);
2382 2380
2383 if (btrfs_root_refs(&root->root_item) == 0) 2381 if (btrfs_root_refs(&root->root_item) == 0)
2384 synchronize_srcu(&fs_info->subvol_srcu); 2382 synchronize_srcu(&fs_info->subvol_srcu);
2385 2383
2386 __btrfs_remove_free_space_cache(root->free_ino_pinned); 2384 __btrfs_remove_free_space_cache(root->free_ino_pinned);
2387 __btrfs_remove_free_space_cache(root->free_ino_ctl); 2385 __btrfs_remove_free_space_cache(root->free_ino_ctl);
2388 free_fs_root(root); 2386 free_fs_root(root);
2389 return 0; 2387 return 0;
2390 } 2388 }
2391 2389
2392 static void free_fs_root(struct btrfs_root *root) 2390 static void free_fs_root(struct btrfs_root *root)
2393 { 2391 {
2394 iput(root->cache_inode); 2392 iput(root->cache_inode);
2395 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 2393 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2396 if (root->anon_super.s_dev) { 2394 if (root->anon_super.s_dev) {
2397 down_write(&root->anon_super.s_umount); 2395 down_write(&root->anon_super.s_umount);
2398 kill_anon_super(&root->anon_super); 2396 kill_anon_super(&root->anon_super);
2399 } 2397 }
2400 free_extent_buffer(root->node); 2398 free_extent_buffer(root->node);
2401 free_extent_buffer(root->commit_root); 2399 free_extent_buffer(root->commit_root);
2402 kfree(root->free_ino_ctl); 2400 kfree(root->free_ino_ctl);
2403 kfree(root->free_ino_pinned); 2401 kfree(root->free_ino_pinned);
2404 kfree(root->name); 2402 kfree(root->name);
2405 kfree(root); 2403 kfree(root);
2406 } 2404 }
2407 2405
2408 static int del_fs_roots(struct btrfs_fs_info *fs_info) 2406 static int del_fs_roots(struct btrfs_fs_info *fs_info)
2409 { 2407 {
2410 int ret; 2408 int ret;
2411 struct btrfs_root *gang[8]; 2409 struct btrfs_root *gang[8];
2412 int i; 2410 int i;
2413 2411
2414 while (!list_empty(&fs_info->dead_roots)) { 2412 while (!list_empty(&fs_info->dead_roots)) {
2415 gang[0] = list_entry(fs_info->dead_roots.next, 2413 gang[0] = list_entry(fs_info->dead_roots.next,
2416 struct btrfs_root, root_list); 2414 struct btrfs_root, root_list);
2417 list_del(&gang[0]->root_list); 2415 list_del(&gang[0]->root_list);
2418 2416
2419 if (gang[0]->in_radix) { 2417 if (gang[0]->in_radix) {
2420 btrfs_free_fs_root(fs_info, gang[0]); 2418 btrfs_free_fs_root(fs_info, gang[0]);
2421 } else { 2419 } else {
2422 free_extent_buffer(gang[0]->node); 2420 free_extent_buffer(gang[0]->node);
2423 free_extent_buffer(gang[0]->commit_root); 2421 free_extent_buffer(gang[0]->commit_root);
2424 kfree(gang[0]); 2422 kfree(gang[0]);
2425 } 2423 }
2426 } 2424 }
2427 2425
2428 while (1) { 2426 while (1) {
2429 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, 2427 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
2430 (void **)gang, 0, 2428 (void **)gang, 0,
2431 ARRAY_SIZE(gang)); 2429 ARRAY_SIZE(gang));
2432 if (!ret) 2430 if (!ret)
2433 break; 2431 break;
2434 for (i = 0; i < ret; i++) 2432 for (i = 0; i < ret; i++)
2435 btrfs_free_fs_root(fs_info, gang[i]); 2433 btrfs_free_fs_root(fs_info, gang[i]);
2436 } 2434 }
2437 return 0; 2435 return 0;
2438 } 2436 }
2439 2437
2440 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) 2438 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
2441 { 2439 {
2442 u64 root_objectid = 0; 2440 u64 root_objectid = 0;
2443 struct btrfs_root *gang[8]; 2441 struct btrfs_root *gang[8];
2444 int i; 2442 int i;
2445 int ret; 2443 int ret;
2446 2444
2447 while (1) { 2445 while (1) {
2448 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, 2446 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
2449 (void **)gang, root_objectid, 2447 (void **)gang, root_objectid,
2450 ARRAY_SIZE(gang)); 2448 ARRAY_SIZE(gang));
2451 if (!ret) 2449 if (!ret)
2452 break; 2450 break;
2453 2451
2454 root_objectid = gang[ret - 1]->root_key.objectid + 1; 2452 root_objectid = gang[ret - 1]->root_key.objectid + 1;
2455 for (i = 0; i < ret; i++) { 2453 for (i = 0; i < ret; i++) {
2456 int err; 2454 int err;
2457 2455
2458 root_objectid = gang[i]->root_key.objectid; 2456 root_objectid = gang[i]->root_key.objectid;
2459 err = btrfs_orphan_cleanup(gang[i]); 2457 err = btrfs_orphan_cleanup(gang[i]);
2460 if (err) 2458 if (err)
2461 return err; 2459 return err;
2462 } 2460 }
2463 root_objectid++; 2461 root_objectid++;
2464 } 2462 }
2465 return 0; 2463 return 0;
2466 } 2464 }
2467 2465
2468 int btrfs_commit_super(struct btrfs_root *root) 2466 int btrfs_commit_super(struct btrfs_root *root)
2469 { 2467 {
2470 struct btrfs_trans_handle *trans; 2468 struct btrfs_trans_handle *trans;
2471 int ret; 2469 int ret;
2472 2470
2473 mutex_lock(&root->fs_info->cleaner_mutex); 2471 mutex_lock(&root->fs_info->cleaner_mutex);
2474 btrfs_run_delayed_iputs(root); 2472 btrfs_run_delayed_iputs(root);
2475 btrfs_clean_old_snapshots(root); 2473 btrfs_clean_old_snapshots(root);
2476 mutex_unlock(&root->fs_info->cleaner_mutex); 2474 mutex_unlock(&root->fs_info->cleaner_mutex);
2477 2475
2478 /* wait until ongoing cleanup work done */ 2476 /* wait until ongoing cleanup work done */
2479 down_write(&root->fs_info->cleanup_work_sem); 2477 down_write(&root->fs_info->cleanup_work_sem);
2480 up_write(&root->fs_info->cleanup_work_sem); 2478 up_write(&root->fs_info->cleanup_work_sem);
2481 2479
2482 trans = btrfs_join_transaction(root); 2480 trans = btrfs_join_transaction(root);
2483 if (IS_ERR(trans)) 2481 if (IS_ERR(trans))
2484 return PTR_ERR(trans); 2482 return PTR_ERR(trans);
2485 ret = btrfs_commit_transaction(trans, root); 2483 ret = btrfs_commit_transaction(trans, root);
2486 BUG_ON(ret); 2484 BUG_ON(ret);
2487 /* run commit again to drop the original snapshot */ 2485 /* run commit again to drop the original snapshot */
2488 trans = btrfs_join_transaction(root); 2486 trans = btrfs_join_transaction(root);
2489 if (IS_ERR(trans)) 2487 if (IS_ERR(trans))
2490 return PTR_ERR(trans); 2488 return PTR_ERR(trans);
2491 btrfs_commit_transaction(trans, root); 2489 btrfs_commit_transaction(trans, root);
2492 ret = btrfs_write_and_wait_transaction(NULL, root); 2490 ret = btrfs_write_and_wait_transaction(NULL, root);
2493 BUG_ON(ret); 2491 BUG_ON(ret);
2494 2492
2495 ret = write_ctree_super(NULL, root, 0); 2493 ret = write_ctree_super(NULL, root, 0);
2496 return ret; 2494 return ret;
2497 } 2495 }
2498 2496
2499 int close_ctree(struct btrfs_root *root) 2497 int close_ctree(struct btrfs_root *root)
2500 { 2498 {
2501 struct btrfs_fs_info *fs_info = root->fs_info; 2499 struct btrfs_fs_info *fs_info = root->fs_info;
2502 int ret; 2500 int ret;
2503 2501
2504 fs_info->closing = 1; 2502 fs_info->closing = 1;
2505 smp_mb(); 2503 smp_mb();
2506 2504
2507 btrfs_scrub_cancel(root); 2505 btrfs_scrub_cancel(root);
2508 2506
2509 /* wait for any defraggers to finish */ 2507 /* wait for any defraggers to finish */
2510 wait_event(fs_info->transaction_wait, 2508 wait_event(fs_info->transaction_wait,
2511 (atomic_read(&fs_info->defrag_running) == 0)); 2509 (atomic_read(&fs_info->defrag_running) == 0));
2512 2510
2513 /* clear out the rbtree of defraggable inodes */ 2511 /* clear out the rbtree of defraggable inodes */
2514 btrfs_run_defrag_inodes(root->fs_info); 2512 btrfs_run_defrag_inodes(root->fs_info);
2515 2513
2516 btrfs_put_block_group_cache(fs_info); 2514 btrfs_put_block_group_cache(fs_info);
2517 2515
2518 /* 2516 /*
2519 * Here come 2 situations when btrfs is broken to flip readonly: 2517 * Here come 2 situations when btrfs is broken to flip readonly:
2520 * 2518 *
2521 * 1. when btrfs flips readonly somewhere else before 2519 * 1. when btrfs flips readonly somewhere else before
2522 * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, 2520 * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
2523 * and btrfs will skip to write sb directly to keep 2521 * and btrfs will skip to write sb directly to keep
2524 * ERROR state on disk. 2522 * ERROR state on disk.
2525 * 2523 *
2526 * 2. when btrfs flips readonly just in btrfs_commit_super, 2524 * 2. when btrfs flips readonly just in btrfs_commit_super,
2527 * and in such case, btrfs cannot write sb via btrfs_commit_super, 2525 * and in such case, btrfs cannot write sb via btrfs_commit_super,
2528 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, 2526 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
2529 * btrfs will cleanup all FS resources first and write sb then. 2527 * btrfs will cleanup all FS resources first and write sb then.
2530 */ 2528 */
2531 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 2529 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
2532 ret = btrfs_commit_super(root); 2530 ret = btrfs_commit_super(root);
2533 if (ret) 2531 if (ret)
2534 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2532 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2535 } 2533 }
2536 2534
2537 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 2535 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
2538 ret = btrfs_error_commit_super(root); 2536 ret = btrfs_error_commit_super(root);
2539 if (ret) 2537 if (ret)
2540 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2538 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2541 } 2539 }
2542 2540
2543 kthread_stop(root->fs_info->transaction_kthread); 2541 kthread_stop(root->fs_info->transaction_kthread);
2544 kthread_stop(root->fs_info->cleaner_kthread); 2542 kthread_stop(root->fs_info->cleaner_kthread);
2545 2543
2546 fs_info->closing = 2; 2544 fs_info->closing = 2;
2547 smp_mb(); 2545 smp_mb();
2548 2546
2549 if (fs_info->delalloc_bytes) { 2547 if (fs_info->delalloc_bytes) {
2550 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 2548 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
2551 (unsigned long long)fs_info->delalloc_bytes); 2549 (unsigned long long)fs_info->delalloc_bytes);
2552 } 2550 }
2553 if (fs_info->total_ref_cache_size) { 2551 if (fs_info->total_ref_cache_size) {
2554 printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", 2552 printk(KERN_INFO "btrfs: at umount reference cache size %llu\n",
2555 (unsigned long long)fs_info->total_ref_cache_size); 2553 (unsigned long long)fs_info->total_ref_cache_size);
2556 } 2554 }
2557 2555
2558 free_extent_buffer(fs_info->extent_root->node); 2556 free_extent_buffer(fs_info->extent_root->node);
2559 free_extent_buffer(fs_info->extent_root->commit_root); 2557 free_extent_buffer(fs_info->extent_root->commit_root);
2560 free_extent_buffer(fs_info->tree_root->node); 2558 free_extent_buffer(fs_info->tree_root->node);
2561 free_extent_buffer(fs_info->tree_root->commit_root); 2559 free_extent_buffer(fs_info->tree_root->commit_root);
2562 free_extent_buffer(root->fs_info->chunk_root->node); 2560 free_extent_buffer(root->fs_info->chunk_root->node);
2563 free_extent_buffer(root->fs_info->chunk_root->commit_root); 2561 free_extent_buffer(root->fs_info->chunk_root->commit_root);
2564 free_extent_buffer(root->fs_info->dev_root->node); 2562 free_extent_buffer(root->fs_info->dev_root->node);
2565 free_extent_buffer(root->fs_info->dev_root->commit_root); 2563 free_extent_buffer(root->fs_info->dev_root->commit_root);
2566 free_extent_buffer(root->fs_info->csum_root->node); 2564 free_extent_buffer(root->fs_info->csum_root->node);
2567 free_extent_buffer(root->fs_info->csum_root->commit_root); 2565 free_extent_buffer(root->fs_info->csum_root->commit_root);
2568 2566
2569 btrfs_free_block_groups(root->fs_info); 2567 btrfs_free_block_groups(root->fs_info);
2570 2568
2571 del_fs_roots(fs_info); 2569 del_fs_roots(fs_info);
2572 2570
2573 iput(fs_info->btree_inode); 2571 iput(fs_info->btree_inode);
2574 kfree(fs_info->delayed_root); 2572 kfree(fs_info->delayed_root);
2575 2573
2576 btrfs_stop_workers(&fs_info->generic_worker); 2574 btrfs_stop_workers(&fs_info->generic_worker);
2577 btrfs_stop_workers(&fs_info->fixup_workers); 2575 btrfs_stop_workers(&fs_info->fixup_workers);
2578 btrfs_stop_workers(&fs_info->delalloc_workers); 2576 btrfs_stop_workers(&fs_info->delalloc_workers);
2579 btrfs_stop_workers(&fs_info->workers); 2577 btrfs_stop_workers(&fs_info->workers);
2580 btrfs_stop_workers(&fs_info->endio_workers); 2578 btrfs_stop_workers(&fs_info->endio_workers);
2581 btrfs_stop_workers(&fs_info->endio_meta_workers); 2579 btrfs_stop_workers(&fs_info->endio_meta_workers);
2582 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2580 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2583 btrfs_stop_workers(&fs_info->endio_write_workers); 2581 btrfs_stop_workers(&fs_info->endio_write_workers);
2584 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2582 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2585 btrfs_stop_workers(&fs_info->submit_workers); 2583 btrfs_stop_workers(&fs_info->submit_workers);
2586 btrfs_stop_workers(&fs_info->delayed_workers); 2584 btrfs_stop_workers(&fs_info->delayed_workers);
2587 2585
2588 btrfs_close_devices(fs_info->fs_devices); 2586 btrfs_close_devices(fs_info->fs_devices);
2589 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2587 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2590 2588
2591 bdi_destroy(&fs_info->bdi); 2589 bdi_destroy(&fs_info->bdi);
2592 cleanup_srcu_struct(&fs_info->subvol_srcu); 2590 cleanup_srcu_struct(&fs_info->subvol_srcu);
2593 2591
2594 kfree(fs_info->extent_root); 2592 kfree(fs_info->extent_root);
2595 kfree(fs_info->tree_root); 2593 kfree(fs_info->tree_root);
2596 kfree(fs_info->chunk_root); 2594 kfree(fs_info->chunk_root);
2597 kfree(fs_info->dev_root); 2595 kfree(fs_info->dev_root);
2598 kfree(fs_info->csum_root); 2596 kfree(fs_info->csum_root);
2599 kfree(fs_info); 2597 kfree(fs_info);
2600 2598
2601 return 0; 2599 return 0;
2602 } 2600 }
2603 2601
2604 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) 2602 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
2605 { 2603 {
2606 int ret; 2604 int ret;
2607 struct inode *btree_inode = buf->first_page->mapping->host; 2605 struct inode *btree_inode = buf->first_page->mapping->host;
2608 2606
2609 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf, 2607 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf,
2610 NULL); 2608 NULL);
2611 if (!ret) 2609 if (!ret)
2612 return ret; 2610 return ret;
2613 2611
2614 ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, 2612 ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf,
2615 parent_transid); 2613 parent_transid);
2616 return !ret; 2614 return !ret;
2617 } 2615 }
2618 2616
2619 int btrfs_set_buffer_uptodate(struct extent_buffer *buf) 2617 int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
2620 { 2618 {
2621 struct inode *btree_inode = buf->first_page->mapping->host; 2619 struct inode *btree_inode = buf->first_page->mapping->host;
2622 return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, 2620 return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
2623 buf); 2621 buf);
2624 } 2622 }
2625 2623
2626 void btrfs_mark_buffer_dirty(struct extent_buffer *buf) 2624 void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
2627 { 2625 {
2628 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 2626 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
2629 u64 transid = btrfs_header_generation(buf); 2627 u64 transid = btrfs_header_generation(buf);
2630 struct inode *btree_inode = root->fs_info->btree_inode; 2628 struct inode *btree_inode = root->fs_info->btree_inode;
2631 int was_dirty; 2629 int was_dirty;
2632 2630
2633 btrfs_assert_tree_locked(buf); 2631 btrfs_assert_tree_locked(buf);
2634 if (transid != root->fs_info->generation) { 2632 if (transid != root->fs_info->generation) {
2635 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " 2633 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
2636 "found %llu running %llu\n", 2634 "found %llu running %llu\n",
2637 (unsigned long long)buf->start, 2635 (unsigned long long)buf->start,
2638 (unsigned long long)transid, 2636 (unsigned long long)transid,
2639 (unsigned long long)root->fs_info->generation); 2637 (unsigned long long)root->fs_info->generation);
2640 WARN_ON(1); 2638 WARN_ON(1);
2641 } 2639 }
2642 was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 2640 was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
2643 buf); 2641 buf);
2644 if (!was_dirty) { 2642 if (!was_dirty) {
2645 spin_lock(&root->fs_info->delalloc_lock); 2643 spin_lock(&root->fs_info->delalloc_lock);
2646 root->fs_info->dirty_metadata_bytes += buf->len; 2644 root->fs_info->dirty_metadata_bytes += buf->len;
2647 spin_unlock(&root->fs_info->delalloc_lock); 2645 spin_unlock(&root->fs_info->delalloc_lock);
2648 } 2646 }
2649 } 2647 }
2650 2648
2651 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) 2649 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2652 { 2650 {
2653 /* 2651 /*
2654 * looks as though older kernels can get into trouble with 2652 * looks as though older kernels can get into trouble with
2655 * this code, they end up stuck in balance_dirty_pages forever 2653 * this code, they end up stuck in balance_dirty_pages forever
2656 */ 2654 */
2657 u64 num_dirty; 2655 u64 num_dirty;
2658 unsigned long thresh = 32 * 1024 * 1024; 2656 unsigned long thresh = 32 * 1024 * 1024;
2659 2657
2660 if (current->flags & PF_MEMALLOC) 2658 if (current->flags & PF_MEMALLOC)
2661 return; 2659 return;
2662 2660
2663 btrfs_balance_delayed_items(root); 2661 btrfs_balance_delayed_items(root);
2664 2662
2665 num_dirty = root->fs_info->dirty_metadata_bytes; 2663 num_dirty = root->fs_info->dirty_metadata_bytes;
2666 2664
2667 if (num_dirty > thresh) { 2665 if (num_dirty > thresh) {
2668 balance_dirty_pages_ratelimited_nr( 2666 balance_dirty_pages_ratelimited_nr(
2669 root->fs_info->btree_inode->i_mapping, 1); 2667 root->fs_info->btree_inode->i_mapping, 1);
2670 } 2668 }
2671 return; 2669 return;
2672 } 2670 }
2673 2671
2674 void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) 2672 void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2675 { 2673 {
2676 /* 2674 /*
2677 * looks as though older kernels can get into trouble with 2675 * looks as though older kernels can get into trouble with
2678 * this code, they end up stuck in balance_dirty_pages forever 2676 * this code, they end up stuck in balance_dirty_pages forever
2679 */ 2677 */
2680 u64 num_dirty; 2678 u64 num_dirty;
2681 unsigned long thresh = 32 * 1024 * 1024; 2679 unsigned long thresh = 32 * 1024 * 1024;
2682 2680
2683 if (current->flags & PF_MEMALLOC) 2681 if (current->flags & PF_MEMALLOC)
2684 return; 2682 return;
2685 2683
2686 num_dirty = root->fs_info->dirty_metadata_bytes; 2684 num_dirty = root->fs_info->dirty_metadata_bytes;
2687 2685
2688 if (num_dirty > thresh) { 2686 if (num_dirty > thresh) {
2689 balance_dirty_pages_ratelimited_nr( 2687 balance_dirty_pages_ratelimited_nr(
2690 root->fs_info->btree_inode->i_mapping, 1); 2688 root->fs_info->btree_inode->i_mapping, 1);
2691 } 2689 }
2692 return; 2690 return;
2693 } 2691 }
2694 2692
2695 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) 2693 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
2696 { 2694 {
2697 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 2695 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
2698 int ret; 2696 int ret;
2699 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 2697 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
2700 if (ret == 0) 2698 if (ret == 0)
2701 set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); 2699 set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
2702 return ret; 2700 return ret;
2703 } 2701 }
2704 2702
2705 int btree_lock_page_hook(struct page *page) 2703 int btree_lock_page_hook(struct page *page)
2706 { 2704 {
2707 struct inode *inode = page->mapping->host; 2705 struct inode *inode = page->mapping->host;
2708 struct btrfs_root *root = BTRFS_I(inode)->root; 2706 struct btrfs_root *root = BTRFS_I(inode)->root;
2709 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2707 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2710 struct extent_buffer *eb; 2708 struct extent_buffer *eb;
2711 unsigned long len; 2709 unsigned long len;
2712 u64 bytenr = page_offset(page); 2710 u64 bytenr = page_offset(page);
2713 2711
2714 if (page->private == EXTENT_PAGE_PRIVATE) 2712 if (page->private == EXTENT_PAGE_PRIVATE)
2715 goto out; 2713 goto out;
2716 2714
2717 len = page->private >> 2; 2715 len = page->private >> 2;
2718 eb = find_extent_buffer(io_tree, bytenr, len); 2716 eb = find_extent_buffer(io_tree, bytenr, len);
2719 if (!eb) 2717 if (!eb)
2720 goto out; 2718 goto out;
2721 2719
2722 btrfs_tree_lock(eb); 2720 btrfs_tree_lock(eb);
2723 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 2721 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
2724 2722
2725 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 2723 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
2726 spin_lock(&root->fs_info->delalloc_lock); 2724 spin_lock(&root->fs_info->delalloc_lock);
2727 if (root->fs_info->dirty_metadata_bytes >= eb->len) 2725 if (root->fs_info->dirty_metadata_bytes >= eb->len)
2728 root->fs_info->dirty_metadata_bytes -= eb->len; 2726 root->fs_info->dirty_metadata_bytes -= eb->len;
2729 else 2727 else
2730 WARN_ON(1); 2728 WARN_ON(1);
2731 spin_unlock(&root->fs_info->delalloc_lock); 2729 spin_unlock(&root->fs_info->delalloc_lock);
2732 } 2730 }
2733 2731
2734 btrfs_tree_unlock(eb); 2732 btrfs_tree_unlock(eb);
2735 free_extent_buffer(eb); 2733 free_extent_buffer(eb);
2736 out: 2734 out:
2737 lock_page(page); 2735 lock_page(page);
2738 return 0; 2736 return 0;
2739 } 2737 }
2740 2738
2741 static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 2739 static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
2742 int read_only) 2740 int read_only)
2743 { 2741 {
2744 if (read_only) 2742 if (read_only)
2745 return; 2743 return;
2746 2744
2747 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 2745 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
2748 printk(KERN_WARNING "warning: mount fs with errors, " 2746 printk(KERN_WARNING "warning: mount fs with errors, "
2749 "running btrfsck is recommended\n"); 2747 "running btrfsck is recommended\n");
2750 } 2748 }
2751 2749
2752 int btrfs_error_commit_super(struct btrfs_root *root) 2750 int btrfs_error_commit_super(struct btrfs_root *root)
2753 { 2751 {
2754 int ret; 2752 int ret;
2755 2753
2756 mutex_lock(&root->fs_info->cleaner_mutex); 2754 mutex_lock(&root->fs_info->cleaner_mutex);
2757 btrfs_run_delayed_iputs(root); 2755 btrfs_run_delayed_iputs(root);
2758 mutex_unlock(&root->fs_info->cleaner_mutex); 2756 mutex_unlock(&root->fs_info->cleaner_mutex);
2759 2757
2760 down_write(&root->fs_info->cleanup_work_sem); 2758 down_write(&root->fs_info->cleanup_work_sem);
2761 up_write(&root->fs_info->cleanup_work_sem); 2759 up_write(&root->fs_info->cleanup_work_sem);
2762 2760
2763 /* cleanup FS via transaction */ 2761 /* cleanup FS via transaction */
2764 btrfs_cleanup_transaction(root); 2762 btrfs_cleanup_transaction(root);
2765 2763
2766 ret = write_ctree_super(NULL, root, 0); 2764 ret = write_ctree_super(NULL, root, 0);
2767 2765
2768 return ret; 2766 return ret;
2769 } 2767 }
2770 2768
2771 static int btrfs_destroy_ordered_operations(struct btrfs_root *root) 2769 static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
2772 { 2770 {
2773 struct btrfs_inode *btrfs_inode; 2771 struct btrfs_inode *btrfs_inode;
2774 struct list_head splice; 2772 struct list_head splice;
2775 2773
2776 INIT_LIST_HEAD(&splice); 2774 INIT_LIST_HEAD(&splice);
2777 2775
2778 mutex_lock(&root->fs_info->ordered_operations_mutex); 2776 mutex_lock(&root->fs_info->ordered_operations_mutex);
2779 spin_lock(&root->fs_info->ordered_extent_lock); 2777 spin_lock(&root->fs_info->ordered_extent_lock);
2780 2778
2781 list_splice_init(&root->fs_info->ordered_operations, &splice); 2779 list_splice_init(&root->fs_info->ordered_operations, &splice);
2782 while (!list_empty(&splice)) { 2780 while (!list_empty(&splice)) {
2783 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 2781 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
2784 ordered_operations); 2782 ordered_operations);
2785 2783
2786 list_del_init(&btrfs_inode->ordered_operations); 2784 list_del_init(&btrfs_inode->ordered_operations);
2787 2785
2788 btrfs_invalidate_inodes(btrfs_inode->root); 2786 btrfs_invalidate_inodes(btrfs_inode->root);
2789 } 2787 }
2790 2788
2791 spin_unlock(&root->fs_info->ordered_extent_lock); 2789 spin_unlock(&root->fs_info->ordered_extent_lock);
2792 mutex_unlock(&root->fs_info->ordered_operations_mutex); 2790 mutex_unlock(&root->fs_info->ordered_operations_mutex);
2793 2791
2794 return 0; 2792 return 0;
2795 } 2793 }
2796 2794
2797 static int btrfs_destroy_ordered_extents(struct btrfs_root *root) 2795 static int btrfs_destroy_ordered_extents(struct btrfs_root *root)
2798 { 2796 {
2799 struct list_head splice; 2797 struct list_head splice;
2800 struct btrfs_ordered_extent *ordered; 2798 struct btrfs_ordered_extent *ordered;
2801 struct inode *inode; 2799 struct inode *inode;
2802 2800
2803 INIT_LIST_HEAD(&splice); 2801 INIT_LIST_HEAD(&splice);
2804 2802
2805 spin_lock(&root->fs_info->ordered_extent_lock); 2803 spin_lock(&root->fs_info->ordered_extent_lock);
2806 2804
2807 list_splice_init(&root->fs_info->ordered_extents, &splice); 2805 list_splice_init(&root->fs_info->ordered_extents, &splice);
2808 while (!list_empty(&splice)) { 2806 while (!list_empty(&splice)) {
2809 ordered = list_entry(splice.next, struct btrfs_ordered_extent, 2807 ordered = list_entry(splice.next, struct btrfs_ordered_extent,
2810 root_extent_list); 2808 root_extent_list);
2811 2809
2812 list_del_init(&ordered->root_extent_list); 2810 list_del_init(&ordered->root_extent_list);
2813 atomic_inc(&ordered->refs); 2811 atomic_inc(&ordered->refs);
2814 2812
2815 /* the inode may be getting freed (in sys_unlink path). */ 2813 /* the inode may be getting freed (in sys_unlink path). */
2816 inode = igrab(ordered->inode); 2814 inode = igrab(ordered->inode);
2817 2815
2818 spin_unlock(&root->fs_info->ordered_extent_lock); 2816 spin_unlock(&root->fs_info->ordered_extent_lock);
2819 if (inode) 2817 if (inode)
2820 iput(inode); 2818 iput(inode);
2821 2819
2822 atomic_set(&ordered->refs, 1); 2820 atomic_set(&ordered->refs, 1);
2823 btrfs_put_ordered_extent(ordered); 2821 btrfs_put_ordered_extent(ordered);
2824 2822
2825 spin_lock(&root->fs_info->ordered_extent_lock); 2823 spin_lock(&root->fs_info->ordered_extent_lock);
2826 } 2824 }
2827 2825
2828 spin_unlock(&root->fs_info->ordered_extent_lock); 2826 spin_unlock(&root->fs_info->ordered_extent_lock);
2829 2827
2830 return 0; 2828 return 0;
2831 } 2829 }
2832 2830
2833 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 2831 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
2834 struct btrfs_root *root) 2832 struct btrfs_root *root)
2835 { 2833 {
2836 struct rb_node *node; 2834 struct rb_node *node;
2837 struct btrfs_delayed_ref_root *delayed_refs; 2835 struct btrfs_delayed_ref_root *delayed_refs;
2838 struct btrfs_delayed_ref_node *ref; 2836 struct btrfs_delayed_ref_node *ref;
2839 int ret = 0; 2837 int ret = 0;
2840 2838
2841 delayed_refs = &trans->delayed_refs; 2839 delayed_refs = &trans->delayed_refs;
2842 2840
2843 spin_lock(&delayed_refs->lock); 2841 spin_lock(&delayed_refs->lock);
2844 if (delayed_refs->num_entries == 0) { 2842 if (delayed_refs->num_entries == 0) {
2845 spin_unlock(&delayed_refs->lock); 2843 spin_unlock(&delayed_refs->lock);
2846 printk(KERN_INFO "delayed_refs has NO entry\n"); 2844 printk(KERN_INFO "delayed_refs has NO entry\n");
2847 return ret; 2845 return ret;
2848 } 2846 }
2849 2847
2850 node = rb_first(&delayed_refs->root); 2848 node = rb_first(&delayed_refs->root);
2851 while (node) { 2849 while (node) {
2852 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2850 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2853 node = rb_next(node); 2851 node = rb_next(node);
2854 2852
2855 ref->in_tree = 0; 2853 ref->in_tree = 0;
2856 rb_erase(&ref->rb_node, &delayed_refs->root); 2854 rb_erase(&ref->rb_node, &delayed_refs->root);
2857 delayed_refs->num_entries--; 2855 delayed_refs->num_entries--;
2858 2856
2859 atomic_set(&ref->refs, 1); 2857 atomic_set(&ref->refs, 1);
2860 if (btrfs_delayed_ref_is_head(ref)) { 2858 if (btrfs_delayed_ref_is_head(ref)) {
2861 struct btrfs_delayed_ref_head *head; 2859 struct btrfs_delayed_ref_head *head;
2862 2860
2863 head = btrfs_delayed_node_to_head(ref); 2861 head = btrfs_delayed_node_to_head(ref);
2864 mutex_lock(&head->mutex); 2862 mutex_lock(&head->mutex);
2865 kfree(head->extent_op); 2863 kfree(head->extent_op);
2866 delayed_refs->num_heads--; 2864 delayed_refs->num_heads--;
2867 if (list_empty(&head->cluster)) 2865 if (list_empty(&head->cluster))
2868 delayed_refs->num_heads_ready--; 2866 delayed_refs->num_heads_ready--;
2869 list_del_init(&head->cluster); 2867 list_del_init(&head->cluster);
2870 mutex_unlock(&head->mutex); 2868 mutex_unlock(&head->mutex);
2871 } 2869 }
2872 2870
2873 spin_unlock(&delayed_refs->lock); 2871 spin_unlock(&delayed_refs->lock);
2874 btrfs_put_delayed_ref(ref); 2872 btrfs_put_delayed_ref(ref);
2875 2873
2876 cond_resched(); 2874 cond_resched();
2877 spin_lock(&delayed_refs->lock); 2875 spin_lock(&delayed_refs->lock);
2878 } 2876 }
2879 2877
2880 spin_unlock(&delayed_refs->lock); 2878 spin_unlock(&delayed_refs->lock);
2881 2879
2882 return ret; 2880 return ret;
2883 } 2881 }
2884 2882
2885 static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) 2883 static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
2886 { 2884 {
2887 struct btrfs_pending_snapshot *snapshot; 2885 struct btrfs_pending_snapshot *snapshot;
2888 struct list_head splice; 2886 struct list_head splice;
2889 2887
2890 INIT_LIST_HEAD(&splice); 2888 INIT_LIST_HEAD(&splice);
2891 2889
2892 list_splice_init(&t->pending_snapshots, &splice); 2890 list_splice_init(&t->pending_snapshots, &splice);
2893 2891
2894 while (!list_empty(&splice)) { 2892 while (!list_empty(&splice)) {
2895 snapshot = list_entry(splice.next, 2893 snapshot = list_entry(splice.next,
2896 struct btrfs_pending_snapshot, 2894 struct btrfs_pending_snapshot,
2897 list); 2895 list);
2898 2896
2899 list_del_init(&snapshot->list); 2897 list_del_init(&snapshot->list);
2900 2898
2901 kfree(snapshot); 2899 kfree(snapshot);
2902 } 2900 }
2903 2901
2904 return 0; 2902 return 0;
2905 } 2903 }
2906 2904
2907 static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) 2905 static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
2908 { 2906 {
2909 struct btrfs_inode *btrfs_inode; 2907 struct btrfs_inode *btrfs_inode;
2910 struct list_head splice; 2908 struct list_head splice;
2911 2909
2912 INIT_LIST_HEAD(&splice); 2910 INIT_LIST_HEAD(&splice);
2913 2911
2914 list_splice_init(&root->fs_info->delalloc_inodes, &splice); 2912 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2915 2913
2916 spin_lock(&root->fs_info->delalloc_lock); 2914 spin_lock(&root->fs_info->delalloc_lock);
2917 2915
2918 while (!list_empty(&splice)) { 2916 while (!list_empty(&splice)) {
2919 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 2917 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
2920 delalloc_inodes); 2918 delalloc_inodes);
2921 2919
2922 list_del_init(&btrfs_inode->delalloc_inodes); 2920 list_del_init(&btrfs_inode->delalloc_inodes);
2923 2921
2924 btrfs_invalidate_inodes(btrfs_inode->root); 2922 btrfs_invalidate_inodes(btrfs_inode->root);
2925 } 2923 }
2926 2924
2927 spin_unlock(&root->fs_info->delalloc_lock); 2925 spin_unlock(&root->fs_info->delalloc_lock);
2928 2926
2929 return 0; 2927 return 0;
2930 } 2928 }
2931 2929
2932 static int btrfs_destroy_marked_extents(struct btrfs_root *root, 2930 static int btrfs_destroy_marked_extents(struct btrfs_root *root,
2933 struct extent_io_tree *dirty_pages, 2931 struct extent_io_tree *dirty_pages,
2934 int mark) 2932 int mark)
2935 { 2933 {
2936 int ret; 2934 int ret;
2937 struct page *page; 2935 struct page *page;
2938 struct inode *btree_inode = root->fs_info->btree_inode; 2936 struct inode *btree_inode = root->fs_info->btree_inode;
2939 struct extent_buffer *eb; 2937 struct extent_buffer *eb;
2940 u64 start = 0; 2938 u64 start = 0;
2941 u64 end; 2939 u64 end;
2942 u64 offset; 2940 u64 offset;
2943 unsigned long index; 2941 unsigned long index;
2944 2942
2945 while (1) { 2943 while (1) {
2946 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 2944 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
2947 mark); 2945 mark);
2948 if (ret) 2946 if (ret)
2949 break; 2947 break;
2950 2948
2951 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); 2949 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
2952 while (start <= end) { 2950 while (start <= end) {
2953 index = start >> PAGE_CACHE_SHIFT; 2951 index = start >> PAGE_CACHE_SHIFT;
2954 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 2952 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
2955 page = find_get_page(btree_inode->i_mapping, index); 2953 page = find_get_page(btree_inode->i_mapping, index);
2956 if (!page) 2954 if (!page)
2957 continue; 2955 continue;
2958 offset = page_offset(page); 2956 offset = page_offset(page);
2959 2957
2960 spin_lock(&dirty_pages->buffer_lock); 2958 spin_lock(&dirty_pages->buffer_lock);
2961 eb = radix_tree_lookup( 2959 eb = radix_tree_lookup(
2962 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, 2960 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
2963 offset >> PAGE_CACHE_SHIFT); 2961 offset >> PAGE_CACHE_SHIFT);
2964 spin_unlock(&dirty_pages->buffer_lock); 2962 spin_unlock(&dirty_pages->buffer_lock);
2965 if (eb) { 2963 if (eb) {
2966 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, 2964 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
2967 &eb->bflags); 2965 &eb->bflags);
2968 atomic_set(&eb->refs, 1); 2966 atomic_set(&eb->refs, 1);
2969 } 2967 }
2970 if (PageWriteback(page)) 2968 if (PageWriteback(page))
2971 end_page_writeback(page); 2969 end_page_writeback(page);
2972 2970
2973 lock_page(page); 2971 lock_page(page);
2974 if (PageDirty(page)) { 2972 if (PageDirty(page)) {
2975 clear_page_dirty_for_io(page); 2973 clear_page_dirty_for_io(page);
2976 spin_lock_irq(&page->mapping->tree_lock); 2974 spin_lock_irq(&page->mapping->tree_lock);
2977 radix_tree_tag_clear(&page->mapping->page_tree, 2975 radix_tree_tag_clear(&page->mapping->page_tree,
2978 page_index(page), 2976 page_index(page),
2979 PAGECACHE_TAG_DIRTY); 2977 PAGECACHE_TAG_DIRTY);
2980 spin_unlock_irq(&page->mapping->tree_lock); 2978 spin_unlock_irq(&page->mapping->tree_lock);
2981 } 2979 }
2982 2980
2983 page->mapping->a_ops->invalidatepage(page, 0); 2981 page->mapping->a_ops->invalidatepage(page, 0);
2984 unlock_page(page); 2982 unlock_page(page);
2985 } 2983 }
2986 } 2984 }
2987 2985
2988 return ret; 2986 return ret;
2989 } 2987 }
2990 2988
2991 static int btrfs_destroy_pinned_extent(struct btrfs_root *root, 2989 static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
2992 struct extent_io_tree *pinned_extents) 2990 struct extent_io_tree *pinned_extents)
2993 { 2991 {
2994 struct extent_io_tree *unpin; 2992 struct extent_io_tree *unpin;
2995 u64 start; 2993 u64 start;
2996 u64 end; 2994 u64 end;
2997 int ret; 2995 int ret;
2998 2996
2999 unpin = pinned_extents; 2997 unpin = pinned_extents;
3000 while (1) { 2998 while (1) {
3001 ret = find_first_extent_bit(unpin, 0, &start, &end, 2999 ret = find_first_extent_bit(unpin, 0, &start, &end,
3002 EXTENT_DIRTY); 3000 EXTENT_DIRTY);
3003 if (ret) 3001 if (ret)
3004 break; 3002 break;
3005 3003
3006 /* opt_discard */ 3004 /* opt_discard */
3007 if (btrfs_test_opt(root, DISCARD)) 3005 if (btrfs_test_opt(root, DISCARD))
3008 ret = btrfs_error_discard_extent(root, start, 3006 ret = btrfs_error_discard_extent(root, start,
3009 end + 1 - start, 3007 end + 1 - start,
3010 NULL); 3008 NULL);
3011 3009
3012 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3010 clear_extent_dirty(unpin, start, end, GFP_NOFS);
3013 btrfs_error_unpin_extent_range(root, start, end); 3011 btrfs_error_unpin_extent_range(root, start, end);
3014 cond_resched(); 3012 cond_resched();
3015 } 3013 }
3016 3014
3017 return 0; 3015 return 0;
3018 } 3016 }
3019 3017
3020 static int btrfs_cleanup_transaction(struct btrfs_root *root) 3018 static int btrfs_cleanup_transaction(struct btrfs_root *root)
3021 { 3019 {
3022 struct btrfs_transaction *t; 3020 struct btrfs_transaction *t;
3023 LIST_HEAD(list); 3021 LIST_HEAD(list);
3024 3022
3025 WARN_ON(1); 3023 WARN_ON(1);
3026 3024
3027 mutex_lock(&root->fs_info->transaction_kthread_mutex); 3025 mutex_lock(&root->fs_info->transaction_kthread_mutex);
3028 3026
3029 spin_lock(&root->fs_info->trans_lock); 3027 spin_lock(&root->fs_info->trans_lock);
3030 list_splice_init(&root->fs_info->trans_list, &list); 3028 list_splice_init(&root->fs_info->trans_list, &list);
3031 root->fs_info->trans_no_join = 1; 3029 root->fs_info->trans_no_join = 1;
3032 spin_unlock(&root->fs_info->trans_lock); 3030 spin_unlock(&root->fs_info->trans_lock);
3033 3031
3034 while (!list_empty(&list)) { 3032 while (!list_empty(&list)) {
3035 t = list_entry(list.next, struct btrfs_transaction, list); 3033 t = list_entry(list.next, struct btrfs_transaction, list);
3036 if (!t) 3034 if (!t)
3037 break; 3035 break;
3038 3036
3039 btrfs_destroy_ordered_operations(root); 3037 btrfs_destroy_ordered_operations(root);
3040 3038
3041 btrfs_destroy_ordered_extents(root); 3039 btrfs_destroy_ordered_extents(root);
3042 3040
3043 btrfs_destroy_delayed_refs(t, root); 3041 btrfs_destroy_delayed_refs(t, root);
3044 3042
3045 btrfs_block_rsv_release(root, 3043 btrfs_block_rsv_release(root,
3046 &root->fs_info->trans_block_rsv, 3044 &root->fs_info->trans_block_rsv,
3047 t->dirty_pages.dirty_bytes); 3045 t->dirty_pages.dirty_bytes);
3048 3046
3049 /* FIXME: cleanup wait for commit */ 3047 /* FIXME: cleanup wait for commit */
3050 t->in_commit = 1; 3048 t->in_commit = 1;
3051 t->blocked = 1; 3049 t->blocked = 1;
3052 if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 3050 if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
3053 wake_up(&root->fs_info->transaction_blocked_wait); 3051 wake_up(&root->fs_info->transaction_blocked_wait);
3054 3052
3055 t->blocked = 0; 3053 t->blocked = 0;
3056 if (waitqueue_active(&root->fs_info->transaction_wait)) 3054 if (waitqueue_active(&root->fs_info->transaction_wait))
3057 wake_up(&root->fs_info->transaction_wait); 3055 wake_up(&root->fs_info->transaction_wait);
3058 3056
3059 t->commit_done = 1; 3057 t->commit_done = 1;
3060 if (waitqueue_active(&t->commit_wait)) 3058 if (waitqueue_active(&t->commit_wait))
3061 wake_up(&t->commit_wait); 3059 wake_up(&t->commit_wait);
3062 3060
3063 btrfs_destroy_pending_snapshots(t); 3061 btrfs_destroy_pending_snapshots(t);
3064 3062
3065 btrfs_destroy_delalloc_inodes(root); 3063 btrfs_destroy_delalloc_inodes(root);
3066 3064
3067 spin_lock(&root->fs_info->trans_lock); 3065 spin_lock(&root->fs_info->trans_lock);
3068 root->fs_info->running_transaction = NULL; 3066 root->fs_info->running_transaction = NULL;
3069 spin_unlock(&root->fs_info->trans_lock); 3067 spin_unlock(&root->fs_info->trans_lock);
3070 3068
3071 btrfs_destroy_marked_extents(root, &t->dirty_pages, 3069 btrfs_destroy_marked_extents(root, &t->dirty_pages,
3072 EXTENT_DIRTY); 3070 EXTENT_DIRTY);
3073 3071
3074 btrfs_destroy_pinned_extent(root, 3072 btrfs_destroy_pinned_extent(root,
3075 root->fs_info->pinned_extents); 3073 root->fs_info->pinned_extents);
3076 3074
3077 atomic_set(&t->use_count, 0); 3075 atomic_set(&t->use_count, 0);
3078 list_del_init(&t->list); 3076 list_del_init(&t->list);
3079 memset(t, 0, sizeof(*t)); 3077 memset(t, 0, sizeof(*t));
3080 kmem_cache_free(btrfs_transaction_cachep, t); 3078 kmem_cache_free(btrfs_transaction_cachep, t);
3081 } 3079 }
3082 3080
3083 spin_lock(&root->fs_info->trans_lock); 3081 spin_lock(&root->fs_info->trans_lock);
3084 root->fs_info->trans_no_join = 0; 3082 root->fs_info->trans_no_join = 0;
3085 spin_unlock(&root->fs_info->trans_lock); 3083 spin_unlock(&root->fs_info->trans_lock);
3086 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 3084 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
3087 3085
3088 return 0; 3086 return 0;
3089 } 3087 }
3090 3088
3091 static struct extent_io_ops btree_extent_io_ops = { 3089 static struct extent_io_ops btree_extent_io_ops = {
3092 .write_cache_pages_lock_hook = btree_lock_page_hook, 3090 .write_cache_pages_lock_hook = btree_lock_page_hook,
3093 .readpage_end_io_hook = btree_readpage_end_io_hook, 3091 .readpage_end_io_hook = btree_readpage_end_io_hook,
3094 .submit_bio_hook = btree_submit_bio_hook, 3092 .submit_bio_hook = btree_submit_bio_hook,
3095 /* note we're sharing with inode.c for the merge bio hook */ 3093 /* note we're sharing with inode.c for the merge bio hook */
3096 .merge_bio_hook = btrfs_merge_bio_hook, 3094 .merge_bio_hook = btrfs_merge_bio_hook,
3097 }; 3095 };
3098 3096
1 /* 1 /*
2 * Copyright (C) 2011 STRATO. All rights reserved. 2 * Copyright (C) 2011 STRATO. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public 5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation. 6 * License v2 as published by the Free Software Foundation.
7 * 7 *
8 * This program is distributed in the hope that it will be useful, 8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details. 11 * General Public License for more details.
12 * 12 *
13 * You should have received a copy of the GNU General Public 13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the 14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19 #include <linux/sched.h> 19 #include <linux/sched.h>
20 #include <linux/pagemap.h> 20 #include <linux/pagemap.h>
21 #include <linux/writeback.h> 21 #include <linux/writeback.h>
22 #include <linux/blkdev.h> 22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h> 23 #include <linux/rbtree.h>
24 #include <linux/slab.h> 24 #include <linux/slab.h>
25 #include <linux/workqueue.h> 25 #include <linux/workqueue.h>
26 #include "ctree.h" 26 #include "ctree.h"
27 #include "volumes.h" 27 #include "volumes.h"
28 #include "disk-io.h" 28 #include "disk-io.h"
29 #include "ordered-data.h" 29 #include "ordered-data.h"
30 30
31 /* 31 /*
32 * This is only the first step towards a full-features scrub. It reads all 32 * This is only the first step towards a full-features scrub. It reads all
33 * extent and super block and verifies the checksums. In case a bad checksum 33 * extent and super block and verifies the checksums. In case a bad checksum
34 * is found or the extent cannot be read, good data will be written back if 34 * is found or the extent cannot be read, good data will be written back if
35 * any can be found. 35 * any can be found.
36 * 36 *
37 * Future enhancements: 37 * Future enhancements:
38 * - To enhance the performance, better read-ahead strategies for the 38 * - To enhance the performance, better read-ahead strategies for the
39 * extent-tree can be employed. 39 * extent-tree can be employed.
40 * - In case an unrepairable extent is encountered, track which files are 40 * - In case an unrepairable extent is encountered, track which files are
41 * affected and report them 41 * affected and report them
42 * - In case of a read error on files with nodatasum, map the file and read 42 * - In case of a read error on files with nodatasum, map the file and read
43 * the extent to trigger a writeback of the good copy 43 * the extent to trigger a writeback of the good copy
44 * - track and record media errors, throw out bad devices 44 * - track and record media errors, throw out bad devices
45 * - add a mode to also read unallocated space 45 * - add a mode to also read unallocated space
46 * - make the prefetch cancellable 46 * - make the prefetch cancellable
47 */ 47 */
48 48
49 struct scrub_bio; 49 struct scrub_bio;
50 struct scrub_page; 50 struct scrub_page;
51 struct scrub_dev; 51 struct scrub_dev;
52 static void scrub_bio_end_io(struct bio *bio, int err); 52 static void scrub_bio_end_io(struct bio *bio, int err);
53 static void scrub_checksum(struct btrfs_work *work); 53 static void scrub_checksum(struct btrfs_work *work);
54 static int scrub_checksum_data(struct scrub_dev *sdev, 54 static int scrub_checksum_data(struct scrub_dev *sdev,
55 struct scrub_page *spag, void *buffer); 55 struct scrub_page *spag, void *buffer);
56 static int scrub_checksum_tree_block(struct scrub_dev *sdev, 56 static int scrub_checksum_tree_block(struct scrub_dev *sdev,
57 struct scrub_page *spag, u64 logical, 57 struct scrub_page *spag, u64 logical,
58 void *buffer); 58 void *buffer);
59 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer); 59 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
60 static int scrub_fixup_check(struct scrub_bio *sbio, int ix); 60 static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
61 static void scrub_fixup_end_io(struct bio *bio, int err); 61 static void scrub_fixup_end_io(struct bio *bio, int err);
62 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, 62 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
63 struct page *page); 63 struct page *page);
64 static void scrub_fixup(struct scrub_bio *sbio, int ix); 64 static void scrub_fixup(struct scrub_bio *sbio, int ix);
65 65
66 #define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ 66 #define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
67 #define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */ 67 #define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
68 68
69 struct scrub_page { 69 struct scrub_page {
70 u64 flags; /* extent flags */ 70 u64 flags; /* extent flags */
71 u64 generation; 71 u64 generation;
72 u64 mirror_num; 72 u64 mirror_num;
73 int have_csum; 73 int have_csum;
74 u8 csum[BTRFS_CSUM_SIZE]; 74 u8 csum[BTRFS_CSUM_SIZE];
75 }; 75 };
76 76
77 struct scrub_bio { 77 struct scrub_bio {
78 int index; 78 int index;
79 struct scrub_dev *sdev; 79 struct scrub_dev *sdev;
80 struct bio *bio; 80 struct bio *bio;
81 int err; 81 int err;
82 u64 logical; 82 u64 logical;
83 u64 physical; 83 u64 physical;
84 struct scrub_page spag[SCRUB_PAGES_PER_BIO]; 84 struct scrub_page spag[SCRUB_PAGES_PER_BIO];
85 u64 count; 85 u64 count;
86 int next_free; 86 int next_free;
87 struct btrfs_work work; 87 struct btrfs_work work;
88 }; 88 };
89 89
90 struct scrub_dev { 90 struct scrub_dev {
91 struct scrub_bio *bios[SCRUB_BIOS_PER_DEV]; 91 struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
92 struct btrfs_device *dev; 92 struct btrfs_device *dev;
93 int first_free; 93 int first_free;
94 int curr; 94 int curr;
95 atomic_t in_flight; 95 atomic_t in_flight;
96 spinlock_t list_lock; 96 spinlock_t list_lock;
97 wait_queue_head_t list_wait; 97 wait_queue_head_t list_wait;
98 u16 csum_size; 98 u16 csum_size;
99 struct list_head csum_list; 99 struct list_head csum_list;
100 atomic_t cancel_req; 100 atomic_t cancel_req;
101 int readonly; 101 int readonly;
102 /* 102 /*
103 * statistics 103 * statistics
104 */ 104 */
105 struct btrfs_scrub_progress stat; 105 struct btrfs_scrub_progress stat;
106 spinlock_t stat_lock; 106 spinlock_t stat_lock;
107 }; 107 };
108 108
109 static void scrub_free_csums(struct scrub_dev *sdev) 109 static void scrub_free_csums(struct scrub_dev *sdev)
110 { 110 {
111 while (!list_empty(&sdev->csum_list)) { 111 while (!list_empty(&sdev->csum_list)) {
112 struct btrfs_ordered_sum *sum; 112 struct btrfs_ordered_sum *sum;
113 sum = list_first_entry(&sdev->csum_list, 113 sum = list_first_entry(&sdev->csum_list,
114 struct btrfs_ordered_sum, list); 114 struct btrfs_ordered_sum, list);
115 list_del(&sum->list); 115 list_del(&sum->list);
116 kfree(sum); 116 kfree(sum);
117 } 117 }
118 } 118 }
119 119
120 static void scrub_free_bio(struct bio *bio) 120 static void scrub_free_bio(struct bio *bio)
121 { 121 {
122 int i; 122 int i;
123 struct page *last_page = NULL; 123 struct page *last_page = NULL;
124 124
125 if (!bio) 125 if (!bio)
126 return; 126 return;
127 127
128 for (i = 0; i < bio->bi_vcnt; ++i) { 128 for (i = 0; i < bio->bi_vcnt; ++i) {
129 if (bio->bi_io_vec[i].bv_page == last_page) 129 if (bio->bi_io_vec[i].bv_page == last_page)
130 continue; 130 continue;
131 last_page = bio->bi_io_vec[i].bv_page; 131 last_page = bio->bi_io_vec[i].bv_page;
132 __free_page(last_page); 132 __free_page(last_page);
133 } 133 }
134 bio_put(bio); 134 bio_put(bio);
135 } 135 }
136 136
137 static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) 137 static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
138 { 138 {
139 int i; 139 int i;
140 140
141 if (!sdev) 141 if (!sdev)
142 return; 142 return;
143 143
144 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 144 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
145 struct scrub_bio *sbio = sdev->bios[i]; 145 struct scrub_bio *sbio = sdev->bios[i];
146 146
147 if (!sbio) 147 if (!sbio)
148 break; 148 break;
149 149
150 scrub_free_bio(sbio->bio); 150 scrub_free_bio(sbio->bio);
151 kfree(sbio); 151 kfree(sbio);
152 } 152 }
153 153
154 scrub_free_csums(sdev); 154 scrub_free_csums(sdev);
155 kfree(sdev); 155 kfree(sdev);
156 } 156 }
157 157
158 static noinline_for_stack 158 static noinline_for_stack
159 struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) 159 struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
160 { 160 {
161 struct scrub_dev *sdev; 161 struct scrub_dev *sdev;
162 int i; 162 int i;
163 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; 163 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
164 164
165 sdev = kzalloc(sizeof(*sdev), GFP_NOFS); 165 sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
166 if (!sdev) 166 if (!sdev)
167 goto nomem; 167 goto nomem;
168 sdev->dev = dev; 168 sdev->dev = dev;
169 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 169 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
170 struct scrub_bio *sbio; 170 struct scrub_bio *sbio;
171 171
172 sbio = kzalloc(sizeof(*sbio), GFP_NOFS); 172 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
173 if (!sbio) 173 if (!sbio)
174 goto nomem; 174 goto nomem;
175 sdev->bios[i] = sbio; 175 sdev->bios[i] = sbio;
176 176
177 sbio->index = i; 177 sbio->index = i;
178 sbio->sdev = sdev; 178 sbio->sdev = sdev;
179 sbio->count = 0; 179 sbio->count = 0;
180 sbio->work.func = scrub_checksum; 180 sbio->work.func = scrub_checksum;
181 181
182 if (i != SCRUB_BIOS_PER_DEV-1) 182 if (i != SCRUB_BIOS_PER_DEV-1)
183 sdev->bios[i]->next_free = i + 1; 183 sdev->bios[i]->next_free = i + 1;
184 else 184 else
185 sdev->bios[i]->next_free = -1; 185 sdev->bios[i]->next_free = -1;
186 } 186 }
187 sdev->first_free = 0; 187 sdev->first_free = 0;
188 sdev->curr = -1; 188 sdev->curr = -1;
189 atomic_set(&sdev->in_flight, 0); 189 atomic_set(&sdev->in_flight, 0);
190 atomic_set(&sdev->cancel_req, 0); 190 atomic_set(&sdev->cancel_req, 0);
191 sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy); 191 sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
192 INIT_LIST_HEAD(&sdev->csum_list); 192 INIT_LIST_HEAD(&sdev->csum_list);
193 193
194 spin_lock_init(&sdev->list_lock); 194 spin_lock_init(&sdev->list_lock);
195 spin_lock_init(&sdev->stat_lock); 195 spin_lock_init(&sdev->stat_lock);
196 init_waitqueue_head(&sdev->list_wait); 196 init_waitqueue_head(&sdev->list_wait);
197 return sdev; 197 return sdev;
198 198
199 nomem: 199 nomem:
200 scrub_free_dev(sdev); 200 scrub_free_dev(sdev);
201 return ERR_PTR(-ENOMEM); 201 return ERR_PTR(-ENOMEM);
202 } 202 }
203 203
204 /* 204 /*
205 * scrub_recheck_error gets called when either verification of the page 205 * scrub_recheck_error gets called when either verification of the page
206 * failed or the bio failed to read, e.g. with EIO. In the latter case, 206 * failed or the bio failed to read, e.g. with EIO. In the latter case,
207 * recheck_error gets called for every page in the bio, even though only 207 * recheck_error gets called for every page in the bio, even though only
208 * one may be bad 208 * one may be bad
209 */ 209 */
210 static void scrub_recheck_error(struct scrub_bio *sbio, int ix) 210 static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
211 { 211 {
212 if (sbio->err) { 212 if (sbio->err) {
213 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, 213 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev,
214 (sbio->physical + ix * PAGE_SIZE) >> 9, 214 (sbio->physical + ix * PAGE_SIZE) >> 9,
215 sbio->bio->bi_io_vec[ix].bv_page) == 0) { 215 sbio->bio->bi_io_vec[ix].bv_page) == 0) {
216 if (scrub_fixup_check(sbio, ix) == 0) 216 if (scrub_fixup_check(sbio, ix) == 0)
217 return; 217 return;
218 } 218 }
219 } 219 }
220 220
221 scrub_fixup(sbio, ix); 221 scrub_fixup(sbio, ix);
222 } 222 }
223 223
224 static int scrub_fixup_check(struct scrub_bio *sbio, int ix) 224 static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
225 { 225 {
226 int ret = 1; 226 int ret = 1;
227 struct page *page; 227 struct page *page;
228 void *buffer; 228 void *buffer;
229 u64 flags = sbio->spag[ix].flags; 229 u64 flags = sbio->spag[ix].flags;
230 230
231 page = sbio->bio->bi_io_vec[ix].bv_page; 231 page = sbio->bio->bi_io_vec[ix].bv_page;
232 buffer = kmap_atomic(page, KM_USER0); 232 buffer = kmap_atomic(page, KM_USER0);
233 if (flags & BTRFS_EXTENT_FLAG_DATA) { 233 if (flags & BTRFS_EXTENT_FLAG_DATA) {
234 ret = scrub_checksum_data(sbio->sdev, 234 ret = scrub_checksum_data(sbio->sdev,
235 sbio->spag + ix, buffer); 235 sbio->spag + ix, buffer);
236 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 236 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
237 ret = scrub_checksum_tree_block(sbio->sdev, 237 ret = scrub_checksum_tree_block(sbio->sdev,
238 sbio->spag + ix, 238 sbio->spag + ix,
239 sbio->logical + ix * PAGE_SIZE, 239 sbio->logical + ix * PAGE_SIZE,
240 buffer); 240 buffer);
241 } else { 241 } else {
242 WARN_ON(1); 242 WARN_ON(1);
243 } 243 }
244 kunmap_atomic(buffer, KM_USER0); 244 kunmap_atomic(buffer, KM_USER0);
245 245
246 return ret; 246 return ret;
247 } 247 }
248 248
249 static void scrub_fixup_end_io(struct bio *bio, int err) 249 static void scrub_fixup_end_io(struct bio *bio, int err)
250 { 250 {
251 complete((struct completion *)bio->bi_private); 251 complete((struct completion *)bio->bi_private);
252 } 252 }
253 253
254 static void scrub_fixup(struct scrub_bio *sbio, int ix) 254 static void scrub_fixup(struct scrub_bio *sbio, int ix)
255 { 255 {
256 struct scrub_dev *sdev = sbio->sdev; 256 struct scrub_dev *sdev = sbio->sdev;
257 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 257 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
258 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; 258 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
259 struct btrfs_multi_bio *multi = NULL; 259 struct btrfs_multi_bio *multi = NULL;
260 u64 logical = sbio->logical + ix * PAGE_SIZE; 260 u64 logical = sbio->logical + ix * PAGE_SIZE;
261 u64 length; 261 u64 length;
262 int i; 262 int i;
263 int ret; 263 int ret;
264 DECLARE_COMPLETION_ONSTACK(complete); 264 DECLARE_COMPLETION_ONSTACK(complete);
265 265
266 if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) && 266 if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
267 (sbio->spag[ix].have_csum == 0)) { 267 (sbio->spag[ix].have_csum == 0)) {
268 /* 268 /*
269 * nodatasum, don't try to fix anything 269 * nodatasum, don't try to fix anything
270 * FIXME: we can do better, open the inode and trigger a 270 * FIXME: we can do better, open the inode and trigger a
271 * writeback 271 * writeback
272 */ 272 */
273 goto uncorrectable; 273 goto uncorrectable;
274 } 274 }
275 275
276 length = PAGE_SIZE; 276 length = PAGE_SIZE;
277 ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, 277 ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
278 &multi, 0); 278 &multi, 0);
279 if (ret || !multi || length < PAGE_SIZE) { 279 if (ret || !multi || length < PAGE_SIZE) {
280 printk(KERN_ERR 280 printk(KERN_ERR
281 "scrub_fixup: btrfs_map_block failed us for %llu\n", 281 "scrub_fixup: btrfs_map_block failed us for %llu\n",
282 (unsigned long long)logical); 282 (unsigned long long)logical);
283 WARN_ON(1); 283 WARN_ON(1);
284 return; 284 return;
285 } 285 }
286 286
287 if (multi->num_stripes == 1) 287 if (multi->num_stripes == 1)
288 /* there aren't any replicas */ 288 /* there aren't any replicas */
289 goto uncorrectable; 289 goto uncorrectable;
290 290
291 /* 291 /*
292 * first find a good copy 292 * first find a good copy
293 */ 293 */
294 for (i = 0; i < multi->num_stripes; ++i) { 294 for (i = 0; i < multi->num_stripes; ++i) {
295 if (i == sbio->spag[ix].mirror_num) 295 if (i == sbio->spag[ix].mirror_num)
296 continue; 296 continue;
297 297
298 if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev, 298 if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev,
299 multi->stripes[i].physical >> 9, 299 multi->stripes[i].physical >> 9,
300 sbio->bio->bi_io_vec[ix].bv_page)) { 300 sbio->bio->bi_io_vec[ix].bv_page)) {
301 /* I/O-error, this is not a good copy */ 301 /* I/O-error, this is not a good copy */
302 continue; 302 continue;
303 } 303 }
304 304
305 if (scrub_fixup_check(sbio, ix) == 0) 305 if (scrub_fixup_check(sbio, ix) == 0)
306 break; 306 break;
307 } 307 }
308 if (i == multi->num_stripes) 308 if (i == multi->num_stripes)
309 goto uncorrectable; 309 goto uncorrectable;
310 310
311 if (!sdev->readonly) { 311 if (!sdev->readonly) {
312 /* 312 /*
313 * bi_io_vec[ix].bv_page now contains good data, write it back 313 * bi_io_vec[ix].bv_page now contains good data, write it back
314 */ 314 */
315 if (scrub_fixup_io(WRITE, sdev->dev->bdev, 315 if (scrub_fixup_io(WRITE, sdev->dev->bdev,
316 (sbio->physical + ix * PAGE_SIZE) >> 9, 316 (sbio->physical + ix * PAGE_SIZE) >> 9,
317 sbio->bio->bi_io_vec[ix].bv_page)) { 317 sbio->bio->bi_io_vec[ix].bv_page)) {
318 /* I/O-error, writeback failed, give up */ 318 /* I/O-error, writeback failed, give up */
319 goto uncorrectable; 319 goto uncorrectable;
320 } 320 }
321 } 321 }
322 322
323 kfree(multi); 323 kfree(multi);
324 spin_lock(&sdev->stat_lock); 324 spin_lock(&sdev->stat_lock);
325 ++sdev->stat.corrected_errors; 325 ++sdev->stat.corrected_errors;
326 spin_unlock(&sdev->stat_lock); 326 spin_unlock(&sdev->stat_lock);
327 327
328 if (printk_ratelimit()) 328 if (printk_ratelimit())
329 printk(KERN_ERR "btrfs: fixed up at %llu\n", 329 printk(KERN_ERR "btrfs: fixed up at %llu\n",
330 (unsigned long long)logical); 330 (unsigned long long)logical);
331 return; 331 return;
332 332
333 uncorrectable: 333 uncorrectable:
334 kfree(multi); 334 kfree(multi);
335 spin_lock(&sdev->stat_lock); 335 spin_lock(&sdev->stat_lock);
336 ++sdev->stat.uncorrectable_errors; 336 ++sdev->stat.uncorrectable_errors;
337 spin_unlock(&sdev->stat_lock); 337 spin_unlock(&sdev->stat_lock);
338 338
339 if (printk_ratelimit()) 339 if (printk_ratelimit())
340 printk(KERN_ERR "btrfs: unable to fixup at %llu\n", 340 printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
341 (unsigned long long)logical); 341 (unsigned long long)logical);
342 } 342 }
343 343
344 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, 344 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
345 struct page *page) 345 struct page *page)
346 { 346 {
347 struct bio *bio = NULL; 347 struct bio *bio = NULL;
348 int ret; 348 int ret;
349 DECLARE_COMPLETION_ONSTACK(complete); 349 DECLARE_COMPLETION_ONSTACK(complete);
350 350
351 bio = bio_alloc(GFP_NOFS, 1); 351 bio = bio_alloc(GFP_NOFS, 1);
352 bio->bi_bdev = bdev; 352 bio->bi_bdev = bdev;
353 bio->bi_sector = sector; 353 bio->bi_sector = sector;
354 bio_add_page(bio, page, PAGE_SIZE, 0); 354 bio_add_page(bio, page, PAGE_SIZE, 0);
355 bio->bi_end_io = scrub_fixup_end_io; 355 bio->bi_end_io = scrub_fixup_end_io;
356 bio->bi_private = &complete; 356 bio->bi_private = &complete;
357 submit_bio(rw, bio); 357 submit_bio(rw, bio);
358 358
359 /* this will also unplug the queue */ 359 /* this will also unplug the queue */
360 wait_for_completion(&complete); 360 wait_for_completion(&complete);
361 361
362 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); 362 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
363 bio_put(bio); 363 bio_put(bio);
364 return ret; 364 return ret;
365 } 365 }
366 366
367 static void scrub_bio_end_io(struct bio *bio, int err) 367 static void scrub_bio_end_io(struct bio *bio, int err)
368 { 368 {
369 struct scrub_bio *sbio = bio->bi_private; 369 struct scrub_bio *sbio = bio->bi_private;
370 struct scrub_dev *sdev = sbio->sdev; 370 struct scrub_dev *sdev = sbio->sdev;
371 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 371 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
372 372
373 sbio->err = err; 373 sbio->err = err;
374 sbio->bio = bio; 374 sbio->bio = bio;
375 375
376 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); 376 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
377 } 377 }
378 378
379 static void scrub_checksum(struct btrfs_work *work) 379 static void scrub_checksum(struct btrfs_work *work)
380 { 380 {
381 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); 381 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
382 struct scrub_dev *sdev = sbio->sdev; 382 struct scrub_dev *sdev = sbio->sdev;
383 struct page *page; 383 struct page *page;
384 void *buffer; 384 void *buffer;
385 int i; 385 int i;
386 u64 flags; 386 u64 flags;
387 u64 logical; 387 u64 logical;
388 int ret; 388 int ret;
389 389
390 if (sbio->err) { 390 if (sbio->err) {
391 for (i = 0; i < sbio->count; ++i) 391 for (i = 0; i < sbio->count; ++i)
392 scrub_recheck_error(sbio, i); 392 scrub_recheck_error(sbio, i);
393 393
394 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); 394 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
395 sbio->bio->bi_flags |= 1 << BIO_UPTODATE; 395 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
396 sbio->bio->bi_phys_segments = 0; 396 sbio->bio->bi_phys_segments = 0;
397 sbio->bio->bi_idx = 0; 397 sbio->bio->bi_idx = 0;
398 398
399 for (i = 0; i < sbio->count; i++) { 399 for (i = 0; i < sbio->count; i++) {
400 struct bio_vec *bi; 400 struct bio_vec *bi;
401 bi = &sbio->bio->bi_io_vec[i]; 401 bi = &sbio->bio->bi_io_vec[i];
402 bi->bv_offset = 0; 402 bi->bv_offset = 0;
403 bi->bv_len = PAGE_SIZE; 403 bi->bv_len = PAGE_SIZE;
404 } 404 }
405 405
406 spin_lock(&sdev->stat_lock); 406 spin_lock(&sdev->stat_lock);
407 ++sdev->stat.read_errors; 407 ++sdev->stat.read_errors;
408 spin_unlock(&sdev->stat_lock); 408 spin_unlock(&sdev->stat_lock);
409 goto out; 409 goto out;
410 } 410 }
411 for (i = 0; i < sbio->count; ++i) { 411 for (i = 0; i < sbio->count; ++i) {
412 page = sbio->bio->bi_io_vec[i].bv_page; 412 page = sbio->bio->bi_io_vec[i].bv_page;
413 buffer = kmap_atomic(page, KM_USER0); 413 buffer = kmap_atomic(page, KM_USER0);
414 flags = sbio->spag[i].flags; 414 flags = sbio->spag[i].flags;
415 logical = sbio->logical + i * PAGE_SIZE; 415 logical = sbio->logical + i * PAGE_SIZE;
416 ret = 0; 416 ret = 0;
417 if (flags & BTRFS_EXTENT_FLAG_DATA) { 417 if (flags & BTRFS_EXTENT_FLAG_DATA) {
418 ret = scrub_checksum_data(sdev, sbio->spag + i, buffer); 418 ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
419 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 419 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
420 ret = scrub_checksum_tree_block(sdev, sbio->spag + i, 420 ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
421 logical, buffer); 421 logical, buffer);
422 } else if (flags & BTRFS_EXTENT_FLAG_SUPER) { 422 } else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
423 BUG_ON(i); 423 BUG_ON(i);
424 (void)scrub_checksum_super(sbio, buffer); 424 (void)scrub_checksum_super(sbio, buffer);
425 } else { 425 } else {
426 WARN_ON(1); 426 WARN_ON(1);
427 } 427 }
428 kunmap_atomic(buffer, KM_USER0); 428 kunmap_atomic(buffer, KM_USER0);
429 if (ret) 429 if (ret)
430 scrub_recheck_error(sbio, i); 430 scrub_recheck_error(sbio, i);
431 } 431 }
432 432
433 out: 433 out:
434 scrub_free_bio(sbio->bio); 434 scrub_free_bio(sbio->bio);
435 sbio->bio = NULL; 435 sbio->bio = NULL;
436 spin_lock(&sdev->list_lock); 436 spin_lock(&sdev->list_lock);
437 sbio->next_free = sdev->first_free; 437 sbio->next_free = sdev->first_free;
438 sdev->first_free = sbio->index; 438 sdev->first_free = sbio->index;
439 spin_unlock(&sdev->list_lock); 439 spin_unlock(&sdev->list_lock);
440 atomic_dec(&sdev->in_flight); 440 atomic_dec(&sdev->in_flight);
441 wake_up(&sdev->list_wait); 441 wake_up(&sdev->list_wait);
442 } 442 }
443 443
444 static int scrub_checksum_data(struct scrub_dev *sdev, 444 static int scrub_checksum_data(struct scrub_dev *sdev,
445 struct scrub_page *spag, void *buffer) 445 struct scrub_page *spag, void *buffer)
446 { 446 {
447 u8 csum[BTRFS_CSUM_SIZE]; 447 u8 csum[BTRFS_CSUM_SIZE];
448 u32 crc = ~(u32)0; 448 u32 crc = ~(u32)0;
449 int fail = 0; 449 int fail = 0;
450 struct btrfs_root *root = sdev->dev->dev_root; 450 struct btrfs_root *root = sdev->dev->dev_root;
451 451
452 if (!spag->have_csum) 452 if (!spag->have_csum)
453 return 0; 453 return 0;
454 454
455 crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE); 455 crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
456 btrfs_csum_final(crc, csum); 456 btrfs_csum_final(crc, csum);
457 if (memcmp(csum, spag->csum, sdev->csum_size)) 457 if (memcmp(csum, spag->csum, sdev->csum_size))
458 fail = 1; 458 fail = 1;
459 459
460 spin_lock(&sdev->stat_lock); 460 spin_lock(&sdev->stat_lock);
461 ++sdev->stat.data_extents_scrubbed; 461 ++sdev->stat.data_extents_scrubbed;
462 sdev->stat.data_bytes_scrubbed += PAGE_SIZE; 462 sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
463 if (fail) 463 if (fail)
464 ++sdev->stat.csum_errors; 464 ++sdev->stat.csum_errors;
465 spin_unlock(&sdev->stat_lock); 465 spin_unlock(&sdev->stat_lock);
466 466
467 return fail; 467 return fail;
468 } 468 }
469 469
470 static int scrub_checksum_tree_block(struct scrub_dev *sdev, 470 static int scrub_checksum_tree_block(struct scrub_dev *sdev,
471 struct scrub_page *spag, u64 logical, 471 struct scrub_page *spag, u64 logical,
472 void *buffer) 472 void *buffer)
473 { 473 {
474 struct btrfs_header *h; 474 struct btrfs_header *h;
475 struct btrfs_root *root = sdev->dev->dev_root; 475 struct btrfs_root *root = sdev->dev->dev_root;
476 struct btrfs_fs_info *fs_info = root->fs_info; 476 struct btrfs_fs_info *fs_info = root->fs_info;
477 u8 csum[BTRFS_CSUM_SIZE]; 477 u8 csum[BTRFS_CSUM_SIZE];
478 u32 crc = ~(u32)0; 478 u32 crc = ~(u32)0;
479 int fail = 0; 479 int fail = 0;
480 int crc_fail = 0; 480 int crc_fail = 0;
481 481
482 /* 482 /*
483 * we don't use the getter functions here, as we 483 * we don't use the getter functions here, as we
484 * a) don't have an extent buffer and 484 * a) don't have an extent buffer and
485 * b) the page is already kmapped 485 * b) the page is already kmapped
486 */ 486 */
487 h = (struct btrfs_header *)buffer; 487 h = (struct btrfs_header *)buffer;
488 488
489 if (logical != le64_to_cpu(h->bytenr)) 489 if (logical != le64_to_cpu(h->bytenr))
490 ++fail; 490 ++fail;
491 491
492 if (spag->generation != le64_to_cpu(h->generation)) 492 if (spag->generation != le64_to_cpu(h->generation))
493 ++fail; 493 ++fail;
494 494
495 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 495 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
496 ++fail; 496 ++fail;
497 497
498 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, 498 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
499 BTRFS_UUID_SIZE)) 499 BTRFS_UUID_SIZE))
500 ++fail; 500 ++fail;
501 501
502 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc, 502 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
503 PAGE_SIZE - BTRFS_CSUM_SIZE); 503 PAGE_SIZE - BTRFS_CSUM_SIZE);
504 btrfs_csum_final(crc, csum); 504 btrfs_csum_final(crc, csum);
505 if (memcmp(csum, h->csum, sdev->csum_size)) 505 if (memcmp(csum, h->csum, sdev->csum_size))
506 ++crc_fail; 506 ++crc_fail;
507 507
508 spin_lock(&sdev->stat_lock); 508 spin_lock(&sdev->stat_lock);
509 ++sdev->stat.tree_extents_scrubbed; 509 ++sdev->stat.tree_extents_scrubbed;
510 sdev->stat.tree_bytes_scrubbed += PAGE_SIZE; 510 sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
511 if (crc_fail) 511 if (crc_fail)
512 ++sdev->stat.csum_errors; 512 ++sdev->stat.csum_errors;
513 if (fail) 513 if (fail)
514 ++sdev->stat.verify_errors; 514 ++sdev->stat.verify_errors;
515 spin_unlock(&sdev->stat_lock); 515 spin_unlock(&sdev->stat_lock);
516 516
517 return fail || crc_fail; 517 return fail || crc_fail;
518 } 518 }
519 519
520 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer) 520 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
521 { 521 {
522 struct btrfs_super_block *s; 522 struct btrfs_super_block *s;
523 u64 logical; 523 u64 logical;
524 struct scrub_dev *sdev = sbio->sdev; 524 struct scrub_dev *sdev = sbio->sdev;
525 struct btrfs_root *root = sdev->dev->dev_root; 525 struct btrfs_root *root = sdev->dev->dev_root;
526 struct btrfs_fs_info *fs_info = root->fs_info; 526 struct btrfs_fs_info *fs_info = root->fs_info;
527 u8 csum[BTRFS_CSUM_SIZE]; 527 u8 csum[BTRFS_CSUM_SIZE];
528 u32 crc = ~(u32)0; 528 u32 crc = ~(u32)0;
529 int fail = 0; 529 int fail = 0;
530 530
531 s = (struct btrfs_super_block *)buffer; 531 s = (struct btrfs_super_block *)buffer;
532 logical = sbio->logical; 532 logical = sbio->logical;
533 533
534 if (logical != le64_to_cpu(s->bytenr)) 534 if (logical != le64_to_cpu(s->bytenr))
535 ++fail; 535 ++fail;
536 536
537 if (sbio->spag[0].generation != le64_to_cpu(s->generation)) 537 if (sbio->spag[0].generation != le64_to_cpu(s->generation))
538 ++fail; 538 ++fail;
539 539
540 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 540 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
541 ++fail; 541 ++fail;
542 542
543 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc, 543 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
544 PAGE_SIZE - BTRFS_CSUM_SIZE); 544 PAGE_SIZE - BTRFS_CSUM_SIZE);
545 btrfs_csum_final(crc, csum); 545 btrfs_csum_final(crc, csum);
546 if (memcmp(csum, s->csum, sbio->sdev->csum_size)) 546 if (memcmp(csum, s->csum, sbio->sdev->csum_size))
547 ++fail; 547 ++fail;
548 548
549 if (fail) { 549 if (fail) {
550 /* 550 /*
551 * if we find an error in a super block, we just report it. 551 * if we find an error in a super block, we just report it.
552 * They will get written with the next transaction commit 552 * They will get written with the next transaction commit
553 * anyway 553 * anyway
554 */ 554 */
555 spin_lock(&sdev->stat_lock); 555 spin_lock(&sdev->stat_lock);
556 ++sdev->stat.super_errors; 556 ++sdev->stat.super_errors;
557 spin_unlock(&sdev->stat_lock); 557 spin_unlock(&sdev->stat_lock);
558 } 558 }
559 559
560 return fail; 560 return fail;
561 } 561 }
562 562
563 static int scrub_submit(struct scrub_dev *sdev) 563 static int scrub_submit(struct scrub_dev *sdev)
564 { 564 {
565 struct scrub_bio *sbio; 565 struct scrub_bio *sbio;
566 struct bio *bio; 566 struct bio *bio;
567 int i; 567 int i;
568 568
569 if (sdev->curr == -1) 569 if (sdev->curr == -1)
570 return 0; 570 return 0;
571 571
572 sbio = sdev->bios[sdev->curr]; 572 sbio = sdev->bios[sdev->curr];
573 573
574 bio = bio_alloc(GFP_NOFS, sbio->count); 574 bio = bio_alloc(GFP_NOFS, sbio->count);
575 if (!bio) 575 if (!bio)
576 goto nomem; 576 goto nomem;
577 577
578 bio->bi_private = sbio; 578 bio->bi_private = sbio;
579 bio->bi_end_io = scrub_bio_end_io; 579 bio->bi_end_io = scrub_bio_end_io;
580 bio->bi_bdev = sdev->dev->bdev; 580 bio->bi_bdev = sdev->dev->bdev;
581 bio->bi_sector = sbio->physical >> 9; 581 bio->bi_sector = sbio->physical >> 9;
582 582
583 for (i = 0; i < sbio->count; ++i) { 583 for (i = 0; i < sbio->count; ++i) {
584 struct page *page; 584 struct page *page;
585 int ret; 585 int ret;
586 586
587 page = alloc_page(GFP_NOFS); 587 page = alloc_page(GFP_NOFS);
588 if (!page) 588 if (!page)
589 goto nomem; 589 goto nomem;
590 590
591 ret = bio_add_page(bio, page, PAGE_SIZE, 0); 591 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
592 if (!ret) { 592 if (!ret) {
593 __free_page(page); 593 __free_page(page);
594 goto nomem; 594 goto nomem;
595 } 595 }
596 } 596 }
597 597
598 sbio->err = 0; 598 sbio->err = 0;
599 sdev->curr = -1; 599 sdev->curr = -1;
600 atomic_inc(&sdev->in_flight); 600 atomic_inc(&sdev->in_flight);
601 601
602 submit_bio(READ, bio); 602 submit_bio(READ, bio);
603 603
604 return 0; 604 return 0;
605 605
606 nomem: 606 nomem:
607 scrub_free_bio(bio); 607 scrub_free_bio(bio);
608 608
609 return -ENOMEM; 609 return -ENOMEM;
610 } 610 }
611 611
612 static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, 612 static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
613 u64 physical, u64 flags, u64 gen, u64 mirror_num, 613 u64 physical, u64 flags, u64 gen, u64 mirror_num,
614 u8 *csum, int force) 614 u8 *csum, int force)
615 { 615 {
616 struct scrub_bio *sbio; 616 struct scrub_bio *sbio;
617 617
618 again: 618 again:
619 /* 619 /*
620 * grab a fresh bio or wait for one to become available 620 * grab a fresh bio or wait for one to become available
621 */ 621 */
622 while (sdev->curr == -1) { 622 while (sdev->curr == -1) {
623 spin_lock(&sdev->list_lock); 623 spin_lock(&sdev->list_lock);
624 sdev->curr = sdev->first_free; 624 sdev->curr = sdev->first_free;
625 if (sdev->curr != -1) { 625 if (sdev->curr != -1) {
626 sdev->first_free = sdev->bios[sdev->curr]->next_free; 626 sdev->first_free = sdev->bios[sdev->curr]->next_free;
627 sdev->bios[sdev->curr]->next_free = -1; 627 sdev->bios[sdev->curr]->next_free = -1;
628 sdev->bios[sdev->curr]->count = 0; 628 sdev->bios[sdev->curr]->count = 0;
629 spin_unlock(&sdev->list_lock); 629 spin_unlock(&sdev->list_lock);
630 } else { 630 } else {
631 spin_unlock(&sdev->list_lock); 631 spin_unlock(&sdev->list_lock);
632 wait_event(sdev->list_wait, sdev->first_free != -1); 632 wait_event(sdev->list_wait, sdev->first_free != -1);
633 } 633 }
634 } 634 }
635 sbio = sdev->bios[sdev->curr]; 635 sbio = sdev->bios[sdev->curr];
636 if (sbio->count == 0) { 636 if (sbio->count == 0) {
637 sbio->physical = physical; 637 sbio->physical = physical;
638 sbio->logical = logical; 638 sbio->logical = logical;
639 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || 639 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
640 sbio->logical + sbio->count * PAGE_SIZE != logical) { 640 sbio->logical + sbio->count * PAGE_SIZE != logical) {
641 int ret; 641 int ret;
642 642
643 ret = scrub_submit(sdev); 643 ret = scrub_submit(sdev);
644 if (ret) 644 if (ret)
645 return ret; 645 return ret;
646 goto again; 646 goto again;
647 } 647 }
648 sbio->spag[sbio->count].flags = flags; 648 sbio->spag[sbio->count].flags = flags;
649 sbio->spag[sbio->count].generation = gen; 649 sbio->spag[sbio->count].generation = gen;
650 sbio->spag[sbio->count].have_csum = 0; 650 sbio->spag[sbio->count].have_csum = 0;
651 sbio->spag[sbio->count].mirror_num = mirror_num; 651 sbio->spag[sbio->count].mirror_num = mirror_num;
652 if (csum) { 652 if (csum) {
653 sbio->spag[sbio->count].have_csum = 1; 653 sbio->spag[sbio->count].have_csum = 1;
654 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); 654 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
655 } 655 }
656 ++sbio->count; 656 ++sbio->count;
657 if (sbio->count == SCRUB_PAGES_PER_BIO || force) { 657 if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
658 int ret; 658 int ret;
659 659
660 ret = scrub_submit(sdev); 660 ret = scrub_submit(sdev);
661 if (ret) 661 if (ret)
662 return ret; 662 return ret;
663 } 663 }
664 664
665 return 0; 665 return 0;
666 } 666 }
667 667
668 static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, 668 static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
669 u8 *csum) 669 u8 *csum)
670 { 670 {
671 struct btrfs_ordered_sum *sum = NULL; 671 struct btrfs_ordered_sum *sum = NULL;
672 int ret = 0; 672 int ret = 0;
673 unsigned long i; 673 unsigned long i;
674 unsigned long num_sectors; 674 unsigned long num_sectors;
675 u32 sectorsize = sdev->dev->dev_root->sectorsize; 675 u32 sectorsize = sdev->dev->dev_root->sectorsize;
676 676
677 while (!list_empty(&sdev->csum_list)) { 677 while (!list_empty(&sdev->csum_list)) {
678 sum = list_first_entry(&sdev->csum_list, 678 sum = list_first_entry(&sdev->csum_list,
679 struct btrfs_ordered_sum, list); 679 struct btrfs_ordered_sum, list);
680 if (sum->bytenr > logical) 680 if (sum->bytenr > logical)
681 return 0; 681 return 0;
682 if (sum->bytenr + sum->len > logical) 682 if (sum->bytenr + sum->len > logical)
683 break; 683 break;
684 684
685 ++sdev->stat.csum_discards; 685 ++sdev->stat.csum_discards;
686 list_del(&sum->list); 686 list_del(&sum->list);
687 kfree(sum); 687 kfree(sum);
688 sum = NULL; 688 sum = NULL;
689 } 689 }
690 if (!sum) 690 if (!sum)
691 return 0; 691 return 0;
692 692
693 num_sectors = sum->len / sectorsize; 693 num_sectors = sum->len / sectorsize;
694 for (i = 0; i < num_sectors; ++i) { 694 for (i = 0; i < num_sectors; ++i) {
695 if (sum->sums[i].bytenr == logical) { 695 if (sum->sums[i].bytenr == logical) {
696 memcpy(csum, &sum->sums[i].sum, sdev->csum_size); 696 memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
697 ret = 1; 697 ret = 1;
698 break; 698 break;
699 } 699 }
700 } 700 }
701 if (ret && i == num_sectors - 1) { 701 if (ret && i == num_sectors - 1) {
702 list_del(&sum->list); 702 list_del(&sum->list);
703 kfree(sum); 703 kfree(sum);
704 } 704 }
705 return ret; 705 return ret;
706 } 706 }
707 707
708 /* scrub extent tries to collect up to 64 kB for each bio */ 708 /* scrub extent tries to collect up to 64 kB for each bio */
709 static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, 709 static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
710 u64 physical, u64 flags, u64 gen, u64 mirror_num) 710 u64 physical, u64 flags, u64 gen, u64 mirror_num)
711 { 711 {
712 int ret; 712 int ret;
713 u8 csum[BTRFS_CSUM_SIZE]; 713 u8 csum[BTRFS_CSUM_SIZE];
714 714
715 while (len) { 715 while (len) {
716 u64 l = min_t(u64, len, PAGE_SIZE); 716 u64 l = min_t(u64, len, PAGE_SIZE);
717 int have_csum = 0; 717 int have_csum = 0;
718 718
719 if (flags & BTRFS_EXTENT_FLAG_DATA) { 719 if (flags & BTRFS_EXTENT_FLAG_DATA) {
720 /* push csums to sbio */ 720 /* push csums to sbio */
721 have_csum = scrub_find_csum(sdev, logical, l, csum); 721 have_csum = scrub_find_csum(sdev, logical, l, csum);
722 if (have_csum == 0) 722 if (have_csum == 0)
723 ++sdev->stat.no_csum; 723 ++sdev->stat.no_csum;
724 } 724 }
725 ret = scrub_page(sdev, logical, l, physical, flags, gen, 725 ret = scrub_page(sdev, logical, l, physical, flags, gen,
726 mirror_num, have_csum ? csum : NULL, 0); 726 mirror_num, have_csum ? csum : NULL, 0);
727 if (ret) 727 if (ret)
728 return ret; 728 return ret;
729 len -= l; 729 len -= l;
730 logical += l; 730 logical += l;
731 physical += l; 731 physical += l;
732 } 732 }
733 return 0; 733 return 0;
734 } 734 }
735 735
736 static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, 736 static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
737 struct map_lookup *map, int num, u64 base, u64 length) 737 struct map_lookup *map, int num, u64 base, u64 length)
738 { 738 {
739 struct btrfs_path *path; 739 struct btrfs_path *path;
740 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 740 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
741 struct btrfs_root *root = fs_info->extent_root; 741 struct btrfs_root *root = fs_info->extent_root;
742 struct btrfs_root *csum_root = fs_info->csum_root; 742 struct btrfs_root *csum_root = fs_info->csum_root;
743 struct btrfs_extent_item *extent; 743 struct btrfs_extent_item *extent;
744 struct blk_plug plug; 744 struct blk_plug plug;
745 u64 flags; 745 u64 flags;
746 int ret; 746 int ret;
747 int slot; 747 int slot;
748 int i; 748 int i;
749 u64 nstripes; 749 u64 nstripes;
750 int start_stripe; 750 int start_stripe;
751 struct extent_buffer *l; 751 struct extent_buffer *l;
752 struct btrfs_key key; 752 struct btrfs_key key;
753 u64 physical; 753 u64 physical;
754 u64 logical; 754 u64 logical;
755 u64 generation; 755 u64 generation;
756 u64 mirror_num; 756 u64 mirror_num;
757 757
758 u64 increment = map->stripe_len; 758 u64 increment = map->stripe_len;
759 u64 offset; 759 u64 offset;
760 760
761 nstripes = length; 761 nstripes = length;
762 offset = 0; 762 offset = 0;
763 do_div(nstripes, map->stripe_len); 763 do_div(nstripes, map->stripe_len);
764 if (map->type & BTRFS_BLOCK_GROUP_RAID0) { 764 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
765 offset = map->stripe_len * num; 765 offset = map->stripe_len * num;
766 increment = map->stripe_len * map->num_stripes; 766 increment = map->stripe_len * map->num_stripes;
767 mirror_num = 0; 767 mirror_num = 0;
768 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { 768 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
769 int factor = map->num_stripes / map->sub_stripes; 769 int factor = map->num_stripes / map->sub_stripes;
770 offset = map->stripe_len * (num / map->sub_stripes); 770 offset = map->stripe_len * (num / map->sub_stripes);
771 increment = map->stripe_len * factor; 771 increment = map->stripe_len * factor;
772 mirror_num = num % map->sub_stripes; 772 mirror_num = num % map->sub_stripes;
773 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { 773 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
774 increment = map->stripe_len; 774 increment = map->stripe_len;
775 mirror_num = num % map->num_stripes; 775 mirror_num = num % map->num_stripes;
776 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 776 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
777 increment = map->stripe_len; 777 increment = map->stripe_len;
778 mirror_num = num % map->num_stripes; 778 mirror_num = num % map->num_stripes;
779 } else { 779 } else {
780 increment = map->stripe_len; 780 increment = map->stripe_len;
781 mirror_num = 0; 781 mirror_num = 0;
782 } 782 }
783 783
784 path = btrfs_alloc_path(); 784 path = btrfs_alloc_path();
785 if (!path) 785 if (!path)
786 return -ENOMEM; 786 return -ENOMEM;
787 787
788 path->reada = 2; 788 path->reada = 2;
789 path->search_commit_root = 1; 789 path->search_commit_root = 1;
790 path->skip_locking = 1; 790 path->skip_locking = 1;
791 791
792 /* 792 /*
793 * find all extents for each stripe and just read them to get 793 * find all extents for each stripe and just read them to get
794 * them into the page cache 794 * them into the page cache
795 * FIXME: we can do better. build a more intelligent prefetching 795 * FIXME: we can do better. build a more intelligent prefetching
796 */ 796 */
797 logical = base + offset; 797 logical = base + offset;
798 physical = map->stripes[num].physical; 798 physical = map->stripes[num].physical;
799 ret = 0; 799 ret = 0;
800 for (i = 0; i < nstripes; ++i) { 800 for (i = 0; i < nstripes; ++i) {
801 key.objectid = logical; 801 key.objectid = logical;
802 key.type = BTRFS_EXTENT_ITEM_KEY; 802 key.type = BTRFS_EXTENT_ITEM_KEY;
803 key.offset = (u64)0; 803 key.offset = (u64)0;
804 804
805 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 805 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
806 if (ret < 0) 806 if (ret < 0)
807 goto out_noplug; 807 goto out_noplug;
808 808
809 /* 809 /*
810 * we might miss half an extent here, but that doesn't matter, 810 * we might miss half an extent here, but that doesn't matter,
811 * as it's only the prefetch 811 * as it's only the prefetch
812 */ 812 */
813 while (1) { 813 while (1) {
814 l = path->nodes[0]; 814 l = path->nodes[0];
815 slot = path->slots[0]; 815 slot = path->slots[0];
816 if (slot >= btrfs_header_nritems(l)) { 816 if (slot >= btrfs_header_nritems(l)) {
817 ret = btrfs_next_leaf(root, path); 817 ret = btrfs_next_leaf(root, path);
818 if (ret == 0) 818 if (ret == 0)
819 continue; 819 continue;
820 if (ret < 0) 820 if (ret < 0)
821 goto out_noplug; 821 goto out_noplug;
822 822
823 break; 823 break;
824 } 824 }
825 btrfs_item_key_to_cpu(l, &key, slot); 825 btrfs_item_key_to_cpu(l, &key, slot);
826 826
827 if (key.objectid >= logical + map->stripe_len) 827 if (key.objectid >= logical + map->stripe_len)
828 break; 828 break;
829 829
830 path->slots[0]++; 830 path->slots[0]++;
831 } 831 }
832 btrfs_release_path(path); 832 btrfs_release_path(path);
833 logical += increment; 833 logical += increment;
834 physical += map->stripe_len; 834 physical += map->stripe_len;
835 cond_resched(); 835 cond_resched();
836 } 836 }
837 837
838 /* 838 /*
839 * collect all data csums for the stripe to avoid seeking during 839 * collect all data csums for the stripe to avoid seeking during
840 * the scrub. This might currently (crc32) end up to be about 1MB 840 * the scrub. This might currently (crc32) end up to be about 1MB
841 */ 841 */
842 start_stripe = 0; 842 start_stripe = 0;
843 blk_start_plug(&plug); 843 blk_start_plug(&plug);
844 again: 844 again:
845 logical = base + offset + start_stripe * increment; 845 logical = base + offset + start_stripe * increment;
846 for (i = start_stripe; i < nstripes; ++i) { 846 for (i = start_stripe; i < nstripes; ++i) {
847 ret = btrfs_lookup_csums_range(csum_root, logical, 847 ret = btrfs_lookup_csums_range(csum_root, logical,
848 logical + map->stripe_len - 1, 848 logical + map->stripe_len - 1,
849 &sdev->csum_list, 1); 849 &sdev->csum_list, 1);
850 if (ret) 850 if (ret)
851 goto out; 851 goto out;
852 852
853 logical += increment; 853 logical += increment;
854 cond_resched(); 854 cond_resched();
855 } 855 }
856 /* 856 /*
857 * now find all extents for each stripe and scrub them 857 * now find all extents for each stripe and scrub them
858 */ 858 */
859 logical = base + offset + start_stripe * increment; 859 logical = base + offset + start_stripe * increment;
860 physical = map->stripes[num].physical + start_stripe * map->stripe_len; 860 physical = map->stripes[num].physical + start_stripe * map->stripe_len;
861 ret = 0; 861 ret = 0;
862 for (i = start_stripe; i < nstripes; ++i) { 862 for (i = start_stripe; i < nstripes; ++i) {
863 /* 863 /*
864 * canceled? 864 * canceled?
865 */ 865 */
866 if (atomic_read(&fs_info->scrub_cancel_req) || 866 if (atomic_read(&fs_info->scrub_cancel_req) ||
867 atomic_read(&sdev->cancel_req)) { 867 atomic_read(&sdev->cancel_req)) {
868 ret = -ECANCELED; 868 ret = -ECANCELED;
869 goto out; 869 goto out;
870 } 870 }
871 /* 871 /*
872 * check to see if we have to pause 872 * check to see if we have to pause
873 */ 873 */
874 if (atomic_read(&fs_info->scrub_pause_req)) { 874 if (atomic_read(&fs_info->scrub_pause_req)) {
875 /* push queued extents */ 875 /* push queued extents */
876 scrub_submit(sdev); 876 scrub_submit(sdev);
877 wait_event(sdev->list_wait, 877 wait_event(sdev->list_wait,
878 atomic_read(&sdev->in_flight) == 0); 878 atomic_read(&sdev->in_flight) == 0);
879 atomic_inc(&fs_info->scrubs_paused); 879 atomic_inc(&fs_info->scrubs_paused);
880 wake_up(&fs_info->scrub_pause_wait); 880 wake_up(&fs_info->scrub_pause_wait);
881 mutex_lock(&fs_info->scrub_lock); 881 mutex_lock(&fs_info->scrub_lock);
882 while (atomic_read(&fs_info->scrub_pause_req)) { 882 while (atomic_read(&fs_info->scrub_pause_req)) {
883 mutex_unlock(&fs_info->scrub_lock); 883 mutex_unlock(&fs_info->scrub_lock);
884 wait_event(fs_info->scrub_pause_wait, 884 wait_event(fs_info->scrub_pause_wait,
885 atomic_read(&fs_info->scrub_pause_req) == 0); 885 atomic_read(&fs_info->scrub_pause_req) == 0);
886 mutex_lock(&fs_info->scrub_lock); 886 mutex_lock(&fs_info->scrub_lock);
887 } 887 }
888 atomic_dec(&fs_info->scrubs_paused); 888 atomic_dec(&fs_info->scrubs_paused);
889 mutex_unlock(&fs_info->scrub_lock); 889 mutex_unlock(&fs_info->scrub_lock);
890 wake_up(&fs_info->scrub_pause_wait); 890 wake_up(&fs_info->scrub_pause_wait);
891 scrub_free_csums(sdev); 891 scrub_free_csums(sdev);
892 start_stripe = i; 892 start_stripe = i;
893 goto again; 893 goto again;
894 } 894 }
895 895
896 key.objectid = logical; 896 key.objectid = logical;
897 key.type = BTRFS_EXTENT_ITEM_KEY; 897 key.type = BTRFS_EXTENT_ITEM_KEY;
898 key.offset = (u64)0; 898 key.offset = (u64)0;
899 899
900 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 900 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
901 if (ret < 0) 901 if (ret < 0)
902 goto out; 902 goto out;
903 if (ret > 0) { 903 if (ret > 0) {
904 ret = btrfs_previous_item(root, path, 0, 904 ret = btrfs_previous_item(root, path, 0,
905 BTRFS_EXTENT_ITEM_KEY); 905 BTRFS_EXTENT_ITEM_KEY);
906 if (ret < 0) 906 if (ret < 0)
907 goto out; 907 goto out;
908 if (ret > 0) { 908 if (ret > 0) {
909 /* there's no smaller item, so stick with the 909 /* there's no smaller item, so stick with the
910 * larger one */ 910 * larger one */
911 btrfs_release_path(path); 911 btrfs_release_path(path);
912 ret = btrfs_search_slot(NULL, root, &key, 912 ret = btrfs_search_slot(NULL, root, &key,
913 path, 0, 0); 913 path, 0, 0);
914 if (ret < 0) 914 if (ret < 0)
915 goto out; 915 goto out;
916 } 916 }
917 } 917 }
918 918
919 while (1) { 919 while (1) {
920 l = path->nodes[0]; 920 l = path->nodes[0];
921 slot = path->slots[0]; 921 slot = path->slots[0];
922 if (slot >= btrfs_header_nritems(l)) { 922 if (slot >= btrfs_header_nritems(l)) {
923 ret = btrfs_next_leaf(root, path); 923 ret = btrfs_next_leaf(root, path);
924 if (ret == 0) 924 if (ret == 0)
925 continue; 925 continue;
926 if (ret < 0) 926 if (ret < 0)
927 goto out; 927 goto out;
928 928
929 break; 929 break;
930 } 930 }
931 btrfs_item_key_to_cpu(l, &key, slot); 931 btrfs_item_key_to_cpu(l, &key, slot);
932 932
933 if (key.objectid + key.offset <= logical) 933 if (key.objectid + key.offset <= logical)
934 goto next; 934 goto next;
935 935
936 if (key.objectid >= logical + map->stripe_len) 936 if (key.objectid >= logical + map->stripe_len)
937 break; 937 break;
938 938
939 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) 939 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
940 goto next; 940 goto next;
941 941
942 extent = btrfs_item_ptr(l, slot, 942 extent = btrfs_item_ptr(l, slot,
943 struct btrfs_extent_item); 943 struct btrfs_extent_item);
944 flags = btrfs_extent_flags(l, extent); 944 flags = btrfs_extent_flags(l, extent);
945 generation = btrfs_extent_generation(l, extent); 945 generation = btrfs_extent_generation(l, extent);
946 946
947 if (key.objectid < logical && 947 if (key.objectid < logical &&
948 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { 948 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
949 printk(KERN_ERR 949 printk(KERN_ERR
950 "btrfs scrub: tree block %llu spanning " 950 "btrfs scrub: tree block %llu spanning "
951 "stripes, ignored. logical=%llu\n", 951 "stripes, ignored. logical=%llu\n",
952 (unsigned long long)key.objectid, 952 (unsigned long long)key.objectid,
953 (unsigned long long)logical); 953 (unsigned long long)logical);
954 goto next; 954 goto next;
955 } 955 }
956 956
957 /* 957 /*
958 * trim extent to this stripe 958 * trim extent to this stripe
959 */ 959 */
960 if (key.objectid < logical) { 960 if (key.objectid < logical) {
961 key.offset -= logical - key.objectid; 961 key.offset -= logical - key.objectid;
962 key.objectid = logical; 962 key.objectid = logical;
963 } 963 }
964 if (key.objectid + key.offset > 964 if (key.objectid + key.offset >
965 logical + map->stripe_len) { 965 logical + map->stripe_len) {
966 key.offset = logical + map->stripe_len - 966 key.offset = logical + map->stripe_len -
967 key.objectid; 967 key.objectid;
968 } 968 }
969 969
970 ret = scrub_extent(sdev, key.objectid, key.offset, 970 ret = scrub_extent(sdev, key.objectid, key.offset,
971 key.objectid - logical + physical, 971 key.objectid - logical + physical,
972 flags, generation, mirror_num); 972 flags, generation, mirror_num);
973 if (ret) 973 if (ret)
974 goto out; 974 goto out;
975 975
976 next: 976 next:
977 path->slots[0]++; 977 path->slots[0]++;
978 } 978 }
979 btrfs_release_path(path); 979 btrfs_release_path(path);
980 logical += increment; 980 logical += increment;
981 physical += map->stripe_len; 981 physical += map->stripe_len;
982 spin_lock(&sdev->stat_lock); 982 spin_lock(&sdev->stat_lock);
983 sdev->stat.last_physical = physical; 983 sdev->stat.last_physical = physical;
984 spin_unlock(&sdev->stat_lock); 984 spin_unlock(&sdev->stat_lock);
985 } 985 }
986 /* push queued extents */ 986 /* push queued extents */
987 scrub_submit(sdev); 987 scrub_submit(sdev);
988 988
989 out: 989 out:
990 blk_finish_plug(&plug); 990 blk_finish_plug(&plug);
991 out_noplug: 991 out_noplug:
992 btrfs_free_path(path); 992 btrfs_free_path(path);
993 return ret < 0 ? ret : 0; 993 return ret < 0 ? ret : 0;
994 } 994 }
995 995
996 static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, 996 static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
997 u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length) 997 u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
998 { 998 {
999 struct btrfs_mapping_tree *map_tree = 999 struct btrfs_mapping_tree *map_tree =
1000 &sdev->dev->dev_root->fs_info->mapping_tree; 1000 &sdev->dev->dev_root->fs_info->mapping_tree;
1001 struct map_lookup *map; 1001 struct map_lookup *map;
1002 struct extent_map *em; 1002 struct extent_map *em;
1003 int i; 1003 int i;
1004 int ret = -EINVAL; 1004 int ret = -EINVAL;
1005 1005
1006 read_lock(&map_tree->map_tree.lock); 1006 read_lock(&map_tree->map_tree.lock);
1007 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); 1007 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
1008 read_unlock(&map_tree->map_tree.lock); 1008 read_unlock(&map_tree->map_tree.lock);
1009 1009
1010 if (!em) 1010 if (!em)
1011 return -EINVAL; 1011 return -EINVAL;
1012 1012
1013 map = (struct map_lookup *)em->bdev; 1013 map = (struct map_lookup *)em->bdev;
1014 if (em->start != chunk_offset) 1014 if (em->start != chunk_offset)
1015 goto out; 1015 goto out;
1016 1016
1017 if (em->len < length) 1017 if (em->len < length)
1018 goto out; 1018 goto out;
1019 1019
1020 for (i = 0; i < map->num_stripes; ++i) { 1020 for (i = 0; i < map->num_stripes; ++i) {
1021 if (map->stripes[i].dev == sdev->dev) { 1021 if (map->stripes[i].dev == sdev->dev) {
1022 ret = scrub_stripe(sdev, map, i, chunk_offset, length); 1022 ret = scrub_stripe(sdev, map, i, chunk_offset, length);
1023 if (ret) 1023 if (ret)
1024 goto out; 1024 goto out;
1025 } 1025 }
1026 } 1026 }
1027 out: 1027 out:
1028 free_extent_map(em); 1028 free_extent_map(em);
1029 1029
1030 return ret; 1030 return ret;
1031 } 1031 }
1032 1032
1033 static noinline_for_stack 1033 static noinline_for_stack
1034 int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) 1034 int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1035 { 1035 {
1036 struct btrfs_dev_extent *dev_extent = NULL; 1036 struct btrfs_dev_extent *dev_extent = NULL;
1037 struct btrfs_path *path; 1037 struct btrfs_path *path;
1038 struct btrfs_root *root = sdev->dev->dev_root; 1038 struct btrfs_root *root = sdev->dev->dev_root;
1039 struct btrfs_fs_info *fs_info = root->fs_info; 1039 struct btrfs_fs_info *fs_info = root->fs_info;
1040 u64 length; 1040 u64 length;
1041 u64 chunk_tree; 1041 u64 chunk_tree;
1042 u64 chunk_objectid; 1042 u64 chunk_objectid;
1043 u64 chunk_offset; 1043 u64 chunk_offset;
1044 int ret; 1044 int ret;
1045 int slot; 1045 int slot;
1046 struct extent_buffer *l; 1046 struct extent_buffer *l;
1047 struct btrfs_key key; 1047 struct btrfs_key key;
1048 struct btrfs_key found_key; 1048 struct btrfs_key found_key;
1049 struct btrfs_block_group_cache *cache; 1049 struct btrfs_block_group_cache *cache;
1050 1050
1051 path = btrfs_alloc_path(); 1051 path = btrfs_alloc_path();
1052 if (!path) 1052 if (!path)
1053 return -ENOMEM; 1053 return -ENOMEM;
1054 1054
1055 path->reada = 2; 1055 path->reada = 2;
1056 path->search_commit_root = 1; 1056 path->search_commit_root = 1;
1057 path->skip_locking = 1; 1057 path->skip_locking = 1;
1058 1058
1059 key.objectid = sdev->dev->devid; 1059 key.objectid = sdev->dev->devid;
1060 key.offset = 0ull; 1060 key.offset = 0ull;
1061 key.type = BTRFS_DEV_EXTENT_KEY; 1061 key.type = BTRFS_DEV_EXTENT_KEY;
1062 1062
1063 1063
1064 while (1) { 1064 while (1) {
1065 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1065 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1066 if (ret < 0) 1066 if (ret < 0)
1067 break; 1067 break;
1068 if (ret > 0) { 1068 if (ret > 0) {
1069 if (path->slots[0] >= 1069 if (path->slots[0] >=
1070 btrfs_header_nritems(path->nodes[0])) { 1070 btrfs_header_nritems(path->nodes[0])) {
1071 ret = btrfs_next_leaf(root, path); 1071 ret = btrfs_next_leaf(root, path);
1072 if (ret) 1072 if (ret)
1073 break; 1073 break;
1074 } 1074 }
1075 } 1075 }
1076 1076
1077 l = path->nodes[0]; 1077 l = path->nodes[0];
1078 slot = path->slots[0]; 1078 slot = path->slots[0];
1079 1079
1080 btrfs_item_key_to_cpu(l, &found_key, slot); 1080 btrfs_item_key_to_cpu(l, &found_key, slot);
1081 1081
1082 if (found_key.objectid != sdev->dev->devid) 1082 if (found_key.objectid != sdev->dev->devid)
1083 break; 1083 break;
1084 1084
1085 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY) 1085 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
1086 break; 1086 break;
1087 1087
1088 if (found_key.offset >= end) 1088 if (found_key.offset >= end)
1089 break; 1089 break;
1090 1090
1091 if (found_key.offset < key.offset) 1091 if (found_key.offset < key.offset)
1092 break; 1092 break;
1093 1093
1094 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 1094 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1095 length = btrfs_dev_extent_length(l, dev_extent); 1095 length = btrfs_dev_extent_length(l, dev_extent);
1096 1096
1097 if (found_key.offset + length <= start) { 1097 if (found_key.offset + length <= start) {
1098 key.offset = found_key.offset + length; 1098 key.offset = found_key.offset + length;
1099 btrfs_release_path(path); 1099 btrfs_release_path(path);
1100 continue; 1100 continue;
1101 } 1101 }
1102 1102
1103 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); 1103 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1104 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); 1104 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1105 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); 1105 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1106 1106
1107 /* 1107 /*
1108 * get a reference on the corresponding block group to prevent 1108 * get a reference on the corresponding block group to prevent
1109 * the chunk from going away while we scrub it 1109 * the chunk from going away while we scrub it
1110 */ 1110 */
1111 cache = btrfs_lookup_block_group(fs_info, chunk_offset); 1111 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1112 if (!cache) { 1112 if (!cache) {
1113 ret = -ENOENT; 1113 ret = -ENOENT;
1114 break; 1114 break;
1115 } 1115 }
1116 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, 1116 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1117 chunk_offset, length); 1117 chunk_offset, length);
1118 btrfs_put_block_group(cache); 1118 btrfs_put_block_group(cache);
1119 if (ret) 1119 if (ret)
1120 break; 1120 break;
1121 1121
1122 key.offset = found_key.offset + length; 1122 key.offset = found_key.offset + length;
1123 btrfs_release_path(path); 1123 btrfs_release_path(path);
1124 } 1124 }
1125 1125
1126 btrfs_free_path(path); 1126 btrfs_free_path(path);
1127 1127
1128 /* 1128 /*
1129 * ret can still be 1 from search_slot or next_leaf, 1129 * ret can still be 1 from search_slot or next_leaf,
1130 * that's not an error 1130 * that's not an error
1131 */ 1131 */
1132 return ret < 0 ? ret : 0; 1132 return ret < 0 ? ret : 0;
1133 } 1133 }
1134 1134
1135 static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) 1135 static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1136 { 1136 {
1137 int i; 1137 int i;
1138 u64 bytenr; 1138 u64 bytenr;
1139 u64 gen; 1139 u64 gen;
1140 int ret; 1140 int ret;
1141 struct btrfs_device *device = sdev->dev; 1141 struct btrfs_device *device = sdev->dev;
1142 struct btrfs_root *root = device->dev_root; 1142 struct btrfs_root *root = device->dev_root;
1143 1143
1144 gen = root->fs_info->last_trans_committed; 1144 gen = root->fs_info->last_trans_committed;
1145 1145
1146 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 1146 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1147 bytenr = btrfs_sb_offset(i); 1147 bytenr = btrfs_sb_offset(i);
1148 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) 1148 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
1149 break; 1149 break;
1150 1150
1151 ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr, 1151 ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
1152 BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1); 1152 BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
1153 if (ret) 1153 if (ret)
1154 return ret; 1154 return ret;
1155 } 1155 }
1156 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); 1156 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1157 1157
1158 return 0; 1158 return 0;
1159 } 1159 }
1160 1160
1161 /* 1161 /*
1162 * get a reference count on fs_info->scrub_workers. start worker if necessary 1162 * get a reference count on fs_info->scrub_workers. start worker if necessary
1163 */ 1163 */
1164 static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) 1164 static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1165 { 1165 {
1166 struct btrfs_fs_info *fs_info = root->fs_info; 1166 struct btrfs_fs_info *fs_info = root->fs_info;
1167 1167
1168 mutex_lock(&fs_info->scrub_lock); 1168 mutex_lock(&fs_info->scrub_lock);
1169 if (fs_info->scrub_workers_refcnt == 0) 1169 if (fs_info->scrub_workers_refcnt == 0) {
1170 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1171 fs_info->thread_pool_size, &fs_info->generic_worker);
1172 fs_info->scrub_workers.idle_thresh = 4;
1170 btrfs_start_workers(&fs_info->scrub_workers, 1); 1173 btrfs_start_workers(&fs_info->scrub_workers, 1);
1174 }
1171 ++fs_info->scrub_workers_refcnt; 1175 ++fs_info->scrub_workers_refcnt;
1172 mutex_unlock(&fs_info->scrub_lock); 1176 mutex_unlock(&fs_info->scrub_lock);
1173 1177
1174 return 0; 1178 return 0;
1175 } 1179 }
1176 1180
1177 static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) 1181 static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
1178 { 1182 {
1179 struct btrfs_fs_info *fs_info = root->fs_info; 1183 struct btrfs_fs_info *fs_info = root->fs_info;
1180 1184
1181 mutex_lock(&fs_info->scrub_lock); 1185 mutex_lock(&fs_info->scrub_lock);
1182 if (--fs_info->scrub_workers_refcnt == 0) 1186 if (--fs_info->scrub_workers_refcnt == 0)
1183 btrfs_stop_workers(&fs_info->scrub_workers); 1187 btrfs_stop_workers(&fs_info->scrub_workers);
1184 WARN_ON(fs_info->scrub_workers_refcnt < 0); 1188 WARN_ON(fs_info->scrub_workers_refcnt < 0);
1185 mutex_unlock(&fs_info->scrub_lock); 1189 mutex_unlock(&fs_info->scrub_lock);
1186 } 1190 }
1187 1191
1188 1192
1189 int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, 1193 int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1190 struct btrfs_scrub_progress *progress, int readonly) 1194 struct btrfs_scrub_progress *progress, int readonly)
1191 { 1195 {
1192 struct scrub_dev *sdev; 1196 struct scrub_dev *sdev;
1193 struct btrfs_fs_info *fs_info = root->fs_info; 1197 struct btrfs_fs_info *fs_info = root->fs_info;
1194 int ret; 1198 int ret;
1195 struct btrfs_device *dev; 1199 struct btrfs_device *dev;
1196 1200
1197 if (btrfs_fs_closing(root->fs_info)) 1201 if (btrfs_fs_closing(root->fs_info))
1198 return -EINVAL; 1202 return -EINVAL;
1199 1203
1200 /* 1204 /*
1201 * check some assumptions 1205 * check some assumptions
1202 */ 1206 */
1203 if (root->sectorsize != PAGE_SIZE || 1207 if (root->sectorsize != PAGE_SIZE ||
1204 root->sectorsize != root->leafsize || 1208 root->sectorsize != root->leafsize ||
1205 root->sectorsize != root->nodesize) { 1209 root->sectorsize != root->nodesize) {
1206 printk(KERN_ERR "btrfs_scrub: size assumptions fail\n"); 1210 printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
1207 return -EINVAL; 1211 return -EINVAL;
1208 } 1212 }
1209 1213
1210 ret = scrub_workers_get(root); 1214 ret = scrub_workers_get(root);
1211 if (ret) 1215 if (ret)
1212 return ret; 1216 return ret;
1213 1217
1214 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 1218 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1215 dev = btrfs_find_device(root, devid, NULL, NULL); 1219 dev = btrfs_find_device(root, devid, NULL, NULL);
1216 if (!dev || dev->missing) { 1220 if (!dev || dev->missing) {
1217 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 1221 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1218 scrub_workers_put(root); 1222 scrub_workers_put(root);
1219 return -ENODEV; 1223 return -ENODEV;
1220 } 1224 }
1221 mutex_lock(&fs_info->scrub_lock); 1225 mutex_lock(&fs_info->scrub_lock);
1222 1226
1223 if (!dev->in_fs_metadata) { 1227 if (!dev->in_fs_metadata) {
1224 mutex_unlock(&fs_info->scrub_lock); 1228 mutex_unlock(&fs_info->scrub_lock);
1225 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 1229 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1226 scrub_workers_put(root); 1230 scrub_workers_put(root);
1227 return -ENODEV; 1231 return -ENODEV;
1228 } 1232 }
1229 1233
1230 if (dev->scrub_device) { 1234 if (dev->scrub_device) {
1231 mutex_unlock(&fs_info->scrub_lock); 1235 mutex_unlock(&fs_info->scrub_lock);
1232 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 1236 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1233 scrub_workers_put(root); 1237 scrub_workers_put(root);
1234 return -EINPROGRESS; 1238 return -EINPROGRESS;
1235 } 1239 }
1236 sdev = scrub_setup_dev(dev); 1240 sdev = scrub_setup_dev(dev);
1237 if (IS_ERR(sdev)) { 1241 if (IS_ERR(sdev)) {
1238 mutex_unlock(&fs_info->scrub_lock); 1242 mutex_unlock(&fs_info->scrub_lock);
1239 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 1243 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1240 scrub_workers_put(root); 1244 scrub_workers_put(root);
1241 return PTR_ERR(sdev); 1245 return PTR_ERR(sdev);
1242 } 1246 }
1243 sdev->readonly = readonly; 1247 sdev->readonly = readonly;
1244 dev->scrub_device = sdev; 1248 dev->scrub_device = sdev;
1245 1249
1246 atomic_inc(&fs_info->scrubs_running); 1250 atomic_inc(&fs_info->scrubs_running);
1247 mutex_unlock(&fs_info->scrub_lock); 1251 mutex_unlock(&fs_info->scrub_lock);
1248 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 1252 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1249 1253
1250 down_read(&fs_info->scrub_super_lock); 1254 down_read(&fs_info->scrub_super_lock);
1251 ret = scrub_supers(sdev); 1255 ret = scrub_supers(sdev);
1252 up_read(&fs_info->scrub_super_lock); 1256 up_read(&fs_info->scrub_super_lock);
1253 1257
1254 if (!ret) 1258 if (!ret)
1255 ret = scrub_enumerate_chunks(sdev, start, end); 1259 ret = scrub_enumerate_chunks(sdev, start, end);
1256 1260
1257 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); 1261 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1258 1262
1259 atomic_dec(&fs_info->scrubs_running); 1263 atomic_dec(&fs_info->scrubs_running);
1260 wake_up(&fs_info->scrub_pause_wait); 1264 wake_up(&fs_info->scrub_pause_wait);
1261 1265
1262 if (progress) 1266 if (progress)
1263 memcpy(progress, &sdev->stat, sizeof(*progress)); 1267 memcpy(progress, &sdev->stat, sizeof(*progress));
1264 1268
1265 mutex_lock(&fs_info->scrub_lock); 1269 mutex_lock(&fs_info->scrub_lock);
1266 dev->scrub_device = NULL; 1270 dev->scrub_device = NULL;
1267 mutex_unlock(&fs_info->scrub_lock); 1271 mutex_unlock(&fs_info->scrub_lock);
1268 1272
1269 scrub_free_dev(sdev); 1273 scrub_free_dev(sdev);
1270 scrub_workers_put(root); 1274 scrub_workers_put(root);
1271 1275
1272 return ret; 1276 return ret;
1273 } 1277 }
1274 1278
1275 int btrfs_scrub_pause(struct btrfs_root *root) 1279 int btrfs_scrub_pause(struct btrfs_root *root)
1276 { 1280 {
1277 struct btrfs_fs_info *fs_info = root->fs_info; 1281 struct btrfs_fs_info *fs_info = root->fs_info;
1278 1282
1279 mutex_lock(&fs_info->scrub_lock); 1283 mutex_lock(&fs_info->scrub_lock);
1280 atomic_inc(&fs_info->scrub_pause_req); 1284 atomic_inc(&fs_info->scrub_pause_req);
1281 while (atomic_read(&fs_info->scrubs_paused) != 1285 while (atomic_read(&fs_info->scrubs_paused) !=
1282 atomic_read(&fs_info->scrubs_running)) { 1286 atomic_read(&fs_info->scrubs_running)) {
1283 mutex_unlock(&fs_info->scrub_lock); 1287 mutex_unlock(&fs_info->scrub_lock);
1284 wait_event(fs_info->scrub_pause_wait, 1288 wait_event(fs_info->scrub_pause_wait,
1285 atomic_read(&fs_info->scrubs_paused) == 1289 atomic_read(&fs_info->scrubs_paused) ==
1286 atomic_read(&fs_info->scrubs_running)); 1290 atomic_read(&fs_info->scrubs_running));
1287 mutex_lock(&fs_info->scrub_lock); 1291 mutex_lock(&fs_info->scrub_lock);
1288 } 1292 }
1289 mutex_unlock(&fs_info->scrub_lock); 1293 mutex_unlock(&fs_info->scrub_lock);
1290 1294
1291 return 0; 1295 return 0;
1292 } 1296 }
1293 1297
1294 int btrfs_scrub_continue(struct btrfs_root *root) 1298 int btrfs_scrub_continue(struct btrfs_root *root)
1295 { 1299 {
1296 struct btrfs_fs_info *fs_info = root->fs_info; 1300 struct btrfs_fs_info *fs_info = root->fs_info;
1297 1301
1298 atomic_dec(&fs_info->scrub_pause_req); 1302 atomic_dec(&fs_info->scrub_pause_req);
1299 wake_up(&fs_info->scrub_pause_wait); 1303 wake_up(&fs_info->scrub_pause_wait);
1300 return 0; 1304 return 0;
1301 } 1305 }
1302 1306
1303 int btrfs_scrub_pause_super(struct btrfs_root *root) 1307 int btrfs_scrub_pause_super(struct btrfs_root *root)
1304 { 1308 {
1305 down_write(&root->fs_info->scrub_super_lock); 1309 down_write(&root->fs_info->scrub_super_lock);
1306 return 0; 1310 return 0;
1307 } 1311 }
1308 1312
1309 int btrfs_scrub_continue_super(struct btrfs_root *root) 1313 int btrfs_scrub_continue_super(struct btrfs_root *root)
1310 { 1314 {
1311 up_write(&root->fs_info->scrub_super_lock); 1315 up_write(&root->fs_info->scrub_super_lock);
1312 return 0; 1316 return 0;
1313 } 1317 }
1314 1318
1315 int btrfs_scrub_cancel(struct btrfs_root *root) 1319 int btrfs_scrub_cancel(struct btrfs_root *root)
1316 { 1320 {
1317 struct btrfs_fs_info *fs_info = root->fs_info; 1321 struct btrfs_fs_info *fs_info = root->fs_info;
1318 1322
1319 mutex_lock(&fs_info->scrub_lock); 1323 mutex_lock(&fs_info->scrub_lock);
1320 if (!atomic_read(&fs_info->scrubs_running)) { 1324 if (!atomic_read(&fs_info->scrubs_running)) {
1321 mutex_unlock(&fs_info->scrub_lock); 1325 mutex_unlock(&fs_info->scrub_lock);
1322 return -ENOTCONN; 1326 return -ENOTCONN;
1323 } 1327 }
1324 1328
1325 atomic_inc(&fs_info->scrub_cancel_req); 1329 atomic_inc(&fs_info->scrub_cancel_req);
1326 while (atomic_read(&fs_info->scrubs_running)) { 1330 while (atomic_read(&fs_info->scrubs_running)) {
1327 mutex_unlock(&fs_info->scrub_lock); 1331 mutex_unlock(&fs_info->scrub_lock);
1328 wait_event(fs_info->scrub_pause_wait, 1332 wait_event(fs_info->scrub_pause_wait,
1329 atomic_read(&fs_info->scrubs_running) == 0); 1333 atomic_read(&fs_info->scrubs_running) == 0);
1330 mutex_lock(&fs_info->scrub_lock); 1334 mutex_lock(&fs_info->scrub_lock);
1331 } 1335 }
1332 atomic_dec(&fs_info->scrub_cancel_req); 1336 atomic_dec(&fs_info->scrub_cancel_req);
1333 mutex_unlock(&fs_info->scrub_lock); 1337 mutex_unlock(&fs_info->scrub_lock);
1334 1338
1335 return 0; 1339 return 0;
1336 } 1340 }
1337 1341
1338 int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev) 1342 int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1339 { 1343 {
1340 struct btrfs_fs_info *fs_info = root->fs_info; 1344 struct btrfs_fs_info *fs_info = root->fs_info;
1341 struct scrub_dev *sdev; 1345 struct scrub_dev *sdev;
1342 1346
1343 mutex_lock(&fs_info->scrub_lock); 1347 mutex_lock(&fs_info->scrub_lock);
1344 sdev = dev->scrub_device; 1348 sdev = dev->scrub_device;
1345 if (!sdev) { 1349 if (!sdev) {
1346 mutex_unlock(&fs_info->scrub_lock); 1350 mutex_unlock(&fs_info->scrub_lock);
1347 return -ENOTCONN; 1351 return -ENOTCONN;
1348 } 1352 }
1349 atomic_inc(&sdev->cancel_req); 1353 atomic_inc(&sdev->cancel_req);
1350 while (dev->scrub_device) { 1354 while (dev->scrub_device) {
1351 mutex_unlock(&fs_info->scrub_lock); 1355 mutex_unlock(&fs_info->scrub_lock);
1352 wait_event(fs_info->scrub_pause_wait, 1356 wait_event(fs_info->scrub_pause_wait,
1353 dev->scrub_device == NULL); 1357 dev->scrub_device == NULL);
1354 mutex_lock(&fs_info->scrub_lock); 1358 mutex_lock(&fs_info->scrub_lock);
1355 } 1359 }
1356 mutex_unlock(&fs_info->scrub_lock); 1360 mutex_unlock(&fs_info->scrub_lock);
1357 1361
1358 return 0; 1362 return 0;
1359 } 1363 }
1360 int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid) 1364 int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
1361 { 1365 {
1362 struct btrfs_fs_info *fs_info = root->fs_info; 1366 struct btrfs_fs_info *fs_info = root->fs_info;
1363 struct btrfs_device *dev; 1367 struct btrfs_device *dev;
1364 int ret; 1368 int ret;
1365 1369
1366 /* 1370 /*
1367 * we have to hold the device_list_mutex here so the device 1371 * we have to hold the device_list_mutex here so the device
1368 * does not go away in cancel_dev. FIXME: find a better solution 1372 * does not go away in cancel_dev. FIXME: find a better solution
1369 */ 1373 */
1370 mutex_lock(&fs_info->fs_devices->device_list_mutex); 1374 mutex_lock(&fs_info->fs_devices->device_list_mutex);
1371 dev = btrfs_find_device(root, devid, NULL, NULL); 1375 dev = btrfs_find_device(root, devid, NULL, NULL);
1372 if (!dev) { 1376 if (!dev) {
1373 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 1377 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1374 return -ENODEV; 1378 return -ENODEV;
1375 } 1379 }
1376 ret = btrfs_scrub_cancel_dev(root, dev); 1380 ret = btrfs_scrub_cancel_dev(root, dev);
1377 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 1381 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1378 1382
1379 return ret; 1383 return ret;
1380 } 1384 }
1381 1385
1382 int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, 1386 int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
1383 struct btrfs_scrub_progress *progress) 1387 struct btrfs_scrub_progress *progress)
1384 { 1388 {
1385 struct btrfs_device *dev; 1389 struct btrfs_device *dev;
1386 struct scrub_dev *sdev = NULL; 1390 struct scrub_dev *sdev = NULL;
1387 1391
1388 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 1392 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1389 dev = btrfs_find_device(root, devid, NULL, NULL); 1393 dev = btrfs_find_device(root, devid, NULL, NULL);
1390 if (dev) 1394 if (dev)
1391 sdev = dev->scrub_device; 1395 sdev = dev->scrub_device;
1392 if (sdev) 1396 if (sdev)
1393 memcpy(progress, &sdev->stat, sizeof(*progress)); 1397 memcpy(progress, &sdev->stat, sizeof(*progress));
1394 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 1398 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1395 1399
1396 return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV; 1400 return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
1397 } 1401 }
1398 1402