Commit 4613ad180d19082f99551477dcb13cb23d23661b

Authored by Al Viro
1 parent f7699f2b01

ext3: move headers to fs/ext3/

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 24 changed files with 1344 additions and 1575 deletions Inline Diff

1 /* 1 /*
2 * linux/fs/ext3/acl.c 2 * linux/fs/ext3/acl.c
3 * 3 *
4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5 */ 5 */
6 6
7 #include <linux/init.h> 7 #include "ext3.h"
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include <linux/capability.h>
11 #include <linux/fs.h>
12 #include <linux/ext3_jbd.h>
13 #include <linux/ext3_fs.h>
14 #include "xattr.h" 8 #include "xattr.h"
15 #include "acl.h" 9 #include "acl.h"
16 10
17 /* 11 /*
18 * Convert from filesystem to in-memory representation. 12 * Convert from filesystem to in-memory representation.
19 */ 13 */
20 static struct posix_acl * 14 static struct posix_acl *
21 ext3_acl_from_disk(const void *value, size_t size) 15 ext3_acl_from_disk(const void *value, size_t size)
22 { 16 {
23 const char *end = (char *)value + size; 17 const char *end = (char *)value + size;
24 int n, count; 18 int n, count;
25 struct posix_acl *acl; 19 struct posix_acl *acl;
26 20
27 if (!value) 21 if (!value)
28 return NULL; 22 return NULL;
29 if (size < sizeof(ext3_acl_header)) 23 if (size < sizeof(ext3_acl_header))
30 return ERR_PTR(-EINVAL); 24 return ERR_PTR(-EINVAL);
31 if (((ext3_acl_header *)value)->a_version != 25 if (((ext3_acl_header *)value)->a_version !=
32 cpu_to_le32(EXT3_ACL_VERSION)) 26 cpu_to_le32(EXT3_ACL_VERSION))
33 return ERR_PTR(-EINVAL); 27 return ERR_PTR(-EINVAL);
34 value = (char *)value + sizeof(ext3_acl_header); 28 value = (char *)value + sizeof(ext3_acl_header);
35 count = ext3_acl_count(size); 29 count = ext3_acl_count(size);
36 if (count < 0) 30 if (count < 0)
37 return ERR_PTR(-EINVAL); 31 return ERR_PTR(-EINVAL);
38 if (count == 0) 32 if (count == 0)
39 return NULL; 33 return NULL;
40 acl = posix_acl_alloc(count, GFP_NOFS); 34 acl = posix_acl_alloc(count, GFP_NOFS);
41 if (!acl) 35 if (!acl)
42 return ERR_PTR(-ENOMEM); 36 return ERR_PTR(-ENOMEM);
43 for (n=0; n < count; n++) { 37 for (n=0; n < count; n++) {
44 ext3_acl_entry *entry = 38 ext3_acl_entry *entry =
45 (ext3_acl_entry *)value; 39 (ext3_acl_entry *)value;
46 if ((char *)value + sizeof(ext3_acl_entry_short) > end) 40 if ((char *)value + sizeof(ext3_acl_entry_short) > end)
47 goto fail; 41 goto fail;
48 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); 42 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
49 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); 43 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
50 switch(acl->a_entries[n].e_tag) { 44 switch(acl->a_entries[n].e_tag) {
51 case ACL_USER_OBJ: 45 case ACL_USER_OBJ:
52 case ACL_GROUP_OBJ: 46 case ACL_GROUP_OBJ:
53 case ACL_MASK: 47 case ACL_MASK:
54 case ACL_OTHER: 48 case ACL_OTHER:
55 value = (char *)value + 49 value = (char *)value +
56 sizeof(ext3_acl_entry_short); 50 sizeof(ext3_acl_entry_short);
57 acl->a_entries[n].e_id = ACL_UNDEFINED_ID; 51 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
58 break; 52 break;
59 53
60 case ACL_USER: 54 case ACL_USER:
61 case ACL_GROUP: 55 case ACL_GROUP:
62 value = (char *)value + sizeof(ext3_acl_entry); 56 value = (char *)value + sizeof(ext3_acl_entry);
63 if ((char *)value > end) 57 if ((char *)value > end)
64 goto fail; 58 goto fail;
65 acl->a_entries[n].e_id = 59 acl->a_entries[n].e_id =
66 le32_to_cpu(entry->e_id); 60 le32_to_cpu(entry->e_id);
67 break; 61 break;
68 62
69 default: 63 default:
70 goto fail; 64 goto fail;
71 } 65 }
72 } 66 }
73 if (value != end) 67 if (value != end)
74 goto fail; 68 goto fail;
75 return acl; 69 return acl;
76 70
77 fail: 71 fail:
78 posix_acl_release(acl); 72 posix_acl_release(acl);
79 return ERR_PTR(-EINVAL); 73 return ERR_PTR(-EINVAL);
80 } 74 }
81 75
82 /* 76 /*
83 * Convert from in-memory to filesystem representation. 77 * Convert from in-memory to filesystem representation.
84 */ 78 */
85 static void * 79 static void *
86 ext3_acl_to_disk(const struct posix_acl *acl, size_t *size) 80 ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
87 { 81 {
88 ext3_acl_header *ext_acl; 82 ext3_acl_header *ext_acl;
89 char *e; 83 char *e;
90 size_t n; 84 size_t n;
91 85
92 *size = ext3_acl_size(acl->a_count); 86 *size = ext3_acl_size(acl->a_count);
93 ext_acl = kmalloc(sizeof(ext3_acl_header) + acl->a_count * 87 ext_acl = kmalloc(sizeof(ext3_acl_header) + acl->a_count *
94 sizeof(ext3_acl_entry), GFP_NOFS); 88 sizeof(ext3_acl_entry), GFP_NOFS);
95 if (!ext_acl) 89 if (!ext_acl)
96 return ERR_PTR(-ENOMEM); 90 return ERR_PTR(-ENOMEM);
97 ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION); 91 ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
98 e = (char *)ext_acl + sizeof(ext3_acl_header); 92 e = (char *)ext_acl + sizeof(ext3_acl_header);
99 for (n=0; n < acl->a_count; n++) { 93 for (n=0; n < acl->a_count; n++) {
100 ext3_acl_entry *entry = (ext3_acl_entry *)e; 94 ext3_acl_entry *entry = (ext3_acl_entry *)e;
101 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 95 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
102 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 96 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
103 switch(acl->a_entries[n].e_tag) { 97 switch(acl->a_entries[n].e_tag) {
104 case ACL_USER: 98 case ACL_USER:
105 case ACL_GROUP: 99 case ACL_GROUP:
106 entry->e_id = 100 entry->e_id =
107 cpu_to_le32(acl->a_entries[n].e_id); 101 cpu_to_le32(acl->a_entries[n].e_id);
108 e += sizeof(ext3_acl_entry); 102 e += sizeof(ext3_acl_entry);
109 break; 103 break;
110 104
111 case ACL_USER_OBJ: 105 case ACL_USER_OBJ:
112 case ACL_GROUP_OBJ: 106 case ACL_GROUP_OBJ:
113 case ACL_MASK: 107 case ACL_MASK:
114 case ACL_OTHER: 108 case ACL_OTHER:
115 e += sizeof(ext3_acl_entry_short); 109 e += sizeof(ext3_acl_entry_short);
116 break; 110 break;
117 111
118 default: 112 default:
119 goto fail; 113 goto fail;
120 } 114 }
121 } 115 }
122 return (char *)ext_acl; 116 return (char *)ext_acl;
123 117
124 fail: 118 fail:
125 kfree(ext_acl); 119 kfree(ext_acl);
126 return ERR_PTR(-EINVAL); 120 return ERR_PTR(-EINVAL);
127 } 121 }
128 122
129 /* 123 /*
130 * Inode operation get_posix_acl(). 124 * Inode operation get_posix_acl().
131 * 125 *
132 * inode->i_mutex: don't care 126 * inode->i_mutex: don't care
133 */ 127 */
134 struct posix_acl * 128 struct posix_acl *
135 ext3_get_acl(struct inode *inode, int type) 129 ext3_get_acl(struct inode *inode, int type)
136 { 130 {
137 int name_index; 131 int name_index;
138 char *value = NULL; 132 char *value = NULL;
139 struct posix_acl *acl; 133 struct posix_acl *acl;
140 int retval; 134 int retval;
141 135
142 if (!test_opt(inode->i_sb, POSIX_ACL)) 136 if (!test_opt(inode->i_sb, POSIX_ACL))
143 return NULL; 137 return NULL;
144 138
145 acl = get_cached_acl(inode, type); 139 acl = get_cached_acl(inode, type);
146 if (acl != ACL_NOT_CACHED) 140 if (acl != ACL_NOT_CACHED)
147 return acl; 141 return acl;
148 142
149 switch (type) { 143 switch (type) {
150 case ACL_TYPE_ACCESS: 144 case ACL_TYPE_ACCESS:
151 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; 145 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
152 break; 146 break;
153 case ACL_TYPE_DEFAULT: 147 case ACL_TYPE_DEFAULT:
154 name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; 148 name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
155 break; 149 break;
156 default: 150 default:
157 BUG(); 151 BUG();
158 } 152 }
159 153
160 retval = ext3_xattr_get(inode, name_index, "", NULL, 0); 154 retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
161 if (retval > 0) { 155 if (retval > 0) {
162 value = kmalloc(retval, GFP_NOFS); 156 value = kmalloc(retval, GFP_NOFS);
163 if (!value) 157 if (!value)
164 return ERR_PTR(-ENOMEM); 158 return ERR_PTR(-ENOMEM);
165 retval = ext3_xattr_get(inode, name_index, "", value, retval); 159 retval = ext3_xattr_get(inode, name_index, "", value, retval);
166 } 160 }
167 if (retval > 0) 161 if (retval > 0)
168 acl = ext3_acl_from_disk(value, retval); 162 acl = ext3_acl_from_disk(value, retval);
169 else if (retval == -ENODATA || retval == -ENOSYS) 163 else if (retval == -ENODATA || retval == -ENOSYS)
170 acl = NULL; 164 acl = NULL;
171 else 165 else
172 acl = ERR_PTR(retval); 166 acl = ERR_PTR(retval);
173 kfree(value); 167 kfree(value);
174 168
175 if (!IS_ERR(acl)) 169 if (!IS_ERR(acl))
176 set_cached_acl(inode, type, acl); 170 set_cached_acl(inode, type, acl);
177 171
178 return acl; 172 return acl;
179 } 173 }
180 174
181 /* 175 /*
182 * Set the access or default ACL of an inode. 176 * Set the access or default ACL of an inode.
183 * 177 *
184 * inode->i_mutex: down unless called from ext3_new_inode 178 * inode->i_mutex: down unless called from ext3_new_inode
185 */ 179 */
186 static int 180 static int
187 ext3_set_acl(handle_t *handle, struct inode *inode, int type, 181 ext3_set_acl(handle_t *handle, struct inode *inode, int type,
188 struct posix_acl *acl) 182 struct posix_acl *acl)
189 { 183 {
190 int name_index; 184 int name_index;
191 void *value = NULL; 185 void *value = NULL;
192 size_t size = 0; 186 size_t size = 0;
193 int error; 187 int error;
194 188
195 if (S_ISLNK(inode->i_mode)) 189 if (S_ISLNK(inode->i_mode))
196 return -EOPNOTSUPP; 190 return -EOPNOTSUPP;
197 191
198 switch(type) { 192 switch(type) {
199 case ACL_TYPE_ACCESS: 193 case ACL_TYPE_ACCESS:
200 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; 194 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
201 if (acl) { 195 if (acl) {
202 error = posix_acl_equiv_mode(acl, &inode->i_mode); 196 error = posix_acl_equiv_mode(acl, &inode->i_mode);
203 if (error < 0) 197 if (error < 0)
204 return error; 198 return error;
205 else { 199 else {
206 inode->i_ctime = CURRENT_TIME_SEC; 200 inode->i_ctime = CURRENT_TIME_SEC;
207 ext3_mark_inode_dirty(handle, inode); 201 ext3_mark_inode_dirty(handle, inode);
208 if (error == 0) 202 if (error == 0)
209 acl = NULL; 203 acl = NULL;
210 } 204 }
211 } 205 }
212 break; 206 break;
213 207
214 case ACL_TYPE_DEFAULT: 208 case ACL_TYPE_DEFAULT:
215 name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; 209 name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
216 if (!S_ISDIR(inode->i_mode)) 210 if (!S_ISDIR(inode->i_mode))
217 return acl ? -EACCES : 0; 211 return acl ? -EACCES : 0;
218 break; 212 break;
219 213
220 default: 214 default:
221 return -EINVAL; 215 return -EINVAL;
222 } 216 }
223 if (acl) { 217 if (acl) {
224 value = ext3_acl_to_disk(acl, &size); 218 value = ext3_acl_to_disk(acl, &size);
225 if (IS_ERR(value)) 219 if (IS_ERR(value))
226 return (int)PTR_ERR(value); 220 return (int)PTR_ERR(value);
227 } 221 }
228 222
229 error = ext3_xattr_set_handle(handle, inode, name_index, "", 223 error = ext3_xattr_set_handle(handle, inode, name_index, "",
230 value, size, 0); 224 value, size, 0);
231 225
232 kfree(value); 226 kfree(value);
233 227
234 if (!error) 228 if (!error)
235 set_cached_acl(inode, type, acl); 229 set_cached_acl(inode, type, acl);
236 230
237 return error; 231 return error;
238 } 232 }
239 233
240 /* 234 /*
241 * Initialize the ACLs of a new inode. Called from ext3_new_inode. 235 * Initialize the ACLs of a new inode. Called from ext3_new_inode.
242 * 236 *
243 * dir->i_mutex: down 237 * dir->i_mutex: down
244 * inode->i_mutex: up (access to inode is still exclusive) 238 * inode->i_mutex: up (access to inode is still exclusive)
245 */ 239 */
246 int 240 int
247 ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) 241 ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
248 { 242 {
249 struct posix_acl *acl = NULL; 243 struct posix_acl *acl = NULL;
250 int error = 0; 244 int error = 0;
251 245
252 if (!S_ISLNK(inode->i_mode)) { 246 if (!S_ISLNK(inode->i_mode)) {
253 if (test_opt(dir->i_sb, POSIX_ACL)) { 247 if (test_opt(dir->i_sb, POSIX_ACL)) {
254 acl = ext3_get_acl(dir, ACL_TYPE_DEFAULT); 248 acl = ext3_get_acl(dir, ACL_TYPE_DEFAULT);
255 if (IS_ERR(acl)) 249 if (IS_ERR(acl))
256 return PTR_ERR(acl); 250 return PTR_ERR(acl);
257 } 251 }
258 if (!acl) 252 if (!acl)
259 inode->i_mode &= ~current_umask(); 253 inode->i_mode &= ~current_umask();
260 } 254 }
261 if (test_opt(inode->i_sb, POSIX_ACL) && acl) { 255 if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
262 if (S_ISDIR(inode->i_mode)) { 256 if (S_ISDIR(inode->i_mode)) {
263 error = ext3_set_acl(handle, inode, 257 error = ext3_set_acl(handle, inode,
264 ACL_TYPE_DEFAULT, acl); 258 ACL_TYPE_DEFAULT, acl);
265 if (error) 259 if (error)
266 goto cleanup; 260 goto cleanup;
267 } 261 }
268 error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); 262 error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
269 if (error < 0) 263 if (error < 0)
270 return error; 264 return error;
271 265
272 if (error > 0) { 266 if (error > 0) {
273 /* This is an extended ACL */ 267 /* This is an extended ACL */
274 error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); 268 error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
275 } 269 }
276 } 270 }
277 cleanup: 271 cleanup:
278 posix_acl_release(acl); 272 posix_acl_release(acl);
279 return error; 273 return error;
280 } 274 }
281 275
282 /* 276 /*
283 * Does chmod for an inode that may have an Access Control List. The 277 * Does chmod for an inode that may have an Access Control List. The
284 * inode->i_mode field must be updated to the desired value by the caller 278 * inode->i_mode field must be updated to the desired value by the caller
285 * before calling this function. 279 * before calling this function.
286 * Returns 0 on success, or a negative error number. 280 * Returns 0 on success, or a negative error number.
287 * 281 *
288 * We change the ACL rather than storing some ACL entries in the file 282 * We change the ACL rather than storing some ACL entries in the file
289 * mode permission bits (which would be more efficient), because that 283 * mode permission bits (which would be more efficient), because that
290 * would break once additional permissions (like ACL_APPEND, ACL_DELETE 284 * would break once additional permissions (like ACL_APPEND, ACL_DELETE
291 * for directories) are added. There are no more bits available in the 285 * for directories) are added. There are no more bits available in the
292 * file mode. 286 * file mode.
293 * 287 *
294 * inode->i_mutex: down 288 * inode->i_mutex: down
295 */ 289 */
296 int 290 int
297 ext3_acl_chmod(struct inode *inode) 291 ext3_acl_chmod(struct inode *inode)
298 { 292 {
299 struct posix_acl *acl; 293 struct posix_acl *acl;
300 handle_t *handle; 294 handle_t *handle;
301 int retries = 0; 295 int retries = 0;
302 int error; 296 int error;
303 297
304 if (S_ISLNK(inode->i_mode)) 298 if (S_ISLNK(inode->i_mode))
305 return -EOPNOTSUPP; 299 return -EOPNOTSUPP;
306 if (!test_opt(inode->i_sb, POSIX_ACL)) 300 if (!test_opt(inode->i_sb, POSIX_ACL))
307 return 0; 301 return 0;
308 acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); 302 acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
309 if (IS_ERR(acl) || !acl) 303 if (IS_ERR(acl) || !acl)
310 return PTR_ERR(acl); 304 return PTR_ERR(acl);
311 error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); 305 error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
312 if (error) 306 if (error)
313 return error; 307 return error;
314 retry: 308 retry:
315 handle = ext3_journal_start(inode, 309 handle = ext3_journal_start(inode,
316 EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); 310 EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
317 if (IS_ERR(handle)) { 311 if (IS_ERR(handle)) {
318 error = PTR_ERR(handle); 312 error = PTR_ERR(handle);
319 ext3_std_error(inode->i_sb, error); 313 ext3_std_error(inode->i_sb, error);
320 goto out; 314 goto out;
321 } 315 }
322 error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); 316 error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
323 ext3_journal_stop(handle); 317 ext3_journal_stop(handle);
324 if (error == -ENOSPC && 318 if (error == -ENOSPC &&
325 ext3_should_retry_alloc(inode->i_sb, &retries)) 319 ext3_should_retry_alloc(inode->i_sb, &retries))
326 goto retry; 320 goto retry;
327 out: 321 out:
328 posix_acl_release(acl); 322 posix_acl_release(acl);
329 return error; 323 return error;
330 } 324 }
331 325
332 /* 326 /*
333 * Extended attribute handlers 327 * Extended attribute handlers
334 */ 328 */
335 static size_t 329 static size_t
336 ext3_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len, 330 ext3_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
337 const char *name, size_t name_len, int type) 331 const char *name, size_t name_len, int type)
338 { 332 {
339 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 333 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
340 334
341 if (!test_opt(dentry->d_sb, POSIX_ACL)) 335 if (!test_opt(dentry->d_sb, POSIX_ACL))
342 return 0; 336 return 0;
343 if (list && size <= list_len) 337 if (list && size <= list_len)
344 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 338 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
345 return size; 339 return size;
346 } 340 }
347 341
348 static size_t 342 static size_t
349 ext3_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len, 343 ext3_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
350 const char *name, size_t name_len, int type) 344 const char *name, size_t name_len, int type)
351 { 345 {
352 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 346 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
353 347
354 if (!test_opt(dentry->d_sb, POSIX_ACL)) 348 if (!test_opt(dentry->d_sb, POSIX_ACL))
355 return 0; 349 return 0;
356 if (list && size <= list_len) 350 if (list && size <= list_len)
357 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 351 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
358 return size; 352 return size;
359 } 353 }
360 354
361 static int 355 static int
362 ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer, 356 ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
363 size_t size, int type) 357 size_t size, int type)
364 { 358 {
365 struct posix_acl *acl; 359 struct posix_acl *acl;
366 int error; 360 int error;
367 361
368 if (strcmp(name, "") != 0) 362 if (strcmp(name, "") != 0)
369 return -EINVAL; 363 return -EINVAL;
370 if (!test_opt(dentry->d_sb, POSIX_ACL)) 364 if (!test_opt(dentry->d_sb, POSIX_ACL))
371 return -EOPNOTSUPP; 365 return -EOPNOTSUPP;
372 366
373 acl = ext3_get_acl(dentry->d_inode, type); 367 acl = ext3_get_acl(dentry->d_inode, type);
374 if (IS_ERR(acl)) 368 if (IS_ERR(acl))
375 return PTR_ERR(acl); 369 return PTR_ERR(acl);
376 if (acl == NULL) 370 if (acl == NULL)
377 return -ENODATA; 371 return -ENODATA;
378 error = posix_acl_to_xattr(acl, buffer, size); 372 error = posix_acl_to_xattr(acl, buffer, size);
379 posix_acl_release(acl); 373 posix_acl_release(acl);
380 374
381 return error; 375 return error;
382 } 376 }
383 377
384 static int 378 static int
385 ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, 379 ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
386 size_t size, int flags, int type) 380 size_t size, int flags, int type)
387 { 381 {
388 struct inode *inode = dentry->d_inode; 382 struct inode *inode = dentry->d_inode;
389 handle_t *handle; 383 handle_t *handle;
390 struct posix_acl *acl; 384 struct posix_acl *acl;
391 int error, retries = 0; 385 int error, retries = 0;
392 386
393 if (strcmp(name, "") != 0) 387 if (strcmp(name, "") != 0)
394 return -EINVAL; 388 return -EINVAL;
395 if (!test_opt(inode->i_sb, POSIX_ACL)) 389 if (!test_opt(inode->i_sb, POSIX_ACL))
396 return -EOPNOTSUPP; 390 return -EOPNOTSUPP;
397 if (!inode_owner_or_capable(inode)) 391 if (!inode_owner_or_capable(inode))
398 return -EPERM; 392 return -EPERM;
399 393
400 if (value) { 394 if (value) {
401 acl = posix_acl_from_xattr(value, size); 395 acl = posix_acl_from_xattr(value, size);
402 if (IS_ERR(acl)) 396 if (IS_ERR(acl))
403 return PTR_ERR(acl); 397 return PTR_ERR(acl);
404 else if (acl) { 398 else if (acl) {
405 error = posix_acl_valid(acl); 399 error = posix_acl_valid(acl);
406 if (error) 400 if (error)
407 goto release_and_out; 401 goto release_and_out;
408 } 402 }
409 } else 403 } else
410 acl = NULL; 404 acl = NULL;
411 405
412 retry: 406 retry:
413 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); 407 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
414 if (IS_ERR(handle)) 408 if (IS_ERR(handle))
415 return PTR_ERR(handle); 409 return PTR_ERR(handle);
416 error = ext3_set_acl(handle, inode, type, acl); 410 error = ext3_set_acl(handle, inode, type, acl);
417 ext3_journal_stop(handle); 411 ext3_journal_stop(handle);
418 if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 412 if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
419 goto retry; 413 goto retry;
420 414
421 release_and_out: 415 release_and_out:
422 posix_acl_release(acl); 416 posix_acl_release(acl);
423 return error; 417 return error;
424 } 418 }
425 419
426 const struct xattr_handler ext3_xattr_acl_access_handler = { 420 const struct xattr_handler ext3_xattr_acl_access_handler = {
427 .prefix = POSIX_ACL_XATTR_ACCESS, 421 .prefix = POSIX_ACL_XATTR_ACCESS,
428 .flags = ACL_TYPE_ACCESS, 422 .flags = ACL_TYPE_ACCESS,
429 .list = ext3_xattr_list_acl_access, 423 .list = ext3_xattr_list_acl_access,
430 .get = ext3_xattr_get_acl, 424 .get = ext3_xattr_get_acl,
431 .set = ext3_xattr_set_acl, 425 .set = ext3_xattr_set_acl,
432 }; 426 };
433 427
434 const struct xattr_handler ext3_xattr_acl_default_handler = { 428 const struct xattr_handler ext3_xattr_acl_default_handler = {
435 .prefix = POSIX_ACL_XATTR_DEFAULT, 429 .prefix = POSIX_ACL_XATTR_DEFAULT,
436 .flags = ACL_TYPE_DEFAULT, 430 .flags = ACL_TYPE_DEFAULT,
437 .list = ext3_xattr_list_acl_default, 431 .list = ext3_xattr_list_acl_default,
438 .get = ext3_xattr_get_acl, 432 .get = ext3_xattr_get_acl,
439 .set = ext3_xattr_set_acl, 433 .set = ext3_xattr_set_acl,
440 }; 434 };
441 435
1 /* 1 /*
2 * linux/fs/ext3/balloc.c 2 * linux/fs/ext3/balloc.c
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 * 8 *
9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10 * Big-endian to little-endian byte-swapping/bitmaps by 10 * Big-endian to little-endian byte-swapping/bitmaps by
11 * David S. Miller (davem@caip.rutgers.edu), 1995 11 * David S. Miller (davem@caip.rutgers.edu), 1995
12 */ 12 */
13 13
14 #include <linux/time.h>
15 #include <linux/capability.h>
16 #include <linux/fs.h>
17 #include <linux/slab.h>
18 #include <linux/jbd.h>
19 #include <linux/ext3_fs.h>
20 #include <linux/ext3_jbd.h>
21 #include <linux/quotaops.h> 14 #include <linux/quotaops.h>
22 #include <linux/buffer_head.h>
23 #include <linux/blkdev.h> 15 #include <linux/blkdev.h>
24 #include <trace/events/ext3.h> 16 #include "ext3.h"
25 17
26 /* 18 /*
27 * balloc.c contains the blocks allocation and deallocation routines 19 * balloc.c contains the blocks allocation and deallocation routines
28 */ 20 */
29 21
30 /* 22 /*
31 * The free blocks are managed by bitmaps. A file system contains several 23 * The free blocks are managed by bitmaps. A file system contains several
32 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap 24 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
33 * block for inodes, N blocks for the inode table and data blocks. 25 * block for inodes, N blocks for the inode table and data blocks.
34 * 26 *
35 * The file system contains group descriptors which are located after the 27 * The file system contains group descriptors which are located after the
36 * super block. Each descriptor contains the number of the bitmap block and 28 * super block. Each descriptor contains the number of the bitmap block and
37 * the free blocks count in the block. The descriptors are loaded in memory 29 * the free blocks count in the block. The descriptors are loaded in memory
38 * when a file system is mounted (see ext3_fill_super). 30 * when a file system is mounted (see ext3_fill_super).
39 */ 31 */
40 32
41 33
42 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 34 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
43 35
44 /* 36 /*
45 * Calculate the block group number and offset, given a block number 37 * Calculate the block group number and offset, given a block number
46 */ 38 */
47 static void ext3_get_group_no_and_offset(struct super_block *sb, 39 static void ext3_get_group_no_and_offset(struct super_block *sb,
48 ext3_fsblk_t blocknr, unsigned long *blockgrpp, ext3_grpblk_t *offsetp) 40 ext3_fsblk_t blocknr, unsigned long *blockgrpp, ext3_grpblk_t *offsetp)
49 { 41 {
50 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 42 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
51 43
52 blocknr = blocknr - le32_to_cpu(es->s_first_data_block); 44 blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
53 if (offsetp) 45 if (offsetp)
54 *offsetp = blocknr % EXT3_BLOCKS_PER_GROUP(sb); 46 *offsetp = blocknr % EXT3_BLOCKS_PER_GROUP(sb);
55 if (blockgrpp) 47 if (blockgrpp)
56 *blockgrpp = blocknr / EXT3_BLOCKS_PER_GROUP(sb); 48 *blockgrpp = blocknr / EXT3_BLOCKS_PER_GROUP(sb);
57 } 49 }
58 50
59 /** 51 /**
60 * ext3_get_group_desc() -- load group descriptor from disk 52 * ext3_get_group_desc() -- load group descriptor from disk
61 * @sb: super block 53 * @sb: super block
62 * @block_group: given block group 54 * @block_group: given block group
63 * @bh: pointer to the buffer head to store the block 55 * @bh: pointer to the buffer head to store the block
64 * group descriptor 56 * group descriptor
65 */ 57 */
66 struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, 58 struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
67 unsigned int block_group, 59 unsigned int block_group,
68 struct buffer_head ** bh) 60 struct buffer_head ** bh)
69 { 61 {
70 unsigned long group_desc; 62 unsigned long group_desc;
71 unsigned long offset; 63 unsigned long offset;
72 struct ext3_group_desc * desc; 64 struct ext3_group_desc * desc;
73 struct ext3_sb_info *sbi = EXT3_SB(sb); 65 struct ext3_sb_info *sbi = EXT3_SB(sb);
74 66
75 if (block_group >= sbi->s_groups_count) { 67 if (block_group >= sbi->s_groups_count) {
76 ext3_error (sb, "ext3_get_group_desc", 68 ext3_error (sb, "ext3_get_group_desc",
77 "block_group >= groups_count - " 69 "block_group >= groups_count - "
78 "block_group = %d, groups_count = %lu", 70 "block_group = %d, groups_count = %lu",
79 block_group, sbi->s_groups_count); 71 block_group, sbi->s_groups_count);
80 72
81 return NULL; 73 return NULL;
82 } 74 }
83 smp_rmb(); 75 smp_rmb();
84 76
85 group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb); 77 group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
86 offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1); 78 offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
87 if (!sbi->s_group_desc[group_desc]) { 79 if (!sbi->s_group_desc[group_desc]) {
88 ext3_error (sb, "ext3_get_group_desc", 80 ext3_error (sb, "ext3_get_group_desc",
89 "Group descriptor not loaded - " 81 "Group descriptor not loaded - "
90 "block_group = %d, group_desc = %lu, desc = %lu", 82 "block_group = %d, group_desc = %lu, desc = %lu",
91 block_group, group_desc, offset); 83 block_group, group_desc, offset);
92 return NULL; 84 return NULL;
93 } 85 }
94 86
95 desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data; 87 desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data;
96 if (bh) 88 if (bh)
97 *bh = sbi->s_group_desc[group_desc]; 89 *bh = sbi->s_group_desc[group_desc];
98 return desc + offset; 90 return desc + offset;
99 } 91 }
100 92
101 static int ext3_valid_block_bitmap(struct super_block *sb, 93 static int ext3_valid_block_bitmap(struct super_block *sb,
102 struct ext3_group_desc *desc, 94 struct ext3_group_desc *desc,
103 unsigned int block_group, 95 unsigned int block_group,
104 struct buffer_head *bh) 96 struct buffer_head *bh)
105 { 97 {
106 ext3_grpblk_t offset; 98 ext3_grpblk_t offset;
107 ext3_grpblk_t next_zero_bit; 99 ext3_grpblk_t next_zero_bit;
108 ext3_fsblk_t bitmap_blk; 100 ext3_fsblk_t bitmap_blk;
109 ext3_fsblk_t group_first_block; 101 ext3_fsblk_t group_first_block;
110 102
111 group_first_block = ext3_group_first_block_no(sb, block_group); 103 group_first_block = ext3_group_first_block_no(sb, block_group);
112 104
113 /* check whether block bitmap block number is set */ 105 /* check whether block bitmap block number is set */
114 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); 106 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
115 offset = bitmap_blk - group_first_block; 107 offset = bitmap_blk - group_first_block;
116 if (!ext3_test_bit(offset, bh->b_data)) 108 if (!ext3_test_bit(offset, bh->b_data))
117 /* bad block bitmap */ 109 /* bad block bitmap */
118 goto err_out; 110 goto err_out;
119 111
120 /* check whether the inode bitmap block number is set */ 112 /* check whether the inode bitmap block number is set */
121 bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap); 113 bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap);
122 offset = bitmap_blk - group_first_block; 114 offset = bitmap_blk - group_first_block;
123 if (!ext3_test_bit(offset, bh->b_data)) 115 if (!ext3_test_bit(offset, bh->b_data))
124 /* bad block bitmap */ 116 /* bad block bitmap */
125 goto err_out; 117 goto err_out;
126 118
127 /* check whether the inode table block number is set */ 119 /* check whether the inode table block number is set */
128 bitmap_blk = le32_to_cpu(desc->bg_inode_table); 120 bitmap_blk = le32_to_cpu(desc->bg_inode_table);
129 offset = bitmap_blk - group_first_block; 121 offset = bitmap_blk - group_first_block;
130 next_zero_bit = ext3_find_next_zero_bit(bh->b_data, 122 next_zero_bit = ext3_find_next_zero_bit(bh->b_data,
131 offset + EXT3_SB(sb)->s_itb_per_group, 123 offset + EXT3_SB(sb)->s_itb_per_group,
132 offset); 124 offset);
133 if (next_zero_bit >= offset + EXT3_SB(sb)->s_itb_per_group) 125 if (next_zero_bit >= offset + EXT3_SB(sb)->s_itb_per_group)
134 /* good bitmap for inode tables */ 126 /* good bitmap for inode tables */
135 return 1; 127 return 1;
136 128
137 err_out: 129 err_out:
138 ext3_error(sb, __func__, 130 ext3_error(sb, __func__,
139 "Invalid block bitmap - " 131 "Invalid block bitmap - "
140 "block_group = %d, block = %lu", 132 "block_group = %d, block = %lu",
141 block_group, bitmap_blk); 133 block_group, bitmap_blk);
142 return 0; 134 return 0;
143 } 135 }
144 136
145 /** 137 /**
146 * read_block_bitmap() 138 * read_block_bitmap()
147 * @sb: super block 139 * @sb: super block
148 * @block_group: given block group 140 * @block_group: given block group
149 * 141 *
150 * Read the bitmap for a given block_group,and validate the 142 * Read the bitmap for a given block_group,and validate the
151 * bits for block/inode/inode tables are set in the bitmaps 143 * bits for block/inode/inode tables are set in the bitmaps
152 * 144 *
153 * Return buffer_head on success or NULL in case of failure. 145 * Return buffer_head on success or NULL in case of failure.
154 */ 146 */
155 static struct buffer_head * 147 static struct buffer_head *
156 read_block_bitmap(struct super_block *sb, unsigned int block_group) 148 read_block_bitmap(struct super_block *sb, unsigned int block_group)
157 { 149 {
158 struct ext3_group_desc * desc; 150 struct ext3_group_desc * desc;
159 struct buffer_head * bh = NULL; 151 struct buffer_head * bh = NULL;
160 ext3_fsblk_t bitmap_blk; 152 ext3_fsblk_t bitmap_blk;
161 153
162 desc = ext3_get_group_desc(sb, block_group, NULL); 154 desc = ext3_get_group_desc(sb, block_group, NULL);
163 if (!desc) 155 if (!desc)
164 return NULL; 156 return NULL;
165 trace_ext3_read_block_bitmap(sb, block_group); 157 trace_ext3_read_block_bitmap(sb, block_group);
166 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); 158 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
167 bh = sb_getblk(sb, bitmap_blk); 159 bh = sb_getblk(sb, bitmap_blk);
168 if (unlikely(!bh)) { 160 if (unlikely(!bh)) {
169 ext3_error(sb, __func__, 161 ext3_error(sb, __func__,
170 "Cannot read block bitmap - " 162 "Cannot read block bitmap - "
171 "block_group = %d, block_bitmap = %u", 163 "block_group = %d, block_bitmap = %u",
172 block_group, le32_to_cpu(desc->bg_block_bitmap)); 164 block_group, le32_to_cpu(desc->bg_block_bitmap));
173 return NULL; 165 return NULL;
174 } 166 }
175 if (likely(bh_uptodate_or_lock(bh))) 167 if (likely(bh_uptodate_or_lock(bh)))
176 return bh; 168 return bh;
177 169
178 if (bh_submit_read(bh) < 0) { 170 if (bh_submit_read(bh) < 0) {
179 brelse(bh); 171 brelse(bh);
180 ext3_error(sb, __func__, 172 ext3_error(sb, __func__,
181 "Cannot read block bitmap - " 173 "Cannot read block bitmap - "
182 "block_group = %d, block_bitmap = %u", 174 "block_group = %d, block_bitmap = %u",
183 block_group, le32_to_cpu(desc->bg_block_bitmap)); 175 block_group, le32_to_cpu(desc->bg_block_bitmap));
184 return NULL; 176 return NULL;
185 } 177 }
186 ext3_valid_block_bitmap(sb, desc, block_group, bh); 178 ext3_valid_block_bitmap(sb, desc, block_group, bh);
187 /* 179 /*
188 * file system mounted not to panic on error, continue with corrupt 180 * file system mounted not to panic on error, continue with corrupt
189 * bitmap 181 * bitmap
190 */ 182 */
191 return bh; 183 return bh;
192 } 184 }
193 /* 185 /*
194 * The reservation window structure operations 186 * The reservation window structure operations
195 * -------------------------------------------- 187 * --------------------------------------------
196 * Operations include: 188 * Operations include:
197 * dump, find, add, remove, is_empty, find_next_reservable_window, etc. 189 * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
198 * 190 *
199 * We use a red-black tree to represent per-filesystem reservation 191 * We use a red-black tree to represent per-filesystem reservation
200 * windows. 192 * windows.
201 * 193 *
202 */ 194 */
203 195
204 /** 196 /**
205 * __rsv_window_dump() -- Dump the filesystem block allocation reservation map 197 * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
206 * @rb_root: root of per-filesystem reservation rb tree 198 * @rb_root: root of per-filesystem reservation rb tree
207 * @verbose: verbose mode 199 * @verbose: verbose mode
208 * @fn: function which wishes to dump the reservation map 200 * @fn: function which wishes to dump the reservation map
209 * 201 *
210 * If verbose is turned on, it will print the whole block reservation 202 * If verbose is turned on, it will print the whole block reservation
211 * windows(start, end). Otherwise, it will only print out the "bad" windows, 203 * windows(start, end). Otherwise, it will only print out the "bad" windows,
212 * those windows that overlap with their immediate neighbors. 204 * those windows that overlap with their immediate neighbors.
213 */ 205 */
214 #if 1 206 #if 1
215 static void __rsv_window_dump(struct rb_root *root, int verbose, 207 static void __rsv_window_dump(struct rb_root *root, int verbose,
216 const char *fn) 208 const char *fn)
217 { 209 {
218 struct rb_node *n; 210 struct rb_node *n;
219 struct ext3_reserve_window_node *rsv, *prev; 211 struct ext3_reserve_window_node *rsv, *prev;
220 int bad; 212 int bad;
221 213
222 restart: 214 restart:
223 n = rb_first(root); 215 n = rb_first(root);
224 bad = 0; 216 bad = 0;
225 prev = NULL; 217 prev = NULL;
226 218
227 printk("Block Allocation Reservation Windows Map (%s):\n", fn); 219 printk("Block Allocation Reservation Windows Map (%s):\n", fn);
228 while (n) { 220 while (n) {
229 rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); 221 rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
230 if (verbose) 222 if (verbose)
231 printk("reservation window 0x%p " 223 printk("reservation window 0x%p "
232 "start: %lu, end: %lu\n", 224 "start: %lu, end: %lu\n",
233 rsv, rsv->rsv_start, rsv->rsv_end); 225 rsv, rsv->rsv_start, rsv->rsv_end);
234 if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { 226 if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
235 printk("Bad reservation %p (start >= end)\n", 227 printk("Bad reservation %p (start >= end)\n",
236 rsv); 228 rsv);
237 bad = 1; 229 bad = 1;
238 } 230 }
239 if (prev && prev->rsv_end >= rsv->rsv_start) { 231 if (prev && prev->rsv_end >= rsv->rsv_start) {
240 printk("Bad reservation %p (prev->end >= start)\n", 232 printk("Bad reservation %p (prev->end >= start)\n",
241 rsv); 233 rsv);
242 bad = 1; 234 bad = 1;
243 } 235 }
244 if (bad) { 236 if (bad) {
245 if (!verbose) { 237 if (!verbose) {
246 printk("Restarting reservation walk in verbose mode\n"); 238 printk("Restarting reservation walk in verbose mode\n");
247 verbose = 1; 239 verbose = 1;
248 goto restart; 240 goto restart;
249 } 241 }
250 } 242 }
251 n = rb_next(n); 243 n = rb_next(n);
252 prev = rsv; 244 prev = rsv;
253 } 245 }
254 printk("Window map complete.\n"); 246 printk("Window map complete.\n");
255 BUG_ON(bad); 247 BUG_ON(bad);
256 } 248 }
257 #define rsv_window_dump(root, verbose) \ 249 #define rsv_window_dump(root, verbose) \
258 __rsv_window_dump((root), (verbose), __func__) 250 __rsv_window_dump((root), (verbose), __func__)
259 #else 251 #else
260 #define rsv_window_dump(root, verbose) do {} while (0) 252 #define rsv_window_dump(root, verbose) do {} while (0)
261 #endif 253 #endif
262 254
263 /** 255 /**
264 * goal_in_my_reservation() 256 * goal_in_my_reservation()
265 * @rsv: inode's reservation window 257 * @rsv: inode's reservation window
266 * @grp_goal: given goal block relative to the allocation block group 258 * @grp_goal: given goal block relative to the allocation block group
267 * @group: the current allocation block group 259 * @group: the current allocation block group
268 * @sb: filesystem super block 260 * @sb: filesystem super block
269 * 261 *
270 * Test if the given goal block (group relative) is within the file's 262 * Test if the given goal block (group relative) is within the file's
271 * own block reservation window range. 263 * own block reservation window range.
272 * 264 *
273 * If the reservation window is outside the goal allocation group, return 0; 265 * If the reservation window is outside the goal allocation group, return 0;
274 * grp_goal (given goal block) could be -1, which means no specific 266 * grp_goal (given goal block) could be -1, which means no specific
275 * goal block. In this case, always return 1. 267 * goal block. In this case, always return 1.
276 * If the goal block is within the reservation window, return 1; 268 * If the goal block is within the reservation window, return 1;
277 * otherwise, return 0; 269 * otherwise, return 0;
278 */ 270 */
279 static int 271 static int
280 goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal, 272 goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
281 unsigned int group, struct super_block * sb) 273 unsigned int group, struct super_block * sb)
282 { 274 {
283 ext3_fsblk_t group_first_block, group_last_block; 275 ext3_fsblk_t group_first_block, group_last_block;
284 276
285 group_first_block = ext3_group_first_block_no(sb, group); 277 group_first_block = ext3_group_first_block_no(sb, group);
286 group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); 278 group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
287 279
288 if ((rsv->_rsv_start > group_last_block) || 280 if ((rsv->_rsv_start > group_last_block) ||
289 (rsv->_rsv_end < group_first_block)) 281 (rsv->_rsv_end < group_first_block))
290 return 0; 282 return 0;
291 if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) 283 if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
292 || (grp_goal + group_first_block > rsv->_rsv_end))) 284 || (grp_goal + group_first_block > rsv->_rsv_end)))
293 return 0; 285 return 0;
294 return 1; 286 return 1;
295 } 287 }
296 288
297 /** 289 /**
298 * search_reserve_window() 290 * search_reserve_window()
299 * @rb_root: root of reservation tree 291 * @rb_root: root of reservation tree
300 * @goal: target allocation block 292 * @goal: target allocation block
301 * 293 *
302 * Find the reserved window which includes the goal, or the previous one 294 * Find the reserved window which includes the goal, or the previous one
303 * if the goal is not in any window. 295 * if the goal is not in any window.
304 * Returns NULL if there are no windows or if all windows start after the goal. 296 * Returns NULL if there are no windows or if all windows start after the goal.
305 */ 297 */
306 static struct ext3_reserve_window_node * 298 static struct ext3_reserve_window_node *
307 search_reserve_window(struct rb_root *root, ext3_fsblk_t goal) 299 search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
308 { 300 {
309 struct rb_node *n = root->rb_node; 301 struct rb_node *n = root->rb_node;
310 struct ext3_reserve_window_node *rsv; 302 struct ext3_reserve_window_node *rsv;
311 303
312 if (!n) 304 if (!n)
313 return NULL; 305 return NULL;
314 306
315 do { 307 do {
316 rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); 308 rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
317 309
318 if (goal < rsv->rsv_start) 310 if (goal < rsv->rsv_start)
319 n = n->rb_left; 311 n = n->rb_left;
320 else if (goal > rsv->rsv_end) 312 else if (goal > rsv->rsv_end)
321 n = n->rb_right; 313 n = n->rb_right;
322 else 314 else
323 return rsv; 315 return rsv;
324 } while (n); 316 } while (n);
325 /* 317 /*
326 * We've fallen off the end of the tree: the goal wasn't inside 318 * We've fallen off the end of the tree: the goal wasn't inside
327 * any particular node. OK, the previous node must be to one 319 * any particular node. OK, the previous node must be to one
328 * side of the interval containing the goal. If it's the RHS, 320 * side of the interval containing the goal. If it's the RHS,
329 * we need to back up one. 321 * we need to back up one.
330 */ 322 */
331 if (rsv->rsv_start > goal) { 323 if (rsv->rsv_start > goal) {
332 n = rb_prev(&rsv->rsv_node); 324 n = rb_prev(&rsv->rsv_node);
333 rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); 325 rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
334 } 326 }
335 return rsv; 327 return rsv;
336 } 328 }
337 329
338 /** 330 /**
339 * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree. 331 * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree.
340 * @sb: super block 332 * @sb: super block
341 * @rsv: reservation window to add 333 * @rsv: reservation window to add
342 * 334 *
343 * Must be called with rsv_lock hold. 335 * Must be called with rsv_lock hold.
344 */ 336 */
345 void ext3_rsv_window_add(struct super_block *sb, 337 void ext3_rsv_window_add(struct super_block *sb,
346 struct ext3_reserve_window_node *rsv) 338 struct ext3_reserve_window_node *rsv)
347 { 339 {
348 struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root; 340 struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root;
349 struct rb_node *node = &rsv->rsv_node; 341 struct rb_node *node = &rsv->rsv_node;
350 ext3_fsblk_t start = rsv->rsv_start; 342 ext3_fsblk_t start = rsv->rsv_start;
351 343
352 struct rb_node ** p = &root->rb_node; 344 struct rb_node ** p = &root->rb_node;
353 struct rb_node * parent = NULL; 345 struct rb_node * parent = NULL;
354 struct ext3_reserve_window_node *this; 346 struct ext3_reserve_window_node *this;
355 347
356 trace_ext3_rsv_window_add(sb, rsv); 348 trace_ext3_rsv_window_add(sb, rsv);
357 while (*p) 349 while (*p)
358 { 350 {
359 parent = *p; 351 parent = *p;
360 this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node); 352 this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node);
361 353
362 if (start < this->rsv_start) 354 if (start < this->rsv_start)
363 p = &(*p)->rb_left; 355 p = &(*p)->rb_left;
364 else if (start > this->rsv_end) 356 else if (start > this->rsv_end)
365 p = &(*p)->rb_right; 357 p = &(*p)->rb_right;
366 else { 358 else {
367 rsv_window_dump(root, 1); 359 rsv_window_dump(root, 1);
368 BUG(); 360 BUG();
369 } 361 }
370 } 362 }
371 363
372 rb_link_node(node, parent, p); 364 rb_link_node(node, parent, p);
373 rb_insert_color(node, root); 365 rb_insert_color(node, root);
374 } 366 }
375 367
376 /** 368 /**
377 * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree 369 * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree
378 * @sb: super block 370 * @sb: super block
379 * @rsv: reservation window to remove 371 * @rsv: reservation window to remove
380 * 372 *
381 * Mark the block reservation window as not allocated, and unlink it 373 * Mark the block reservation window as not allocated, and unlink it
382 * from the filesystem reservation window rb tree. Must be called with 374 * from the filesystem reservation window rb tree. Must be called with
383 * rsv_lock hold. 375 * rsv_lock hold.
384 */ 376 */
385 static void rsv_window_remove(struct super_block *sb, 377 static void rsv_window_remove(struct super_block *sb,
386 struct ext3_reserve_window_node *rsv) 378 struct ext3_reserve_window_node *rsv)
387 { 379 {
388 rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 380 rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
389 rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 381 rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
390 rsv->rsv_alloc_hit = 0; 382 rsv->rsv_alloc_hit = 0;
391 rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root); 383 rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root);
392 } 384 }
393 385
394 /* 386 /*
395 * rsv_is_empty() -- Check if the reservation window is allocated. 387 * rsv_is_empty() -- Check if the reservation window is allocated.
396 * @rsv: given reservation window to check 388 * @rsv: given reservation window to check
397 * 389 *
398 * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED. 390 * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED.
399 */ 391 */
400 static inline int rsv_is_empty(struct ext3_reserve_window *rsv) 392 static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
401 { 393 {
402 /* a valid reservation end block could not be 0 */ 394 /* a valid reservation end block could not be 0 */
403 return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 395 return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
404 } 396 }
405 397
406 /** 398 /**
407 * ext3_init_block_alloc_info() 399 * ext3_init_block_alloc_info()
408 * @inode: file inode structure 400 * @inode: file inode structure
409 * 401 *
410 * Allocate and initialize the reservation window structure, and 402 * Allocate and initialize the reservation window structure, and
411 * link the window to the ext3 inode structure at last 403 * link the window to the ext3 inode structure at last
412 * 404 *
413 * The reservation window structure is only dynamically allocated 405 * The reservation window structure is only dynamically allocated
414 * and linked to ext3 inode the first time the open file 406 * and linked to ext3 inode the first time the open file
415 * needs a new block. So, before every ext3_new_block(s) call, for 407 * needs a new block. So, before every ext3_new_block(s) call, for
416 * regular files, we should check whether the reservation window 408 * regular files, we should check whether the reservation window
417 * structure exists or not. In the latter case, this function is called. 409 * structure exists or not. In the latter case, this function is called.
418 * Fail to do so will result in block reservation being turned off for that 410 * Fail to do so will result in block reservation being turned off for that
419 * open file. 411 * open file.
420 * 412 *
421 * This function is called from ext3_get_blocks_handle(), also called 413 * This function is called from ext3_get_blocks_handle(), also called
422 * when setting the reservation window size through ioctl before the file 414 * when setting the reservation window size through ioctl before the file
423 * is open for write (needs block allocation). 415 * is open for write (needs block allocation).
424 * 416 *
425 * Needs truncate_mutex protection prior to call this function. 417 * Needs truncate_mutex protection prior to call this function.
426 */ 418 */
427 void ext3_init_block_alloc_info(struct inode *inode) 419 void ext3_init_block_alloc_info(struct inode *inode)
428 { 420 {
429 struct ext3_inode_info *ei = EXT3_I(inode); 421 struct ext3_inode_info *ei = EXT3_I(inode);
430 struct ext3_block_alloc_info *block_i; 422 struct ext3_block_alloc_info *block_i;
431 struct super_block *sb = inode->i_sb; 423 struct super_block *sb = inode->i_sb;
432 424
433 block_i = kmalloc(sizeof(*block_i), GFP_NOFS); 425 block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
434 if (block_i) { 426 if (block_i) {
435 struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node; 427 struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node;
436 428
437 rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 429 rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
438 rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 430 rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
439 431
440 /* 432 /*
441 * if filesystem is mounted with NORESERVATION, the goal 433 * if filesystem is mounted with NORESERVATION, the goal
442 * reservation window size is set to zero to indicate 434 * reservation window size is set to zero to indicate
443 * block reservation is off 435 * block reservation is off
444 */ 436 */
445 if (!test_opt(sb, RESERVATION)) 437 if (!test_opt(sb, RESERVATION))
446 rsv->rsv_goal_size = 0; 438 rsv->rsv_goal_size = 0;
447 else 439 else
448 rsv->rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS; 440 rsv->rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS;
449 rsv->rsv_alloc_hit = 0; 441 rsv->rsv_alloc_hit = 0;
450 block_i->last_alloc_logical_block = 0; 442 block_i->last_alloc_logical_block = 0;
451 block_i->last_alloc_physical_block = 0; 443 block_i->last_alloc_physical_block = 0;
452 } 444 }
453 ei->i_block_alloc_info = block_i; 445 ei->i_block_alloc_info = block_i;
454 } 446 }
455 447
456 /** 448 /**
457 * ext3_discard_reservation() 449 * ext3_discard_reservation()
458 * @inode: inode 450 * @inode: inode
459 * 451 *
460 * Discard(free) block reservation window on last file close, or truncate 452 * Discard(free) block reservation window on last file close, or truncate
461 * or at last iput(). 453 * or at last iput().
462 * 454 *
463 * It is being called in three cases: 455 * It is being called in three cases:
464 * ext3_release_file(): last writer close the file 456 * ext3_release_file(): last writer close the file
465 * ext3_clear_inode(): last iput(), when nobody link to this file. 457 * ext3_clear_inode(): last iput(), when nobody link to this file.
466 * ext3_truncate(): when the block indirect map is about to change. 458 * ext3_truncate(): when the block indirect map is about to change.
467 * 459 *
468 */ 460 */
469 void ext3_discard_reservation(struct inode *inode) 461 void ext3_discard_reservation(struct inode *inode)
470 { 462 {
471 struct ext3_inode_info *ei = EXT3_I(inode); 463 struct ext3_inode_info *ei = EXT3_I(inode);
472 struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info; 464 struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
473 struct ext3_reserve_window_node *rsv; 465 struct ext3_reserve_window_node *rsv;
474 spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; 466 spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock;
475 467
476 if (!block_i) 468 if (!block_i)
477 return; 469 return;
478 470
479 rsv = &block_i->rsv_window_node; 471 rsv = &block_i->rsv_window_node;
480 if (!rsv_is_empty(&rsv->rsv_window)) { 472 if (!rsv_is_empty(&rsv->rsv_window)) {
481 spin_lock(rsv_lock); 473 spin_lock(rsv_lock);
482 if (!rsv_is_empty(&rsv->rsv_window)) { 474 if (!rsv_is_empty(&rsv->rsv_window)) {
483 trace_ext3_discard_reservation(inode, rsv); 475 trace_ext3_discard_reservation(inode, rsv);
484 rsv_window_remove(inode->i_sb, rsv); 476 rsv_window_remove(inode->i_sb, rsv);
485 } 477 }
486 spin_unlock(rsv_lock); 478 spin_unlock(rsv_lock);
487 } 479 }
488 } 480 }
489 481
490 /** 482 /**
491 * ext3_free_blocks_sb() -- Free given blocks and update quota 483 * ext3_free_blocks_sb() -- Free given blocks and update quota
492 * @handle: handle to this transaction 484 * @handle: handle to this transaction
493 * @sb: super block 485 * @sb: super block
494 * @block: start physcial block to free 486 * @block: start physcial block to free
495 * @count: number of blocks to free 487 * @count: number of blocks to free
496 * @pdquot_freed_blocks: pointer to quota 488 * @pdquot_freed_blocks: pointer to quota
497 */ 489 */
498 void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb, 490 void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
499 ext3_fsblk_t block, unsigned long count, 491 ext3_fsblk_t block, unsigned long count,
500 unsigned long *pdquot_freed_blocks) 492 unsigned long *pdquot_freed_blocks)
501 { 493 {
502 struct buffer_head *bitmap_bh = NULL; 494 struct buffer_head *bitmap_bh = NULL;
503 struct buffer_head *gd_bh; 495 struct buffer_head *gd_bh;
504 unsigned long block_group; 496 unsigned long block_group;
505 ext3_grpblk_t bit; 497 ext3_grpblk_t bit;
506 unsigned long i; 498 unsigned long i;
507 unsigned long overflow; 499 unsigned long overflow;
508 struct ext3_group_desc * desc; 500 struct ext3_group_desc * desc;
509 struct ext3_super_block * es; 501 struct ext3_super_block * es;
510 struct ext3_sb_info *sbi; 502 struct ext3_sb_info *sbi;
511 int err = 0, ret; 503 int err = 0, ret;
512 ext3_grpblk_t group_freed; 504 ext3_grpblk_t group_freed;
513 505
514 *pdquot_freed_blocks = 0; 506 *pdquot_freed_blocks = 0;
515 sbi = EXT3_SB(sb); 507 sbi = EXT3_SB(sb);
516 es = sbi->s_es; 508 es = sbi->s_es;
517 if (block < le32_to_cpu(es->s_first_data_block) || 509 if (block < le32_to_cpu(es->s_first_data_block) ||
518 block + count < block || 510 block + count < block ||
519 block + count > le32_to_cpu(es->s_blocks_count)) { 511 block + count > le32_to_cpu(es->s_blocks_count)) {
520 ext3_error (sb, "ext3_free_blocks", 512 ext3_error (sb, "ext3_free_blocks",
521 "Freeing blocks not in datazone - " 513 "Freeing blocks not in datazone - "
522 "block = "E3FSBLK", count = %lu", block, count); 514 "block = "E3FSBLK", count = %lu", block, count);
523 goto error_return; 515 goto error_return;
524 } 516 }
525 517
526 ext3_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1); 518 ext3_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
527 519
528 do_more: 520 do_more:
529 overflow = 0; 521 overflow = 0;
530 block_group = (block - le32_to_cpu(es->s_first_data_block)) / 522 block_group = (block - le32_to_cpu(es->s_first_data_block)) /
531 EXT3_BLOCKS_PER_GROUP(sb); 523 EXT3_BLOCKS_PER_GROUP(sb);
532 bit = (block - le32_to_cpu(es->s_first_data_block)) % 524 bit = (block - le32_to_cpu(es->s_first_data_block)) %
533 EXT3_BLOCKS_PER_GROUP(sb); 525 EXT3_BLOCKS_PER_GROUP(sb);
534 /* 526 /*
535 * Check to see if we are freeing blocks across a group 527 * Check to see if we are freeing blocks across a group
536 * boundary. 528 * boundary.
537 */ 529 */
538 if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { 530 if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
539 overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); 531 overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
540 count -= overflow; 532 count -= overflow;
541 } 533 }
542 brelse(bitmap_bh); 534 brelse(bitmap_bh);
543 bitmap_bh = read_block_bitmap(sb, block_group); 535 bitmap_bh = read_block_bitmap(sb, block_group);
544 if (!bitmap_bh) 536 if (!bitmap_bh)
545 goto error_return; 537 goto error_return;
546 desc = ext3_get_group_desc (sb, block_group, &gd_bh); 538 desc = ext3_get_group_desc (sb, block_group, &gd_bh);
547 if (!desc) 539 if (!desc)
548 goto error_return; 540 goto error_return;
549 541
550 if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) || 542 if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
551 in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) || 543 in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
552 in_range (block, le32_to_cpu(desc->bg_inode_table), 544 in_range (block, le32_to_cpu(desc->bg_inode_table),
553 sbi->s_itb_per_group) || 545 sbi->s_itb_per_group) ||
554 in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), 546 in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
555 sbi->s_itb_per_group)) { 547 sbi->s_itb_per_group)) {
556 ext3_error (sb, "ext3_free_blocks", 548 ext3_error (sb, "ext3_free_blocks",
557 "Freeing blocks in system zones - " 549 "Freeing blocks in system zones - "
558 "Block = "E3FSBLK", count = %lu", 550 "Block = "E3FSBLK", count = %lu",
559 block, count); 551 block, count);
560 goto error_return; 552 goto error_return;
561 } 553 }
562 554
563 /* 555 /*
564 * We are about to start releasing blocks in the bitmap, 556 * We are about to start releasing blocks in the bitmap,
565 * so we need undo access. 557 * so we need undo access.
566 */ 558 */
567 /* @@@ check errors */ 559 /* @@@ check errors */
568 BUFFER_TRACE(bitmap_bh, "getting undo access"); 560 BUFFER_TRACE(bitmap_bh, "getting undo access");
569 err = ext3_journal_get_undo_access(handle, bitmap_bh); 561 err = ext3_journal_get_undo_access(handle, bitmap_bh);
570 if (err) 562 if (err)
571 goto error_return; 563 goto error_return;
572 564
573 /* 565 /*
574 * We are about to modify some metadata. Call the journal APIs 566 * We are about to modify some metadata. Call the journal APIs
575 * to unshare ->b_data if a currently-committing transaction is 567 * to unshare ->b_data if a currently-committing transaction is
576 * using it 568 * using it
577 */ 569 */
578 BUFFER_TRACE(gd_bh, "get_write_access"); 570 BUFFER_TRACE(gd_bh, "get_write_access");
579 err = ext3_journal_get_write_access(handle, gd_bh); 571 err = ext3_journal_get_write_access(handle, gd_bh);
580 if (err) 572 if (err)
581 goto error_return; 573 goto error_return;
582 574
583 jbd_lock_bh_state(bitmap_bh); 575 jbd_lock_bh_state(bitmap_bh);
584 576
585 for (i = 0, group_freed = 0; i < count; i++) { 577 for (i = 0, group_freed = 0; i < count; i++) {
586 /* 578 /*
587 * An HJ special. This is expensive... 579 * An HJ special. This is expensive...
588 */ 580 */
589 #ifdef CONFIG_JBD_DEBUG 581 #ifdef CONFIG_JBD_DEBUG
590 jbd_unlock_bh_state(bitmap_bh); 582 jbd_unlock_bh_state(bitmap_bh);
591 { 583 {
592 struct buffer_head *debug_bh; 584 struct buffer_head *debug_bh;
593 debug_bh = sb_find_get_block(sb, block + i); 585 debug_bh = sb_find_get_block(sb, block + i);
594 if (debug_bh) { 586 if (debug_bh) {
595 BUFFER_TRACE(debug_bh, "Deleted!"); 587 BUFFER_TRACE(debug_bh, "Deleted!");
596 if (!bh2jh(bitmap_bh)->b_committed_data) 588 if (!bh2jh(bitmap_bh)->b_committed_data)
597 BUFFER_TRACE(debug_bh, 589 BUFFER_TRACE(debug_bh,
598 "No committed data in bitmap"); 590 "No committed data in bitmap");
599 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); 591 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
600 __brelse(debug_bh); 592 __brelse(debug_bh);
601 } 593 }
602 } 594 }
603 jbd_lock_bh_state(bitmap_bh); 595 jbd_lock_bh_state(bitmap_bh);
604 #endif 596 #endif
605 if (need_resched()) { 597 if (need_resched()) {
606 jbd_unlock_bh_state(bitmap_bh); 598 jbd_unlock_bh_state(bitmap_bh);
607 cond_resched(); 599 cond_resched();
608 jbd_lock_bh_state(bitmap_bh); 600 jbd_lock_bh_state(bitmap_bh);
609 } 601 }
610 /* @@@ This prevents newly-allocated data from being 602 /* @@@ This prevents newly-allocated data from being
611 * freed and then reallocated within the same 603 * freed and then reallocated within the same
612 * transaction. 604 * transaction.
613 * 605 *
614 * Ideally we would want to allow that to happen, but to 606 * Ideally we would want to allow that to happen, but to
615 * do so requires making journal_forget() capable of 607 * do so requires making journal_forget() capable of
616 * revoking the queued write of a data block, which 608 * revoking the queued write of a data block, which
617 * implies blocking on the journal lock. *forget() 609 * implies blocking on the journal lock. *forget()
618 * cannot block due to truncate races. 610 * cannot block due to truncate races.
619 * 611 *
620 * Eventually we can fix this by making journal_forget() 612 * Eventually we can fix this by making journal_forget()
621 * return a status indicating whether or not it was able 613 * return a status indicating whether or not it was able
622 * to revoke the buffer. On successful revoke, it is 614 * to revoke the buffer. On successful revoke, it is
623 * safe not to set the allocation bit in the committed 615 * safe not to set the allocation bit in the committed
624 * bitmap, because we know that there is no outstanding 616 * bitmap, because we know that there is no outstanding
625 * activity on the buffer any more and so it is safe to 617 * activity on the buffer any more and so it is safe to
626 * reallocate it. 618 * reallocate it.
627 */ 619 */
628 BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); 620 BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
629 J_ASSERT_BH(bitmap_bh, 621 J_ASSERT_BH(bitmap_bh,
630 bh2jh(bitmap_bh)->b_committed_data != NULL); 622 bh2jh(bitmap_bh)->b_committed_data != NULL);
631 ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, 623 ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
632 bh2jh(bitmap_bh)->b_committed_data); 624 bh2jh(bitmap_bh)->b_committed_data);
633 625
634 /* 626 /*
635 * We clear the bit in the bitmap after setting the committed 627 * We clear the bit in the bitmap after setting the committed
636 * data bit, because this is the reverse order to that which 628 * data bit, because this is the reverse order to that which
637 * the allocator uses. 629 * the allocator uses.
638 */ 630 */
639 BUFFER_TRACE(bitmap_bh, "clear bit"); 631 BUFFER_TRACE(bitmap_bh, "clear bit");
640 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 632 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
641 bit + i, bitmap_bh->b_data)) { 633 bit + i, bitmap_bh->b_data)) {
642 jbd_unlock_bh_state(bitmap_bh); 634 jbd_unlock_bh_state(bitmap_bh);
643 ext3_error(sb, __func__, 635 ext3_error(sb, __func__,
644 "bit already cleared for block "E3FSBLK, 636 "bit already cleared for block "E3FSBLK,
645 block + i); 637 block + i);
646 jbd_lock_bh_state(bitmap_bh); 638 jbd_lock_bh_state(bitmap_bh);
647 BUFFER_TRACE(bitmap_bh, "bit already cleared"); 639 BUFFER_TRACE(bitmap_bh, "bit already cleared");
648 } else { 640 } else {
649 group_freed++; 641 group_freed++;
650 } 642 }
651 } 643 }
652 jbd_unlock_bh_state(bitmap_bh); 644 jbd_unlock_bh_state(bitmap_bh);
653 645
654 spin_lock(sb_bgl_lock(sbi, block_group)); 646 spin_lock(sb_bgl_lock(sbi, block_group));
655 le16_add_cpu(&desc->bg_free_blocks_count, group_freed); 647 le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
656 spin_unlock(sb_bgl_lock(sbi, block_group)); 648 spin_unlock(sb_bgl_lock(sbi, block_group));
657 percpu_counter_add(&sbi->s_freeblocks_counter, count); 649 percpu_counter_add(&sbi->s_freeblocks_counter, count);
658 650
659 /* We dirtied the bitmap block */ 651 /* We dirtied the bitmap block */
660 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 652 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
661 err = ext3_journal_dirty_metadata(handle, bitmap_bh); 653 err = ext3_journal_dirty_metadata(handle, bitmap_bh);
662 654
663 /* And the group descriptor block */ 655 /* And the group descriptor block */
664 BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); 656 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
665 ret = ext3_journal_dirty_metadata(handle, gd_bh); 657 ret = ext3_journal_dirty_metadata(handle, gd_bh);
666 if (!err) err = ret; 658 if (!err) err = ret;
667 *pdquot_freed_blocks += group_freed; 659 *pdquot_freed_blocks += group_freed;
668 660
669 if (overflow && !err) { 661 if (overflow && !err) {
670 block += count; 662 block += count;
671 count = overflow; 663 count = overflow;
672 goto do_more; 664 goto do_more;
673 } 665 }
674 666
675 error_return: 667 error_return:
676 brelse(bitmap_bh); 668 brelse(bitmap_bh);
677 ext3_std_error(sb, err); 669 ext3_std_error(sb, err);
678 return; 670 return;
679 } 671 }
680 672
681 /** 673 /**
682 * ext3_free_blocks() -- Free given blocks and update quota 674 * ext3_free_blocks() -- Free given blocks and update quota
683 * @handle: handle for this transaction 675 * @handle: handle for this transaction
684 * @inode: inode 676 * @inode: inode
685 * @block: start physical block to free 677 * @block: start physical block to free
686 * @count: number of blocks to count 678 * @count: number of blocks to count
687 */ 679 */
688 void ext3_free_blocks(handle_t *handle, struct inode *inode, 680 void ext3_free_blocks(handle_t *handle, struct inode *inode,
689 ext3_fsblk_t block, unsigned long count) 681 ext3_fsblk_t block, unsigned long count)
690 { 682 {
691 struct super_block *sb = inode->i_sb; 683 struct super_block *sb = inode->i_sb;
692 unsigned long dquot_freed_blocks; 684 unsigned long dquot_freed_blocks;
693 685
694 trace_ext3_free_blocks(inode, block, count); 686 trace_ext3_free_blocks(inode, block, count);
695 ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); 687 ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
696 if (dquot_freed_blocks) 688 if (dquot_freed_blocks)
697 dquot_free_block(inode, dquot_freed_blocks); 689 dquot_free_block(inode, dquot_freed_blocks);
698 return; 690 return;
699 } 691 }
700 692
701 /** 693 /**
702 * ext3_test_allocatable() 694 * ext3_test_allocatable()
703 * @nr: given allocation block group 695 * @nr: given allocation block group
704 * @bh: bufferhead contains the bitmap of the given block group 696 * @bh: bufferhead contains the bitmap of the given block group
705 * 697 *
706 * For ext3 allocations, we must not reuse any blocks which are 698 * For ext3 allocations, we must not reuse any blocks which are
707 * allocated in the bitmap buffer's "last committed data" copy. This 699 * allocated in the bitmap buffer's "last committed data" copy. This
708 * prevents deletes from freeing up the page for reuse until we have 700 * prevents deletes from freeing up the page for reuse until we have
709 * committed the delete transaction. 701 * committed the delete transaction.
710 * 702 *
711 * If we didn't do this, then deleting something and reallocating it as 703 * If we didn't do this, then deleting something and reallocating it as
712 * data would allow the old block to be overwritten before the 704 * data would allow the old block to be overwritten before the
713 * transaction committed (because we force data to disk before commit). 705 * transaction committed (because we force data to disk before commit).
714 * This would lead to corruption if we crashed between overwriting the 706 * This would lead to corruption if we crashed between overwriting the
715 * data and committing the delete. 707 * data and committing the delete.
716 * 708 *
717 * @@@ We may want to make this allocation behaviour conditional on 709 * @@@ We may want to make this allocation behaviour conditional on
718 * data-writes at some point, and disable it for metadata allocations or 710 * data-writes at some point, and disable it for metadata allocations or
719 * sync-data inodes. 711 * sync-data inodes.
720 */ 712 */
721 static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh) 713 static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
722 { 714 {
723 int ret; 715 int ret;
724 struct journal_head *jh = bh2jh(bh); 716 struct journal_head *jh = bh2jh(bh);
725 717
726 if (ext3_test_bit(nr, bh->b_data)) 718 if (ext3_test_bit(nr, bh->b_data))
727 return 0; 719 return 0;
728 720
729 jbd_lock_bh_state(bh); 721 jbd_lock_bh_state(bh);
730 if (!jh->b_committed_data) 722 if (!jh->b_committed_data)
731 ret = 1; 723 ret = 1;
732 else 724 else
733 ret = !ext3_test_bit(nr, jh->b_committed_data); 725 ret = !ext3_test_bit(nr, jh->b_committed_data);
734 jbd_unlock_bh_state(bh); 726 jbd_unlock_bh_state(bh);
735 return ret; 727 return ret;
736 } 728 }
737 729
738 /** 730 /**
739 * bitmap_search_next_usable_block() 731 * bitmap_search_next_usable_block()
740 * @start: the starting block (group relative) of the search 732 * @start: the starting block (group relative) of the search
741 * @bh: bufferhead contains the block group bitmap 733 * @bh: bufferhead contains the block group bitmap
742 * @maxblocks: the ending block (group relative) of the reservation 734 * @maxblocks: the ending block (group relative) of the reservation
743 * 735 *
744 * The bitmap search --- search forward alternately through the actual 736 * The bitmap search --- search forward alternately through the actual
745 * bitmap on disk and the last-committed copy in journal, until we find a 737 * bitmap on disk and the last-committed copy in journal, until we find a
746 * bit free in both bitmaps. 738 * bit free in both bitmaps.
747 */ 739 */
748 static ext3_grpblk_t 740 static ext3_grpblk_t
749 bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, 741 bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
750 ext3_grpblk_t maxblocks) 742 ext3_grpblk_t maxblocks)
751 { 743 {
752 ext3_grpblk_t next; 744 ext3_grpblk_t next;
753 struct journal_head *jh = bh2jh(bh); 745 struct journal_head *jh = bh2jh(bh);
754 746
755 while (start < maxblocks) { 747 while (start < maxblocks) {
756 next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start); 748 next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start);
757 if (next >= maxblocks) 749 if (next >= maxblocks)
758 return -1; 750 return -1;
759 if (ext3_test_allocatable(next, bh)) 751 if (ext3_test_allocatable(next, bh))
760 return next; 752 return next;
761 jbd_lock_bh_state(bh); 753 jbd_lock_bh_state(bh);
762 if (jh->b_committed_data) 754 if (jh->b_committed_data)
763 start = ext3_find_next_zero_bit(jh->b_committed_data, 755 start = ext3_find_next_zero_bit(jh->b_committed_data,
764 maxblocks, next); 756 maxblocks, next);
765 jbd_unlock_bh_state(bh); 757 jbd_unlock_bh_state(bh);
766 } 758 }
767 return -1; 759 return -1;
768 } 760 }
769 761
770 /** 762 /**
771 * find_next_usable_block() 763 * find_next_usable_block()
772 * @start: the starting block (group relative) to find next 764 * @start: the starting block (group relative) to find next
773 * allocatable block in bitmap. 765 * allocatable block in bitmap.
774 * @bh: bufferhead contains the block group bitmap 766 * @bh: bufferhead contains the block group bitmap
775 * @maxblocks: the ending block (group relative) for the search 767 * @maxblocks: the ending block (group relative) for the search
776 * 768 *
777 * Find an allocatable block in a bitmap. We honor both the bitmap and 769 * Find an allocatable block in a bitmap. We honor both the bitmap and
778 * its last-committed copy (if that exists), and perform the "most 770 * its last-committed copy (if that exists), and perform the "most
779 * appropriate allocation" algorithm of looking for a free block near 771 * appropriate allocation" algorithm of looking for a free block near
780 * the initial goal; then for a free byte somewhere in the bitmap; then 772 * the initial goal; then for a free byte somewhere in the bitmap; then
781 * for any free bit in the bitmap. 773 * for any free bit in the bitmap.
782 */ 774 */
783 static ext3_grpblk_t 775 static ext3_grpblk_t
784 find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, 776 find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
785 ext3_grpblk_t maxblocks) 777 ext3_grpblk_t maxblocks)
786 { 778 {
787 ext3_grpblk_t here, next; 779 ext3_grpblk_t here, next;
788 char *p, *r; 780 char *p, *r;
789 781
790 if (start > 0) { 782 if (start > 0) {
791 /* 783 /*
792 * The goal was occupied; search forward for a free 784 * The goal was occupied; search forward for a free
793 * block within the next XX blocks. 785 * block within the next XX blocks.
794 * 786 *
795 * end_goal is more or less random, but it has to be 787 * end_goal is more or less random, but it has to be
796 * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the 788 * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
797 * next 64-bit boundary is simple.. 789 * next 64-bit boundary is simple..
798 */ 790 */
799 ext3_grpblk_t end_goal = (start + 63) & ~63; 791 ext3_grpblk_t end_goal = (start + 63) & ~63;
800 if (end_goal > maxblocks) 792 if (end_goal > maxblocks)
801 end_goal = maxblocks; 793 end_goal = maxblocks;
802 here = ext3_find_next_zero_bit(bh->b_data, end_goal, start); 794 here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
803 if (here < end_goal && ext3_test_allocatable(here, bh)) 795 if (here < end_goal && ext3_test_allocatable(here, bh))
804 return here; 796 return here;
805 ext3_debug("Bit not found near goal\n"); 797 ext3_debug("Bit not found near goal\n");
806 } 798 }
807 799
808 here = start; 800 here = start;
809 if (here < 0) 801 if (here < 0)
810 here = 0; 802 here = 0;
811 803
812 p = bh->b_data + (here >> 3); 804 p = bh->b_data + (here >> 3);
813 r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); 805 r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
814 next = (r - bh->b_data) << 3; 806 next = (r - bh->b_data) << 3;
815 807
816 if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh)) 808 if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
817 return next; 809 return next;
818 810
819 /* 811 /*
820 * The bitmap search --- search forward alternately through the actual 812 * The bitmap search --- search forward alternately through the actual
821 * bitmap and the last-committed copy until we find a bit free in 813 * bitmap and the last-committed copy until we find a bit free in
822 * both 814 * both
823 */ 815 */
824 here = bitmap_search_next_usable_block(here, bh, maxblocks); 816 here = bitmap_search_next_usable_block(here, bh, maxblocks);
825 return here; 817 return here;
826 } 818 }
827 819
828 /** 820 /**
829 * claim_block() 821 * claim_block()
830 * @lock: the spin lock for this block group 822 * @lock: the spin lock for this block group
831 * @block: the free block (group relative) to allocate 823 * @block: the free block (group relative) to allocate
832 * @bh: the buffer_head contains the block group bitmap 824 * @bh: the buffer_head contains the block group bitmap
833 * 825 *
834 * We think we can allocate this block in this bitmap. Try to set the bit. 826 * We think we can allocate this block in this bitmap. Try to set the bit.
835 * If that succeeds then check that nobody has allocated and then freed the 827 * If that succeeds then check that nobody has allocated and then freed the
836 * block since we saw that is was not marked in b_committed_data. If it _was_ 828 * block since we saw that is was not marked in b_committed_data. If it _was_
837 * allocated and freed then clear the bit in the bitmap again and return 829 * allocated and freed then clear the bit in the bitmap again and return
838 * zero (failure). 830 * zero (failure).
839 */ 831 */
840 static inline int 832 static inline int
841 claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh) 833 claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
842 { 834 {
843 struct journal_head *jh = bh2jh(bh); 835 struct journal_head *jh = bh2jh(bh);
844 int ret; 836 int ret;
845 837
846 if (ext3_set_bit_atomic(lock, block, bh->b_data)) 838 if (ext3_set_bit_atomic(lock, block, bh->b_data))
847 return 0; 839 return 0;
848 jbd_lock_bh_state(bh); 840 jbd_lock_bh_state(bh);
849 if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) { 841 if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) {
850 ext3_clear_bit_atomic(lock, block, bh->b_data); 842 ext3_clear_bit_atomic(lock, block, bh->b_data);
851 ret = 0; 843 ret = 0;
852 } else { 844 } else {
853 ret = 1; 845 ret = 1;
854 } 846 }
855 jbd_unlock_bh_state(bh); 847 jbd_unlock_bh_state(bh);
856 return ret; 848 return ret;
857 } 849 }
858 850
859 /** 851 /**
860 * ext3_try_to_allocate() 852 * ext3_try_to_allocate()
861 * @sb: superblock 853 * @sb: superblock
862 * @handle: handle to this transaction 854 * @handle: handle to this transaction
863 * @group: given allocation block group 855 * @group: given allocation block group
864 * @bitmap_bh: bufferhead holds the block bitmap 856 * @bitmap_bh: bufferhead holds the block bitmap
865 * @grp_goal: given target block within the group 857 * @grp_goal: given target block within the group
866 * @count: target number of blocks to allocate 858 * @count: target number of blocks to allocate
867 * @my_rsv: reservation window 859 * @my_rsv: reservation window
868 * 860 *
869 * Attempt to allocate blocks within a give range. Set the range of allocation 861 * Attempt to allocate blocks within a give range. Set the range of allocation
870 * first, then find the first free bit(s) from the bitmap (within the range), 862 * first, then find the first free bit(s) from the bitmap (within the range),
871 * and at last, allocate the blocks by claiming the found free bit as allocated. 863 * and at last, allocate the blocks by claiming the found free bit as allocated.
872 * 864 *
873 * To set the range of this allocation: 865 * To set the range of this allocation:
874 * if there is a reservation window, only try to allocate block(s) from the 866 * if there is a reservation window, only try to allocate block(s) from the
875 * file's own reservation window; 867 * file's own reservation window;
876 * Otherwise, the allocation range starts from the give goal block, ends at 868 * Otherwise, the allocation range starts from the give goal block, ends at
877 * the block group's last block. 869 * the block group's last block.
878 * 870 *
879 * If we failed to allocate the desired block then we may end up crossing to a 871 * If we failed to allocate the desired block then we may end up crossing to a
880 * new bitmap. In that case we must release write access to the old one via 872 * new bitmap. In that case we must release write access to the old one via
881 * ext3_journal_release_buffer(), else we'll run out of credits. 873 * ext3_journal_release_buffer(), else we'll run out of credits.
882 */ 874 */
883 static ext3_grpblk_t 875 static ext3_grpblk_t
884 ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, 876 ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
885 struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal, 877 struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal,
886 unsigned long *count, struct ext3_reserve_window *my_rsv) 878 unsigned long *count, struct ext3_reserve_window *my_rsv)
887 { 879 {
888 ext3_fsblk_t group_first_block; 880 ext3_fsblk_t group_first_block;
889 ext3_grpblk_t start, end; 881 ext3_grpblk_t start, end;
890 unsigned long num = 0; 882 unsigned long num = 0;
891 883
892 /* we do allocation within the reservation window if we have a window */ 884 /* we do allocation within the reservation window if we have a window */
893 if (my_rsv) { 885 if (my_rsv) {
894 group_first_block = ext3_group_first_block_no(sb, group); 886 group_first_block = ext3_group_first_block_no(sb, group);
895 if (my_rsv->_rsv_start >= group_first_block) 887 if (my_rsv->_rsv_start >= group_first_block)
896 start = my_rsv->_rsv_start - group_first_block; 888 start = my_rsv->_rsv_start - group_first_block;
897 else 889 else
898 /* reservation window cross group boundary */ 890 /* reservation window cross group boundary */
899 start = 0; 891 start = 0;
900 end = my_rsv->_rsv_end - group_first_block + 1; 892 end = my_rsv->_rsv_end - group_first_block + 1;
901 if (end > EXT3_BLOCKS_PER_GROUP(sb)) 893 if (end > EXT3_BLOCKS_PER_GROUP(sb))
902 /* reservation window crosses group boundary */ 894 /* reservation window crosses group boundary */
903 end = EXT3_BLOCKS_PER_GROUP(sb); 895 end = EXT3_BLOCKS_PER_GROUP(sb);
904 if ((start <= grp_goal) && (grp_goal < end)) 896 if ((start <= grp_goal) && (grp_goal < end))
905 start = grp_goal; 897 start = grp_goal;
906 else 898 else
907 grp_goal = -1; 899 grp_goal = -1;
908 } else { 900 } else {
909 if (grp_goal > 0) 901 if (grp_goal > 0)
910 start = grp_goal; 902 start = grp_goal;
911 else 903 else
912 start = 0; 904 start = 0;
913 end = EXT3_BLOCKS_PER_GROUP(sb); 905 end = EXT3_BLOCKS_PER_GROUP(sb);
914 } 906 }
915 907
916 BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb)); 908 BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb));
917 909
918 repeat: 910 repeat:
919 if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) { 911 if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) {
920 grp_goal = find_next_usable_block(start, bitmap_bh, end); 912 grp_goal = find_next_usable_block(start, bitmap_bh, end);
921 if (grp_goal < 0) 913 if (grp_goal < 0)
922 goto fail_access; 914 goto fail_access;
923 if (!my_rsv) { 915 if (!my_rsv) {
924 int i; 916 int i;
925 917
926 for (i = 0; i < 7 && grp_goal > start && 918 for (i = 0; i < 7 && grp_goal > start &&
927 ext3_test_allocatable(grp_goal - 1, 919 ext3_test_allocatable(grp_goal - 1,
928 bitmap_bh); 920 bitmap_bh);
929 i++, grp_goal--) 921 i++, grp_goal--)
930 ; 922 ;
931 } 923 }
932 } 924 }
933 start = grp_goal; 925 start = grp_goal;
934 926
935 if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), 927 if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group),
936 grp_goal, bitmap_bh)) { 928 grp_goal, bitmap_bh)) {
937 /* 929 /*
938 * The block was allocated by another thread, or it was 930 * The block was allocated by another thread, or it was
939 * allocated and then freed by another thread 931 * allocated and then freed by another thread
940 */ 932 */
941 start++; 933 start++;
942 grp_goal++; 934 grp_goal++;
943 if (start >= end) 935 if (start >= end)
944 goto fail_access; 936 goto fail_access;
945 goto repeat; 937 goto repeat;
946 } 938 }
947 num++; 939 num++;
948 grp_goal++; 940 grp_goal++;
949 while (num < *count && grp_goal < end 941 while (num < *count && grp_goal < end
950 && ext3_test_allocatable(grp_goal, bitmap_bh) 942 && ext3_test_allocatable(grp_goal, bitmap_bh)
951 && claim_block(sb_bgl_lock(EXT3_SB(sb), group), 943 && claim_block(sb_bgl_lock(EXT3_SB(sb), group),
952 grp_goal, bitmap_bh)) { 944 grp_goal, bitmap_bh)) {
953 num++; 945 num++;
954 grp_goal++; 946 grp_goal++;
955 } 947 }
956 *count = num; 948 *count = num;
957 return grp_goal - num; 949 return grp_goal - num;
958 fail_access: 950 fail_access:
959 *count = num; 951 *count = num;
960 return -1; 952 return -1;
961 } 953 }
962 954
963 /** 955 /**
964 * find_next_reservable_window(): 956 * find_next_reservable_window():
965 * find a reservable space within the given range. 957 * find a reservable space within the given range.
966 * It does not allocate the reservation window for now: 958 * It does not allocate the reservation window for now:
967 * alloc_new_reservation() will do the work later. 959 * alloc_new_reservation() will do the work later.
968 * 960 *
969 * @search_head: the head of the searching list; 961 * @search_head: the head of the searching list;
970 * This is not necessarily the list head of the whole filesystem 962 * This is not necessarily the list head of the whole filesystem
971 * 963 *
972 * We have both head and start_block to assist the search 964 * We have both head and start_block to assist the search
973 * for the reservable space. The list starts from head, 965 * for the reservable space. The list starts from head,
974 * but we will shift to the place where start_block is, 966 * but we will shift to the place where start_block is,
975 * then start from there, when looking for a reservable space. 967 * then start from there, when looking for a reservable space.
976 * 968 *
977 * @my_rsv: the reservation window 969 * @my_rsv: the reservation window
978 * 970 *
979 * @sb: the super block 971 * @sb: the super block
980 * 972 *
981 * @start_block: the first block we consider to start 973 * @start_block: the first block we consider to start
982 * the real search from 974 * the real search from
983 * 975 *
984 * @last_block: 976 * @last_block:
985 * the maximum block number that our goal reservable space 977 * the maximum block number that our goal reservable space
986 * could start from. This is normally the last block in this 978 * could start from. This is normally the last block in this
987 * group. The search will end when we found the start of next 979 * group. The search will end when we found the start of next
988 * possible reservable space is out of this boundary. 980 * possible reservable space is out of this boundary.
989 * This could handle the cross boundary reservation window 981 * This could handle the cross boundary reservation window
990 * request. 982 * request.
991 * 983 *
992 * basically we search from the given range, rather than the whole 984 * basically we search from the given range, rather than the whole
993 * reservation double linked list, (start_block, last_block) 985 * reservation double linked list, (start_block, last_block)
994 * to find a free region that is of my size and has not 986 * to find a free region that is of my size and has not
995 * been reserved. 987 * been reserved.
996 * 988 *
997 */ 989 */
998 static int find_next_reservable_window( 990 static int find_next_reservable_window(
999 struct ext3_reserve_window_node *search_head, 991 struct ext3_reserve_window_node *search_head,
1000 struct ext3_reserve_window_node *my_rsv, 992 struct ext3_reserve_window_node *my_rsv,
1001 struct super_block * sb, 993 struct super_block * sb,
1002 ext3_fsblk_t start_block, 994 ext3_fsblk_t start_block,
1003 ext3_fsblk_t last_block) 995 ext3_fsblk_t last_block)
1004 { 996 {
1005 struct rb_node *next; 997 struct rb_node *next;
1006 struct ext3_reserve_window_node *rsv, *prev; 998 struct ext3_reserve_window_node *rsv, *prev;
1007 ext3_fsblk_t cur; 999 ext3_fsblk_t cur;
1008 int size = my_rsv->rsv_goal_size; 1000 int size = my_rsv->rsv_goal_size;
1009 1001
1010 /* TODO: make the start of the reservation window byte-aligned */ 1002 /* TODO: make the start of the reservation window byte-aligned */
1011 /* cur = *start_block & ~7;*/ 1003 /* cur = *start_block & ~7;*/
1012 cur = start_block; 1004 cur = start_block;
1013 rsv = search_head; 1005 rsv = search_head;
1014 if (!rsv) 1006 if (!rsv)
1015 return -1; 1007 return -1;
1016 1008
1017 while (1) { 1009 while (1) {
1018 if (cur <= rsv->rsv_end) 1010 if (cur <= rsv->rsv_end)
1019 cur = rsv->rsv_end + 1; 1011 cur = rsv->rsv_end + 1;
1020 1012
1021 /* TODO? 1013 /* TODO?
1022 * in the case we could not find a reservable space 1014 * in the case we could not find a reservable space
1023 * that is what is expected, during the re-search, we could 1015 * that is what is expected, during the re-search, we could
1024 * remember what's the largest reservable space we could have 1016 * remember what's the largest reservable space we could have
1025 * and return that one. 1017 * and return that one.
1026 * 1018 *
1027 * For now it will fail if we could not find the reservable 1019 * For now it will fail if we could not find the reservable
1028 * space with expected-size (or more)... 1020 * space with expected-size (or more)...
1029 */ 1021 */
1030 if (cur > last_block) 1022 if (cur > last_block)
1031 return -1; /* fail */ 1023 return -1; /* fail */
1032 1024
1033 prev = rsv; 1025 prev = rsv;
1034 next = rb_next(&rsv->rsv_node); 1026 next = rb_next(&rsv->rsv_node);
1035 rsv = rb_entry(next,struct ext3_reserve_window_node,rsv_node); 1027 rsv = rb_entry(next,struct ext3_reserve_window_node,rsv_node);
1036 1028
1037 /* 1029 /*
1038 * Reached the last reservation, we can just append to the 1030 * Reached the last reservation, we can just append to the
1039 * previous one. 1031 * previous one.
1040 */ 1032 */
1041 if (!next) 1033 if (!next)
1042 break; 1034 break;
1043 1035
1044 if (cur + size <= rsv->rsv_start) { 1036 if (cur + size <= rsv->rsv_start) {
1045 /* 1037 /*
1046 * Found a reserveable space big enough. We could 1038 * Found a reserveable space big enough. We could
1047 * have a reservation across the group boundary here 1039 * have a reservation across the group boundary here
1048 */ 1040 */
1049 break; 1041 break;
1050 } 1042 }
1051 } 1043 }
1052 /* 1044 /*
1053 * we come here either : 1045 * we come here either :
1054 * when we reach the end of the whole list, 1046 * when we reach the end of the whole list,
1055 * and there is empty reservable space after last entry in the list. 1047 * and there is empty reservable space after last entry in the list.
1056 * append it to the end of the list. 1048 * append it to the end of the list.
1057 * 1049 *
1058 * or we found one reservable space in the middle of the list, 1050 * or we found one reservable space in the middle of the list,
1059 * return the reservation window that we could append to. 1051 * return the reservation window that we could append to.
1060 * succeed. 1052 * succeed.
1061 */ 1053 */
1062 1054
1063 if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) 1055 if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
1064 rsv_window_remove(sb, my_rsv); 1056 rsv_window_remove(sb, my_rsv);
1065 1057
1066 /* 1058 /*
1067 * Let's book the whole available window for now. We will check the 1059 * Let's book the whole available window for now. We will check the
1068 * disk bitmap later and then, if there are free blocks then we adjust 1060 * disk bitmap later and then, if there are free blocks then we adjust
1069 * the window size if it's larger than requested. 1061 * the window size if it's larger than requested.
1070 * Otherwise, we will remove this node from the tree next time 1062 * Otherwise, we will remove this node from the tree next time
1071 * call find_next_reservable_window. 1063 * call find_next_reservable_window.
1072 */ 1064 */
1073 my_rsv->rsv_start = cur; 1065 my_rsv->rsv_start = cur;
1074 my_rsv->rsv_end = cur + size - 1; 1066 my_rsv->rsv_end = cur + size - 1;
1075 my_rsv->rsv_alloc_hit = 0; 1067 my_rsv->rsv_alloc_hit = 0;
1076 1068
1077 if (prev != my_rsv) 1069 if (prev != my_rsv)
1078 ext3_rsv_window_add(sb, my_rsv); 1070 ext3_rsv_window_add(sb, my_rsv);
1079 1071
1080 return 0; 1072 return 0;
1081 } 1073 }
1082 1074
1083 /** 1075 /**
1084 * alloc_new_reservation()--allocate a new reservation window 1076 * alloc_new_reservation()--allocate a new reservation window
1085 * 1077 *
1086 * To make a new reservation, we search part of the filesystem 1078 * To make a new reservation, we search part of the filesystem
1087 * reservation list (the list that inside the group). We try to 1079 * reservation list (the list that inside the group). We try to
1088 * allocate a new reservation window near the allocation goal, 1080 * allocate a new reservation window near the allocation goal,
1089 * or the beginning of the group, if there is no goal. 1081 * or the beginning of the group, if there is no goal.
1090 * 1082 *
1091 * We first find a reservable space after the goal, then from 1083 * We first find a reservable space after the goal, then from
1092 * there, we check the bitmap for the first free block after 1084 * there, we check the bitmap for the first free block after
1093 * it. If there is no free block until the end of group, then the 1085 * it. If there is no free block until the end of group, then the
1094 * whole group is full, we failed. Otherwise, check if the free 1086 * whole group is full, we failed. Otherwise, check if the free
1095 * block is inside the expected reservable space, if so, we 1087 * block is inside the expected reservable space, if so, we
1096 * succeed. 1088 * succeed.
1097 * If the first free block is outside the reservable space, then 1089 * If the first free block is outside the reservable space, then
1098 * start from the first free block, we search for next available 1090 * start from the first free block, we search for next available
1099 * space, and go on. 1091 * space, and go on.
1100 * 1092 *
1101 * on succeed, a new reservation will be found and inserted into the list 1093 * on succeed, a new reservation will be found and inserted into the list
1102 * It contains at least one free block, and it does not overlap with other 1094 * It contains at least one free block, and it does not overlap with other
1103 * reservation windows. 1095 * reservation windows.
1104 * 1096 *
1105 * failed: we failed to find a reservation window in this group 1097 * failed: we failed to find a reservation window in this group
1106 * 1098 *
1107 * @my_rsv: the reservation window 1099 * @my_rsv: the reservation window
1108 * 1100 *
1109 * @grp_goal: The goal (group-relative). It is where the search for a 1101 * @grp_goal: The goal (group-relative). It is where the search for a
1110 * free reservable space should start from. 1102 * free reservable space should start from.
1111 * if we have a grp_goal(grp_goal >0 ), then start from there, 1103 * if we have a grp_goal(grp_goal >0 ), then start from there,
1112 * no grp_goal(grp_goal = -1), we start from the first block 1104 * no grp_goal(grp_goal = -1), we start from the first block
1113 * of the group. 1105 * of the group.
1114 * 1106 *
1115 * @sb: the super block 1107 * @sb: the super block
1116 * @group: the group we are trying to allocate in 1108 * @group: the group we are trying to allocate in
1117 * @bitmap_bh: the block group block bitmap 1109 * @bitmap_bh: the block group block bitmap
1118 * 1110 *
1119 */ 1111 */
1120 static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, 1112 static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
1121 ext3_grpblk_t grp_goal, struct super_block *sb, 1113 ext3_grpblk_t grp_goal, struct super_block *sb,
1122 unsigned int group, struct buffer_head *bitmap_bh) 1114 unsigned int group, struct buffer_head *bitmap_bh)
1123 { 1115 {
1124 struct ext3_reserve_window_node *search_head; 1116 struct ext3_reserve_window_node *search_head;
1125 ext3_fsblk_t group_first_block, group_end_block, start_block; 1117 ext3_fsblk_t group_first_block, group_end_block, start_block;
1126 ext3_grpblk_t first_free_block; 1118 ext3_grpblk_t first_free_block;
1127 struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; 1119 struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
1128 unsigned long size; 1120 unsigned long size;
1129 int ret; 1121 int ret;
1130 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; 1122 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
1131 1123
1132 group_first_block = ext3_group_first_block_no(sb, group); 1124 group_first_block = ext3_group_first_block_no(sb, group);
1133 group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); 1125 group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1134 1126
1135 if (grp_goal < 0) 1127 if (grp_goal < 0)
1136 start_block = group_first_block; 1128 start_block = group_first_block;
1137 else 1129 else
1138 start_block = grp_goal + group_first_block; 1130 start_block = grp_goal + group_first_block;
1139 1131
1140 trace_ext3_alloc_new_reservation(sb, start_block); 1132 trace_ext3_alloc_new_reservation(sb, start_block);
1141 size = my_rsv->rsv_goal_size; 1133 size = my_rsv->rsv_goal_size;
1142 1134
1143 if (!rsv_is_empty(&my_rsv->rsv_window)) { 1135 if (!rsv_is_empty(&my_rsv->rsv_window)) {
1144 /* 1136 /*
1145 * if the old reservation is cross group boundary 1137 * if the old reservation is cross group boundary
1146 * and if the goal is inside the old reservation window, 1138 * and if the goal is inside the old reservation window,
1147 * we will come here when we just failed to allocate from 1139 * we will come here when we just failed to allocate from
1148 * the first part of the window. We still have another part 1140 * the first part of the window. We still have another part
1149 * that belongs to the next group. In this case, there is no 1141 * that belongs to the next group. In this case, there is no
1150 * point to discard our window and try to allocate a new one 1142 * point to discard our window and try to allocate a new one
1151 * in this group(which will fail). we should 1143 * in this group(which will fail). we should
1152 * keep the reservation window, just simply move on. 1144 * keep the reservation window, just simply move on.
1153 * 1145 *
1154 * Maybe we could shift the start block of the reservation 1146 * Maybe we could shift the start block of the reservation
1155 * window to the first block of next group. 1147 * window to the first block of next group.
1156 */ 1148 */
1157 1149
1158 if ((my_rsv->rsv_start <= group_end_block) && 1150 if ((my_rsv->rsv_start <= group_end_block) &&
1159 (my_rsv->rsv_end > group_end_block) && 1151 (my_rsv->rsv_end > group_end_block) &&
1160 (start_block >= my_rsv->rsv_start)) 1152 (start_block >= my_rsv->rsv_start))
1161 return -1; 1153 return -1;
1162 1154
1163 if ((my_rsv->rsv_alloc_hit > 1155 if ((my_rsv->rsv_alloc_hit >
1164 (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { 1156 (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
1165 /* 1157 /*
1166 * if the previously allocation hit ratio is 1158 * if the previously allocation hit ratio is
1167 * greater than 1/2, then we double the size of 1159 * greater than 1/2, then we double the size of
1168 * the reservation window the next time, 1160 * the reservation window the next time,
1169 * otherwise we keep the same size window 1161 * otherwise we keep the same size window
1170 */ 1162 */
1171 size = size * 2; 1163 size = size * 2;
1172 if (size > EXT3_MAX_RESERVE_BLOCKS) 1164 if (size > EXT3_MAX_RESERVE_BLOCKS)
1173 size = EXT3_MAX_RESERVE_BLOCKS; 1165 size = EXT3_MAX_RESERVE_BLOCKS;
1174 my_rsv->rsv_goal_size= size; 1166 my_rsv->rsv_goal_size= size;
1175 } 1167 }
1176 } 1168 }
1177 1169
1178 spin_lock(rsv_lock); 1170 spin_lock(rsv_lock);
1179 /* 1171 /*
1180 * shift the search start to the window near the goal block 1172 * shift the search start to the window near the goal block
1181 */ 1173 */
1182 search_head = search_reserve_window(fs_rsv_root, start_block); 1174 search_head = search_reserve_window(fs_rsv_root, start_block);
1183 1175
1184 /* 1176 /*
1185 * find_next_reservable_window() simply finds a reservable window 1177 * find_next_reservable_window() simply finds a reservable window
1186 * inside the given range(start_block, group_end_block). 1178 * inside the given range(start_block, group_end_block).
1187 * 1179 *
1188 * To make sure the reservation window has a free bit inside it, we 1180 * To make sure the reservation window has a free bit inside it, we
1189 * need to check the bitmap after we found a reservable window. 1181 * need to check the bitmap after we found a reservable window.
1190 */ 1182 */
1191 retry: 1183 retry:
1192 ret = find_next_reservable_window(search_head, my_rsv, sb, 1184 ret = find_next_reservable_window(search_head, my_rsv, sb,
1193 start_block, group_end_block); 1185 start_block, group_end_block);
1194 1186
1195 if (ret == -1) { 1187 if (ret == -1) {
1196 if (!rsv_is_empty(&my_rsv->rsv_window)) 1188 if (!rsv_is_empty(&my_rsv->rsv_window))
1197 rsv_window_remove(sb, my_rsv); 1189 rsv_window_remove(sb, my_rsv);
1198 spin_unlock(rsv_lock); 1190 spin_unlock(rsv_lock);
1199 return -1; 1191 return -1;
1200 } 1192 }
1201 1193
1202 /* 1194 /*
1203 * On success, find_next_reservable_window() returns the 1195 * On success, find_next_reservable_window() returns the
1204 * reservation window where there is a reservable space after it. 1196 * reservation window where there is a reservable space after it.
1205 * Before we reserve this reservable space, we need 1197 * Before we reserve this reservable space, we need
1206 * to make sure there is at least a free block inside this region. 1198 * to make sure there is at least a free block inside this region.
1207 * 1199 *
1208 * searching the first free bit on the block bitmap and copy of 1200 * searching the first free bit on the block bitmap and copy of
1209 * last committed bitmap alternatively, until we found a allocatable 1201 * last committed bitmap alternatively, until we found a allocatable
1210 * block. Search start from the start block of the reservable space 1202 * block. Search start from the start block of the reservable space
1211 * we just found. 1203 * we just found.
1212 */ 1204 */
1213 spin_unlock(rsv_lock); 1205 spin_unlock(rsv_lock);
1214 first_free_block = bitmap_search_next_usable_block( 1206 first_free_block = bitmap_search_next_usable_block(
1215 my_rsv->rsv_start - group_first_block, 1207 my_rsv->rsv_start - group_first_block,
1216 bitmap_bh, group_end_block - group_first_block + 1); 1208 bitmap_bh, group_end_block - group_first_block + 1);
1217 1209
1218 if (first_free_block < 0) { 1210 if (first_free_block < 0) {
1219 /* 1211 /*
1220 * no free block left on the bitmap, no point 1212 * no free block left on the bitmap, no point
1221 * to reserve the space. return failed. 1213 * to reserve the space. return failed.
1222 */ 1214 */
1223 spin_lock(rsv_lock); 1215 spin_lock(rsv_lock);
1224 if (!rsv_is_empty(&my_rsv->rsv_window)) 1216 if (!rsv_is_empty(&my_rsv->rsv_window))
1225 rsv_window_remove(sb, my_rsv); 1217 rsv_window_remove(sb, my_rsv);
1226 spin_unlock(rsv_lock); 1218 spin_unlock(rsv_lock);
1227 return -1; /* failed */ 1219 return -1; /* failed */
1228 } 1220 }
1229 1221
1230 start_block = first_free_block + group_first_block; 1222 start_block = first_free_block + group_first_block;
1231 /* 1223 /*
1232 * check if the first free block is within the 1224 * check if the first free block is within the
1233 * free space we just reserved 1225 * free space we just reserved
1234 */ 1226 */
1235 if (start_block >= my_rsv->rsv_start && 1227 if (start_block >= my_rsv->rsv_start &&
1236 start_block <= my_rsv->rsv_end) { 1228 start_block <= my_rsv->rsv_end) {
1237 trace_ext3_reserved(sb, start_block, my_rsv); 1229 trace_ext3_reserved(sb, start_block, my_rsv);
1238 return 0; /* success */ 1230 return 0; /* success */
1239 } 1231 }
1240 /* 1232 /*
1241 * if the first free bit we found is out of the reservable space 1233 * if the first free bit we found is out of the reservable space
1242 * continue search for next reservable space, 1234 * continue search for next reservable space,
1243 * start from where the free block is, 1235 * start from where the free block is,
1244 * we also shift the list head to where we stopped last time 1236 * we also shift the list head to where we stopped last time
1245 */ 1237 */
1246 search_head = my_rsv; 1238 search_head = my_rsv;
1247 spin_lock(rsv_lock); 1239 spin_lock(rsv_lock);
1248 goto retry; 1240 goto retry;
1249 } 1241 }
1250 1242
1251 /** 1243 /**
1252 * try_to_extend_reservation() 1244 * try_to_extend_reservation()
1253 * @my_rsv: given reservation window 1245 * @my_rsv: given reservation window
1254 * @sb: super block 1246 * @sb: super block
1255 * @size: the delta to extend 1247 * @size: the delta to extend
1256 * 1248 *
1257 * Attempt to expand the reservation window large enough to have 1249 * Attempt to expand the reservation window large enough to have
1258 * required number of free blocks 1250 * required number of free blocks
1259 * 1251 *
1260 * Since ext3_try_to_allocate() will always allocate blocks within 1252 * Since ext3_try_to_allocate() will always allocate blocks within
1261 * the reservation window range, if the window size is too small, 1253 * the reservation window range, if the window size is too small,
1262 * multiple blocks allocation has to stop at the end of the reservation 1254 * multiple blocks allocation has to stop at the end of the reservation
1263 * window. To make this more efficient, given the total number of 1255 * window. To make this more efficient, given the total number of
1264 * blocks needed and the current size of the window, we try to 1256 * blocks needed and the current size of the window, we try to
1265 * expand the reservation window size if necessary on a best-effort 1257 * expand the reservation window size if necessary on a best-effort
1266 * basis before ext3_new_blocks() tries to allocate blocks, 1258 * basis before ext3_new_blocks() tries to allocate blocks,
1267 */ 1259 */
1268 static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv, 1260 static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
1269 struct super_block *sb, int size) 1261 struct super_block *sb, int size)
1270 { 1262 {
1271 struct ext3_reserve_window_node *next_rsv; 1263 struct ext3_reserve_window_node *next_rsv;
1272 struct rb_node *next; 1264 struct rb_node *next;
1273 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; 1265 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
1274 1266
1275 if (!spin_trylock(rsv_lock)) 1267 if (!spin_trylock(rsv_lock))
1276 return; 1268 return;
1277 1269
1278 next = rb_next(&my_rsv->rsv_node); 1270 next = rb_next(&my_rsv->rsv_node);
1279 1271
1280 if (!next) 1272 if (!next)
1281 my_rsv->rsv_end += size; 1273 my_rsv->rsv_end += size;
1282 else { 1274 else {
1283 next_rsv = rb_entry(next, struct ext3_reserve_window_node, rsv_node); 1275 next_rsv = rb_entry(next, struct ext3_reserve_window_node, rsv_node);
1284 1276
1285 if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) 1277 if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
1286 my_rsv->rsv_end += size; 1278 my_rsv->rsv_end += size;
1287 else 1279 else
1288 my_rsv->rsv_end = next_rsv->rsv_start - 1; 1280 my_rsv->rsv_end = next_rsv->rsv_start - 1;
1289 } 1281 }
1290 spin_unlock(rsv_lock); 1282 spin_unlock(rsv_lock);
1291 } 1283 }
1292 1284
1293 /** 1285 /**
1294 * ext3_try_to_allocate_with_rsv() 1286 * ext3_try_to_allocate_with_rsv()
1295 * @sb: superblock 1287 * @sb: superblock
1296 * @handle: handle to this transaction 1288 * @handle: handle to this transaction
1297 * @group: given allocation block group 1289 * @group: given allocation block group
1298 * @bitmap_bh: bufferhead holds the block bitmap 1290 * @bitmap_bh: bufferhead holds the block bitmap
1299 * @grp_goal: given target block within the group 1291 * @grp_goal: given target block within the group
1300 * @my_rsv: reservation window 1292 * @my_rsv: reservation window
1301 * @count: target number of blocks to allocate 1293 * @count: target number of blocks to allocate
1302 * @errp: pointer to store the error code 1294 * @errp: pointer to store the error code
1303 * 1295 *
1304 * This is the main function used to allocate a new block and its reservation 1296 * This is the main function used to allocate a new block and its reservation
1305 * window. 1297 * window.
1306 * 1298 *
1307 * Each time when a new block allocation is need, first try to allocate from 1299 * Each time when a new block allocation is need, first try to allocate from
1308 * its own reservation. If it does not have a reservation window, instead of 1300 * its own reservation. If it does not have a reservation window, instead of
1309 * looking for a free bit on bitmap first, then look up the reservation list to 1301 * looking for a free bit on bitmap first, then look up the reservation list to
1310 * see if it is inside somebody else's reservation window, we try to allocate a 1302 * see if it is inside somebody else's reservation window, we try to allocate a
1311 * reservation window for it starting from the goal first. Then do the block 1303 * reservation window for it starting from the goal first. Then do the block
1312 * allocation within the reservation window. 1304 * allocation within the reservation window.
1313 * 1305 *
1314 * This will avoid keeping on searching the reservation list again and 1306 * This will avoid keeping on searching the reservation list again and
1315 * again when somebody is looking for a free block (without 1307 * again when somebody is looking for a free block (without
1316 * reservation), and there are lots of free blocks, but they are all 1308 * reservation), and there are lots of free blocks, but they are all
1317 * being reserved. 1309 * being reserved.
1318 * 1310 *
1319 * We use a red-black tree for the per-filesystem reservation list. 1311 * We use a red-black tree for the per-filesystem reservation list.
1320 * 1312 *
1321 */ 1313 */
1322 static ext3_grpblk_t 1314 static ext3_grpblk_t
1323 ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, 1315 ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1324 unsigned int group, struct buffer_head *bitmap_bh, 1316 unsigned int group, struct buffer_head *bitmap_bh,
1325 ext3_grpblk_t grp_goal, 1317 ext3_grpblk_t grp_goal,
1326 struct ext3_reserve_window_node * my_rsv, 1318 struct ext3_reserve_window_node * my_rsv,
1327 unsigned long *count, int *errp) 1319 unsigned long *count, int *errp)
1328 { 1320 {
1329 ext3_fsblk_t group_first_block, group_last_block; 1321 ext3_fsblk_t group_first_block, group_last_block;
1330 ext3_grpblk_t ret = 0; 1322 ext3_grpblk_t ret = 0;
1331 int fatal; 1323 int fatal;
1332 unsigned long num = *count; 1324 unsigned long num = *count;
1333 1325
1334 *errp = 0; 1326 *errp = 0;
1335 1327
1336 /* 1328 /*
1337 * Make sure we use undo access for the bitmap, because it is critical 1329 * Make sure we use undo access for the bitmap, because it is critical
1338 * that we do the frozen_data COW on bitmap buffers in all cases even 1330 * that we do the frozen_data COW on bitmap buffers in all cases even
1339 * if the buffer is in BJ_Forget state in the committing transaction. 1331 * if the buffer is in BJ_Forget state in the committing transaction.
1340 */ 1332 */
1341 BUFFER_TRACE(bitmap_bh, "get undo access for new block"); 1333 BUFFER_TRACE(bitmap_bh, "get undo access for new block");
1342 fatal = ext3_journal_get_undo_access(handle, bitmap_bh); 1334 fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
1343 if (fatal) { 1335 if (fatal) {
1344 *errp = fatal; 1336 *errp = fatal;
1345 return -1; 1337 return -1;
1346 } 1338 }
1347 1339
1348 /* 1340 /*
1349 * we don't deal with reservation when 1341 * we don't deal with reservation when
1350 * filesystem is mounted without reservation 1342 * filesystem is mounted without reservation
1351 * or the file is not a regular file 1343 * or the file is not a regular file
1352 * or last attempt to allocate a block with reservation turned on failed 1344 * or last attempt to allocate a block with reservation turned on failed
1353 */ 1345 */
1354 if (my_rsv == NULL ) { 1346 if (my_rsv == NULL ) {
1355 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, 1347 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
1356 grp_goal, count, NULL); 1348 grp_goal, count, NULL);
1357 goto out; 1349 goto out;
1358 } 1350 }
1359 /* 1351 /*
1360 * grp_goal is a group relative block number (if there is a goal) 1352 * grp_goal is a group relative block number (if there is a goal)
1361 * 0 <= grp_goal < EXT3_BLOCKS_PER_GROUP(sb) 1353 * 0 <= grp_goal < EXT3_BLOCKS_PER_GROUP(sb)
1362 * first block is a filesystem wide block number 1354 * first block is a filesystem wide block number
1363 * first block is the block number of the first block in this group 1355 * first block is the block number of the first block in this group
1364 */ 1356 */
1365 group_first_block = ext3_group_first_block_no(sb, group); 1357 group_first_block = ext3_group_first_block_no(sb, group);
1366 group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); 1358 group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1367 1359
1368 /* 1360 /*
1369 * Basically we will allocate a new block from inode's reservation 1361 * Basically we will allocate a new block from inode's reservation
1370 * window. 1362 * window.
1371 * 1363 *
1372 * We need to allocate a new reservation window, if: 1364 * We need to allocate a new reservation window, if:
1373 * a) inode does not have a reservation window; or 1365 * a) inode does not have a reservation window; or
1374 * b) last attempt to allocate a block from existing reservation 1366 * b) last attempt to allocate a block from existing reservation
1375 * failed; or 1367 * failed; or
1376 * c) we come here with a goal and with a reservation window 1368 * c) we come here with a goal and with a reservation window
1377 * 1369 *
1378 * We do not need to allocate a new reservation window if we come here 1370 * We do not need to allocate a new reservation window if we come here
1379 * at the beginning with a goal and the goal is inside the window, or 1371 * at the beginning with a goal and the goal is inside the window, or
1380 * we don't have a goal but already have a reservation window. 1372 * we don't have a goal but already have a reservation window.
1381 * then we could go to allocate from the reservation window directly. 1373 * then we could go to allocate from the reservation window directly.
1382 */ 1374 */
1383 while (1) { 1375 while (1) {
1384 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || 1376 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
1385 !goal_in_my_reservation(&my_rsv->rsv_window, 1377 !goal_in_my_reservation(&my_rsv->rsv_window,
1386 grp_goal, group, sb)) { 1378 grp_goal, group, sb)) {
1387 if (my_rsv->rsv_goal_size < *count) 1379 if (my_rsv->rsv_goal_size < *count)
1388 my_rsv->rsv_goal_size = *count; 1380 my_rsv->rsv_goal_size = *count;
1389 ret = alloc_new_reservation(my_rsv, grp_goal, sb, 1381 ret = alloc_new_reservation(my_rsv, grp_goal, sb,
1390 group, bitmap_bh); 1382 group, bitmap_bh);
1391 if (ret < 0) 1383 if (ret < 0)
1392 break; /* failed */ 1384 break; /* failed */
1393 1385
1394 if (!goal_in_my_reservation(&my_rsv->rsv_window, 1386 if (!goal_in_my_reservation(&my_rsv->rsv_window,
1395 grp_goal, group, sb)) 1387 grp_goal, group, sb))
1396 grp_goal = -1; 1388 grp_goal = -1;
1397 } else if (grp_goal >= 0) { 1389 } else if (grp_goal >= 0) {
1398 int curr = my_rsv->rsv_end - 1390 int curr = my_rsv->rsv_end -
1399 (grp_goal + group_first_block) + 1; 1391 (grp_goal + group_first_block) + 1;
1400 1392
1401 if (curr < *count) 1393 if (curr < *count)
1402 try_to_extend_reservation(my_rsv, sb, 1394 try_to_extend_reservation(my_rsv, sb,
1403 *count - curr); 1395 *count - curr);
1404 } 1396 }
1405 1397
1406 if ((my_rsv->rsv_start > group_last_block) || 1398 if ((my_rsv->rsv_start > group_last_block) ||
1407 (my_rsv->rsv_end < group_first_block)) { 1399 (my_rsv->rsv_end < group_first_block)) {
1408 rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1); 1400 rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1);
1409 BUG(); 1401 BUG();
1410 } 1402 }
1411 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, 1403 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
1412 grp_goal, &num, &my_rsv->rsv_window); 1404 grp_goal, &num, &my_rsv->rsv_window);
1413 if (ret >= 0) { 1405 if (ret >= 0) {
1414 my_rsv->rsv_alloc_hit += num; 1406 my_rsv->rsv_alloc_hit += num;
1415 *count = num; 1407 *count = num;
1416 break; /* succeed */ 1408 break; /* succeed */
1417 } 1409 }
1418 num = *count; 1410 num = *count;
1419 } 1411 }
1420 out: 1412 out:
1421 if (ret >= 0) { 1413 if (ret >= 0) {
1422 BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " 1414 BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
1423 "bitmap block"); 1415 "bitmap block");
1424 fatal = ext3_journal_dirty_metadata(handle, bitmap_bh); 1416 fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
1425 if (fatal) { 1417 if (fatal) {
1426 *errp = fatal; 1418 *errp = fatal;
1427 return -1; 1419 return -1;
1428 } 1420 }
1429 return ret; 1421 return ret;
1430 } 1422 }
1431 1423
1432 BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); 1424 BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
1433 ext3_journal_release_buffer(handle, bitmap_bh); 1425 ext3_journal_release_buffer(handle, bitmap_bh);
1434 return ret; 1426 return ret;
1435 } 1427 }
1436 1428
1437 /** 1429 /**
1438 * ext3_has_free_blocks() 1430 * ext3_has_free_blocks()
1439 * @sbi: in-core super block structure. 1431 * @sbi: in-core super block structure.
1440 * 1432 *
1441 * Check if filesystem has at least 1 free block available for allocation. 1433 * Check if filesystem has at least 1 free block available for allocation.
1442 */ 1434 */
1443 static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation) 1435 static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation)
1444 { 1436 {
1445 ext3_fsblk_t free_blocks, root_blocks; 1437 ext3_fsblk_t free_blocks, root_blocks;
1446 1438
1447 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 1439 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1448 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); 1440 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
1449 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && 1441 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
1450 !use_reservation && sbi->s_resuid != current_fsuid() && 1442 !use_reservation && sbi->s_resuid != current_fsuid() &&
1451 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { 1443 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
1452 return 0; 1444 return 0;
1453 } 1445 }
1454 return 1; 1446 return 1;
1455 } 1447 }
1456 1448
1457 /** 1449 /**
1458 * ext3_should_retry_alloc() 1450 * ext3_should_retry_alloc()
1459 * @sb: super block 1451 * @sb: super block
1460 * @retries number of attemps has been made 1452 * @retries number of attemps has been made
1461 * 1453 *
1462 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if 1454 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
1463 * it is profitable to retry the operation, this function will wait 1455 * it is profitable to retry the operation, this function will wait
1464 * for the current or committing transaction to complete, and then 1456 * for the current or committing transaction to complete, and then
1465 * return TRUE. 1457 * return TRUE.
1466 * 1458 *
1467 * if the total number of retries exceed three times, return FALSE. 1459 * if the total number of retries exceed three times, return FALSE.
1468 */ 1460 */
1469 int ext3_should_retry_alloc(struct super_block *sb, int *retries) 1461 int ext3_should_retry_alloc(struct super_block *sb, int *retries)
1470 { 1462 {
1471 if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3) 1463 if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3)
1472 return 0; 1464 return 0;
1473 1465
1474 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); 1466 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
1475 1467
1476 return journal_force_commit_nested(EXT3_SB(sb)->s_journal); 1468 return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
1477 } 1469 }
1478 1470
1479 /** 1471 /**
1480 * ext3_new_blocks() -- core block(s) allocation function 1472 * ext3_new_blocks() -- core block(s) allocation function
1481 * @handle: handle to this transaction 1473 * @handle: handle to this transaction
1482 * @inode: file inode 1474 * @inode: file inode
1483 * @goal: given target block(filesystem wide) 1475 * @goal: given target block(filesystem wide)
1484 * @count: target number of blocks to allocate 1476 * @count: target number of blocks to allocate
1485 * @errp: error code 1477 * @errp: error code
1486 * 1478 *
1487 * ext3_new_blocks uses a goal block to assist allocation. It tries to 1479 * ext3_new_blocks uses a goal block to assist allocation. It tries to
1488 * allocate block(s) from the block group contains the goal block first. If that 1480 * allocate block(s) from the block group contains the goal block first. If that
1489 * fails, it will try to allocate block(s) from other block groups without 1481 * fails, it will try to allocate block(s) from other block groups without
1490 * any specific goal block. 1482 * any specific goal block.
1491 * 1483 *
1492 */ 1484 */
1493 ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, 1485 ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1494 ext3_fsblk_t goal, unsigned long *count, int *errp) 1486 ext3_fsblk_t goal, unsigned long *count, int *errp)
1495 { 1487 {
1496 struct buffer_head *bitmap_bh = NULL; 1488 struct buffer_head *bitmap_bh = NULL;
1497 struct buffer_head *gdp_bh; 1489 struct buffer_head *gdp_bh;
1498 int group_no; 1490 int group_no;
1499 int goal_group; 1491 int goal_group;
1500 ext3_grpblk_t grp_target_blk; /* blockgroup relative goal block */ 1492 ext3_grpblk_t grp_target_blk; /* blockgroup relative goal block */
1501 ext3_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ 1493 ext3_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/
1502 ext3_fsblk_t ret_block; /* filesyetem-wide allocated block */ 1494 ext3_fsblk_t ret_block; /* filesyetem-wide allocated block */
1503 int bgi; /* blockgroup iteration index */ 1495 int bgi; /* blockgroup iteration index */
1504 int fatal = 0, err; 1496 int fatal = 0, err;
1505 int performed_allocation = 0; 1497 int performed_allocation = 0;
1506 ext3_grpblk_t free_blocks; /* number of free blocks in a group */ 1498 ext3_grpblk_t free_blocks; /* number of free blocks in a group */
1507 struct super_block *sb; 1499 struct super_block *sb;
1508 struct ext3_group_desc *gdp; 1500 struct ext3_group_desc *gdp;
1509 struct ext3_super_block *es; 1501 struct ext3_super_block *es;
1510 struct ext3_sb_info *sbi; 1502 struct ext3_sb_info *sbi;
1511 struct ext3_reserve_window_node *my_rsv = NULL; 1503 struct ext3_reserve_window_node *my_rsv = NULL;
1512 struct ext3_block_alloc_info *block_i; 1504 struct ext3_block_alloc_info *block_i;
1513 unsigned short windowsz = 0; 1505 unsigned short windowsz = 0;
1514 #ifdef EXT3FS_DEBUG 1506 #ifdef EXT3FS_DEBUG
1515 static int goal_hits, goal_attempts; 1507 static int goal_hits, goal_attempts;
1516 #endif 1508 #endif
1517 unsigned long ngroups; 1509 unsigned long ngroups;
1518 unsigned long num = *count; 1510 unsigned long num = *count;
1519 1511
1520 *errp = -ENOSPC; 1512 *errp = -ENOSPC;
1521 sb = inode->i_sb; 1513 sb = inode->i_sb;
1522 1514
1523 /* 1515 /*
1524 * Check quota for allocation of this block. 1516 * Check quota for allocation of this block.
1525 */ 1517 */
1526 err = dquot_alloc_block(inode, num); 1518 err = dquot_alloc_block(inode, num);
1527 if (err) { 1519 if (err) {
1528 *errp = err; 1520 *errp = err;
1529 return 0; 1521 return 0;
1530 } 1522 }
1531 1523
1532 trace_ext3_request_blocks(inode, goal, num); 1524 trace_ext3_request_blocks(inode, goal, num);
1533 1525
1534 sbi = EXT3_SB(sb); 1526 sbi = EXT3_SB(sb);
1535 es = sbi->s_es; 1527 es = sbi->s_es;
1536 ext3_debug("goal=%lu.\n", goal); 1528 ext3_debug("goal=%lu.\n", goal);
1537 /* 1529 /*
1538 * Allocate a block from reservation only when 1530 * Allocate a block from reservation only when
1539 * filesystem is mounted with reservation(default,-o reservation), and 1531 * filesystem is mounted with reservation(default,-o reservation), and
1540 * it's a regular file, and 1532 * it's a regular file, and
1541 * the desired window size is greater than 0 (One could use ioctl 1533 * the desired window size is greater than 0 (One could use ioctl
1542 * command EXT3_IOC_SETRSVSZ to set the window size to 0 to turn off 1534 * command EXT3_IOC_SETRSVSZ to set the window size to 0 to turn off
1543 * reservation on that particular file) 1535 * reservation on that particular file)
1544 */ 1536 */
1545 block_i = EXT3_I(inode)->i_block_alloc_info; 1537 block_i = EXT3_I(inode)->i_block_alloc_info;
1546 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) 1538 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
1547 my_rsv = &block_i->rsv_window_node; 1539 my_rsv = &block_i->rsv_window_node;
1548 1540
1549 if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) { 1541 if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) {
1550 *errp = -ENOSPC; 1542 *errp = -ENOSPC;
1551 goto out; 1543 goto out;
1552 } 1544 }
1553 1545
1554 /* 1546 /*
1555 * First, test whether the goal block is free. 1547 * First, test whether the goal block is free.
1556 */ 1548 */
1557 if (goal < le32_to_cpu(es->s_first_data_block) || 1549 if (goal < le32_to_cpu(es->s_first_data_block) ||
1558 goal >= le32_to_cpu(es->s_blocks_count)) 1550 goal >= le32_to_cpu(es->s_blocks_count))
1559 goal = le32_to_cpu(es->s_first_data_block); 1551 goal = le32_to_cpu(es->s_first_data_block);
1560 group_no = (goal - le32_to_cpu(es->s_first_data_block)) / 1552 group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
1561 EXT3_BLOCKS_PER_GROUP(sb); 1553 EXT3_BLOCKS_PER_GROUP(sb);
1562 goal_group = group_no; 1554 goal_group = group_no;
1563 retry_alloc: 1555 retry_alloc:
1564 gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); 1556 gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
1565 if (!gdp) 1557 if (!gdp)
1566 goto io_error; 1558 goto io_error;
1567 1559
1568 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1560 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1569 /* 1561 /*
1570 * if there is not enough free blocks to make a new resevation 1562 * if there is not enough free blocks to make a new resevation
1571 * turn off reservation for this allocation 1563 * turn off reservation for this allocation
1572 */ 1564 */
1573 if (my_rsv && (free_blocks < windowsz) 1565 if (my_rsv && (free_blocks < windowsz)
1574 && (free_blocks > 0) 1566 && (free_blocks > 0)
1575 && (rsv_is_empty(&my_rsv->rsv_window))) 1567 && (rsv_is_empty(&my_rsv->rsv_window)))
1576 my_rsv = NULL; 1568 my_rsv = NULL;
1577 1569
1578 if (free_blocks > 0) { 1570 if (free_blocks > 0) {
1579 grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) % 1571 grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
1580 EXT3_BLOCKS_PER_GROUP(sb)); 1572 EXT3_BLOCKS_PER_GROUP(sb));
1581 bitmap_bh = read_block_bitmap(sb, group_no); 1573 bitmap_bh = read_block_bitmap(sb, group_no);
1582 if (!bitmap_bh) 1574 if (!bitmap_bh)
1583 goto io_error; 1575 goto io_error;
1584 grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle, 1576 grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
1585 group_no, bitmap_bh, grp_target_blk, 1577 group_no, bitmap_bh, grp_target_blk,
1586 my_rsv, &num, &fatal); 1578 my_rsv, &num, &fatal);
1587 if (fatal) 1579 if (fatal)
1588 goto out; 1580 goto out;
1589 if (grp_alloc_blk >= 0) 1581 if (grp_alloc_blk >= 0)
1590 goto allocated; 1582 goto allocated;
1591 } 1583 }
1592 1584
1593 ngroups = EXT3_SB(sb)->s_groups_count; 1585 ngroups = EXT3_SB(sb)->s_groups_count;
1594 smp_rmb(); 1586 smp_rmb();
1595 1587
1596 /* 1588 /*
1597 * Now search the rest of the groups. We assume that 1589 * Now search the rest of the groups. We assume that
1598 * group_no and gdp correctly point to the last group visited. 1590 * group_no and gdp correctly point to the last group visited.
1599 */ 1591 */
1600 for (bgi = 0; bgi < ngroups; bgi++) { 1592 for (bgi = 0; bgi < ngroups; bgi++) {
1601 group_no++; 1593 group_no++;
1602 if (group_no >= ngroups) 1594 if (group_no >= ngroups)
1603 group_no = 0; 1595 group_no = 0;
1604 gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); 1596 gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
1605 if (!gdp) 1597 if (!gdp)
1606 goto io_error; 1598 goto io_error;
1607 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1599 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1608 /* 1600 /*
1609 * skip this group (and avoid loading bitmap) if there 1601 * skip this group (and avoid loading bitmap) if there
1610 * are no free blocks 1602 * are no free blocks
1611 */ 1603 */
1612 if (!free_blocks) 1604 if (!free_blocks)
1613 continue; 1605 continue;
1614 /* 1606 /*
1615 * skip this group if the number of 1607 * skip this group if the number of
1616 * free blocks is less than half of the reservation 1608 * free blocks is less than half of the reservation
1617 * window size. 1609 * window size.
1618 */ 1610 */
1619 if (my_rsv && (free_blocks <= (windowsz/2))) 1611 if (my_rsv && (free_blocks <= (windowsz/2)))
1620 continue; 1612 continue;
1621 1613
1622 brelse(bitmap_bh); 1614 brelse(bitmap_bh);
1623 bitmap_bh = read_block_bitmap(sb, group_no); 1615 bitmap_bh = read_block_bitmap(sb, group_no);
1624 if (!bitmap_bh) 1616 if (!bitmap_bh)
1625 goto io_error; 1617 goto io_error;
1626 /* 1618 /*
1627 * try to allocate block(s) from this group, without a goal(-1). 1619 * try to allocate block(s) from this group, without a goal(-1).
1628 */ 1620 */
1629 grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle, 1621 grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
1630 group_no, bitmap_bh, -1, my_rsv, 1622 group_no, bitmap_bh, -1, my_rsv,
1631 &num, &fatal); 1623 &num, &fatal);
1632 if (fatal) 1624 if (fatal)
1633 goto out; 1625 goto out;
1634 if (grp_alloc_blk >= 0) 1626 if (grp_alloc_blk >= 0)
1635 goto allocated; 1627 goto allocated;
1636 } 1628 }
1637 /* 1629 /*
1638 * We may end up a bogus earlier ENOSPC error due to 1630 * We may end up a bogus earlier ENOSPC error due to
1639 * filesystem is "full" of reservations, but 1631 * filesystem is "full" of reservations, but
1640 * there maybe indeed free blocks available on disk 1632 * there maybe indeed free blocks available on disk
1641 * In this case, we just forget about the reservations 1633 * In this case, we just forget about the reservations
1642 * just do block allocation as without reservations. 1634 * just do block allocation as without reservations.
1643 */ 1635 */
1644 if (my_rsv) { 1636 if (my_rsv) {
1645 my_rsv = NULL; 1637 my_rsv = NULL;
1646 windowsz = 0; 1638 windowsz = 0;
1647 group_no = goal_group; 1639 group_no = goal_group;
1648 goto retry_alloc; 1640 goto retry_alloc;
1649 } 1641 }
1650 /* No space left on the device */ 1642 /* No space left on the device */
1651 *errp = -ENOSPC; 1643 *errp = -ENOSPC;
1652 goto out; 1644 goto out;
1653 1645
1654 allocated: 1646 allocated:
1655 1647
1656 ext3_debug("using block group %d(%d)\n", 1648 ext3_debug("using block group %d(%d)\n",
1657 group_no, gdp->bg_free_blocks_count); 1649 group_no, gdp->bg_free_blocks_count);
1658 1650
1659 BUFFER_TRACE(gdp_bh, "get_write_access"); 1651 BUFFER_TRACE(gdp_bh, "get_write_access");
1660 fatal = ext3_journal_get_write_access(handle, gdp_bh); 1652 fatal = ext3_journal_get_write_access(handle, gdp_bh);
1661 if (fatal) 1653 if (fatal)
1662 goto out; 1654 goto out;
1663 1655
1664 ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no); 1656 ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no);
1665 1657
1666 if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) || 1658 if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||
1667 in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) || 1659 in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||
1668 in_range(ret_block, le32_to_cpu(gdp->bg_inode_table), 1660 in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),
1669 EXT3_SB(sb)->s_itb_per_group) || 1661 EXT3_SB(sb)->s_itb_per_group) ||
1670 in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table), 1662 in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
1671 EXT3_SB(sb)->s_itb_per_group)) { 1663 EXT3_SB(sb)->s_itb_per_group)) {
1672 ext3_error(sb, "ext3_new_block", 1664 ext3_error(sb, "ext3_new_block",
1673 "Allocating block in system zone - " 1665 "Allocating block in system zone - "
1674 "blocks from "E3FSBLK", length %lu", 1666 "blocks from "E3FSBLK", length %lu",
1675 ret_block, num); 1667 ret_block, num);
1676 /* 1668 /*
1677 * claim_block() marked the blocks we allocated as in use. So we 1669 * claim_block() marked the blocks we allocated as in use. So we
1678 * may want to selectively mark some of the blocks as free. 1670 * may want to selectively mark some of the blocks as free.
1679 */ 1671 */
1680 goto retry_alloc; 1672 goto retry_alloc;
1681 } 1673 }
1682 1674
1683 performed_allocation = 1; 1675 performed_allocation = 1;
1684 1676
1685 #ifdef CONFIG_JBD_DEBUG 1677 #ifdef CONFIG_JBD_DEBUG
1686 { 1678 {
1687 struct buffer_head *debug_bh; 1679 struct buffer_head *debug_bh;
1688 1680
1689 /* Record bitmap buffer state in the newly allocated block */ 1681 /* Record bitmap buffer state in the newly allocated block */
1690 debug_bh = sb_find_get_block(sb, ret_block); 1682 debug_bh = sb_find_get_block(sb, ret_block);
1691 if (debug_bh) { 1683 if (debug_bh) {
1692 BUFFER_TRACE(debug_bh, "state when allocated"); 1684 BUFFER_TRACE(debug_bh, "state when allocated");
1693 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); 1685 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
1694 brelse(debug_bh); 1686 brelse(debug_bh);
1695 } 1687 }
1696 } 1688 }
1697 jbd_lock_bh_state(bitmap_bh); 1689 jbd_lock_bh_state(bitmap_bh);
1698 spin_lock(sb_bgl_lock(sbi, group_no)); 1690 spin_lock(sb_bgl_lock(sbi, group_no));
1699 if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { 1691 if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
1700 int i; 1692 int i;
1701 1693
1702 for (i = 0; i < num; i++) { 1694 for (i = 0; i < num; i++) {
1703 if (ext3_test_bit(grp_alloc_blk+i, 1695 if (ext3_test_bit(grp_alloc_blk+i,
1704 bh2jh(bitmap_bh)->b_committed_data)) { 1696 bh2jh(bitmap_bh)->b_committed_data)) {
1705 printk("%s: block was unexpectedly set in " 1697 printk("%s: block was unexpectedly set in "
1706 "b_committed_data\n", __func__); 1698 "b_committed_data\n", __func__);
1707 } 1699 }
1708 } 1700 }
1709 } 1701 }
1710 ext3_debug("found bit %d\n", grp_alloc_blk); 1702 ext3_debug("found bit %d\n", grp_alloc_blk);
1711 spin_unlock(sb_bgl_lock(sbi, group_no)); 1703 spin_unlock(sb_bgl_lock(sbi, group_no));
1712 jbd_unlock_bh_state(bitmap_bh); 1704 jbd_unlock_bh_state(bitmap_bh);
1713 #endif 1705 #endif
1714 1706
1715 if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) { 1707 if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
1716 ext3_error(sb, "ext3_new_block", 1708 ext3_error(sb, "ext3_new_block",
1717 "block("E3FSBLK") >= blocks count(%d) - " 1709 "block("E3FSBLK") >= blocks count(%d) - "
1718 "block_group = %d, es == %p ", ret_block, 1710 "block_group = %d, es == %p ", ret_block,
1719 le32_to_cpu(es->s_blocks_count), group_no, es); 1711 le32_to_cpu(es->s_blocks_count), group_no, es);
1720 goto out; 1712 goto out;
1721 } 1713 }
1722 1714
1723 /* 1715 /*
1724 * It is up to the caller to add the new buffer to a journal 1716 * It is up to the caller to add the new buffer to a journal
1725 * list of some description. We don't know in advance whether 1717 * list of some description. We don't know in advance whether
1726 * the caller wants to use it as metadata or data. 1718 * the caller wants to use it as metadata or data.
1727 */ 1719 */
1728 ext3_debug("allocating block %lu. Goal hits %d of %d.\n", 1720 ext3_debug("allocating block %lu. Goal hits %d of %d.\n",
1729 ret_block, goal_hits, goal_attempts); 1721 ret_block, goal_hits, goal_attempts);
1730 1722
1731 spin_lock(sb_bgl_lock(sbi, group_no)); 1723 spin_lock(sb_bgl_lock(sbi, group_no));
1732 le16_add_cpu(&gdp->bg_free_blocks_count, -num); 1724 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1733 spin_unlock(sb_bgl_lock(sbi, group_no)); 1725 spin_unlock(sb_bgl_lock(sbi, group_no));
1734 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1726 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
1735 1727
1736 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); 1728 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
1737 err = ext3_journal_dirty_metadata(handle, gdp_bh); 1729 err = ext3_journal_dirty_metadata(handle, gdp_bh);
1738 if (!fatal) 1730 if (!fatal)
1739 fatal = err; 1731 fatal = err;
1740 1732
1741 if (fatal) 1733 if (fatal)
1742 goto out; 1734 goto out;
1743 1735
1744 *errp = 0; 1736 *errp = 0;
1745 brelse(bitmap_bh); 1737 brelse(bitmap_bh);
1746 1738
1747 if (num < *count) { 1739 if (num < *count) {
1748 dquot_free_block(inode, *count-num); 1740 dquot_free_block(inode, *count-num);
1749 *count = num; 1741 *count = num;
1750 } 1742 }
1751 1743
1752 trace_ext3_allocate_blocks(inode, goal, num, 1744 trace_ext3_allocate_blocks(inode, goal, num,
1753 (unsigned long long)ret_block); 1745 (unsigned long long)ret_block);
1754 1746
1755 return ret_block; 1747 return ret_block;
1756 1748
1757 io_error: 1749 io_error:
1758 *errp = -EIO; 1750 *errp = -EIO;
1759 out: 1751 out:
1760 if (fatal) { 1752 if (fatal) {
1761 *errp = fatal; 1753 *errp = fatal;
1762 ext3_std_error(sb, fatal); 1754 ext3_std_error(sb, fatal);
1763 } 1755 }
1764 /* 1756 /*
1765 * Undo the block allocation 1757 * Undo the block allocation
1766 */ 1758 */
1767 if (!performed_allocation) 1759 if (!performed_allocation)
1768 dquot_free_block(inode, *count); 1760 dquot_free_block(inode, *count);
1769 brelse(bitmap_bh); 1761 brelse(bitmap_bh);
1770 return 0; 1762 return 0;
1771 } 1763 }
1772 1764
1773 ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, 1765 ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
1774 ext3_fsblk_t goal, int *errp) 1766 ext3_fsblk_t goal, int *errp)
1775 { 1767 {
1776 unsigned long count = 1; 1768 unsigned long count = 1;
1777 1769
1778 return ext3_new_blocks(handle, inode, goal, &count, errp); 1770 return ext3_new_blocks(handle, inode, goal, &count, errp);
1779 } 1771 }
1780 1772
1781 /** 1773 /**
1782 * ext3_count_free_blocks() -- count filesystem free blocks 1774 * ext3_count_free_blocks() -- count filesystem free blocks
1783 * @sb: superblock 1775 * @sb: superblock
1784 * 1776 *
1785 * Adds up the number of free blocks from each block group. 1777 * Adds up the number of free blocks from each block group.
1786 */ 1778 */
1787 ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb) 1779 ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
1788 { 1780 {
1789 ext3_fsblk_t desc_count; 1781 ext3_fsblk_t desc_count;
1790 struct ext3_group_desc *gdp; 1782 struct ext3_group_desc *gdp;
1791 int i; 1783 int i;
1792 unsigned long ngroups = EXT3_SB(sb)->s_groups_count; 1784 unsigned long ngroups = EXT3_SB(sb)->s_groups_count;
1793 #ifdef EXT3FS_DEBUG 1785 #ifdef EXT3FS_DEBUG
1794 struct ext3_super_block *es; 1786 struct ext3_super_block *es;
1795 ext3_fsblk_t bitmap_count; 1787 ext3_fsblk_t bitmap_count;
1796 unsigned long x; 1788 unsigned long x;
1797 struct buffer_head *bitmap_bh = NULL; 1789 struct buffer_head *bitmap_bh = NULL;
1798 1790
1799 es = EXT3_SB(sb)->s_es; 1791 es = EXT3_SB(sb)->s_es;
1800 desc_count = 0; 1792 desc_count = 0;
1801 bitmap_count = 0; 1793 bitmap_count = 0;
1802 gdp = NULL; 1794 gdp = NULL;
1803 1795
1804 smp_rmb(); 1796 smp_rmb();
1805 for (i = 0; i < ngroups; i++) { 1797 for (i = 0; i < ngroups; i++) {
1806 gdp = ext3_get_group_desc(sb, i, NULL); 1798 gdp = ext3_get_group_desc(sb, i, NULL);
1807 if (!gdp) 1799 if (!gdp)
1808 continue; 1800 continue;
1809 desc_count += le16_to_cpu(gdp->bg_free_blocks_count); 1801 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
1810 brelse(bitmap_bh); 1802 brelse(bitmap_bh);
1811 bitmap_bh = read_block_bitmap(sb, i); 1803 bitmap_bh = read_block_bitmap(sb, i);
1812 if (bitmap_bh == NULL) 1804 if (bitmap_bh == NULL)
1813 continue; 1805 continue;
1814 1806
1815 x = ext3_count_free(bitmap_bh, sb->s_blocksize); 1807 x = ext3_count_free(bitmap_bh, sb->s_blocksize);
1816 printk("group %d: stored = %d, counted = %lu\n", 1808 printk("group %d: stored = %d, counted = %lu\n",
1817 i, le16_to_cpu(gdp->bg_free_blocks_count), x); 1809 i, le16_to_cpu(gdp->bg_free_blocks_count), x);
1818 bitmap_count += x; 1810 bitmap_count += x;
1819 } 1811 }
1820 brelse(bitmap_bh); 1812 brelse(bitmap_bh);
1821 printk("ext3_count_free_blocks: stored = "E3FSBLK 1813 printk("ext3_count_free_blocks: stored = "E3FSBLK
1822 ", computed = "E3FSBLK", "E3FSBLK"\n", 1814 ", computed = "E3FSBLK", "E3FSBLK"\n",
1823 le32_to_cpu(es->s_free_blocks_count), 1815 le32_to_cpu(es->s_free_blocks_count),
1824 desc_count, bitmap_count); 1816 desc_count, bitmap_count);
1825 return bitmap_count; 1817 return bitmap_count;
1826 #else 1818 #else
1827 desc_count = 0; 1819 desc_count = 0;
1828 smp_rmb(); 1820 smp_rmb();
1829 for (i = 0; i < ngroups; i++) { 1821 for (i = 0; i < ngroups; i++) {
1830 gdp = ext3_get_group_desc(sb, i, NULL); 1822 gdp = ext3_get_group_desc(sb, i, NULL);
1831 if (!gdp) 1823 if (!gdp)
1832 continue; 1824 continue;
1833 desc_count += le16_to_cpu(gdp->bg_free_blocks_count); 1825 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
1834 } 1826 }
1835 1827
1836 return desc_count; 1828 return desc_count;
1837 #endif 1829 #endif
1838 } 1830 }
1839 1831
1840 static inline int test_root(int a, int b) 1832 static inline int test_root(int a, int b)
1841 { 1833 {
1842 int num = b; 1834 int num = b;
1843 1835
1844 while (a > num) 1836 while (a > num)
1845 num *= b; 1837 num *= b;
1846 return num == a; 1838 return num == a;
1847 } 1839 }
1848 1840
1849 static int ext3_group_sparse(int group) 1841 static int ext3_group_sparse(int group)
1850 { 1842 {
1851 if (group <= 1) 1843 if (group <= 1)
1852 return 1; 1844 return 1;
1853 if (!(group & 1)) 1845 if (!(group & 1))
1854 return 0; 1846 return 0;
1855 return (test_root(group, 7) || test_root(group, 5) || 1847 return (test_root(group, 7) || test_root(group, 5) ||
1856 test_root(group, 3)); 1848 test_root(group, 3));
1857 } 1849 }
1858 1850
1859 /** 1851 /**
1860 * ext3_bg_has_super - number of blocks used by the superblock in group 1852 * ext3_bg_has_super - number of blocks used by the superblock in group
1861 * @sb: superblock for filesystem 1853 * @sb: superblock for filesystem
1862 * @group: group number to check 1854 * @group: group number to check
1863 * 1855 *
1864 * Return the number of blocks used by the superblock (primary or backup) 1856 * Return the number of blocks used by the superblock (primary or backup)
1865 * in this group. Currently this will be only 0 or 1. 1857 * in this group. Currently this will be only 0 or 1.
1866 */ 1858 */
1867 int ext3_bg_has_super(struct super_block *sb, int group) 1859 int ext3_bg_has_super(struct super_block *sb, int group)
1868 { 1860 {
1869 if (EXT3_HAS_RO_COMPAT_FEATURE(sb, 1861 if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
1870 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) && 1862 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
1871 !ext3_group_sparse(group)) 1863 !ext3_group_sparse(group))
1872 return 0; 1864 return 0;
1873 return 1; 1865 return 1;
1874 } 1866 }
1875 1867
1876 static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group) 1868 static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group)
1877 { 1869 {
1878 unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb); 1870 unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
1879 unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb); 1871 unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb);
1880 unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1; 1872 unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1;
1881 1873
1882 if (group == first || group == first + 1 || group == last) 1874 if (group == first || group == first + 1 || group == last)
1883 return 1; 1875 return 1;
1884 return 0; 1876 return 0;
1885 } 1877 }
1886 1878
1887 static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group) 1879 static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group)
1888 { 1880 {
1889 return ext3_bg_has_super(sb, group) ? EXT3_SB(sb)->s_gdb_count : 0; 1881 return ext3_bg_has_super(sb, group) ? EXT3_SB(sb)->s_gdb_count : 0;
1890 } 1882 }
1891 1883
1892 /** 1884 /**
1893 * ext3_bg_num_gdb - number of blocks used by the group table in group 1885 * ext3_bg_num_gdb - number of blocks used by the group table in group
1894 * @sb: superblock for filesystem 1886 * @sb: superblock for filesystem
1895 * @group: group number to check 1887 * @group: group number to check
1896 * 1888 *
1897 * Return the number of blocks used by the group descriptor table 1889 * Return the number of blocks used by the group descriptor table
1898 * (primary or backup) in this group. In the future there may be a 1890 * (primary or backup) in this group. In the future there may be a
1899 * different number of descriptor blocks in each group. 1891 * different number of descriptor blocks in each group.
1900 */ 1892 */
1901 unsigned long ext3_bg_num_gdb(struct super_block *sb, int group) 1893 unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
1902 { 1894 {
1903 unsigned long first_meta_bg = 1895 unsigned long first_meta_bg =
1904 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg); 1896 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg);
1905 unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb); 1897 unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
1906 1898
1907 if (!EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) || 1899 if (!EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) ||
1908 metagroup < first_meta_bg) 1900 metagroup < first_meta_bg)
1909 return ext3_bg_num_gdb_nometa(sb,group); 1901 return ext3_bg_num_gdb_nometa(sb,group);
1910 1902
1911 return ext3_bg_num_gdb_meta(sb,group); 1903 return ext3_bg_num_gdb_meta(sb,group);
1912 1904
1913 } 1905 }
1914 1906
1915 /** 1907 /**
1916 * ext3_trim_all_free -- function to trim all free space in alloc. group 1908 * ext3_trim_all_free -- function to trim all free space in alloc. group
1917 * @sb: super block for file system 1909 * @sb: super block for file system
1918 * @group: allocation group to trim 1910 * @group: allocation group to trim
1919 * @start: first group block to examine 1911 * @start: first group block to examine
1920 * @max: last group block to examine 1912 * @max: last group block to examine
1921 * @gdp: allocation group description structure 1913 * @gdp: allocation group description structure
1922 * @minblocks: minimum extent block count 1914 * @minblocks: minimum extent block count
1923 * 1915 *
1924 * ext3_trim_all_free walks through group's block bitmap searching for free 1916 * ext3_trim_all_free walks through group's block bitmap searching for free
1925 * blocks. When the free block is found, it tries to allocate this block and 1917 * blocks. When the free block is found, it tries to allocate this block and
1926 * consequent free block to get the biggest free extent possible, until it 1918 * consequent free block to get the biggest free extent possible, until it
1927 * reaches any used block. Then issue a TRIM command on this extent and free 1919 * reaches any used block. Then issue a TRIM command on this extent and free
1928 * the extent in the block bitmap. This is done until whole group is scanned. 1920 * the extent in the block bitmap. This is done until whole group is scanned.
1929 */ 1921 */
1930 static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, 1922 static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
1931 unsigned int group, 1923 unsigned int group,
1932 ext3_grpblk_t start, ext3_grpblk_t max, 1924 ext3_grpblk_t start, ext3_grpblk_t max,
1933 ext3_grpblk_t minblocks) 1925 ext3_grpblk_t minblocks)
1934 { 1926 {
1935 handle_t *handle; 1927 handle_t *handle;
1936 ext3_grpblk_t next, free_blocks, bit, freed, count = 0; 1928 ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
1937 ext3_fsblk_t discard_block; 1929 ext3_fsblk_t discard_block;
1938 struct ext3_sb_info *sbi; 1930 struct ext3_sb_info *sbi;
1939 struct buffer_head *gdp_bh, *bitmap_bh = NULL; 1931 struct buffer_head *gdp_bh, *bitmap_bh = NULL;
1940 struct ext3_group_desc *gdp; 1932 struct ext3_group_desc *gdp;
1941 int err = 0, ret = 0; 1933 int err = 0, ret = 0;
1942 1934
1943 /* 1935 /*
1944 * We will update one block bitmap, and one group descriptor 1936 * We will update one block bitmap, and one group descriptor
1945 */ 1937 */
1946 handle = ext3_journal_start_sb(sb, 2); 1938 handle = ext3_journal_start_sb(sb, 2);
1947 if (IS_ERR(handle)) 1939 if (IS_ERR(handle))
1948 return PTR_ERR(handle); 1940 return PTR_ERR(handle);
1949 1941
1950 bitmap_bh = read_block_bitmap(sb, group); 1942 bitmap_bh = read_block_bitmap(sb, group);
1951 if (!bitmap_bh) { 1943 if (!bitmap_bh) {
1952 err = -EIO; 1944 err = -EIO;
1953 goto err_out; 1945 goto err_out;
1954 } 1946 }
1955 1947
1956 BUFFER_TRACE(bitmap_bh, "getting undo access"); 1948 BUFFER_TRACE(bitmap_bh, "getting undo access");
1957 err = ext3_journal_get_undo_access(handle, bitmap_bh); 1949 err = ext3_journal_get_undo_access(handle, bitmap_bh);
1958 if (err) 1950 if (err)
1959 goto err_out; 1951 goto err_out;
1960 1952
1961 gdp = ext3_get_group_desc(sb, group, &gdp_bh); 1953 gdp = ext3_get_group_desc(sb, group, &gdp_bh);
1962 if (!gdp) { 1954 if (!gdp) {
1963 err = -EIO; 1955 err = -EIO;
1964 goto err_out; 1956 goto err_out;
1965 } 1957 }
1966 1958
1967 BUFFER_TRACE(gdp_bh, "get_write_access"); 1959 BUFFER_TRACE(gdp_bh, "get_write_access");
1968 err = ext3_journal_get_write_access(handle, gdp_bh); 1960 err = ext3_journal_get_write_access(handle, gdp_bh);
1969 if (err) 1961 if (err)
1970 goto err_out; 1962 goto err_out;
1971 1963
1972 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1964 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1973 sbi = EXT3_SB(sb); 1965 sbi = EXT3_SB(sb);
1974 1966
1975 /* Walk through the whole group */ 1967 /* Walk through the whole group */
1976 while (start <= max) { 1968 while (start <= max) {
1977 start = bitmap_search_next_usable_block(start, bitmap_bh, max); 1969 start = bitmap_search_next_usable_block(start, bitmap_bh, max);
1978 if (start < 0) 1970 if (start < 0)
1979 break; 1971 break;
1980 next = start; 1972 next = start;
1981 1973
1982 /* 1974 /*
1983 * Allocate contiguous free extents by setting bits in the 1975 * Allocate contiguous free extents by setting bits in the
1984 * block bitmap 1976 * block bitmap
1985 */ 1977 */
1986 while (next <= max 1978 while (next <= max
1987 && claim_block(sb_bgl_lock(sbi, group), 1979 && claim_block(sb_bgl_lock(sbi, group),
1988 next, bitmap_bh)) { 1980 next, bitmap_bh)) {
1989 next++; 1981 next++;
1990 } 1982 }
1991 1983
1992 /* We did not claim any blocks */ 1984 /* We did not claim any blocks */
1993 if (next == start) 1985 if (next == start)
1994 continue; 1986 continue;
1995 1987
1996 discard_block = (ext3_fsblk_t)start + 1988 discard_block = (ext3_fsblk_t)start +
1997 ext3_group_first_block_no(sb, group); 1989 ext3_group_first_block_no(sb, group);
1998 1990
1999 /* Update counters */ 1991 /* Update counters */
2000 spin_lock(sb_bgl_lock(sbi, group)); 1992 spin_lock(sb_bgl_lock(sbi, group));
2001 le16_add_cpu(&gdp->bg_free_blocks_count, start - next); 1993 le16_add_cpu(&gdp->bg_free_blocks_count, start - next);
2002 spin_unlock(sb_bgl_lock(sbi, group)); 1994 spin_unlock(sb_bgl_lock(sbi, group));
2003 percpu_counter_sub(&sbi->s_freeblocks_counter, next - start); 1995 percpu_counter_sub(&sbi->s_freeblocks_counter, next - start);
2004 1996
2005 free_blocks -= next - start; 1997 free_blocks -= next - start;
2006 /* Do not issue a TRIM on extents smaller than minblocks */ 1998 /* Do not issue a TRIM on extents smaller than minblocks */
2007 if ((next - start) < minblocks) 1999 if ((next - start) < minblocks)
2008 goto free_extent; 2000 goto free_extent;
2009 2001
2010 trace_ext3_discard_blocks(sb, discard_block, next - start); 2002 trace_ext3_discard_blocks(sb, discard_block, next - start);
2011 /* Send the TRIM command down to the device */ 2003 /* Send the TRIM command down to the device */
2012 err = sb_issue_discard(sb, discard_block, next - start, 2004 err = sb_issue_discard(sb, discard_block, next - start,
2013 GFP_NOFS, 0); 2005 GFP_NOFS, 0);
2014 count += (next - start); 2006 count += (next - start);
2015 free_extent: 2007 free_extent:
2016 freed = 0; 2008 freed = 0;
2017 2009
2018 /* 2010 /*
2019 * Clear bits in the bitmap 2011 * Clear bits in the bitmap
2020 */ 2012 */
2021 for (bit = start; bit < next; bit++) { 2013 for (bit = start; bit < next; bit++) {
2022 BUFFER_TRACE(bitmap_bh, "clear bit"); 2014 BUFFER_TRACE(bitmap_bh, "clear bit");
2023 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group), 2015 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group),
2024 bit, bitmap_bh->b_data)) { 2016 bit, bitmap_bh->b_data)) {
2025 ext3_error(sb, __func__, 2017 ext3_error(sb, __func__,
2026 "bit already cleared for block "E3FSBLK, 2018 "bit already cleared for block "E3FSBLK,
2027 (unsigned long)bit); 2019 (unsigned long)bit);
2028 BUFFER_TRACE(bitmap_bh, "bit already cleared"); 2020 BUFFER_TRACE(bitmap_bh, "bit already cleared");
2029 } else { 2021 } else {
2030 freed++; 2022 freed++;
2031 } 2023 }
2032 } 2024 }
2033 2025
2034 /* Update couters */ 2026 /* Update couters */
2035 spin_lock(sb_bgl_lock(sbi, group)); 2027 spin_lock(sb_bgl_lock(sbi, group));
2036 le16_add_cpu(&gdp->bg_free_blocks_count, freed); 2028 le16_add_cpu(&gdp->bg_free_blocks_count, freed);
2037 spin_unlock(sb_bgl_lock(sbi, group)); 2029 spin_unlock(sb_bgl_lock(sbi, group));
2038 percpu_counter_add(&sbi->s_freeblocks_counter, freed); 2030 percpu_counter_add(&sbi->s_freeblocks_counter, freed);
2039 2031
2040 start = next; 2032 start = next;
2041 if (err < 0) { 2033 if (err < 0) {
2042 if (err != -EOPNOTSUPP) 2034 if (err != -EOPNOTSUPP)
2043 ext3_warning(sb, __func__, "Discard command " 2035 ext3_warning(sb, __func__, "Discard command "
2044 "returned error %d\n", err); 2036 "returned error %d\n", err);
2045 break; 2037 break;
2046 } 2038 }
2047 2039
2048 if (fatal_signal_pending(current)) { 2040 if (fatal_signal_pending(current)) {
2049 err = -ERESTARTSYS; 2041 err = -ERESTARTSYS;
2050 break; 2042 break;
2051 } 2043 }
2052 2044
2053 cond_resched(); 2045 cond_resched();
2054 2046
2055 /* No more suitable extents */ 2047 /* No more suitable extents */
2056 if (free_blocks < minblocks) 2048 if (free_blocks < minblocks)
2057 break; 2049 break;
2058 } 2050 }
2059 2051
2060 /* We dirtied the bitmap block */ 2052 /* We dirtied the bitmap block */
2061 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 2053 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
2062 ret = ext3_journal_dirty_metadata(handle, bitmap_bh); 2054 ret = ext3_journal_dirty_metadata(handle, bitmap_bh);
2063 if (!err) 2055 if (!err)
2064 err = ret; 2056 err = ret;
2065 2057
2066 /* And the group descriptor block */ 2058 /* And the group descriptor block */
2067 BUFFER_TRACE(gdp_bh, "dirtied group descriptor block"); 2059 BUFFER_TRACE(gdp_bh, "dirtied group descriptor block");
2068 ret = ext3_journal_dirty_metadata(handle, gdp_bh); 2060 ret = ext3_journal_dirty_metadata(handle, gdp_bh);
2069 if (!err) 2061 if (!err)
2070 err = ret; 2062 err = ret;
2071 2063
2072 ext3_debug("trimmed %d blocks in the group %d\n", 2064 ext3_debug("trimmed %d blocks in the group %d\n",
2073 count, group); 2065 count, group);
2074 2066
2075 err_out: 2067 err_out:
2076 if (err) 2068 if (err)
2077 count = err; 2069 count = err;
2078 ext3_journal_stop(handle); 2070 ext3_journal_stop(handle);
2079 brelse(bitmap_bh); 2071 brelse(bitmap_bh);
2080 2072
2081 return count; 2073 return count;
2082 } 2074 }
2083 2075
2084 /** 2076 /**
2085 * ext3_trim_fs() -- trim ioctl handle function 2077 * ext3_trim_fs() -- trim ioctl handle function
2086 * @sb: superblock for filesystem 2078 * @sb: superblock for filesystem
2087 * @start: First Byte to trim 2079 * @start: First Byte to trim
2088 * @len: number of Bytes to trim from start 2080 * @len: number of Bytes to trim from start
2089 * @minlen: minimum extent length in Bytes 2081 * @minlen: minimum extent length in Bytes
2090 * 2082 *
2091 * ext3_trim_fs goes through all allocation groups containing Bytes from 2083 * ext3_trim_fs goes through all allocation groups containing Bytes from
2092 * start to start+len. For each such a group ext3_trim_all_free function 2084 * start to start+len. For each such a group ext3_trim_all_free function
2093 * is invoked to trim all free space. 2085 * is invoked to trim all free space.
2094 */ 2086 */
2095 int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) 2087 int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2096 { 2088 {
2097 ext3_grpblk_t last_block, first_block; 2089 ext3_grpblk_t last_block, first_block;
2098 unsigned long group, first_group, last_group; 2090 unsigned long group, first_group, last_group;
2099 struct ext3_group_desc *gdp; 2091 struct ext3_group_desc *gdp;
2100 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 2092 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
2101 uint64_t start, minlen, end, trimmed = 0; 2093 uint64_t start, minlen, end, trimmed = 0;
2102 ext3_fsblk_t first_data_blk = 2094 ext3_fsblk_t first_data_blk =
2103 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block); 2095 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
2104 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count); 2096 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
2105 int ret = 0; 2097 int ret = 0;
2106 2098
2107 start = range->start >> sb->s_blocksize_bits; 2099 start = range->start >> sb->s_blocksize_bits;
2108 end = start + (range->len >> sb->s_blocksize_bits) - 1; 2100 end = start + (range->len >> sb->s_blocksize_bits) - 1;
2109 minlen = range->minlen >> sb->s_blocksize_bits; 2101 minlen = range->minlen >> sb->s_blocksize_bits;
2110 2102
2111 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)) || 2103 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)) ||
2112 unlikely(start >= max_blks)) 2104 unlikely(start >= max_blks))
2113 return -EINVAL; 2105 return -EINVAL;
2114 if (end >= max_blks) 2106 if (end >= max_blks)
2115 end = max_blks - 1; 2107 end = max_blks - 1;
2116 if (end <= first_data_blk) 2108 if (end <= first_data_blk)
2117 goto out; 2109 goto out;
2118 if (start < first_data_blk) 2110 if (start < first_data_blk)
2119 start = first_data_blk; 2111 start = first_data_blk;
2120 2112
2121 smp_rmb(); 2113 smp_rmb();
2122 2114
2123 /* Determine first and last group to examine based on start and len */ 2115 /* Determine first and last group to examine based on start and len */
2124 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start, 2116 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
2125 &first_group, &first_block); 2117 &first_group, &first_block);
2126 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) end, 2118 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) end,
2127 &last_group, &last_block); 2119 &last_group, &last_block);
2128 2120
2129 /* end now represents the last block to discard in this group */ 2121 /* end now represents the last block to discard in this group */
2130 end = EXT3_BLOCKS_PER_GROUP(sb) - 1; 2122 end = EXT3_BLOCKS_PER_GROUP(sb) - 1;
2131 2123
2132 for (group = first_group; group <= last_group; group++) { 2124 for (group = first_group; group <= last_group; group++) {
2133 gdp = ext3_get_group_desc(sb, group, NULL); 2125 gdp = ext3_get_group_desc(sb, group, NULL);
2134 if (!gdp) 2126 if (!gdp)
2135 break; 2127 break;
2136 2128
2137 /* 2129 /*
2138 * For all the groups except the last one, last block will 2130 * For all the groups except the last one, last block will
2139 * always be EXT3_BLOCKS_PER_GROUP(sb)-1, so we only need to 2131 * always be EXT3_BLOCKS_PER_GROUP(sb)-1, so we only need to
2140 * change it for the last group, note that last_block is 2132 * change it for the last group, note that last_block is
2141 * already computed earlier by ext3_get_group_no_and_offset() 2133 * already computed earlier by ext3_get_group_no_and_offset()
2142 */ 2134 */
2143 if (group == last_group) 2135 if (group == last_group)
2144 end = last_block; 2136 end = last_block;
2145 2137
2146 if (le16_to_cpu(gdp->bg_free_blocks_count) >= minlen) { 2138 if (le16_to_cpu(gdp->bg_free_blocks_count) >= minlen) {
2147 ret = ext3_trim_all_free(sb, group, first_block, 2139 ret = ext3_trim_all_free(sb, group, first_block,
2148 end, minlen); 2140 end, minlen);
2149 if (ret < 0) 2141 if (ret < 0)
2150 break; 2142 break;
2151 trimmed += ret; 2143 trimmed += ret;
2152 } 2144 }
2153 2145
2154 /* 2146 /*
2155 * For every group except the first one, we are sure 2147 * For every group except the first one, we are sure
2156 * that the first block to discard will be block #0. 2148 * that the first block to discard will be block #0.
2157 */ 2149 */
2158 first_block = 0; 2150 first_block = 0;
2159 } 2151 }
2160 2152
2161 if (ret > 0) 2153 if (ret > 0)
2162 ret = 0; 2154 ret = 0;
2163 2155
2164 out: 2156 out:
2165 range->len = trimmed * sb->s_blocksize; 2157 range->len = trimmed * sb->s_blocksize;
2166 return ret; 2158 return ret;
2167 } 2159 }
2168 2160
1 /* 1 /*
2 * linux/fs/ext3/bitmap.c 2 * linux/fs/ext3/bitmap.c
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 */ 8 */
9 9
10 #include <linux/buffer_head.h> 10 #include "ext3.h"
11 #include <linux/jbd.h>
12 #include <linux/ext3_fs.h>
13 11
14 #ifdef EXT3FS_DEBUG 12 #ifdef EXT3FS_DEBUG
15 13
16 static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; 14 static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
17 15
18 unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars) 16 unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
19 { 17 {
20 unsigned int i; 18 unsigned int i;
21 unsigned long sum = 0; 19 unsigned long sum = 0;
22 20
23 if (!map) 21 if (!map)
24 return (0); 22 return (0);
25 for (i = 0; i < numchars; i++) 23 for (i = 0; i < numchars; i++)
26 sum += nibblemap[map->b_data[i] & 0xf] + 24 sum += nibblemap[map->b_data[i] & 0xf] +
27 nibblemap[(map->b_data[i] >> 4) & 0xf]; 25 nibblemap[(map->b_data[i] >> 4) & 0xf];
28 return (sum); 26 return (sum);
29 } 27 }
30 28
31 #endif /* EXT3FS_DEBUG */ 29 #endif /* EXT3FS_DEBUG */
32 30
33 31
1 /* 1 /*
2 * linux/fs/ext3/dir.c 2 * linux/fs/ext3/dir.c
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 * 8 *
9 * from 9 * from
10 * 10 *
11 * linux/fs/minix/dir.c 11 * linux/fs/minix/dir.c
12 * 12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 * 14 *
15 * ext3 directory handling functions 15 * ext3 directory handling functions
16 * 16 *
17 * Big-endian to little-endian byte-swapping/bitmaps by 17 * Big-endian to little-endian byte-swapping/bitmaps by
18 * David S. Miller (davem@caip.rutgers.edu), 1995 18 * David S. Miller (davem@caip.rutgers.edu), 1995
19 * 19 *
20 * Hash Tree Directory indexing (c) 2001 Daniel Phillips 20 * Hash Tree Directory indexing (c) 2001 Daniel Phillips
21 * 21 *
22 */ 22 */
23 23
24 #include <linux/fs.h> 24 #include "ext3.h"
25 #include <linux/jbd.h>
26 #include <linux/ext3_fs.h>
27 #include <linux/buffer_head.h>
28 #include <linux/slab.h>
29 #include <linux/rbtree.h>
30 25
31 static unsigned char ext3_filetype_table[] = { 26 static unsigned char ext3_filetype_table[] = {
32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 27 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
33 }; 28 };
34 29
35 static int ext3_readdir(struct file *, void *, filldir_t); 30 static int ext3_readdir(struct file *, void *, filldir_t);
36 static int ext3_dx_readdir(struct file * filp, 31 static int ext3_dx_readdir(struct file * filp,
37 void * dirent, filldir_t filldir); 32 void * dirent, filldir_t filldir);
38 static int ext3_release_dir (struct inode * inode, 33 static int ext3_release_dir (struct inode * inode,
39 struct file * filp); 34 struct file * filp);
40 35
41 const struct file_operations ext3_dir_operations = { 36 const struct file_operations ext3_dir_operations = {
42 .llseek = generic_file_llseek, 37 .llseek = generic_file_llseek,
43 .read = generic_read_dir, 38 .read = generic_read_dir,
44 .readdir = ext3_readdir, /* we take BKL. needed?*/ 39 .readdir = ext3_readdir, /* we take BKL. needed?*/
45 .unlocked_ioctl = ext3_ioctl, 40 .unlocked_ioctl = ext3_ioctl,
46 #ifdef CONFIG_COMPAT 41 #ifdef CONFIG_COMPAT
47 .compat_ioctl = ext3_compat_ioctl, 42 .compat_ioctl = ext3_compat_ioctl,
48 #endif 43 #endif
49 .fsync = ext3_sync_file, /* BKL held */ 44 .fsync = ext3_sync_file, /* BKL held */
50 .release = ext3_release_dir, 45 .release = ext3_release_dir,
51 }; 46 };
52 47
53 48
54 static unsigned char get_dtype(struct super_block *sb, int filetype) 49 static unsigned char get_dtype(struct super_block *sb, int filetype)
55 { 50 {
56 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || 51 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
57 (filetype >= EXT3_FT_MAX)) 52 (filetype >= EXT3_FT_MAX))
58 return DT_UNKNOWN; 53 return DT_UNKNOWN;
59 54
60 return (ext3_filetype_table[filetype]); 55 return (ext3_filetype_table[filetype]);
61 } 56 }
62 57
63 58
64 int ext3_check_dir_entry (const char * function, struct inode * dir, 59 int ext3_check_dir_entry (const char * function, struct inode * dir,
65 struct ext3_dir_entry_2 * de, 60 struct ext3_dir_entry_2 * de,
66 struct buffer_head * bh, 61 struct buffer_head * bh,
67 unsigned long offset) 62 unsigned long offset)
68 { 63 {
69 const char * error_msg = NULL; 64 const char * error_msg = NULL;
70 const int rlen = ext3_rec_len_from_disk(de->rec_len); 65 const int rlen = ext3_rec_len_from_disk(de->rec_len);
71 66
72 if (unlikely(rlen < EXT3_DIR_REC_LEN(1))) 67 if (unlikely(rlen < EXT3_DIR_REC_LEN(1)))
73 error_msg = "rec_len is smaller than minimal"; 68 error_msg = "rec_len is smaller than minimal";
74 else if (unlikely(rlen % 4 != 0)) 69 else if (unlikely(rlen % 4 != 0))
75 error_msg = "rec_len % 4 != 0"; 70 error_msg = "rec_len % 4 != 0";
76 else if (unlikely(rlen < EXT3_DIR_REC_LEN(de->name_len))) 71 else if (unlikely(rlen < EXT3_DIR_REC_LEN(de->name_len)))
77 error_msg = "rec_len is too small for name_len"; 72 error_msg = "rec_len is too small for name_len";
78 else if (unlikely((((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))) 73 else if (unlikely((((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)))
79 error_msg = "directory entry across blocks"; 74 error_msg = "directory entry across blocks";
80 else if (unlikely(le32_to_cpu(de->inode) > 75 else if (unlikely(le32_to_cpu(de->inode) >
81 le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))) 76 le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)))
82 error_msg = "inode out of bounds"; 77 error_msg = "inode out of bounds";
83 78
84 if (unlikely(error_msg != NULL)) 79 if (unlikely(error_msg != NULL))
85 ext3_error (dir->i_sb, function, 80 ext3_error (dir->i_sb, function,
86 "bad entry in directory #%lu: %s - " 81 "bad entry in directory #%lu: %s - "
87 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", 82 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
88 dir->i_ino, error_msg, offset, 83 dir->i_ino, error_msg, offset,
89 (unsigned long) le32_to_cpu(de->inode), 84 (unsigned long) le32_to_cpu(de->inode),
90 rlen, de->name_len); 85 rlen, de->name_len);
91 86
92 return error_msg == NULL ? 1 : 0; 87 return error_msg == NULL ? 1 : 0;
93 } 88 }
94 89
95 static int ext3_readdir(struct file * filp, 90 static int ext3_readdir(struct file * filp,
96 void * dirent, filldir_t filldir) 91 void * dirent, filldir_t filldir)
97 { 92 {
98 int error = 0; 93 int error = 0;
99 unsigned long offset; 94 unsigned long offset;
100 int i, stored; 95 int i, stored;
101 struct ext3_dir_entry_2 *de; 96 struct ext3_dir_entry_2 *de;
102 struct super_block *sb; 97 struct super_block *sb;
103 int err; 98 int err;
104 struct inode *inode = filp->f_path.dentry->d_inode; 99 struct inode *inode = filp->f_path.dentry->d_inode;
105 int ret = 0; 100 int ret = 0;
106 int dir_has_error = 0; 101 int dir_has_error = 0;
107 102
108 sb = inode->i_sb; 103 sb = inode->i_sb;
109 104
110 if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, 105 if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
111 EXT3_FEATURE_COMPAT_DIR_INDEX) && 106 EXT3_FEATURE_COMPAT_DIR_INDEX) &&
112 ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || 107 ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
113 ((inode->i_size >> sb->s_blocksize_bits) == 1))) { 108 ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
114 err = ext3_dx_readdir(filp, dirent, filldir); 109 err = ext3_dx_readdir(filp, dirent, filldir);
115 if (err != ERR_BAD_DX_DIR) { 110 if (err != ERR_BAD_DX_DIR) {
116 ret = err; 111 ret = err;
117 goto out; 112 goto out;
118 } 113 }
119 /* 114 /*
120 * We don't set the inode dirty flag since it's not 115 * We don't set the inode dirty flag since it's not
121 * critical that it get flushed back to the disk. 116 * critical that it get flushed back to the disk.
122 */ 117 */
123 EXT3_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; 118 EXT3_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
124 } 119 }
125 stored = 0; 120 stored = 0;
126 offset = filp->f_pos & (sb->s_blocksize - 1); 121 offset = filp->f_pos & (sb->s_blocksize - 1);
127 122
128 while (!error && !stored && filp->f_pos < inode->i_size) { 123 while (!error && !stored && filp->f_pos < inode->i_size) {
129 unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb); 124 unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb);
130 struct buffer_head map_bh; 125 struct buffer_head map_bh;
131 struct buffer_head *bh = NULL; 126 struct buffer_head *bh = NULL;
132 127
133 map_bh.b_state = 0; 128 map_bh.b_state = 0;
134 err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0); 129 err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0);
135 if (err > 0) { 130 if (err > 0) {
136 pgoff_t index = map_bh.b_blocknr >> 131 pgoff_t index = map_bh.b_blocknr >>
137 (PAGE_CACHE_SHIFT - inode->i_blkbits); 132 (PAGE_CACHE_SHIFT - inode->i_blkbits);
138 if (!ra_has_index(&filp->f_ra, index)) 133 if (!ra_has_index(&filp->f_ra, index))
139 page_cache_sync_readahead( 134 page_cache_sync_readahead(
140 sb->s_bdev->bd_inode->i_mapping, 135 sb->s_bdev->bd_inode->i_mapping,
141 &filp->f_ra, filp, 136 &filp->f_ra, filp,
142 index, 1); 137 index, 1);
143 filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; 138 filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
144 bh = ext3_bread(NULL, inode, blk, 0, &err); 139 bh = ext3_bread(NULL, inode, blk, 0, &err);
145 } 140 }
146 141
147 /* 142 /*
148 * We ignore I/O errors on directories so users have a chance 143 * We ignore I/O errors on directories so users have a chance
149 * of recovering data when there's a bad sector 144 * of recovering data when there's a bad sector
150 */ 145 */
151 if (!bh) { 146 if (!bh) {
152 if (!dir_has_error) { 147 if (!dir_has_error) {
153 ext3_error(sb, __func__, "directory #%lu " 148 ext3_error(sb, __func__, "directory #%lu "
154 "contains a hole at offset %lld", 149 "contains a hole at offset %lld",
155 inode->i_ino, filp->f_pos); 150 inode->i_ino, filp->f_pos);
156 dir_has_error = 1; 151 dir_has_error = 1;
157 } 152 }
158 /* corrupt size? Maybe no more blocks to read */ 153 /* corrupt size? Maybe no more blocks to read */
159 if (filp->f_pos > inode->i_blocks << 9) 154 if (filp->f_pos > inode->i_blocks << 9)
160 break; 155 break;
161 filp->f_pos += sb->s_blocksize - offset; 156 filp->f_pos += sb->s_blocksize - offset;
162 continue; 157 continue;
163 } 158 }
164 159
165 revalidate: 160 revalidate:
166 /* If the dir block has changed since the last call to 161 /* If the dir block has changed since the last call to
167 * readdir(2), then we might be pointing to an invalid 162 * readdir(2), then we might be pointing to an invalid
168 * dirent right now. Scan from the start of the block 163 * dirent right now. Scan from the start of the block
169 * to make sure. */ 164 * to make sure. */
170 if (filp->f_version != inode->i_version) { 165 if (filp->f_version != inode->i_version) {
171 for (i = 0; i < sb->s_blocksize && i < offset; ) { 166 for (i = 0; i < sb->s_blocksize && i < offset; ) {
172 de = (struct ext3_dir_entry_2 *) 167 de = (struct ext3_dir_entry_2 *)
173 (bh->b_data + i); 168 (bh->b_data + i);
174 /* It's too expensive to do a full 169 /* It's too expensive to do a full
175 * dirent test each time round this 170 * dirent test each time round this
176 * loop, but we do have to test at 171 * loop, but we do have to test at
177 * least that it is non-zero. A 172 * least that it is non-zero. A
178 * failure will be detected in the 173 * failure will be detected in the
179 * dirent test below. */ 174 * dirent test below. */
180 if (ext3_rec_len_from_disk(de->rec_len) < 175 if (ext3_rec_len_from_disk(de->rec_len) <
181 EXT3_DIR_REC_LEN(1)) 176 EXT3_DIR_REC_LEN(1))
182 break; 177 break;
183 i += ext3_rec_len_from_disk(de->rec_len); 178 i += ext3_rec_len_from_disk(de->rec_len);
184 } 179 }
185 offset = i; 180 offset = i;
186 filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) 181 filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
187 | offset; 182 | offset;
188 filp->f_version = inode->i_version; 183 filp->f_version = inode->i_version;
189 } 184 }
190 185
191 while (!error && filp->f_pos < inode->i_size 186 while (!error && filp->f_pos < inode->i_size
192 && offset < sb->s_blocksize) { 187 && offset < sb->s_blocksize) {
193 de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); 188 de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
194 if (!ext3_check_dir_entry ("ext3_readdir", inode, de, 189 if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
195 bh, offset)) { 190 bh, offset)) {
196 /* On error, skip the f_pos to the 191 /* On error, skip the f_pos to the
197 next block. */ 192 next block. */
198 filp->f_pos = (filp->f_pos | 193 filp->f_pos = (filp->f_pos |
199 (sb->s_blocksize - 1)) + 1; 194 (sb->s_blocksize - 1)) + 1;
200 brelse (bh); 195 brelse (bh);
201 ret = stored; 196 ret = stored;
202 goto out; 197 goto out;
203 } 198 }
204 offset += ext3_rec_len_from_disk(de->rec_len); 199 offset += ext3_rec_len_from_disk(de->rec_len);
205 if (le32_to_cpu(de->inode)) { 200 if (le32_to_cpu(de->inode)) {
206 /* We might block in the next section 201 /* We might block in the next section
207 * if the data destination is 202 * if the data destination is
208 * currently swapped out. So, use a 203 * currently swapped out. So, use a
209 * version stamp to detect whether or 204 * version stamp to detect whether or
210 * not the directory has been modified 205 * not the directory has been modified
211 * during the copy operation. 206 * during the copy operation.
212 */ 207 */
213 u64 version = filp->f_version; 208 u64 version = filp->f_version;
214 209
215 error = filldir(dirent, de->name, 210 error = filldir(dirent, de->name,
216 de->name_len, 211 de->name_len,
217 filp->f_pos, 212 filp->f_pos,
218 le32_to_cpu(de->inode), 213 le32_to_cpu(de->inode),
219 get_dtype(sb, de->file_type)); 214 get_dtype(sb, de->file_type));
220 if (error) 215 if (error)
221 break; 216 break;
222 if (version != filp->f_version) 217 if (version != filp->f_version)
223 goto revalidate; 218 goto revalidate;
224 stored ++; 219 stored ++;
225 } 220 }
226 filp->f_pos += ext3_rec_len_from_disk(de->rec_len); 221 filp->f_pos += ext3_rec_len_from_disk(de->rec_len);
227 } 222 }
228 offset = 0; 223 offset = 0;
229 brelse (bh); 224 brelse (bh);
230 } 225 }
231 out: 226 out:
232 return ret; 227 return ret;
233 } 228 }
234 229
235 /* 230 /*
236 * These functions convert from the major/minor hash to an f_pos 231 * These functions convert from the major/minor hash to an f_pos
237 * value. 232 * value.
238 * 233 *
239 * Currently we only use major hash numer. This is unfortunate, but 234 * Currently we only use major hash numer. This is unfortunate, but
240 * on 32-bit machines, the same VFS interface is used for lseek and 235 * on 32-bit machines, the same VFS interface is used for lseek and
241 * llseek, so if we use the 64 bit offset, then the 32-bit versions of 236 * llseek, so if we use the 64 bit offset, then the 32-bit versions of
242 * lseek/telldir/seekdir will blow out spectacularly, and from within 237 * lseek/telldir/seekdir will blow out spectacularly, and from within
243 * the ext2 low-level routine, we don't know if we're being called by 238 * the ext2 low-level routine, we don't know if we're being called by
244 * a 64-bit version of the system call or the 32-bit version of the 239 * a 64-bit version of the system call or the 32-bit version of the
245 * system call. Worse yet, NFSv2 only allows for a 32-bit readdir 240 * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
246 * cookie. Sigh. 241 * cookie. Sigh.
247 */ 242 */
248 #define hash2pos(major, minor) (major >> 1) 243 #define hash2pos(major, minor) (major >> 1)
249 #define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) 244 #define pos2maj_hash(pos) ((pos << 1) & 0xffffffff)
250 #define pos2min_hash(pos) (0) 245 #define pos2min_hash(pos) (0)
251 246
252 /* 247 /*
253 * This structure holds the nodes of the red-black tree used to store 248 * This structure holds the nodes of the red-black tree used to store
254 * the directory entry in hash order. 249 * the directory entry in hash order.
255 */ 250 */
256 struct fname { 251 struct fname {
257 __u32 hash; 252 __u32 hash;
258 __u32 minor_hash; 253 __u32 minor_hash;
259 struct rb_node rb_hash; 254 struct rb_node rb_hash;
260 struct fname *next; 255 struct fname *next;
261 __u32 inode; 256 __u32 inode;
262 __u8 name_len; 257 __u8 name_len;
263 __u8 file_type; 258 __u8 file_type;
264 char name[0]; 259 char name[0];
265 }; 260 };
266 261
267 /* 262 /*
268 * This functoin implements a non-recursive way of freeing all of the 263 * This functoin implements a non-recursive way of freeing all of the
269 * nodes in the red-black tree. 264 * nodes in the red-black tree.
270 */ 265 */
271 static void free_rb_tree_fname(struct rb_root *root) 266 static void free_rb_tree_fname(struct rb_root *root)
272 { 267 {
273 struct rb_node *n = root->rb_node; 268 struct rb_node *n = root->rb_node;
274 struct rb_node *parent; 269 struct rb_node *parent;
275 struct fname *fname; 270 struct fname *fname;
276 271
277 while (n) { 272 while (n) {
278 /* Do the node's children first */ 273 /* Do the node's children first */
279 if (n->rb_left) { 274 if (n->rb_left) {
280 n = n->rb_left; 275 n = n->rb_left;
281 continue; 276 continue;
282 } 277 }
283 if (n->rb_right) { 278 if (n->rb_right) {
284 n = n->rb_right; 279 n = n->rb_right;
285 continue; 280 continue;
286 } 281 }
287 /* 282 /*
288 * The node has no children; free it, and then zero 283 * The node has no children; free it, and then zero
289 * out parent's link to it. Finally go to the 284 * out parent's link to it. Finally go to the
290 * beginning of the loop and try to free the parent 285 * beginning of the loop and try to free the parent
291 * node. 286 * node.
292 */ 287 */
293 parent = rb_parent(n); 288 parent = rb_parent(n);
294 fname = rb_entry(n, struct fname, rb_hash); 289 fname = rb_entry(n, struct fname, rb_hash);
295 while (fname) { 290 while (fname) {
296 struct fname * old = fname; 291 struct fname * old = fname;
297 fname = fname->next; 292 fname = fname->next;
298 kfree (old); 293 kfree (old);
299 } 294 }
300 if (!parent) 295 if (!parent)
301 *root = RB_ROOT; 296 *root = RB_ROOT;
302 else if (parent->rb_left == n) 297 else if (parent->rb_left == n)
303 parent->rb_left = NULL; 298 parent->rb_left = NULL;
304 else if (parent->rb_right == n) 299 else if (parent->rb_right == n)
305 parent->rb_right = NULL; 300 parent->rb_right = NULL;
306 n = parent; 301 n = parent;
307 } 302 }
308 } 303 }
309 304
310 305
311 static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) 306 static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
312 { 307 {
313 struct dir_private_info *p; 308 struct dir_private_info *p;
314 309
315 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); 310 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
316 if (!p) 311 if (!p)
317 return NULL; 312 return NULL;
318 p->curr_hash = pos2maj_hash(pos); 313 p->curr_hash = pos2maj_hash(pos);
319 p->curr_minor_hash = pos2min_hash(pos); 314 p->curr_minor_hash = pos2min_hash(pos);
320 return p; 315 return p;
321 } 316 }
322 317
323 void ext3_htree_free_dir_info(struct dir_private_info *p) 318 void ext3_htree_free_dir_info(struct dir_private_info *p)
324 { 319 {
325 free_rb_tree_fname(&p->root); 320 free_rb_tree_fname(&p->root);
326 kfree(p); 321 kfree(p);
327 } 322 }
328 323
329 /* 324 /*
330 * Given a directory entry, enter it into the fname rb tree. 325 * Given a directory entry, enter it into the fname rb tree.
331 */ 326 */
332 int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, 327 int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
333 __u32 minor_hash, 328 __u32 minor_hash,
334 struct ext3_dir_entry_2 *dirent) 329 struct ext3_dir_entry_2 *dirent)
335 { 330 {
336 struct rb_node **p, *parent = NULL; 331 struct rb_node **p, *parent = NULL;
337 struct fname * fname, *new_fn; 332 struct fname * fname, *new_fn;
338 struct dir_private_info *info; 333 struct dir_private_info *info;
339 int len; 334 int len;
340 335
341 info = (struct dir_private_info *) dir_file->private_data; 336 info = (struct dir_private_info *) dir_file->private_data;
342 p = &info->root.rb_node; 337 p = &info->root.rb_node;
343 338
344 /* Create and allocate the fname structure */ 339 /* Create and allocate the fname structure */
345 len = sizeof(struct fname) + dirent->name_len + 1; 340 len = sizeof(struct fname) + dirent->name_len + 1;
346 new_fn = kzalloc(len, GFP_KERNEL); 341 new_fn = kzalloc(len, GFP_KERNEL);
347 if (!new_fn) 342 if (!new_fn)
348 return -ENOMEM; 343 return -ENOMEM;
349 new_fn->hash = hash; 344 new_fn->hash = hash;
350 new_fn->minor_hash = minor_hash; 345 new_fn->minor_hash = minor_hash;
351 new_fn->inode = le32_to_cpu(dirent->inode); 346 new_fn->inode = le32_to_cpu(dirent->inode);
352 new_fn->name_len = dirent->name_len; 347 new_fn->name_len = dirent->name_len;
353 new_fn->file_type = dirent->file_type; 348 new_fn->file_type = dirent->file_type;
354 memcpy(new_fn->name, dirent->name, dirent->name_len); 349 memcpy(new_fn->name, dirent->name, dirent->name_len);
355 new_fn->name[dirent->name_len] = 0; 350 new_fn->name[dirent->name_len] = 0;
356 351
357 while (*p) { 352 while (*p) {
358 parent = *p; 353 parent = *p;
359 fname = rb_entry(parent, struct fname, rb_hash); 354 fname = rb_entry(parent, struct fname, rb_hash);
360 355
361 /* 356 /*
362 * If the hash and minor hash match up, then we put 357 * If the hash and minor hash match up, then we put
363 * them on a linked list. This rarely happens... 358 * them on a linked list. This rarely happens...
364 */ 359 */
365 if ((new_fn->hash == fname->hash) && 360 if ((new_fn->hash == fname->hash) &&
366 (new_fn->minor_hash == fname->minor_hash)) { 361 (new_fn->minor_hash == fname->minor_hash)) {
367 new_fn->next = fname->next; 362 new_fn->next = fname->next;
368 fname->next = new_fn; 363 fname->next = new_fn;
369 return 0; 364 return 0;
370 } 365 }
371 366
372 if (new_fn->hash < fname->hash) 367 if (new_fn->hash < fname->hash)
373 p = &(*p)->rb_left; 368 p = &(*p)->rb_left;
374 else if (new_fn->hash > fname->hash) 369 else if (new_fn->hash > fname->hash)
375 p = &(*p)->rb_right; 370 p = &(*p)->rb_right;
376 else if (new_fn->minor_hash < fname->minor_hash) 371 else if (new_fn->minor_hash < fname->minor_hash)
377 p = &(*p)->rb_left; 372 p = &(*p)->rb_left;
378 else /* if (new_fn->minor_hash > fname->minor_hash) */ 373 else /* if (new_fn->minor_hash > fname->minor_hash) */
379 p = &(*p)->rb_right; 374 p = &(*p)->rb_right;
380 } 375 }
381 376
382 rb_link_node(&new_fn->rb_hash, parent, p); 377 rb_link_node(&new_fn->rb_hash, parent, p);
383 rb_insert_color(&new_fn->rb_hash, &info->root); 378 rb_insert_color(&new_fn->rb_hash, &info->root);
384 return 0; 379 return 0;
385 } 380 }
386 381
387 382
388 383
389 /* 384 /*
390 * This is a helper function for ext3_dx_readdir. It calls filldir 385 * This is a helper function for ext3_dx_readdir. It calls filldir
391 * for all entres on the fname linked list. (Normally there is only 386 * for all entres on the fname linked list. (Normally there is only
392 * one entry on the linked list, unless there are 62 bit hash collisions.) 387 * one entry on the linked list, unless there are 62 bit hash collisions.)
393 */ 388 */
394 static int call_filldir(struct file * filp, void * dirent, 389 static int call_filldir(struct file * filp, void * dirent,
395 filldir_t filldir, struct fname *fname) 390 filldir_t filldir, struct fname *fname)
396 { 391 {
397 struct dir_private_info *info = filp->private_data; 392 struct dir_private_info *info = filp->private_data;
398 loff_t curr_pos; 393 loff_t curr_pos;
399 struct inode *inode = filp->f_path.dentry->d_inode; 394 struct inode *inode = filp->f_path.dentry->d_inode;
400 struct super_block * sb; 395 struct super_block * sb;
401 int error; 396 int error;
402 397
403 sb = inode->i_sb; 398 sb = inode->i_sb;
404 399
405 if (!fname) { 400 if (!fname) {
406 printk("call_filldir: called with null fname?!?\n"); 401 printk("call_filldir: called with null fname?!?\n");
407 return 0; 402 return 0;
408 } 403 }
409 curr_pos = hash2pos(fname->hash, fname->minor_hash); 404 curr_pos = hash2pos(fname->hash, fname->minor_hash);
410 while (fname) { 405 while (fname) {
411 error = filldir(dirent, fname->name, 406 error = filldir(dirent, fname->name,
412 fname->name_len, curr_pos, 407 fname->name_len, curr_pos,
413 fname->inode, 408 fname->inode,
414 get_dtype(sb, fname->file_type)); 409 get_dtype(sb, fname->file_type));
415 if (error) { 410 if (error) {
416 filp->f_pos = curr_pos; 411 filp->f_pos = curr_pos;
417 info->extra_fname = fname; 412 info->extra_fname = fname;
418 return error; 413 return error;
419 } 414 }
420 fname = fname->next; 415 fname = fname->next;
421 } 416 }
422 return 0; 417 return 0;
423 } 418 }
424 419
425 static int ext3_dx_readdir(struct file * filp, 420 static int ext3_dx_readdir(struct file * filp,
426 void * dirent, filldir_t filldir) 421 void * dirent, filldir_t filldir)
427 { 422 {
428 struct dir_private_info *info = filp->private_data; 423 struct dir_private_info *info = filp->private_data;
429 struct inode *inode = filp->f_path.dentry->d_inode; 424 struct inode *inode = filp->f_path.dentry->d_inode;
430 struct fname *fname; 425 struct fname *fname;
431 int ret; 426 int ret;
432 427
433 if (!info) { 428 if (!info) {
434 info = ext3_htree_create_dir_info(filp->f_pos); 429 info = ext3_htree_create_dir_info(filp->f_pos);
435 if (!info) 430 if (!info)
436 return -ENOMEM; 431 return -ENOMEM;
437 filp->private_data = info; 432 filp->private_data = info;
438 } 433 }
439 434
440 if (filp->f_pos == EXT3_HTREE_EOF) 435 if (filp->f_pos == EXT3_HTREE_EOF)
441 return 0; /* EOF */ 436 return 0; /* EOF */
442 437
443 /* Some one has messed with f_pos; reset the world */ 438 /* Some one has messed with f_pos; reset the world */
444 if (info->last_pos != filp->f_pos) { 439 if (info->last_pos != filp->f_pos) {
445 free_rb_tree_fname(&info->root); 440 free_rb_tree_fname(&info->root);
446 info->curr_node = NULL; 441 info->curr_node = NULL;
447 info->extra_fname = NULL; 442 info->extra_fname = NULL;
448 info->curr_hash = pos2maj_hash(filp->f_pos); 443 info->curr_hash = pos2maj_hash(filp->f_pos);
449 info->curr_minor_hash = pos2min_hash(filp->f_pos); 444 info->curr_minor_hash = pos2min_hash(filp->f_pos);
450 } 445 }
451 446
452 /* 447 /*
453 * If there are any leftover names on the hash collision 448 * If there are any leftover names on the hash collision
454 * chain, return them first. 449 * chain, return them first.
455 */ 450 */
456 if (info->extra_fname) { 451 if (info->extra_fname) {
457 if (call_filldir(filp, dirent, filldir, info->extra_fname)) 452 if (call_filldir(filp, dirent, filldir, info->extra_fname))
458 goto finished; 453 goto finished;
459 info->extra_fname = NULL; 454 info->extra_fname = NULL;
460 goto next_node; 455 goto next_node;
461 } else if (!info->curr_node) 456 } else if (!info->curr_node)
462 info->curr_node = rb_first(&info->root); 457 info->curr_node = rb_first(&info->root);
463 458
464 while (1) { 459 while (1) {
465 /* 460 /*
466 * Fill the rbtree if we have no more entries, 461 * Fill the rbtree if we have no more entries,
467 * or the inode has changed since we last read in the 462 * or the inode has changed since we last read in the
468 * cached entries. 463 * cached entries.
469 */ 464 */
470 if ((!info->curr_node) || 465 if ((!info->curr_node) ||
471 (filp->f_version != inode->i_version)) { 466 (filp->f_version != inode->i_version)) {
472 info->curr_node = NULL; 467 info->curr_node = NULL;
473 free_rb_tree_fname(&info->root); 468 free_rb_tree_fname(&info->root);
474 filp->f_version = inode->i_version; 469 filp->f_version = inode->i_version;
475 ret = ext3_htree_fill_tree(filp, info->curr_hash, 470 ret = ext3_htree_fill_tree(filp, info->curr_hash,
476 info->curr_minor_hash, 471 info->curr_minor_hash,
477 &info->next_hash); 472 &info->next_hash);
478 if (ret < 0) 473 if (ret < 0)
479 return ret; 474 return ret;
480 if (ret == 0) { 475 if (ret == 0) {
481 filp->f_pos = EXT3_HTREE_EOF; 476 filp->f_pos = EXT3_HTREE_EOF;
482 break; 477 break;
483 } 478 }
484 info->curr_node = rb_first(&info->root); 479 info->curr_node = rb_first(&info->root);
485 } 480 }
486 481
487 fname = rb_entry(info->curr_node, struct fname, rb_hash); 482 fname = rb_entry(info->curr_node, struct fname, rb_hash);
488 info->curr_hash = fname->hash; 483 info->curr_hash = fname->hash;
489 info->curr_minor_hash = fname->minor_hash; 484 info->curr_minor_hash = fname->minor_hash;
490 if (call_filldir(filp, dirent, filldir, fname)) 485 if (call_filldir(filp, dirent, filldir, fname))
491 break; 486 break;
492 next_node: 487 next_node:
493 info->curr_node = rb_next(info->curr_node); 488 info->curr_node = rb_next(info->curr_node);
494 if (info->curr_node) { 489 if (info->curr_node) {
495 fname = rb_entry(info->curr_node, struct fname, 490 fname = rb_entry(info->curr_node, struct fname,
496 rb_hash); 491 rb_hash);
497 info->curr_hash = fname->hash; 492 info->curr_hash = fname->hash;
498 info->curr_minor_hash = fname->minor_hash; 493 info->curr_minor_hash = fname->minor_hash;
499 } else { 494 } else {
500 if (info->next_hash == ~0) { 495 if (info->next_hash == ~0) {
501 filp->f_pos = EXT3_HTREE_EOF; 496 filp->f_pos = EXT3_HTREE_EOF;
502 break; 497 break;
503 } 498 }
504 info->curr_hash = info->next_hash; 499 info->curr_hash = info->next_hash;
505 info->curr_minor_hash = 0; 500 info->curr_minor_hash = 0;
506 } 501 }
507 } 502 }
508 finished: 503 finished:
509 info->last_pos = filp->f_pos; 504 info->last_pos = filp->f_pos;
510 return 0; 505 return 0;
511 } 506 }
512 507
513 static int ext3_release_dir (struct inode * inode, struct file * filp) 508 static int ext3_release_dir (struct inode * inode, struct file * filp)
514 { 509 {
515 if (filp->private_data) 510 if (filp->private_data)
516 ext3_htree_free_dir_info(filp->private_data); 511 ext3_htree_free_dir_info(filp->private_data);
517 512
518 return 0; 513 return 0;
519 } 514 }
520 515
File was created 1 /*
2 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
3 *
4 * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
5 *
6 * This file is part of the Linux kernel and is made available under
7 * the terms of the GNU General Public License, version 2, or at your
8 * option, any later version, incorporated herein by reference.
9 *
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 *
15 * from
16 *
17 * linux/include/linux/minix_fs.h
18 *
19 * Copyright (C) 1991, 1992 Linus Torvalds
20 */
21
22 #include <linux/fs.h>
23 #include <linux/jbd.h>
24 #include <linux/magic.h>
25 #include <linux/bug.h>
26 #include <linux/blockgroup_lock.h>
27
28 /*
29 * The second extended filesystem constants/structures
30 */
31
32 /*
33 * Define EXT3FS_DEBUG to produce debug messages
34 */
35 #undef EXT3FS_DEBUG
36
37 /*
38 * Define EXT3_RESERVATION to reserve data blocks for expanding files
39 */
40 #define EXT3_DEFAULT_RESERVE_BLOCKS 8
41 /*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
42 #define EXT3_MAX_RESERVE_BLOCKS 1027
43 #define EXT3_RESERVE_WINDOW_NOT_ALLOCATED 0
44
45 /*
46 * Debug code
47 */
48 #ifdef EXT3FS_DEBUG
49 #define ext3_debug(f, a...) \
50 do { \
51 printk (KERN_DEBUG "EXT3-fs DEBUG (%s, %d): %s:", \
52 __FILE__, __LINE__, __func__); \
53 printk (KERN_DEBUG f, ## a); \
54 } while (0)
55 #else
56 #define ext3_debug(f, a...) do {} while (0)
57 #endif
58
59 /*
60 * Special inodes numbers
61 */
62 #define EXT3_BAD_INO 1 /* Bad blocks inode */
63 #define EXT3_ROOT_INO 2 /* Root inode */
64 #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */
65 #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */
66 #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */
67 #define EXT3_JOURNAL_INO 8 /* Journal inode */
68
69 /* First non-reserved inode for old ext3 filesystems */
70 #define EXT3_GOOD_OLD_FIRST_INO 11
71
72 /*
73 * Maximal count of links to a file
74 */
75 #define EXT3_LINK_MAX 32000
76
77 /*
78 * Macro-instructions used to manage several block sizes
79 */
80 #define EXT3_MIN_BLOCK_SIZE 1024
81 #define EXT3_MAX_BLOCK_SIZE 65536
82 #define EXT3_MIN_BLOCK_LOG_SIZE 10
83 #define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize)
84 #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32))
85 #define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
86 #define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits)
87 #define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size)
88 #define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino)
89
90 /*
91 * Macro-instructions used to manage fragments
92 */
93 #define EXT3_MIN_FRAG_SIZE 1024
94 #define EXT3_MAX_FRAG_SIZE 4096
95 #define EXT3_MIN_FRAG_LOG_SIZE 10
96 #define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size)
97 #define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block)
98
99 /*
100 * Structure of a blocks group descriptor
101 */
102 struct ext3_group_desc
103 {
104 __le32 bg_block_bitmap; /* Blocks bitmap block */
105 __le32 bg_inode_bitmap; /* Inodes bitmap block */
106 __le32 bg_inode_table; /* Inodes table block */
107 __le16 bg_free_blocks_count; /* Free blocks count */
108 __le16 bg_free_inodes_count; /* Free inodes count */
109 __le16 bg_used_dirs_count; /* Directories count */
110 __u16 bg_pad;
111 __le32 bg_reserved[3];
112 };
113
114 /*
115 * Macro-instructions used to manage group descriptors
116 */
117 #define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group)
118 #define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block)
119 #define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group)
120 #define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits)
121
122 /*
123 * Constants relative to the data blocks
124 */
125 #define EXT3_NDIR_BLOCKS 12
126 #define EXT3_IND_BLOCK EXT3_NDIR_BLOCKS
127 #define EXT3_DIND_BLOCK (EXT3_IND_BLOCK + 1)
128 #define EXT3_TIND_BLOCK (EXT3_DIND_BLOCK + 1)
129 #define EXT3_N_BLOCKS (EXT3_TIND_BLOCK + 1)
130
131 /*
132 * Inode flags
133 */
134 #define EXT3_SECRM_FL 0x00000001 /* Secure deletion */
135 #define EXT3_UNRM_FL 0x00000002 /* Undelete */
136 #define EXT3_COMPR_FL 0x00000004 /* Compress file */
137 #define EXT3_SYNC_FL 0x00000008 /* Synchronous updates */
138 #define EXT3_IMMUTABLE_FL 0x00000010 /* Immutable file */
139 #define EXT3_APPEND_FL 0x00000020 /* writes to file may only append */
140 #define EXT3_NODUMP_FL 0x00000040 /* do not dump file */
141 #define EXT3_NOATIME_FL 0x00000080 /* do not update atime */
142 /* Reserved for compression usage... */
143 #define EXT3_DIRTY_FL 0x00000100
144 #define EXT3_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
145 #define EXT3_NOCOMPR_FL 0x00000400 /* Don't compress */
146 #define EXT3_ECOMPR_FL 0x00000800 /* Compression error */
147 /* End compression flags --- maybe not all used */
148 #define EXT3_INDEX_FL 0x00001000 /* hash-indexed directory */
149 #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */
150 #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
151 #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */
152 #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
153 #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
154 #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
155
156 #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
157 #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
158
159 /* Flags that should be inherited by new inodes from their parent. */
160 #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
161 EXT3_SYNC_FL | EXT3_NODUMP_FL |\
162 EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\
163 EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
164 EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
165
166 /* Flags that are appropriate for regular files (all but dir-specific ones). */
167 #define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL))
168
169 /* Flags that are appropriate for non-directories/regular files. */
170 #define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL)
171
172 /* Mask out flags that are inappropriate for the given type of inode. */
173 static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags)
174 {
175 if (S_ISDIR(mode))
176 return flags;
177 else if (S_ISREG(mode))
178 return flags & EXT3_REG_FLMASK;
179 else
180 return flags & EXT3_OTHER_FLMASK;
181 }
182
183 /* Used to pass group descriptor data when online resize is done */
184 struct ext3_new_group_input {
185 __u32 group; /* Group number for this data */
186 __u32 block_bitmap; /* Absolute block number of block bitmap */
187 __u32 inode_bitmap; /* Absolute block number of inode bitmap */
188 __u32 inode_table; /* Absolute block number of inode table start */
189 __u32 blocks_count; /* Total number of blocks in this group */
190 __u16 reserved_blocks; /* Number of reserved blocks in this group */
191 __u16 unused;
192 };
193
194 /* The struct ext3_new_group_input in kernel space, with free_blocks_count */
195 struct ext3_new_group_data {
196 __u32 group;
197 __u32 block_bitmap;
198 __u32 inode_bitmap;
199 __u32 inode_table;
200 __u32 blocks_count;
201 __u16 reserved_blocks;
202 __u16 unused;
203 __u32 free_blocks_count;
204 };
205
206
207 /*
208 * ioctl commands
209 */
210 #define EXT3_IOC_GETFLAGS FS_IOC_GETFLAGS
211 #define EXT3_IOC_SETFLAGS FS_IOC_SETFLAGS
212 #define EXT3_IOC_GETVERSION _IOR('f', 3, long)
213 #define EXT3_IOC_SETVERSION _IOW('f', 4, long)
214 #define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
215 #define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input)
216 #define EXT3_IOC_GETVERSION_OLD FS_IOC_GETVERSION
217 #define EXT3_IOC_SETVERSION_OLD FS_IOC_SETVERSION
218 #ifdef CONFIG_JBD_DEBUG
219 #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
220 #endif
221 #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long)
222 #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long)
223
224 /*
225 * ioctl commands in 32 bit emulation
226 */
227 #define EXT3_IOC32_GETFLAGS FS_IOC32_GETFLAGS
228 #define EXT3_IOC32_SETFLAGS FS_IOC32_SETFLAGS
229 #define EXT3_IOC32_GETVERSION _IOR('f', 3, int)
230 #define EXT3_IOC32_SETVERSION _IOW('f', 4, int)
231 #define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int)
232 #define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int)
233 #define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
234 #ifdef CONFIG_JBD_DEBUG
235 #define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
236 #endif
237 #define EXT3_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
238 #define EXT3_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
239
240
241 /*
242 * Mount options
243 */
244 struct ext3_mount_options {
245 unsigned long s_mount_opt;
246 uid_t s_resuid;
247 gid_t s_resgid;
248 unsigned long s_commit_interval;
249 #ifdef CONFIG_QUOTA
250 int s_jquota_fmt;
251 char *s_qf_names[MAXQUOTAS];
252 #endif
253 };
254
255 /*
256 * Structure of an inode on the disk
257 */
258 struct ext3_inode {
259 __le16 i_mode; /* File mode */
260 __le16 i_uid; /* Low 16 bits of Owner Uid */
261 __le32 i_size; /* Size in bytes */
262 __le32 i_atime; /* Access time */
263 __le32 i_ctime; /* Creation time */
264 __le32 i_mtime; /* Modification time */
265 __le32 i_dtime; /* Deletion Time */
266 __le16 i_gid; /* Low 16 bits of Group Id */
267 __le16 i_links_count; /* Links count */
268 __le32 i_blocks; /* Blocks count */
269 __le32 i_flags; /* File flags */
270 union {
271 struct {
272 __u32 l_i_reserved1;
273 } linux1;
274 struct {
275 __u32 h_i_translator;
276 } hurd1;
277 struct {
278 __u32 m_i_reserved1;
279 } masix1;
280 } osd1; /* OS dependent 1 */
281 __le32 i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
282 __le32 i_generation; /* File version (for NFS) */
283 __le32 i_file_acl; /* File ACL */
284 __le32 i_dir_acl; /* Directory ACL */
285 __le32 i_faddr; /* Fragment address */
286 union {
287 struct {
288 __u8 l_i_frag; /* Fragment number */
289 __u8 l_i_fsize; /* Fragment size */
290 __u16 i_pad1;
291 __le16 l_i_uid_high; /* these 2 fields */
292 __le16 l_i_gid_high; /* were reserved2[0] */
293 __u32 l_i_reserved2;
294 } linux2;
295 struct {
296 __u8 h_i_frag; /* Fragment number */
297 __u8 h_i_fsize; /* Fragment size */
298 __u16 h_i_mode_high;
299 __u16 h_i_uid_high;
300 __u16 h_i_gid_high;
301 __u32 h_i_author;
302 } hurd2;
303 struct {
304 __u8 m_i_frag; /* Fragment number */
305 __u8 m_i_fsize; /* Fragment size */
306 __u16 m_pad1;
307 __u32 m_i_reserved2[2];
308 } masix2;
309 } osd2; /* OS dependent 2 */
310 __le16 i_extra_isize;
311 __le16 i_pad1;
312 };
313
314 #define i_size_high i_dir_acl
315
316 #define i_reserved1 osd1.linux1.l_i_reserved1
317 #define i_frag osd2.linux2.l_i_frag
318 #define i_fsize osd2.linux2.l_i_fsize
319 #define i_uid_low i_uid
320 #define i_gid_low i_gid
321 #define i_uid_high osd2.linux2.l_i_uid_high
322 #define i_gid_high osd2.linux2.l_i_gid_high
323 #define i_reserved2 osd2.linux2.l_i_reserved2
324
325 /*
326 * File system states
327 */
328 #define EXT3_VALID_FS 0x0001 /* Unmounted cleanly */
329 #define EXT3_ERROR_FS 0x0002 /* Errors detected */
330 #define EXT3_ORPHAN_FS 0x0004 /* Orphans being recovered */
331
332 /*
333 * Misc. filesystem flags
334 */
335 #define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
336 #define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
337 #define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
338
339 /*
340 * Mount flags
341 */
342 #define EXT3_MOUNT_CHECK 0x00001 /* Do mount-time checks */
343 /* EXT3_MOUNT_OLDALLOC was there */
344 #define EXT3_MOUNT_GRPID 0x00004 /* Create files with directory's group */
345 #define EXT3_MOUNT_DEBUG 0x00008 /* Some debugging messages */
346 #define EXT3_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
347 #define EXT3_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
348 #define EXT3_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
349 #define EXT3_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
350 #define EXT3_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
351 #define EXT3_MOUNT_ABORT 0x00200 /* Fatal error detected */
352 #define EXT3_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
353 #define EXT3_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
354 #define EXT3_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
355 #define EXT3_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
356 #define EXT3_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
357 #define EXT3_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
358 #define EXT3_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
359 #define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
360 #define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */
361 #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */
362 #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */
363 #define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
364 #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
365 #define EXT3_MOUNT_DATA_ERR_ABORT 0x400000 /* Abort on file data write
366 * error in ordered mode */
367
368 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
369 #ifndef _LINUX_EXT2_FS_H
370 #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt
371 #define set_opt(o, opt) o |= EXT3_MOUNT_##opt
372 #define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \
373 EXT3_MOUNT_##opt)
374 #else
375 #define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD
376 #define EXT2_MOUNT_ABORT EXT3_MOUNT_ABORT
377 #define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS
378 #endif
379
380 #define ext3_set_bit __set_bit_le
381 #define ext3_set_bit_atomic ext2_set_bit_atomic
382 #define ext3_clear_bit __clear_bit_le
383 #define ext3_clear_bit_atomic ext2_clear_bit_atomic
384 #define ext3_test_bit test_bit_le
385 #define ext3_find_next_zero_bit find_next_zero_bit_le
386
387 /*
388 * Maximal mount counts between two filesystem checks
389 */
390 #define EXT3_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
391 #define EXT3_DFL_CHECKINTERVAL 0 /* Don't use interval check */
392
393 /*
394 * Behaviour when detecting errors
395 */
396 #define EXT3_ERRORS_CONTINUE 1 /* Continue execution */
397 #define EXT3_ERRORS_RO 2 /* Remount fs read-only */
398 #define EXT3_ERRORS_PANIC 3 /* Panic */
399 #define EXT3_ERRORS_DEFAULT EXT3_ERRORS_CONTINUE
400
401 /*
402 * Structure of the super block
403 */
404 struct ext3_super_block {
405 /*00*/ __le32 s_inodes_count; /* Inodes count */
406 __le32 s_blocks_count; /* Blocks count */
407 __le32 s_r_blocks_count; /* Reserved blocks count */
408 __le32 s_free_blocks_count; /* Free blocks count */
409 /*10*/ __le32 s_free_inodes_count; /* Free inodes count */
410 __le32 s_first_data_block; /* First Data Block */
411 __le32 s_log_block_size; /* Block size */
412 __le32 s_log_frag_size; /* Fragment size */
413 /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
414 __le32 s_frags_per_group; /* # Fragments per group */
415 __le32 s_inodes_per_group; /* # Inodes per group */
416 __le32 s_mtime; /* Mount time */
417 /*30*/ __le32 s_wtime; /* Write time */
418 __le16 s_mnt_count; /* Mount count */
419 __le16 s_max_mnt_count; /* Maximal mount count */
420 __le16 s_magic; /* Magic signature */
421 __le16 s_state; /* File system state */
422 __le16 s_errors; /* Behaviour when detecting errors */
423 __le16 s_minor_rev_level; /* minor revision level */
424 /*40*/ __le32 s_lastcheck; /* time of last check */
425 __le32 s_checkinterval; /* max. time between checks */
426 __le32 s_creator_os; /* OS */
427 __le32 s_rev_level; /* Revision level */
428 /*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
429 __le16 s_def_resgid; /* Default gid for reserved blocks */
430 /*
431 * These fields are for EXT3_DYNAMIC_REV superblocks only.
432 *
433 * Note: the difference between the compatible feature set and
434 * the incompatible feature set is that if there is a bit set
435 * in the incompatible feature set that the kernel doesn't
436 * know about, it should refuse to mount the filesystem.
437 *
438 * e2fsck's requirements are more strict; if it doesn't know
439 * about a feature in either the compatible or incompatible
440 * feature set, it must abort and not try to meddle with
441 * things it doesn't understand...
442 */
443 __le32 s_first_ino; /* First non-reserved inode */
444 __le16 s_inode_size; /* size of inode structure */
445 __le16 s_block_group_nr; /* block group # of this superblock */
446 __le32 s_feature_compat; /* compatible feature set */
447 /*60*/ __le32 s_feature_incompat; /* incompatible feature set */
448 __le32 s_feature_ro_compat; /* readonly-compatible feature set */
449 /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
450 /*78*/ char s_volume_name[16]; /* volume name */
451 /*88*/ char s_last_mounted[64]; /* directory where last mounted */
452 /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
453 /*
454 * Performance hints. Directory preallocation should only
455 * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on.
456 */
457 __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
458 __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
459 __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
460 /*
461 * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
462 */
463 /*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
464 /*E0*/ __le32 s_journal_inum; /* inode number of journal file */
465 __le32 s_journal_dev; /* device number of journal file */
466 __le32 s_last_orphan; /* start of list of inodes to delete */
467 __le32 s_hash_seed[4]; /* HTREE hash seed */
468 __u8 s_def_hash_version; /* Default hash version to use */
469 __u8 s_reserved_char_pad;
470 __u16 s_reserved_word_pad;
471 __le32 s_default_mount_opts;
472 __le32 s_first_meta_bg; /* First metablock block group */
473 __le32 s_mkfs_time; /* When the filesystem was created */
474 __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
475 /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
476 /*150*/ __le32 s_blocks_count_hi; /* Blocks count */
477 __le32 s_r_blocks_count_hi; /* Reserved blocks count */
478 __le32 s_free_blocks_count_hi; /* Free blocks count */
479 __le16 s_min_extra_isize; /* All inodes have at least # bytes */
480 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
481 __le32 s_flags; /* Miscellaneous flags */
482 __le16 s_raid_stride; /* RAID stride */
483 __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
484 __le64 s_mmp_block; /* Block for multi-mount protection */
485 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
486 __u8 s_log_groups_per_flex; /* FLEX_BG group size */
487 __u8 s_reserved_char_pad2;
488 __le16 s_reserved_pad;
489 __u32 s_reserved[162]; /* Padding to the end of the block */
490 };
491
492 /* data type for block offset of block group */
493 typedef int ext3_grpblk_t;
494
495 /* data type for filesystem-wide blocks number */
496 typedef unsigned long ext3_fsblk_t;
497
498 #define E3FSBLK "%lu"
499
500 struct ext3_reserve_window {
501 ext3_fsblk_t _rsv_start; /* First byte reserved */
502 ext3_fsblk_t _rsv_end; /* Last byte reserved or 0 */
503 };
504
505 struct ext3_reserve_window_node {
506 struct rb_node rsv_node;
507 __u32 rsv_goal_size;
508 __u32 rsv_alloc_hit;
509 struct ext3_reserve_window rsv_window;
510 };
511
512 struct ext3_block_alloc_info {
513 /* information about reservation window */
514 struct ext3_reserve_window_node rsv_window_node;
515 /*
516 * was i_next_alloc_block in ext3_inode_info
517 * is the logical (file-relative) number of the
518 * most-recently-allocated block in this file.
519 * We use this for detecting linearly ascending allocation requests.
520 */
521 __u32 last_alloc_logical_block;
522 /*
523 * Was i_next_alloc_goal in ext3_inode_info
524 * is the *physical* companion to i_next_alloc_block.
525 * it the physical block number of the block which was most-recentl
526 * allocated to this file. This give us the goal (target) for the next
527 * allocation when we detect linearly ascending requests.
528 */
529 ext3_fsblk_t last_alloc_physical_block;
530 };
531
532 #define rsv_start rsv_window._rsv_start
533 #define rsv_end rsv_window._rsv_end
534
535 /*
536 * third extended file system inode data in memory
537 */
538 struct ext3_inode_info {
539 __le32 i_data[15]; /* unconverted */
540 __u32 i_flags;
541 #ifdef EXT3_FRAGMENTS
542 __u32 i_faddr;
543 __u8 i_frag_no;
544 __u8 i_frag_size;
545 #endif
546 ext3_fsblk_t i_file_acl;
547 __u32 i_dir_acl;
548 __u32 i_dtime;
549
550 /*
551 * i_block_group is the number of the block group which contains
552 * this file's inode. Constant across the lifetime of the inode,
553 * it is ued for making block allocation decisions - we try to
554 * place a file's data blocks near its inode block, and new inodes
555 * near to their parent directory's inode.
556 */
557 __u32 i_block_group;
558 unsigned long i_state_flags; /* Dynamic state flags for ext3 */
559
560 /* block reservation info */
561 struct ext3_block_alloc_info *i_block_alloc_info;
562
563 __u32 i_dir_start_lookup;
564 #ifdef CONFIG_EXT3_FS_XATTR
565 /*
566 * Extended attributes can be read independently of the main file
567 * data. Taking i_mutex even when reading would cause contention
568 * between readers of EAs and writers of regular file data, so
569 * instead we synchronize on xattr_sem when reading or changing
570 * EAs.
571 */
572 struct rw_semaphore xattr_sem;
573 #endif
574
575 struct list_head i_orphan; /* unlinked but open inodes */
576
577 /*
578 * i_disksize keeps track of what the inode size is ON DISK, not
579 * in memory. During truncate, i_size is set to the new size by
580 * the VFS prior to calling ext3_truncate(), but the filesystem won't
581 * set i_disksize to 0 until the truncate is actually under way.
582 *
583 * The intent is that i_disksize always represents the blocks which
584 * are used by this file. This allows recovery to restart truncate
585 * on orphans if we crash during truncate. We actually write i_disksize
586 * into the on-disk inode when writing inodes out, instead of i_size.
587 *
588 * The only time when i_disksize and i_size may be different is when
589 * a truncate is in progress. The only things which change i_disksize
590 * are ext3_get_block (growth) and ext3_truncate (shrinkth).
591 */
592 loff_t i_disksize;
593
594 /* on-disk additional length */
595 __u16 i_extra_isize;
596
597 /*
598 * truncate_mutex is for serialising ext3_truncate() against
599 * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's
600 * data tree are chopped off during truncate. We can't do that in
601 * ext3 because whenever we perform intermediate commits during
602 * truncate, the inode and all the metadata blocks *must* be in a
603 * consistent state which allows truncation of the orphans to restart
604 * during recovery. Hence we must fix the get_block-vs-truncate race
605 * by other means, so we have truncate_mutex.
606 */
607 struct mutex truncate_mutex;
608
609 /*
610 * Transactions that contain inode's metadata needed to complete
611 * fsync and fdatasync, respectively.
612 */
613 atomic_t i_sync_tid;
614 atomic_t i_datasync_tid;
615
616 struct inode vfs_inode;
617 };
618
619 /*
620 * third extended-fs super-block data in memory
621 */
622 struct ext3_sb_info {
623 unsigned long s_frag_size; /* Size of a fragment in bytes */
624 unsigned long s_frags_per_block;/* Number of fragments per block */
625 unsigned long s_inodes_per_block;/* Number of inodes per block */
626 unsigned long s_frags_per_group;/* Number of fragments in a group */
627 unsigned long s_blocks_per_group;/* Number of blocks in a group */
628 unsigned long s_inodes_per_group;/* Number of inodes in a group */
629 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
630 unsigned long s_gdb_count; /* Number of group descriptor blocks */
631 unsigned long s_desc_per_block; /* Number of group descriptors per block */
632 unsigned long s_groups_count; /* Number of groups in the fs */
633 unsigned long s_overhead_last; /* Last calculated overhead */
634 unsigned long s_blocks_last; /* Last seen block count */
635 struct buffer_head * s_sbh; /* Buffer containing the super block */
636 struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */
637 struct buffer_head ** s_group_desc;
638 unsigned long s_mount_opt;
639 ext3_fsblk_t s_sb_block;
640 uid_t s_resuid;
641 gid_t s_resgid;
642 unsigned short s_mount_state;
643 unsigned short s_pad;
644 int s_addr_per_block_bits;
645 int s_desc_per_block_bits;
646 int s_inode_size;
647 int s_first_ino;
648 spinlock_t s_next_gen_lock;
649 u32 s_next_generation;
650 u32 s_hash_seed[4];
651 int s_def_hash_version;
652 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
653 struct percpu_counter s_freeblocks_counter;
654 struct percpu_counter s_freeinodes_counter;
655 struct percpu_counter s_dirs_counter;
656 struct blockgroup_lock *s_blockgroup_lock;
657
658 /* root of the per fs reservation window tree */
659 spinlock_t s_rsv_window_lock;
660 struct rb_root s_rsv_window_root;
661 struct ext3_reserve_window_node s_rsv_window_head;
662
663 /* Journaling */
664 struct inode * s_journal_inode;
665 struct journal_s * s_journal;
666 struct list_head s_orphan;
667 struct mutex s_orphan_lock;
668 struct mutex s_resize_lock;
669 unsigned long s_commit_interval;
670 struct block_device *journal_bdev;
671 #ifdef CONFIG_QUOTA
672 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
673 int s_jquota_fmt; /* Format of quota to use */
674 #endif
675 };
676
677 static inline spinlock_t *
678 sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group)
679 {
680 return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
681 }
682
683 static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb)
684 {
685 return sb->s_fs_info;
686 }
687 static inline struct ext3_inode_info *EXT3_I(struct inode *inode)
688 {
689 return container_of(inode, struct ext3_inode_info, vfs_inode);
690 }
691
692 static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino)
693 {
694 return ino == EXT3_ROOT_INO ||
695 ino == EXT3_JOURNAL_INO ||
696 ino == EXT3_RESIZE_INO ||
697 (ino >= EXT3_FIRST_INO(sb) &&
698 ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count));
699 }
700
701 /*
702 * Inode dynamic state flags
703 */
704 enum {
705 EXT3_STATE_JDATA, /* journaled data exists */
706 EXT3_STATE_NEW, /* inode is newly created */
707 EXT3_STATE_XATTR, /* has in-inode xattrs */
708 EXT3_STATE_FLUSH_ON_CLOSE, /* flush dirty pages on close */
709 };
710
711 static inline int ext3_test_inode_state(struct inode *inode, int bit)
712 {
713 return test_bit(bit, &EXT3_I(inode)->i_state_flags);
714 }
715
716 static inline void ext3_set_inode_state(struct inode *inode, int bit)
717 {
718 set_bit(bit, &EXT3_I(inode)->i_state_flags);
719 }
720
721 static inline void ext3_clear_inode_state(struct inode *inode, int bit)
722 {
723 clear_bit(bit, &EXT3_I(inode)->i_state_flags);
724 }
725
726 #define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime
727
728 /*
729 * Codes for operating systems
730 */
731 #define EXT3_OS_LINUX 0
732 #define EXT3_OS_HURD 1
733 #define EXT3_OS_MASIX 2
734 #define EXT3_OS_FREEBSD 3
735 #define EXT3_OS_LITES 4
736
737 /*
738 * Revision levels
739 */
740 #define EXT3_GOOD_OLD_REV 0 /* The good old (original) format */
741 #define EXT3_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
742
743 #define EXT3_CURRENT_REV EXT3_GOOD_OLD_REV
744 #define EXT3_MAX_SUPP_REV EXT3_DYNAMIC_REV
745
746 #define EXT3_GOOD_OLD_INODE_SIZE 128
747
748 /*
749 * Feature set definitions
750 */
751
752 #define EXT3_HAS_COMPAT_FEATURE(sb,mask) \
753 ( EXT3_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
754 #define EXT3_HAS_RO_COMPAT_FEATURE(sb,mask) \
755 ( EXT3_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
756 #define EXT3_HAS_INCOMPAT_FEATURE(sb,mask) \
757 ( EXT3_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
758 #define EXT3_SET_COMPAT_FEATURE(sb,mask) \
759 EXT3_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
760 #define EXT3_SET_RO_COMPAT_FEATURE(sb,mask) \
761 EXT3_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
762 #define EXT3_SET_INCOMPAT_FEATURE(sb,mask) \
763 EXT3_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
764 #define EXT3_CLEAR_COMPAT_FEATURE(sb,mask) \
765 EXT3_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
766 #define EXT3_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
767 EXT3_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
768 #define EXT3_CLEAR_INCOMPAT_FEATURE(sb,mask) \
769 EXT3_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
770
771 #define EXT3_FEATURE_COMPAT_DIR_PREALLOC 0x0001
772 #define EXT3_FEATURE_COMPAT_IMAGIC_INODES 0x0002
773 #define EXT3_FEATURE_COMPAT_HAS_JOURNAL 0x0004
774 #define EXT3_FEATURE_COMPAT_EXT_ATTR 0x0008
775 #define EXT3_FEATURE_COMPAT_RESIZE_INODE 0x0010
776 #define EXT3_FEATURE_COMPAT_DIR_INDEX 0x0020
777
778 #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
779 #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
780 #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
781
782 #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001
783 #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002
784 #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
785 #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
786 #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010
787
788 #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
789 #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
790 EXT3_FEATURE_INCOMPAT_RECOVER| \
791 EXT3_FEATURE_INCOMPAT_META_BG)
792 #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
793 EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
794 EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
795
796 /*
797 * Default values for user and/or group using reserved blocks
798 */
799 #define EXT3_DEF_RESUID 0
800 #define EXT3_DEF_RESGID 0
801
802 /*
803 * Default mount options
804 */
805 #define EXT3_DEFM_DEBUG 0x0001
806 #define EXT3_DEFM_BSDGROUPS 0x0002
807 #define EXT3_DEFM_XATTR_USER 0x0004
808 #define EXT3_DEFM_ACL 0x0008
809 #define EXT3_DEFM_UID16 0x0010
810 #define EXT3_DEFM_JMODE 0x0060
811 #define EXT3_DEFM_JMODE_DATA 0x0020
812 #define EXT3_DEFM_JMODE_ORDERED 0x0040
813 #define EXT3_DEFM_JMODE_WBACK 0x0060
814
815 /*
816 * Structure of a directory entry
817 */
818 #define EXT3_NAME_LEN 255
819
820 struct ext3_dir_entry {
821 __le32 inode; /* Inode number */
822 __le16 rec_len; /* Directory entry length */
823 __le16 name_len; /* Name length */
824 char name[EXT3_NAME_LEN]; /* File name */
825 };
826
827 /*
828 * The new version of the directory entry. Since EXT3 structures are
829 * stored in intel byte order, and the name_len field could never be
830 * bigger than 255 chars, it's safe to reclaim the extra byte for the
831 * file_type field.
832 */
833 struct ext3_dir_entry_2 {
834 __le32 inode; /* Inode number */
835 __le16 rec_len; /* Directory entry length */
836 __u8 name_len; /* Name length */
837 __u8 file_type;
838 char name[EXT3_NAME_LEN]; /* File name */
839 };
840
841 /*
842 * Ext3 directory file types. Only the low 3 bits are used. The
843 * other bits are reserved for now.
844 */
845 #define EXT3_FT_UNKNOWN 0
846 #define EXT3_FT_REG_FILE 1
847 #define EXT3_FT_DIR 2
848 #define EXT3_FT_CHRDEV 3
849 #define EXT3_FT_BLKDEV 4
850 #define EXT3_FT_FIFO 5
851 #define EXT3_FT_SOCK 6
852 #define EXT3_FT_SYMLINK 7
853
854 #define EXT3_FT_MAX 8
855
856 /*
857 * EXT3_DIR_PAD defines the directory entries boundaries
858 *
859 * NOTE: It must be a multiple of 4
860 */
861 #define EXT3_DIR_PAD 4
862 #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1)
863 #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \
864 ~EXT3_DIR_ROUND)
865 #define EXT3_MAX_REC_LEN ((1<<16)-1)
866
867 /*
868 * Tests against MAX_REC_LEN etc were put in place for 64k block
869 * sizes; if that is not possible on this arch, we can skip
870 * those tests and speed things up.
871 */
872 static inline unsigned ext3_rec_len_from_disk(__le16 dlen)
873 {
874 unsigned len = le16_to_cpu(dlen);
875
876 #if (PAGE_CACHE_SIZE >= 65536)
877 if (len == EXT3_MAX_REC_LEN)
878 return 1 << 16;
879 #endif
880 return len;
881 }
882
883 static inline __le16 ext3_rec_len_to_disk(unsigned len)
884 {
885 #if (PAGE_CACHE_SIZE >= 65536)
886 if (len == (1 << 16))
887 return cpu_to_le16(EXT3_MAX_REC_LEN);
888 else if (len > (1 << 16))
889 BUG();
890 #endif
891 return cpu_to_le16(len);
892 }
893
894 /*
895 * Hash Tree Directory indexing
896 * (c) Daniel Phillips, 2001
897 */
898
899 #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
900 EXT3_FEATURE_COMPAT_DIR_INDEX) && \
901 (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
902 #define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
903 #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
904
905 /* Legal values for the dx_root hash_version field: */
906
907 #define DX_HASH_LEGACY 0
908 #define DX_HASH_HALF_MD4 1
909 #define DX_HASH_TEA 2
910 #define DX_HASH_LEGACY_UNSIGNED 3
911 #define DX_HASH_HALF_MD4_UNSIGNED 4
912 #define DX_HASH_TEA_UNSIGNED 5
913
914 /* hash info structure used by the directory hash */
915 struct dx_hash_info
916 {
917 u32 hash;
918 u32 minor_hash;
919 int hash_version;
920 u32 *seed;
921 };
922
923 #define EXT3_HTREE_EOF 0x7fffffff
924
925 /*
926 * Control parameters used by ext3_htree_next_block
927 */
928 #define HASH_NB_ALWAYS 1
929
930
931 /*
932 * Describe an inode's exact location on disk and in memory
933 */
934 struct ext3_iloc
935 {
936 struct buffer_head *bh;
937 unsigned long offset;
938 unsigned long block_group;
939 };
940
941 static inline struct ext3_inode *ext3_raw_inode(struct ext3_iloc *iloc)
942 {
943 return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset);
944 }
945
946 /*
947 * This structure is stuffed into the struct file's private_data field
948 * for directories. It is where we put information so that we can do
949 * readdir operations in hash tree order.
950 */
951 struct dir_private_info {
952 struct rb_root root;
953 struct rb_node *curr_node;
954 struct fname *extra_fname;
955 loff_t last_pos;
956 __u32 curr_hash;
957 __u32 curr_minor_hash;
958 __u32 next_hash;
959 };
960
961 /* calculate the first block number of the group */
962 static inline ext3_fsblk_t
963 ext3_group_first_block_no(struct super_block *sb, unsigned long group_no)
964 {
965 return group_no * (ext3_fsblk_t)EXT3_BLOCKS_PER_GROUP(sb) +
966 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
967 }
968
969 /*
970 * Special error return code only used by dx_probe() and its callers.
971 */
972 #define ERR_BAD_DX_DIR -75000
973
974 /*
975 * Function prototypes
976 */
977
978 /*
979 * Ok, these declarations are also in <linux/kernel.h> but none of the
980 * ext3 source programs needs to include it so they are duplicated here.
981 */
982 # define NORET_TYPE /**/
983 # define ATTRIB_NORET __attribute__((noreturn))
984 # define NORET_AND noreturn,
985
986 /* balloc.c */
987 extern int ext3_bg_has_super(struct super_block *sb, int group);
988 extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
989 extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
990 ext3_fsblk_t goal, int *errp);
991 extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
992 ext3_fsblk_t goal, unsigned long *count, int *errp);
993 extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
994 ext3_fsblk_t block, unsigned long count);
995 extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
996 ext3_fsblk_t block, unsigned long count,
997 unsigned long *pdquot_freed_blocks);
998 extern ext3_fsblk_t ext3_count_free_blocks (struct super_block *);
999 extern void ext3_check_blocks_bitmap (struct super_block *);
1000 extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
1001 unsigned int block_group,
1002 struct buffer_head ** bh);
1003 extern int ext3_should_retry_alloc(struct super_block *sb, int *retries);
1004 extern void ext3_init_block_alloc_info(struct inode *);
1005 extern void ext3_rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv);
1006 extern int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range);
1007
1008 /* dir.c */
1009 extern int ext3_check_dir_entry(const char *, struct inode *,
1010 struct ext3_dir_entry_2 *,
1011 struct buffer_head *, unsigned long);
1012 extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
1013 __u32 minor_hash,
1014 struct ext3_dir_entry_2 *dirent);
1015 extern void ext3_htree_free_dir_info(struct dir_private_info *p);
1016
1017 /* fsync.c */
1018 extern int ext3_sync_file(struct file *, loff_t, loff_t, int);
1019
1020 /* hash.c */
1021 extern int ext3fs_dirhash(const char *name, int len, struct
1022 dx_hash_info *hinfo);
1023
1024 /* ialloc.c */
1025 extern struct inode * ext3_new_inode (handle_t *, struct inode *,
1026 const struct qstr *, umode_t);
1027 extern void ext3_free_inode (handle_t *, struct inode *);
1028 extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
1029 extern unsigned long ext3_count_free_inodes (struct super_block *);
1030 extern unsigned long ext3_count_dirs (struct super_block *);
1031 extern void ext3_check_inodes_bitmap (struct super_block *);
1032 extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
1033
1034
1035 /* inode.c */
1036 int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
1037 struct buffer_head *bh, ext3_fsblk_t blocknr);
1038 struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
1039 struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
1040 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
1041 sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
1042 int create);
1043
1044 extern struct inode *ext3_iget(struct super_block *, unsigned long);
1045 extern int ext3_write_inode (struct inode *, struct writeback_control *);
1046 extern int ext3_setattr (struct dentry *, struct iattr *);
1047 extern void ext3_evict_inode (struct inode *);
1048 extern int ext3_sync_inode (handle_t *, struct inode *);
1049 extern void ext3_discard_reservation (struct inode *);
1050 extern void ext3_dirty_inode(struct inode *, int);
1051 extern int ext3_change_inode_journal_flag(struct inode *, int);
1052 extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
1053 extern int ext3_can_truncate(struct inode *inode);
1054 extern void ext3_truncate(struct inode *inode);
1055 extern void ext3_set_inode_flags(struct inode *);
1056 extern void ext3_get_inode_flags(struct ext3_inode_info *);
1057 extern void ext3_set_aops(struct inode *inode);
1058 extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1059 u64 start, u64 len);
1060
1061 /* ioctl.c */
1062 extern long ext3_ioctl(struct file *, unsigned int, unsigned long);
1063 extern long ext3_compat_ioctl(struct file *, unsigned int, unsigned long);
1064
1065 /* namei.c */
1066 extern int ext3_orphan_add(handle_t *, struct inode *);
1067 extern int ext3_orphan_del(handle_t *, struct inode *);
1068 extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1069 __u32 start_minor_hash, __u32 *next_hash);
1070
1071 /* resize.c */
1072 extern int ext3_group_add(struct super_block *sb,
1073 struct ext3_new_group_data *input);
1074 extern int ext3_group_extend(struct super_block *sb,
1075 struct ext3_super_block *es,
1076 ext3_fsblk_t n_blocks_count);
1077
1078 /* super.c */
1079 extern __printf(3, 4)
1080 void ext3_error(struct super_block *, const char *, const char *, ...);
1081 extern void __ext3_std_error (struct super_block *, const char *, int);
1082 extern __printf(3, 4)
1083 void ext3_abort(struct super_block *, const char *, const char *, ...);
1084 extern __printf(3, 4)
1085 void ext3_warning(struct super_block *, const char *, const char *, ...);
1086 extern __printf(3, 4)
1087 void ext3_msg(struct super_block *, const char *, const char *, ...);
1088 extern void ext3_update_dynamic_rev (struct super_block *sb);
1089
1090 #define ext3_std_error(sb, errno) \
1091 do { \
1092 if ((errno)) \
1093 __ext3_std_error((sb), __func__, (errno)); \
1094 } while (0)
1095
1096 /*
1097 * Inodes and files operations
1098 */
1099
1100 /* dir.c */
1101 extern const struct file_operations ext3_dir_operations;
1102
1103 /* file.c */
1104 extern const struct inode_operations ext3_file_inode_operations;
1105 extern const struct file_operations ext3_file_operations;
1106
1107 /* namei.c */
1108 extern const struct inode_operations ext3_dir_inode_operations;
1109 extern const struct inode_operations ext3_special_inode_operations;
1110
1111 /* symlink.c */
1112 extern const struct inode_operations ext3_symlink_inode_operations;
1113 extern const struct inode_operations ext3_fast_symlink_inode_operations;
1114
1115 #define EXT3_JOURNAL(inode) (EXT3_SB((inode)->i_sb)->s_journal)
1116
1117 /* Define the number of blocks we need to account to a transaction to
1118 * modify one block of data.
1119 *
1120 * We may have to touch one inode, one bitmap buffer, up to three
1121 * indirection blocks, the group and superblock summaries, and the data
1122 * block to complete the transaction. */
1123
1124 #define EXT3_SINGLEDATA_TRANS_BLOCKS 8U
1125
1126 /* Extended attribute operations touch at most two data buffers,
1127 * two bitmap buffers, and two group summaries, in addition to the inode
1128 * and the superblock, which are already accounted for. */
1129
1130 #define EXT3_XATTR_TRANS_BLOCKS 6U
1131
1132 /* Define the minimum size for a transaction which modifies data. This
1133 * needs to take into account the fact that we may end up modifying two
1134 * quota files too (one for the group, one for the user quota). The
1135 * superblock only gets updated once, of course, so don't bother
1136 * counting that again for the quota updates. */
1137
1138 #define EXT3_DATA_TRANS_BLOCKS(sb) (EXT3_SINGLEDATA_TRANS_BLOCKS + \
1139 EXT3_XATTR_TRANS_BLOCKS - 2 + \
1140 EXT3_MAXQUOTAS_TRANS_BLOCKS(sb))
1141
1142 /* Delete operations potentially hit one directory's namespace plus an
1143 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
1144 * generous. We can grow the delete transaction later if necessary. */
1145
1146 #define EXT3_DELETE_TRANS_BLOCKS(sb) (EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) + 64)
1147
1148 /* Define an arbitrary limit for the amount of data we will anticipate
1149 * writing to any given transaction. For unbounded transactions such as
1150 * write(2) and truncate(2) we can write more than this, but we always
1151 * start off at the maximum transaction size and grow the transaction
1152 * optimistically as we go. */
1153
1154 #define EXT3_MAX_TRANS_DATA 64U
1155
1156 /* We break up a large truncate or write transaction once the handle's
1157 * buffer credits gets this low, we need either to extend the
1158 * transaction or to start a new one. Reserve enough space here for
1159 * inode, bitmap, superblock, group and indirection updates for at least
1160 * one block, plus two quota updates. Quota allocations are not
1161 * needed. */
1162
1163 #define EXT3_RESERVE_TRANS_BLOCKS 12U
1164
1165 #define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
1166
1167 #ifdef CONFIG_QUOTA
1168 /* Amount of blocks needed for quota update - we know that the structure was
1169 * allocated so we need to update only inode+data */
1170 #define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
1171 /* Amount of blocks needed for quota insert/delete - we do some block writes
1172 * but inode, sb and group updates are done only once */
1173 #define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
1174 (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
1175 #define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
1176 (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
1177 #else
1178 #define EXT3_QUOTA_TRANS_BLOCKS(sb) 0
1179 #define EXT3_QUOTA_INIT_BLOCKS(sb) 0
1180 #define EXT3_QUOTA_DEL_BLOCKS(sb) 0
1181 #endif
1182 #define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb))
1183 #define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb))
1184 #define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb))
1185
1186 int
1187 ext3_mark_iloc_dirty(handle_t *handle,
1188 struct inode *inode,
1189 struct ext3_iloc *iloc);
1190
1191 /*
1192 * On success, We end up with an outstanding reference count against
1193 * iloc->bh. This _must_ be cleaned up later.
1194 */
1195
1196 int ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
1197 struct ext3_iloc *iloc);
1198
1199 int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode);
1200
1201 /*
1202 * Wrapper functions with which ext3 calls into JBD. The intent here is
1203 * to allow these to be turned into appropriate stubs so ext3 can control
1204 * ext2 filesystems, so ext2+ext3 systems only nee one fs. This work hasn't
1205 * been done yet.
1206 */
1207
1208 static inline void ext3_journal_release_buffer(handle_t *handle,
1209 struct buffer_head *bh)
1210 {
1211 journal_release_buffer(handle, bh);
1212 }
1213
1214 void ext3_journal_abort_handle(const char *caller, const char *err_fn,
1215 struct buffer_head *bh, handle_t *handle, int err);
1216
1217 int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
1218 struct buffer_head *bh);
1219
1220 int __ext3_journal_get_write_access(const char *where, handle_t *handle,
1221 struct buffer_head *bh);
1222
1223 int __ext3_journal_forget(const char *where, handle_t *handle,
1224 struct buffer_head *bh);
1225
1226 int __ext3_journal_revoke(const char *where, handle_t *handle,
1227 unsigned long blocknr, struct buffer_head *bh);
1228
1229 int __ext3_journal_get_create_access(const char *where,
1230 handle_t *handle, struct buffer_head *bh);
1231
1232 int __ext3_journal_dirty_metadata(const char *where,
1233 handle_t *handle, struct buffer_head *bh);
1234
1235 #define ext3_journal_get_undo_access(handle, bh) \
1236 __ext3_journal_get_undo_access(__func__, (handle), (bh))
1237 #define ext3_journal_get_write_access(handle, bh) \
1238 __ext3_journal_get_write_access(__func__, (handle), (bh))
1239 #define ext3_journal_revoke(handle, blocknr, bh) \
1240 __ext3_journal_revoke(__func__, (handle), (blocknr), (bh))
1241 #define ext3_journal_get_create_access(handle, bh) \
1242 __ext3_journal_get_create_access(__func__, (handle), (bh))
1243 #define ext3_journal_dirty_metadata(handle, bh) \
1244 __ext3_journal_dirty_metadata(__func__, (handle), (bh))
1245 #define ext3_journal_forget(handle, bh) \
1246 __ext3_journal_forget(__func__, (handle), (bh))
1247
1248 int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
1249
1250 handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks);
1251 int __ext3_journal_stop(const char *where, handle_t *handle);
1252
1253 static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks)
1254 {
1255 return ext3_journal_start_sb(inode->i_sb, nblocks);
1256 }
1257
1258 #define ext3_journal_stop(handle) \
1259 __ext3_journal_stop(__func__, (handle))
1260
1261 static inline handle_t *ext3_journal_current_handle(void)
1262 {
1263 return journal_current_handle();
1264 }
1265
1266 static inline int ext3_journal_extend(handle_t *handle, int nblocks)
1267 {
1268 return journal_extend(handle, nblocks);
1269 }
1270
1271 static inline int ext3_journal_restart(handle_t *handle, int nblocks)
1272 {
1273 return journal_restart(handle, nblocks);
1274 }
1275
1276 static inline int ext3_journal_blocks_per_page(struct inode *inode)
1277 {
1278 return journal_blocks_per_page(inode);
1279 }
1280
1281 static inline int ext3_journal_force_commit(journal_t *journal)
1282 {
1283 return journal_force_commit(journal);
1284 }
1285
1286 /* super.c */
1287 int ext3_force_commit(struct super_block *sb);
1288
1289 static inline int ext3_should_journal_data(struct inode *inode)
1290 {
1291 if (!S_ISREG(inode->i_mode))
1292 return 1;
1293 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
1294 return 1;
1295 if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
1296 return 1;
1297 return 0;
1298 }
1299
1300 static inline int ext3_should_order_data(struct inode *inode)
1301 {
1302 if (!S_ISREG(inode->i_mode))
1303 return 0;
1304 if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
1305 return 0;
1306 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
1307 return 1;
1308 return 0;
1309 }
1310
1311 static inline int ext3_should_writeback_data(struct inode *inode)
1312 {
1313 if (!S_ISREG(inode->i_mode))
1314 return 0;
1315 if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
1316 return 0;
1317 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
1318 return 1;
1319 return 0;
1320 }
1321
1322 #include <trace/events/ext3.h>
1323
1 /* 1 /*
2 * Interface between ext3 and JBD 2 * Interface between ext3 and JBD
3 */ 3 */
4 4
5 #include <linux/ext3_jbd.h> 5 #include "ext3.h"
6 6
7 int __ext3_journal_get_undo_access(const char *where, handle_t *handle, 7 int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 8 struct buffer_head *bh)
9 { 9 {
10 int err = journal_get_undo_access(handle, bh); 10 int err = journal_get_undo_access(handle, bh);
11 if (err) 11 if (err)
12 ext3_journal_abort_handle(where, __func__, bh, handle,err); 12 ext3_journal_abort_handle(where, __func__, bh, handle,err);
13 return err; 13 return err;
14 } 14 }
15 15
16 int __ext3_journal_get_write_access(const char *where, handle_t *handle, 16 int __ext3_journal_get_write_access(const char *where, handle_t *handle,
17 struct buffer_head *bh) 17 struct buffer_head *bh)
18 { 18 {
19 int err = journal_get_write_access(handle, bh); 19 int err = journal_get_write_access(handle, bh);
20 if (err) 20 if (err)
21 ext3_journal_abort_handle(where, __func__, bh, handle,err); 21 ext3_journal_abort_handle(where, __func__, bh, handle,err);
22 return err; 22 return err;
23 } 23 }
24 24
25 int __ext3_journal_forget(const char *where, handle_t *handle, 25 int __ext3_journal_forget(const char *where, handle_t *handle,
26 struct buffer_head *bh) 26 struct buffer_head *bh)
27 { 27 {
28 int err = journal_forget(handle, bh); 28 int err = journal_forget(handle, bh);
29 if (err) 29 if (err)
30 ext3_journal_abort_handle(where, __func__, bh, handle,err); 30 ext3_journal_abort_handle(where, __func__, bh, handle,err);
31 return err; 31 return err;
32 } 32 }
33 33
34 int __ext3_journal_revoke(const char *where, handle_t *handle, 34 int __ext3_journal_revoke(const char *where, handle_t *handle,
35 unsigned long blocknr, struct buffer_head *bh) 35 unsigned long blocknr, struct buffer_head *bh)
36 { 36 {
37 int err = journal_revoke(handle, blocknr, bh); 37 int err = journal_revoke(handle, blocknr, bh);
38 if (err) 38 if (err)
39 ext3_journal_abort_handle(where, __func__, bh, handle,err); 39 ext3_journal_abort_handle(where, __func__, bh, handle,err);
40 return err; 40 return err;
41 } 41 }
42 42
43 int __ext3_journal_get_create_access(const char *where, 43 int __ext3_journal_get_create_access(const char *where,
44 handle_t *handle, struct buffer_head *bh) 44 handle_t *handle, struct buffer_head *bh)
45 { 45 {
46 int err = journal_get_create_access(handle, bh); 46 int err = journal_get_create_access(handle, bh);
47 if (err) 47 if (err)
48 ext3_journal_abort_handle(where, __func__, bh, handle,err); 48 ext3_journal_abort_handle(where, __func__, bh, handle,err);
49 return err; 49 return err;
50 } 50 }
51 51
52 int __ext3_journal_dirty_metadata(const char *where, 52 int __ext3_journal_dirty_metadata(const char *where,
53 handle_t *handle, struct buffer_head *bh) 53 handle_t *handle, struct buffer_head *bh)
54 { 54 {
55 int err = journal_dirty_metadata(handle, bh); 55 int err = journal_dirty_metadata(handle, bh);
56 if (err) 56 if (err)
57 ext3_journal_abort_handle(where, __func__, bh, handle,err); 57 ext3_journal_abort_handle(where, __func__, bh, handle,err);
58 return err; 58 return err;
59 } 59 }
60 60
1 /* 1 /*
2 * linux/fs/ext3/file.c 2 * linux/fs/ext3/file.c
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 * 8 *
9 * from 9 * from
10 * 10 *
11 * linux/fs/minix/file.c 11 * linux/fs/minix/file.c
12 * 12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 * 14 *
15 * ext3 fs regular file handling primitives 15 * ext3 fs regular file handling primitives
16 * 16 *
17 * 64-bit file support on 64-bit platforms by Jakub Jelinek 17 * 64-bit file support on 64-bit platforms by Jakub Jelinek
18 * (jj@sunsite.ms.mff.cuni.cz) 18 * (jj@sunsite.ms.mff.cuni.cz)
19 */ 19 */
20 20
21 #include <linux/time.h>
22 #include <linux/fs.h>
23 #include <linux/jbd.h>
24 #include <linux/quotaops.h> 21 #include <linux/quotaops.h>
25 #include <linux/ext3_fs.h> 22 #include "ext3.h"
26 #include <linux/ext3_jbd.h>
27 #include "xattr.h" 23 #include "xattr.h"
28 #include "acl.h" 24 #include "acl.h"
29 25
30 /* 26 /*
31 * Called when an inode is released. Note that this is different 27 * Called when an inode is released. Note that this is different
32 * from ext3_file_open: open gets called at every open, but release 28 * from ext3_file_open: open gets called at every open, but release
33 * gets called only when /all/ the files are closed. 29 * gets called only when /all/ the files are closed.
34 */ 30 */
35 static int ext3_release_file (struct inode * inode, struct file * filp) 31 static int ext3_release_file (struct inode * inode, struct file * filp)
36 { 32 {
37 if (ext3_test_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE)) { 33 if (ext3_test_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE)) {
38 filemap_flush(inode->i_mapping); 34 filemap_flush(inode->i_mapping);
39 ext3_clear_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); 35 ext3_clear_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
40 } 36 }
41 /* if we are the last writer on the inode, drop the block reservation */ 37 /* if we are the last writer on the inode, drop the block reservation */
42 if ((filp->f_mode & FMODE_WRITE) && 38 if ((filp->f_mode & FMODE_WRITE) &&
43 (atomic_read(&inode->i_writecount) == 1)) 39 (atomic_read(&inode->i_writecount) == 1))
44 { 40 {
45 mutex_lock(&EXT3_I(inode)->truncate_mutex); 41 mutex_lock(&EXT3_I(inode)->truncate_mutex);
46 ext3_discard_reservation(inode); 42 ext3_discard_reservation(inode);
47 mutex_unlock(&EXT3_I(inode)->truncate_mutex); 43 mutex_unlock(&EXT3_I(inode)->truncate_mutex);
48 } 44 }
49 if (is_dx(inode) && filp->private_data) 45 if (is_dx(inode) && filp->private_data)
50 ext3_htree_free_dir_info(filp->private_data); 46 ext3_htree_free_dir_info(filp->private_data);
51 47
52 return 0; 48 return 0;
53 } 49 }
54 50
55 const struct file_operations ext3_file_operations = { 51 const struct file_operations ext3_file_operations = {
56 .llseek = generic_file_llseek, 52 .llseek = generic_file_llseek,
57 .read = do_sync_read, 53 .read = do_sync_read,
58 .write = do_sync_write, 54 .write = do_sync_write,
59 .aio_read = generic_file_aio_read, 55 .aio_read = generic_file_aio_read,
60 .aio_write = generic_file_aio_write, 56 .aio_write = generic_file_aio_write,
61 .unlocked_ioctl = ext3_ioctl, 57 .unlocked_ioctl = ext3_ioctl,
62 #ifdef CONFIG_COMPAT 58 #ifdef CONFIG_COMPAT
63 .compat_ioctl = ext3_compat_ioctl, 59 .compat_ioctl = ext3_compat_ioctl,
64 #endif 60 #endif
65 .mmap = generic_file_mmap, 61 .mmap = generic_file_mmap,
66 .open = dquot_file_open, 62 .open = dquot_file_open,
67 .release = ext3_release_file, 63 .release = ext3_release_file,
68 .fsync = ext3_sync_file, 64 .fsync = ext3_sync_file,
69 .splice_read = generic_file_splice_read, 65 .splice_read = generic_file_splice_read,
70 .splice_write = generic_file_splice_write, 66 .splice_write = generic_file_splice_write,
71 }; 67 };
72 68
73 const struct inode_operations ext3_file_inode_operations = { 69 const struct inode_operations ext3_file_inode_operations = {
74 .setattr = ext3_setattr, 70 .setattr = ext3_setattr,
75 #ifdef CONFIG_EXT3_FS_XATTR 71 #ifdef CONFIG_EXT3_FS_XATTR
76 .setxattr = generic_setxattr, 72 .setxattr = generic_setxattr,
77 .getxattr = generic_getxattr, 73 .getxattr = generic_getxattr,
78 .listxattr = ext3_listxattr, 74 .listxattr = ext3_listxattr,
79 .removexattr = generic_removexattr, 75 .removexattr = generic_removexattr,
80 #endif 76 #endif
81 .get_acl = ext3_get_acl, 77 .get_acl = ext3_get_acl,
82 .fiemap = ext3_fiemap, 78 .fiemap = ext3_fiemap,
83 }; 79 };
84 80
85 81
1 /* 1 /*
2 * linux/fs/ext3/fsync.c 2 * linux/fs/ext3/fsync.c
3 * 3 *
4 * Copyright (C) 1993 Stephen Tweedie (sct@redhat.com) 4 * Copyright (C) 1993 Stephen Tweedie (sct@redhat.com)
5 * from 5 * from
6 * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) 6 * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
7 * Laboratoire MASI - Institut Blaise Pascal 7 * Laboratoire MASI - Institut Blaise Pascal
8 * Universite Pierre et Marie Curie (Paris VI) 8 * Universite Pierre et Marie Curie (Paris VI)
9 * from 9 * from
10 * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds 10 * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds
11 * 11 *
12 * ext3fs fsync primitive 12 * ext3fs fsync primitive
13 * 13 *
14 * Big-endian to little-endian byte-swapping/bitmaps by 14 * Big-endian to little-endian byte-swapping/bitmaps by
15 * David S. Miller (davem@caip.rutgers.edu), 1995 15 * David S. Miller (davem@caip.rutgers.edu), 1995
16 * 16 *
17 * Removed unnecessary code duplication for little endian machines 17 * Removed unnecessary code duplication for little endian machines
18 * and excessive __inline__s. 18 * and excessive __inline__s.
19 * Andi Kleen, 1997 19 * Andi Kleen, 1997
20 * 20 *
21 * Major simplications and cleanup - we only need to do the metadata, because 21 * Major simplications and cleanup - we only need to do the metadata, because
22 * we can depend on generic_block_fdatasync() to sync the data blocks. 22 * we can depend on generic_block_fdatasync() to sync the data blocks.
23 */ 23 */
24 24
25 #include <linux/time.h>
26 #include <linux/blkdev.h> 25 #include <linux/blkdev.h>
27 #include <linux/fs.h>
28 #include <linux/sched.h>
29 #include <linux/writeback.h> 26 #include <linux/writeback.h>
30 #include <linux/jbd.h> 27 #include "ext3.h"
31 #include <linux/ext3_fs.h>
32 #include <linux/ext3_jbd.h>
33 #include <trace/events/ext3.h>
34 28
35 /* 29 /*
36 * akpm: A new design for ext3_sync_file(). 30 * akpm: A new design for ext3_sync_file().
37 * 31 *
38 * This is only called from sys_fsync(), sys_fdatasync() and sys_msync(). 32 * This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
39 * There cannot be a transaction open by this task. 33 * There cannot be a transaction open by this task.
40 * Another task could have dirtied this inode. Its data can be in any 34 * Another task could have dirtied this inode. Its data can be in any
41 * state in the journalling system. 35 * state in the journalling system.
42 * 36 *
43 * What we do is just kick off a commit and wait on it. This will snapshot the 37 * What we do is just kick off a commit and wait on it. This will snapshot the
44 * inode to disk. 38 * inode to disk.
45 */ 39 */
46 40
47 int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 41 int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
48 { 42 {
49 struct inode *inode = file->f_mapping->host; 43 struct inode *inode = file->f_mapping->host;
50 struct ext3_inode_info *ei = EXT3_I(inode); 44 struct ext3_inode_info *ei = EXT3_I(inode);
51 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; 45 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
52 int ret, needs_barrier = 0; 46 int ret, needs_barrier = 0;
53 tid_t commit_tid; 47 tid_t commit_tid;
54 48
55 trace_ext3_sync_file_enter(file, datasync); 49 trace_ext3_sync_file_enter(file, datasync);
56 50
57 if (inode->i_sb->s_flags & MS_RDONLY) 51 if (inode->i_sb->s_flags & MS_RDONLY)
58 return 0; 52 return 0;
59 53
60 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 54 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
61 if (ret) 55 if (ret)
62 goto out; 56 goto out;
63 57
64 J_ASSERT(ext3_journal_current_handle() == NULL); 58 J_ASSERT(ext3_journal_current_handle() == NULL);
65 59
66 /* 60 /*
67 * data=writeback,ordered: 61 * data=writeback,ordered:
68 * The caller's filemap_fdatawrite()/wait will sync the data. 62 * The caller's filemap_fdatawrite()/wait will sync the data.
69 * Metadata is in the journal, we wait for a proper transaction 63 * Metadata is in the journal, we wait for a proper transaction
70 * to commit here. 64 * to commit here.
71 * 65 *
72 * data=journal: 66 * data=journal:
73 * filemap_fdatawrite won't do anything (the buffers are clean). 67 * filemap_fdatawrite won't do anything (the buffers are clean).
74 * ext3_force_commit will write the file data into the journal and 68 * ext3_force_commit will write the file data into the journal and
75 * will wait on that. 69 * will wait on that.
76 * filemap_fdatawait() will encounter a ton of newly-dirtied pages 70 * filemap_fdatawait() will encounter a ton of newly-dirtied pages
77 * (they were dirtied by commit). But that's OK - the blocks are 71 * (they were dirtied by commit). But that's OK - the blocks are
78 * safe in-journal, which is all fsync() needs to ensure. 72 * safe in-journal, which is all fsync() needs to ensure.
79 */ 73 */
80 if (ext3_should_journal_data(inode)) { 74 if (ext3_should_journal_data(inode)) {
81 ret = ext3_force_commit(inode->i_sb); 75 ret = ext3_force_commit(inode->i_sb);
82 goto out; 76 goto out;
83 } 77 }
84 78
85 if (datasync) 79 if (datasync)
86 commit_tid = atomic_read(&ei->i_datasync_tid); 80 commit_tid = atomic_read(&ei->i_datasync_tid);
87 else 81 else
88 commit_tid = atomic_read(&ei->i_sync_tid); 82 commit_tid = atomic_read(&ei->i_sync_tid);
89 83
90 if (test_opt(inode->i_sb, BARRIER) && 84 if (test_opt(inode->i_sb, BARRIER) &&
91 !journal_trans_will_send_data_barrier(journal, commit_tid)) 85 !journal_trans_will_send_data_barrier(journal, commit_tid))
92 needs_barrier = 1; 86 needs_barrier = 1;
93 log_start_commit(journal, commit_tid); 87 log_start_commit(journal, commit_tid);
94 ret = log_wait_commit(journal, commit_tid); 88 ret = log_wait_commit(journal, commit_tid);
95 89
96 /* 90 /*
97 * In case we didn't commit a transaction, we have to flush 91 * In case we didn't commit a transaction, we have to flush
98 * disk caches manually so that data really is on persistent 92 * disk caches manually so that data really is on persistent
99 * storage 93 * storage
100 */ 94 */
101 if (needs_barrier) 95 if (needs_barrier)
102 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 96 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
103 out: 97 out:
104 trace_ext3_sync_file_exit(inode, ret); 98 trace_ext3_sync_file_exit(inode, ret);
105 return ret; 99 return ret;
106 } 100 }
107 101
1 /* 1 /*
2 * linux/fs/ext3/hash.c 2 * linux/fs/ext3/hash.c
3 * 3 *
4 * Copyright (C) 2002 by Theodore Ts'o 4 * Copyright (C) 2002 by Theodore Ts'o
5 * 5 *
6 * This file is released under the GPL v2. 6 * This file is released under the GPL v2.
7 * 7 *
8 * This file may be redistributed under the terms of the GNU Public 8 * This file may be redistributed under the terms of the GNU Public
9 * License. 9 * License.
10 */ 10 */
11 11
12 #include <linux/fs.h> 12 #include "ext3.h"
13 #include <linux/jbd.h>
14 #include <linux/ext3_fs.h>
15 #include <linux/cryptohash.h> 13 #include <linux/cryptohash.h>
16 14
17 #define DELTA 0x9E3779B9 15 #define DELTA 0x9E3779B9
18 16
19 static void TEA_transform(__u32 buf[4], __u32 const in[]) 17 static void TEA_transform(__u32 buf[4], __u32 const in[])
20 { 18 {
21 __u32 sum = 0; 19 __u32 sum = 0;
22 __u32 b0 = buf[0], b1 = buf[1]; 20 __u32 b0 = buf[0], b1 = buf[1];
23 __u32 a = in[0], b = in[1], c = in[2], d = in[3]; 21 __u32 a = in[0], b = in[1], c = in[2], d = in[3];
24 int n = 16; 22 int n = 16;
25 23
26 do { 24 do {
27 sum += DELTA; 25 sum += DELTA;
28 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); 26 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
29 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); 27 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
30 } while(--n); 28 } while(--n);
31 29
32 buf[0] += b0; 30 buf[0] += b0;
33 buf[1] += b1; 31 buf[1] += b1;
34 } 32 }
35 33
36 34
37 /* The old legacy hash */ 35 /* The old legacy hash */
38 static __u32 dx_hack_hash_unsigned(const char *name, int len) 36 static __u32 dx_hack_hash_unsigned(const char *name, int len)
39 { 37 {
40 __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; 38 __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
41 const unsigned char *ucp = (const unsigned char *) name; 39 const unsigned char *ucp = (const unsigned char *) name;
42 40
43 while (len--) { 41 while (len--) {
44 hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373)); 42 hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373));
45 43
46 if (hash & 0x80000000) 44 if (hash & 0x80000000)
47 hash -= 0x7fffffff; 45 hash -= 0x7fffffff;
48 hash1 = hash0; 46 hash1 = hash0;
49 hash0 = hash; 47 hash0 = hash;
50 } 48 }
51 return hash0 << 1; 49 return hash0 << 1;
52 } 50 }
53 51
54 static __u32 dx_hack_hash_signed(const char *name, int len) 52 static __u32 dx_hack_hash_signed(const char *name, int len)
55 { 53 {
56 __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; 54 __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
57 const signed char *scp = (const signed char *) name; 55 const signed char *scp = (const signed char *) name;
58 56
59 while (len--) { 57 while (len--) {
60 hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373)); 58 hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373));
61 59
62 if (hash & 0x80000000) 60 if (hash & 0x80000000)
63 hash -= 0x7fffffff; 61 hash -= 0x7fffffff;
64 hash1 = hash0; 62 hash1 = hash0;
65 hash0 = hash; 63 hash0 = hash;
66 } 64 }
67 return hash0 << 1; 65 return hash0 << 1;
68 } 66 }
69 67
70 static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num) 68 static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
71 { 69 {
72 __u32 pad, val; 70 __u32 pad, val;
73 int i; 71 int i;
74 const signed char *scp = (const signed char *) msg; 72 const signed char *scp = (const signed char *) msg;
75 73
76 pad = (__u32)len | ((__u32)len << 8); 74 pad = (__u32)len | ((__u32)len << 8);
77 pad |= pad << 16; 75 pad |= pad << 16;
78 76
79 val = pad; 77 val = pad;
80 if (len > num*4) 78 if (len > num*4)
81 len = num * 4; 79 len = num * 4;
82 for (i = 0; i < len; i++) { 80 for (i = 0; i < len; i++) {
83 if ((i % 4) == 0) 81 if ((i % 4) == 0)
84 val = pad; 82 val = pad;
85 val = ((int) scp[i]) + (val << 8); 83 val = ((int) scp[i]) + (val << 8);
86 if ((i % 4) == 3) { 84 if ((i % 4) == 3) {
87 *buf++ = val; 85 *buf++ = val;
88 val = pad; 86 val = pad;
89 num--; 87 num--;
90 } 88 }
91 } 89 }
92 if (--num >= 0) 90 if (--num >= 0)
93 *buf++ = val; 91 *buf++ = val;
94 while (--num >= 0) 92 while (--num >= 0)
95 *buf++ = pad; 93 *buf++ = pad;
96 } 94 }
97 95
98 static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num) 96 static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
99 { 97 {
100 __u32 pad, val; 98 __u32 pad, val;
101 int i; 99 int i;
102 const unsigned char *ucp = (const unsigned char *) msg; 100 const unsigned char *ucp = (const unsigned char *) msg;
103 101
104 pad = (__u32)len | ((__u32)len << 8); 102 pad = (__u32)len | ((__u32)len << 8);
105 pad |= pad << 16; 103 pad |= pad << 16;
106 104
107 val = pad; 105 val = pad;
108 if (len > num*4) 106 if (len > num*4)
109 len = num * 4; 107 len = num * 4;
110 for (i=0; i < len; i++) { 108 for (i=0; i < len; i++) {
111 if ((i % 4) == 0) 109 if ((i % 4) == 0)
112 val = pad; 110 val = pad;
113 val = ((int) ucp[i]) + (val << 8); 111 val = ((int) ucp[i]) + (val << 8);
114 if ((i % 4) == 3) { 112 if ((i % 4) == 3) {
115 *buf++ = val; 113 *buf++ = val;
116 val = pad; 114 val = pad;
117 num--; 115 num--;
118 } 116 }
119 } 117 }
120 if (--num >= 0) 118 if (--num >= 0)
121 *buf++ = val; 119 *buf++ = val;
122 while (--num >= 0) 120 while (--num >= 0)
123 *buf++ = pad; 121 *buf++ = pad;
124 } 122 }
125 123
126 /* 124 /*
127 * Returns the hash of a filename. If len is 0 and name is NULL, then 125 * Returns the hash of a filename. If len is 0 and name is NULL, then
128 * this function can be used to test whether or not a hash version is 126 * this function can be used to test whether or not a hash version is
129 * supported. 127 * supported.
130 * 128 *
131 * The seed is an 4 longword (32 bits) "secret" which can be used to 129 * The seed is an 4 longword (32 bits) "secret" which can be used to
132 * uniquify a hash. If the seed is all zero's, then some default seed 130 * uniquify a hash. If the seed is all zero's, then some default seed
133 * may be used. 131 * may be used.
134 * 132 *
135 * A particular hash version specifies whether or not the seed is 133 * A particular hash version specifies whether or not the seed is
136 * represented, and whether or not the returned hash is 32 bits or 64 134 * represented, and whether or not the returned hash is 32 bits or 64
137 * bits. 32 bit hashes will return 0 for the minor hash. 135 * bits. 32 bit hashes will return 0 for the minor hash.
138 */ 136 */
139 int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) 137 int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
140 { 138 {
141 __u32 hash; 139 __u32 hash;
142 __u32 minor_hash = 0; 140 __u32 minor_hash = 0;
143 const char *p; 141 const char *p;
144 int i; 142 int i;
145 __u32 in[8], buf[4]; 143 __u32 in[8], buf[4];
146 void (*str2hashbuf)(const char *, int, __u32 *, int) = 144 void (*str2hashbuf)(const char *, int, __u32 *, int) =
147 str2hashbuf_signed; 145 str2hashbuf_signed;
148 146
149 /* Initialize the default seed for the hash checksum functions */ 147 /* Initialize the default seed for the hash checksum functions */
150 buf[0] = 0x67452301; 148 buf[0] = 0x67452301;
151 buf[1] = 0xefcdab89; 149 buf[1] = 0xefcdab89;
152 buf[2] = 0x98badcfe; 150 buf[2] = 0x98badcfe;
153 buf[3] = 0x10325476; 151 buf[3] = 0x10325476;
154 152
155 /* Check to see if the seed is all zero's */ 153 /* Check to see if the seed is all zero's */
156 if (hinfo->seed) { 154 if (hinfo->seed) {
157 for (i=0; i < 4; i++) { 155 for (i=0; i < 4; i++) {
158 if (hinfo->seed[i]) 156 if (hinfo->seed[i])
159 break; 157 break;
160 } 158 }
161 if (i < 4) 159 if (i < 4)
162 memcpy(buf, hinfo->seed, sizeof(buf)); 160 memcpy(buf, hinfo->seed, sizeof(buf));
163 } 161 }
164 162
165 switch (hinfo->hash_version) { 163 switch (hinfo->hash_version) {
166 case DX_HASH_LEGACY_UNSIGNED: 164 case DX_HASH_LEGACY_UNSIGNED:
167 hash = dx_hack_hash_unsigned(name, len); 165 hash = dx_hack_hash_unsigned(name, len);
168 break; 166 break;
169 case DX_HASH_LEGACY: 167 case DX_HASH_LEGACY:
170 hash = dx_hack_hash_signed(name, len); 168 hash = dx_hack_hash_signed(name, len);
171 break; 169 break;
172 case DX_HASH_HALF_MD4_UNSIGNED: 170 case DX_HASH_HALF_MD4_UNSIGNED:
173 str2hashbuf = str2hashbuf_unsigned; 171 str2hashbuf = str2hashbuf_unsigned;
174 case DX_HASH_HALF_MD4: 172 case DX_HASH_HALF_MD4:
175 p = name; 173 p = name;
176 while (len > 0) { 174 while (len > 0) {
177 (*str2hashbuf)(p, len, in, 8); 175 (*str2hashbuf)(p, len, in, 8);
178 half_md4_transform(buf, in); 176 half_md4_transform(buf, in);
179 len -= 32; 177 len -= 32;
180 p += 32; 178 p += 32;
181 } 179 }
182 minor_hash = buf[2]; 180 minor_hash = buf[2];
183 hash = buf[1]; 181 hash = buf[1];
184 break; 182 break;
185 case DX_HASH_TEA_UNSIGNED: 183 case DX_HASH_TEA_UNSIGNED:
186 str2hashbuf = str2hashbuf_unsigned; 184 str2hashbuf = str2hashbuf_unsigned;
187 case DX_HASH_TEA: 185 case DX_HASH_TEA:
188 p = name; 186 p = name;
189 while (len > 0) { 187 while (len > 0) {
190 (*str2hashbuf)(p, len, in, 4); 188 (*str2hashbuf)(p, len, in, 4);
191 TEA_transform(buf, in); 189 TEA_transform(buf, in);
192 len -= 16; 190 len -= 16;
193 p += 16; 191 p += 16;
194 } 192 }
195 hash = buf[0]; 193 hash = buf[0];
196 minor_hash = buf[1]; 194 minor_hash = buf[1];
197 break; 195 break;
198 default: 196 default:
199 hinfo->hash = 0; 197 hinfo->hash = 0;
200 return -1; 198 return -1;
201 } 199 }
202 hash = hash & ~1; 200 hash = hash & ~1;
203 if (hash == (EXT3_HTREE_EOF << 1)) 201 if (hash == (EXT3_HTREE_EOF << 1))
204 hash = (EXT3_HTREE_EOF-1) << 1; 202 hash = (EXT3_HTREE_EOF-1) << 1;
205 hinfo->hash = hash; 203 hinfo->hash = hash;
206 hinfo->minor_hash = minor_hash; 204 hinfo->minor_hash = minor_hash;
207 return 0; 205 return 0;
208 } 206 }
209 207
1 /* 1 /*
2 * linux/fs/ext3/ialloc.c 2 * linux/fs/ext3/ialloc.c
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 * 8 *
9 * BSD ufs-inspired inode and directory allocation by 9 * BSD ufs-inspired inode and directory allocation by
10 * Stephen Tweedie (sct@redhat.com), 1993 10 * Stephen Tweedie (sct@redhat.com), 1993
11 * Big-endian to little-endian byte-swapping/bitmaps by 11 * Big-endian to little-endian byte-swapping/bitmaps by
12 * David S. Miller (davem@caip.rutgers.edu), 1995 12 * David S. Miller (davem@caip.rutgers.edu), 1995
13 */ 13 */
14 14
15 #include <linux/time.h>
16 #include <linux/fs.h>
17 #include <linux/jbd.h>
18 #include <linux/ext3_fs.h>
19 #include <linux/ext3_jbd.h>
20 #include <linux/stat.h>
21 #include <linux/string.h>
22 #include <linux/quotaops.h> 15 #include <linux/quotaops.h>
23 #include <linux/buffer_head.h>
24 #include <linux/random.h> 16 #include <linux/random.h>
25 #include <linux/bitops.h>
26 #include <trace/events/ext3.h>
27 17
28 #include <asm/byteorder.h> 18 #include "ext3.h"
29
30 #include "xattr.h" 19 #include "xattr.h"
31 #include "acl.h" 20 #include "acl.h"
32 21
33 /* 22 /*
34 * ialloc.c contains the inodes allocation and deallocation routines 23 * ialloc.c contains the inodes allocation and deallocation routines
35 */ 24 */
36 25
37 /* 26 /*
38 * The free inodes are managed by bitmaps. A file system contains several 27 * The free inodes are managed by bitmaps. A file system contains several
39 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap 28 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
40 * block for inodes, N blocks for the inode table and data blocks. 29 * block for inodes, N blocks for the inode table and data blocks.
41 * 30 *
42 * The file system contains group descriptors which are located after the 31 * The file system contains group descriptors which are located after the
43 * super block. Each descriptor contains the number of the bitmap block and 32 * super block. Each descriptor contains the number of the bitmap block and
44 * the free blocks count in the block. 33 * the free blocks count in the block.
45 */ 34 */
46 35
47 36
48 /* 37 /*
49 * Read the inode allocation bitmap for a given block_group, reading 38 * Read the inode allocation bitmap for a given block_group, reading
50 * into the specified slot in the superblock's bitmap cache. 39 * into the specified slot in the superblock's bitmap cache.
51 * 40 *
52 * Return buffer_head of bitmap on success or NULL. 41 * Return buffer_head of bitmap on success or NULL.
53 */ 42 */
54 static struct buffer_head * 43 static struct buffer_head *
55 read_inode_bitmap(struct super_block * sb, unsigned long block_group) 44 read_inode_bitmap(struct super_block * sb, unsigned long block_group)
56 { 45 {
57 struct ext3_group_desc *desc; 46 struct ext3_group_desc *desc;
58 struct buffer_head *bh = NULL; 47 struct buffer_head *bh = NULL;
59 48
60 desc = ext3_get_group_desc(sb, block_group, NULL); 49 desc = ext3_get_group_desc(sb, block_group, NULL);
61 if (!desc) 50 if (!desc)
62 goto error_out; 51 goto error_out;
63 52
64 bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); 53 bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
65 if (!bh) 54 if (!bh)
66 ext3_error(sb, "read_inode_bitmap", 55 ext3_error(sb, "read_inode_bitmap",
67 "Cannot read inode bitmap - " 56 "Cannot read inode bitmap - "
68 "block_group = %lu, inode_bitmap = %u", 57 "block_group = %lu, inode_bitmap = %u",
69 block_group, le32_to_cpu(desc->bg_inode_bitmap)); 58 block_group, le32_to_cpu(desc->bg_inode_bitmap));
70 error_out: 59 error_out:
71 return bh; 60 return bh;
72 } 61 }
73 62
74 /* 63 /*
75 * NOTE! When we get the inode, we're the only people 64 * NOTE! When we get the inode, we're the only people
76 * that have access to it, and as such there are no 65 * that have access to it, and as such there are no
77 * race conditions we have to worry about. The inode 66 * race conditions we have to worry about. The inode
78 * is not on the hash-lists, and it cannot be reached 67 * is not on the hash-lists, and it cannot be reached
79 * through the filesystem because the directory entry 68 * through the filesystem because the directory entry
80 * has been deleted earlier. 69 * has been deleted earlier.
81 * 70 *
82 * HOWEVER: we must make sure that we get no aliases, 71 * HOWEVER: we must make sure that we get no aliases,
83 * which means that we have to call "clear_inode()" 72 * which means that we have to call "clear_inode()"
84 * _before_ we mark the inode not in use in the inode 73 * _before_ we mark the inode not in use in the inode
85 * bitmaps. Otherwise a newly created file might use 74 * bitmaps. Otherwise a newly created file might use
86 * the same inode number (not actually the same pointer 75 * the same inode number (not actually the same pointer
87 * though), and then we'd have two inodes sharing the 76 * though), and then we'd have two inodes sharing the
88 * same inode number and space on the harddisk. 77 * same inode number and space on the harddisk.
89 */ 78 */
90 void ext3_free_inode (handle_t *handle, struct inode * inode) 79 void ext3_free_inode (handle_t *handle, struct inode * inode)
91 { 80 {
92 struct super_block * sb = inode->i_sb; 81 struct super_block * sb = inode->i_sb;
93 int is_directory; 82 int is_directory;
94 unsigned long ino; 83 unsigned long ino;
95 struct buffer_head *bitmap_bh = NULL; 84 struct buffer_head *bitmap_bh = NULL;
96 struct buffer_head *bh2; 85 struct buffer_head *bh2;
97 unsigned long block_group; 86 unsigned long block_group;
98 unsigned long bit; 87 unsigned long bit;
99 struct ext3_group_desc * gdp; 88 struct ext3_group_desc * gdp;
100 struct ext3_super_block * es; 89 struct ext3_super_block * es;
101 struct ext3_sb_info *sbi; 90 struct ext3_sb_info *sbi;
102 int fatal = 0, err; 91 int fatal = 0, err;
103 92
104 if (atomic_read(&inode->i_count) > 1) { 93 if (atomic_read(&inode->i_count) > 1) {
105 printk ("ext3_free_inode: inode has count=%d\n", 94 printk ("ext3_free_inode: inode has count=%d\n",
106 atomic_read(&inode->i_count)); 95 atomic_read(&inode->i_count));
107 return; 96 return;
108 } 97 }
109 if (inode->i_nlink) { 98 if (inode->i_nlink) {
110 printk ("ext3_free_inode: inode has nlink=%d\n", 99 printk ("ext3_free_inode: inode has nlink=%d\n",
111 inode->i_nlink); 100 inode->i_nlink);
112 return; 101 return;
113 } 102 }
114 if (!sb) { 103 if (!sb) {
115 printk("ext3_free_inode: inode on nonexistent device\n"); 104 printk("ext3_free_inode: inode on nonexistent device\n");
116 return; 105 return;
117 } 106 }
118 sbi = EXT3_SB(sb); 107 sbi = EXT3_SB(sb);
119 108
120 ino = inode->i_ino; 109 ino = inode->i_ino;
121 ext3_debug ("freeing inode %lu\n", ino); 110 ext3_debug ("freeing inode %lu\n", ino);
122 trace_ext3_free_inode(inode); 111 trace_ext3_free_inode(inode);
123 112
124 is_directory = S_ISDIR(inode->i_mode); 113 is_directory = S_ISDIR(inode->i_mode);
125 114
126 es = EXT3_SB(sb)->s_es; 115 es = EXT3_SB(sb)->s_es;
127 if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 116 if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
128 ext3_error (sb, "ext3_free_inode", 117 ext3_error (sb, "ext3_free_inode",
129 "reserved or nonexistent inode %lu", ino); 118 "reserved or nonexistent inode %lu", ino);
130 goto error_return; 119 goto error_return;
131 } 120 }
132 block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); 121 block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
133 bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb); 122 bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
134 bitmap_bh = read_inode_bitmap(sb, block_group); 123 bitmap_bh = read_inode_bitmap(sb, block_group);
135 if (!bitmap_bh) 124 if (!bitmap_bh)
136 goto error_return; 125 goto error_return;
137 126
138 BUFFER_TRACE(bitmap_bh, "get_write_access"); 127 BUFFER_TRACE(bitmap_bh, "get_write_access");
139 fatal = ext3_journal_get_write_access(handle, bitmap_bh); 128 fatal = ext3_journal_get_write_access(handle, bitmap_bh);
140 if (fatal) 129 if (fatal)
141 goto error_return; 130 goto error_return;
142 131
143 /* Ok, now we can actually update the inode bitmaps.. */ 132 /* Ok, now we can actually update the inode bitmaps.. */
144 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 133 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
145 bit, bitmap_bh->b_data)) 134 bit, bitmap_bh->b_data))
146 ext3_error (sb, "ext3_free_inode", 135 ext3_error (sb, "ext3_free_inode",
147 "bit already cleared for inode %lu", ino); 136 "bit already cleared for inode %lu", ino);
148 else { 137 else {
149 gdp = ext3_get_group_desc (sb, block_group, &bh2); 138 gdp = ext3_get_group_desc (sb, block_group, &bh2);
150 139
151 BUFFER_TRACE(bh2, "get_write_access"); 140 BUFFER_TRACE(bh2, "get_write_access");
152 fatal = ext3_journal_get_write_access(handle, bh2); 141 fatal = ext3_journal_get_write_access(handle, bh2);
153 if (fatal) goto error_return; 142 if (fatal) goto error_return;
154 143
155 if (gdp) { 144 if (gdp) {
156 spin_lock(sb_bgl_lock(sbi, block_group)); 145 spin_lock(sb_bgl_lock(sbi, block_group));
157 le16_add_cpu(&gdp->bg_free_inodes_count, 1); 146 le16_add_cpu(&gdp->bg_free_inodes_count, 1);
158 if (is_directory) 147 if (is_directory)
159 le16_add_cpu(&gdp->bg_used_dirs_count, -1); 148 le16_add_cpu(&gdp->bg_used_dirs_count, -1);
160 spin_unlock(sb_bgl_lock(sbi, block_group)); 149 spin_unlock(sb_bgl_lock(sbi, block_group));
161 percpu_counter_inc(&sbi->s_freeinodes_counter); 150 percpu_counter_inc(&sbi->s_freeinodes_counter);
162 if (is_directory) 151 if (is_directory)
163 percpu_counter_dec(&sbi->s_dirs_counter); 152 percpu_counter_dec(&sbi->s_dirs_counter);
164 153
165 } 154 }
166 BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); 155 BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
167 err = ext3_journal_dirty_metadata(handle, bh2); 156 err = ext3_journal_dirty_metadata(handle, bh2);
168 if (!fatal) fatal = err; 157 if (!fatal) fatal = err;
169 } 158 }
170 BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata"); 159 BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
171 err = ext3_journal_dirty_metadata(handle, bitmap_bh); 160 err = ext3_journal_dirty_metadata(handle, bitmap_bh);
172 if (!fatal) 161 if (!fatal)
173 fatal = err; 162 fatal = err;
174 163
175 error_return: 164 error_return:
176 brelse(bitmap_bh); 165 brelse(bitmap_bh);
177 ext3_std_error(sb, fatal); 166 ext3_std_error(sb, fatal);
178 } 167 }
179 168
180 /* 169 /*
181 * Orlov's allocator for directories. 170 * Orlov's allocator for directories.
182 * 171 *
183 * We always try to spread first-level directories. 172 * We always try to spread first-level directories.
184 * 173 *
185 * If there are blockgroups with both free inodes and free blocks counts 174 * If there are blockgroups with both free inodes and free blocks counts
186 * not worse than average we return one with smallest directory count. 175 * not worse than average we return one with smallest directory count.
187 * Otherwise we simply return a random group. 176 * Otherwise we simply return a random group.
188 * 177 *
189 * For the rest rules look so: 178 * For the rest rules look so:
190 * 179 *
191 * It's OK to put directory into a group unless 180 * It's OK to put directory into a group unless
192 * it has too many directories already (max_dirs) or 181 * it has too many directories already (max_dirs) or
193 * it has too few free inodes left (min_inodes) or 182 * it has too few free inodes left (min_inodes) or
194 * it has too few free blocks left (min_blocks) or 183 * it has too few free blocks left (min_blocks) or
195 * it's already running too large debt (max_debt). 184 * it's already running too large debt (max_debt).
196 * Parent's group is preferred, if it doesn't satisfy these 185 * Parent's group is preferred, if it doesn't satisfy these
197 * conditions we search cyclically through the rest. If none 186 * conditions we search cyclically through the rest. If none
198 * of the groups look good we just look for a group with more 187 * of the groups look good we just look for a group with more
199 * free inodes than average (starting at parent's group). 188 * free inodes than average (starting at parent's group).
200 * 189 *
201 * Debt is incremented each time we allocate a directory and decremented 190 * Debt is incremented each time we allocate a directory and decremented
202 * when we allocate an inode, within 0--255. 191 * when we allocate an inode, within 0--255.
203 */ 192 */
204 193
205 #define INODE_COST 64 194 #define INODE_COST 64
206 #define BLOCK_COST 256 195 #define BLOCK_COST 256
207 196
208 static int find_group_orlov(struct super_block *sb, struct inode *parent) 197 static int find_group_orlov(struct super_block *sb, struct inode *parent)
209 { 198 {
210 int parent_group = EXT3_I(parent)->i_block_group; 199 int parent_group = EXT3_I(parent)->i_block_group;
211 struct ext3_sb_info *sbi = EXT3_SB(sb); 200 struct ext3_sb_info *sbi = EXT3_SB(sb);
212 struct ext3_super_block *es = sbi->s_es; 201 struct ext3_super_block *es = sbi->s_es;
213 int ngroups = sbi->s_groups_count; 202 int ngroups = sbi->s_groups_count;
214 int inodes_per_group = EXT3_INODES_PER_GROUP(sb); 203 int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
215 unsigned int freei, avefreei; 204 unsigned int freei, avefreei;
216 ext3_fsblk_t freeb, avefreeb; 205 ext3_fsblk_t freeb, avefreeb;
217 ext3_fsblk_t blocks_per_dir; 206 ext3_fsblk_t blocks_per_dir;
218 unsigned int ndirs; 207 unsigned int ndirs;
219 int max_debt, max_dirs, min_inodes; 208 int max_debt, max_dirs, min_inodes;
220 ext3_grpblk_t min_blocks; 209 ext3_grpblk_t min_blocks;
221 int group = -1, i; 210 int group = -1, i;
222 struct ext3_group_desc *desc; 211 struct ext3_group_desc *desc;
223 212
224 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); 213 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
225 avefreei = freei / ngroups; 214 avefreei = freei / ngroups;
226 freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 215 freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
227 avefreeb = freeb / ngroups; 216 avefreeb = freeb / ngroups;
228 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); 217 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
229 218
230 if ((parent == sb->s_root->d_inode) || 219 if ((parent == sb->s_root->d_inode) ||
231 (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) { 220 (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) {
232 int best_ndir = inodes_per_group; 221 int best_ndir = inodes_per_group;
233 int best_group = -1; 222 int best_group = -1;
234 223
235 get_random_bytes(&group, sizeof(group)); 224 get_random_bytes(&group, sizeof(group));
236 parent_group = (unsigned)group % ngroups; 225 parent_group = (unsigned)group % ngroups;
237 for (i = 0; i < ngroups; i++) { 226 for (i = 0; i < ngroups; i++) {
238 group = (parent_group + i) % ngroups; 227 group = (parent_group + i) % ngroups;
239 desc = ext3_get_group_desc (sb, group, NULL); 228 desc = ext3_get_group_desc (sb, group, NULL);
240 if (!desc || !desc->bg_free_inodes_count) 229 if (!desc || !desc->bg_free_inodes_count)
241 continue; 230 continue;
242 if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) 231 if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
243 continue; 232 continue;
244 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) 233 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
245 continue; 234 continue;
246 if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) 235 if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
247 continue; 236 continue;
248 best_group = group; 237 best_group = group;
249 best_ndir = le16_to_cpu(desc->bg_used_dirs_count); 238 best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
250 } 239 }
251 if (best_group >= 0) 240 if (best_group >= 0)
252 return best_group; 241 return best_group;
253 goto fallback; 242 goto fallback;
254 } 243 }
255 244
256 blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs; 245 blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs;
257 246
258 max_dirs = ndirs / ngroups + inodes_per_group / 16; 247 max_dirs = ndirs / ngroups + inodes_per_group / 16;
259 min_inodes = avefreei - inodes_per_group / 4; 248 min_inodes = avefreei - inodes_per_group / 4;
260 min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4; 249 min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
261 250
262 max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST); 251 max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
263 if (max_debt * INODE_COST > inodes_per_group) 252 if (max_debt * INODE_COST > inodes_per_group)
264 max_debt = inodes_per_group / INODE_COST; 253 max_debt = inodes_per_group / INODE_COST;
265 if (max_debt > 255) 254 if (max_debt > 255)
266 max_debt = 255; 255 max_debt = 255;
267 if (max_debt == 0) 256 if (max_debt == 0)
268 max_debt = 1; 257 max_debt = 1;
269 258
270 for (i = 0; i < ngroups; i++) { 259 for (i = 0; i < ngroups; i++) {
271 group = (parent_group + i) % ngroups; 260 group = (parent_group + i) % ngroups;
272 desc = ext3_get_group_desc (sb, group, NULL); 261 desc = ext3_get_group_desc (sb, group, NULL);
273 if (!desc || !desc->bg_free_inodes_count) 262 if (!desc || !desc->bg_free_inodes_count)
274 continue; 263 continue;
275 if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) 264 if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
276 continue; 265 continue;
277 if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) 266 if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
278 continue; 267 continue;
279 if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) 268 if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
280 continue; 269 continue;
281 return group; 270 return group;
282 } 271 }
283 272
284 fallback: 273 fallback:
285 for (i = 0; i < ngroups; i++) { 274 for (i = 0; i < ngroups; i++) {
286 group = (parent_group + i) % ngroups; 275 group = (parent_group + i) % ngroups;
287 desc = ext3_get_group_desc (sb, group, NULL); 276 desc = ext3_get_group_desc (sb, group, NULL);
288 if (!desc || !desc->bg_free_inodes_count) 277 if (!desc || !desc->bg_free_inodes_count)
289 continue; 278 continue;
290 if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) 279 if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
291 return group; 280 return group;
292 } 281 }
293 282
294 if (avefreei) { 283 if (avefreei) {
295 /* 284 /*
296 * The free-inodes counter is approximate, and for really small 285 * The free-inodes counter is approximate, and for really small
297 * filesystems the above test can fail to find any blockgroups 286 * filesystems the above test can fail to find any blockgroups
298 */ 287 */
299 avefreei = 0; 288 avefreei = 0;
300 goto fallback; 289 goto fallback;
301 } 290 }
302 291
303 return -1; 292 return -1;
304 } 293 }
305 294
306 static int find_group_other(struct super_block *sb, struct inode *parent) 295 static int find_group_other(struct super_block *sb, struct inode *parent)
307 { 296 {
308 int parent_group = EXT3_I(parent)->i_block_group; 297 int parent_group = EXT3_I(parent)->i_block_group;
309 int ngroups = EXT3_SB(sb)->s_groups_count; 298 int ngroups = EXT3_SB(sb)->s_groups_count;
310 struct ext3_group_desc *desc; 299 struct ext3_group_desc *desc;
311 int group, i; 300 int group, i;
312 301
313 /* 302 /*
314 * Try to place the inode in its parent directory 303 * Try to place the inode in its parent directory
315 */ 304 */
316 group = parent_group; 305 group = parent_group;
317 desc = ext3_get_group_desc (sb, group, NULL); 306 desc = ext3_get_group_desc (sb, group, NULL);
318 if (desc && le16_to_cpu(desc->bg_free_inodes_count) && 307 if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
319 le16_to_cpu(desc->bg_free_blocks_count)) 308 le16_to_cpu(desc->bg_free_blocks_count))
320 return group; 309 return group;
321 310
322 /* 311 /*
323 * We're going to place this inode in a different blockgroup from its 312 * We're going to place this inode in a different blockgroup from its
324 * parent. We want to cause files in a common directory to all land in 313 * parent. We want to cause files in a common directory to all land in
325 * the same blockgroup. But we want files which are in a different 314 * the same blockgroup. But we want files which are in a different
326 * directory which shares a blockgroup with our parent to land in a 315 * directory which shares a blockgroup with our parent to land in a
327 * different blockgroup. 316 * different blockgroup.
328 * 317 *
329 * So add our directory's i_ino into the starting point for the hash. 318 * So add our directory's i_ino into the starting point for the hash.
330 */ 319 */
331 group = (group + parent->i_ino) % ngroups; 320 group = (group + parent->i_ino) % ngroups;
332 321
333 /* 322 /*
334 * Use a quadratic hash to find a group with a free inode and some free 323 * Use a quadratic hash to find a group with a free inode and some free
335 * blocks. 324 * blocks.
336 */ 325 */
337 for (i = 1; i < ngroups; i <<= 1) { 326 for (i = 1; i < ngroups; i <<= 1) {
338 group += i; 327 group += i;
339 if (group >= ngroups) 328 if (group >= ngroups)
340 group -= ngroups; 329 group -= ngroups;
341 desc = ext3_get_group_desc (sb, group, NULL); 330 desc = ext3_get_group_desc (sb, group, NULL);
342 if (desc && le16_to_cpu(desc->bg_free_inodes_count) && 331 if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
343 le16_to_cpu(desc->bg_free_blocks_count)) 332 le16_to_cpu(desc->bg_free_blocks_count))
344 return group; 333 return group;
345 } 334 }
346 335
347 /* 336 /*
348 * That failed: try linear search for a free inode, even if that group 337 * That failed: try linear search for a free inode, even if that group
349 * has no free blocks. 338 * has no free blocks.
350 */ 339 */
351 group = parent_group; 340 group = parent_group;
352 for (i = 0; i < ngroups; i++) { 341 for (i = 0; i < ngroups; i++) {
353 if (++group >= ngroups) 342 if (++group >= ngroups)
354 group = 0; 343 group = 0;
355 desc = ext3_get_group_desc (sb, group, NULL); 344 desc = ext3_get_group_desc (sb, group, NULL);
356 if (desc && le16_to_cpu(desc->bg_free_inodes_count)) 345 if (desc && le16_to_cpu(desc->bg_free_inodes_count))
357 return group; 346 return group;
358 } 347 }
359 348
360 return -1; 349 return -1;
361 } 350 }
362 351
363 /* 352 /*
364 * There are two policies for allocating an inode. If the new inode is 353 * There are two policies for allocating an inode. If the new inode is
365 * a directory, then a forward search is made for a block group with both 354 * a directory, then a forward search is made for a block group with both
366 * free space and a low directory-to-inode ratio; if that fails, then of 355 * free space and a low directory-to-inode ratio; if that fails, then of
367 * the groups with above-average free space, that group with the fewest 356 * the groups with above-average free space, that group with the fewest
368 * directories already is chosen. 357 * directories already is chosen.
369 * 358 *
370 * For other inodes, search forward from the parent directory's block 359 * For other inodes, search forward from the parent directory's block
371 * group to find a free inode. 360 * group to find a free inode.
372 */ 361 */
373 struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, 362 struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
374 const struct qstr *qstr, umode_t mode) 363 const struct qstr *qstr, umode_t mode)
375 { 364 {
376 struct super_block *sb; 365 struct super_block *sb;
377 struct buffer_head *bitmap_bh = NULL; 366 struct buffer_head *bitmap_bh = NULL;
378 struct buffer_head *bh2; 367 struct buffer_head *bh2;
379 int group; 368 int group;
380 unsigned long ino = 0; 369 unsigned long ino = 0;
381 struct inode * inode; 370 struct inode * inode;
382 struct ext3_group_desc * gdp = NULL; 371 struct ext3_group_desc * gdp = NULL;
383 struct ext3_super_block * es; 372 struct ext3_super_block * es;
384 struct ext3_inode_info *ei; 373 struct ext3_inode_info *ei;
385 struct ext3_sb_info *sbi; 374 struct ext3_sb_info *sbi;
386 int err = 0; 375 int err = 0;
387 struct inode *ret; 376 struct inode *ret;
388 int i; 377 int i;
389 378
390 /* Cannot create files in a deleted directory */ 379 /* Cannot create files in a deleted directory */
391 if (!dir || !dir->i_nlink) 380 if (!dir || !dir->i_nlink)
392 return ERR_PTR(-EPERM); 381 return ERR_PTR(-EPERM);
393 382
394 sb = dir->i_sb; 383 sb = dir->i_sb;
395 trace_ext3_request_inode(dir, mode); 384 trace_ext3_request_inode(dir, mode);
396 inode = new_inode(sb); 385 inode = new_inode(sb);
397 if (!inode) 386 if (!inode)
398 return ERR_PTR(-ENOMEM); 387 return ERR_PTR(-ENOMEM);
399 ei = EXT3_I(inode); 388 ei = EXT3_I(inode);
400 389
401 sbi = EXT3_SB(sb); 390 sbi = EXT3_SB(sb);
402 es = sbi->s_es; 391 es = sbi->s_es;
403 if (S_ISDIR(mode)) 392 if (S_ISDIR(mode))
404 group = find_group_orlov(sb, dir); 393 group = find_group_orlov(sb, dir);
405 else 394 else
406 group = find_group_other(sb, dir); 395 group = find_group_other(sb, dir);
407 396
408 err = -ENOSPC; 397 err = -ENOSPC;
409 if (group == -1) 398 if (group == -1)
410 goto out; 399 goto out;
411 400
412 for (i = 0; i < sbi->s_groups_count; i++) { 401 for (i = 0; i < sbi->s_groups_count; i++) {
413 err = -EIO; 402 err = -EIO;
414 403
415 gdp = ext3_get_group_desc(sb, group, &bh2); 404 gdp = ext3_get_group_desc(sb, group, &bh2);
416 if (!gdp) 405 if (!gdp)
417 goto fail; 406 goto fail;
418 407
419 brelse(bitmap_bh); 408 brelse(bitmap_bh);
420 bitmap_bh = read_inode_bitmap(sb, group); 409 bitmap_bh = read_inode_bitmap(sb, group);
421 if (!bitmap_bh) 410 if (!bitmap_bh)
422 goto fail; 411 goto fail;
423 412
424 ino = 0; 413 ino = 0;
425 414
426 repeat_in_this_group: 415 repeat_in_this_group:
427 ino = ext3_find_next_zero_bit((unsigned long *) 416 ino = ext3_find_next_zero_bit((unsigned long *)
428 bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino); 417 bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino);
429 if (ino < EXT3_INODES_PER_GROUP(sb)) { 418 if (ino < EXT3_INODES_PER_GROUP(sb)) {
430 419
431 BUFFER_TRACE(bitmap_bh, "get_write_access"); 420 BUFFER_TRACE(bitmap_bh, "get_write_access");
432 err = ext3_journal_get_write_access(handle, bitmap_bh); 421 err = ext3_journal_get_write_access(handle, bitmap_bh);
433 if (err) 422 if (err)
434 goto fail; 423 goto fail;
435 424
436 if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group), 425 if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
437 ino, bitmap_bh->b_data)) { 426 ino, bitmap_bh->b_data)) {
438 /* we won it */ 427 /* we won it */
439 BUFFER_TRACE(bitmap_bh, 428 BUFFER_TRACE(bitmap_bh,
440 "call ext3_journal_dirty_metadata"); 429 "call ext3_journal_dirty_metadata");
441 err = ext3_journal_dirty_metadata(handle, 430 err = ext3_journal_dirty_metadata(handle,
442 bitmap_bh); 431 bitmap_bh);
443 if (err) 432 if (err)
444 goto fail; 433 goto fail;
445 goto got; 434 goto got;
446 } 435 }
447 /* we lost it */ 436 /* we lost it */
448 journal_release_buffer(handle, bitmap_bh); 437 journal_release_buffer(handle, bitmap_bh);
449 438
450 if (++ino < EXT3_INODES_PER_GROUP(sb)) 439 if (++ino < EXT3_INODES_PER_GROUP(sb))
451 goto repeat_in_this_group; 440 goto repeat_in_this_group;
452 } 441 }
453 442
454 /* 443 /*
455 * This case is possible in concurrent environment. It is very 444 * This case is possible in concurrent environment. It is very
456 * rare. We cannot repeat the find_group_xxx() call because 445 * rare. We cannot repeat the find_group_xxx() call because
457 * that will simply return the same blockgroup, because the 446 * that will simply return the same blockgroup, because the
458 * group descriptor metadata has not yet been updated. 447 * group descriptor metadata has not yet been updated.
459 * So we just go onto the next blockgroup. 448 * So we just go onto the next blockgroup.
460 */ 449 */
461 if (++group == sbi->s_groups_count) 450 if (++group == sbi->s_groups_count)
462 group = 0; 451 group = 0;
463 } 452 }
464 err = -ENOSPC; 453 err = -ENOSPC;
465 goto out; 454 goto out;
466 455
467 got: 456 got:
468 ino += group * EXT3_INODES_PER_GROUP(sb) + 1; 457 ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
469 if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 458 if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
470 ext3_error (sb, "ext3_new_inode", 459 ext3_error (sb, "ext3_new_inode",
471 "reserved inode or inode > inodes count - " 460 "reserved inode or inode > inodes count - "
472 "block_group = %d, inode=%lu", group, ino); 461 "block_group = %d, inode=%lu", group, ino);
473 err = -EIO; 462 err = -EIO;
474 goto fail; 463 goto fail;
475 } 464 }
476 465
477 BUFFER_TRACE(bh2, "get_write_access"); 466 BUFFER_TRACE(bh2, "get_write_access");
478 err = ext3_journal_get_write_access(handle, bh2); 467 err = ext3_journal_get_write_access(handle, bh2);
479 if (err) goto fail; 468 if (err) goto fail;
480 spin_lock(sb_bgl_lock(sbi, group)); 469 spin_lock(sb_bgl_lock(sbi, group));
481 le16_add_cpu(&gdp->bg_free_inodes_count, -1); 470 le16_add_cpu(&gdp->bg_free_inodes_count, -1);
482 if (S_ISDIR(mode)) { 471 if (S_ISDIR(mode)) {
483 le16_add_cpu(&gdp->bg_used_dirs_count, 1); 472 le16_add_cpu(&gdp->bg_used_dirs_count, 1);
484 } 473 }
485 spin_unlock(sb_bgl_lock(sbi, group)); 474 spin_unlock(sb_bgl_lock(sbi, group));
486 BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); 475 BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
487 err = ext3_journal_dirty_metadata(handle, bh2); 476 err = ext3_journal_dirty_metadata(handle, bh2);
488 if (err) goto fail; 477 if (err) goto fail;
489 478
490 percpu_counter_dec(&sbi->s_freeinodes_counter); 479 percpu_counter_dec(&sbi->s_freeinodes_counter);
491 if (S_ISDIR(mode)) 480 if (S_ISDIR(mode))
492 percpu_counter_inc(&sbi->s_dirs_counter); 481 percpu_counter_inc(&sbi->s_dirs_counter);
493 482
494 483
495 if (test_opt(sb, GRPID)) { 484 if (test_opt(sb, GRPID)) {
496 inode->i_mode = mode; 485 inode->i_mode = mode;
497 inode->i_uid = current_fsuid(); 486 inode->i_uid = current_fsuid();
498 inode->i_gid = dir->i_gid; 487 inode->i_gid = dir->i_gid;
499 } else 488 } else
500 inode_init_owner(inode, dir, mode); 489 inode_init_owner(inode, dir, mode);
501 490
502 inode->i_ino = ino; 491 inode->i_ino = ino;
503 /* This is the optimal IO size (for stat), not the fs block size */ 492 /* This is the optimal IO size (for stat), not the fs block size */
504 inode->i_blocks = 0; 493 inode->i_blocks = 0;
505 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 494 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
506 495
507 memset(ei->i_data, 0, sizeof(ei->i_data)); 496 memset(ei->i_data, 0, sizeof(ei->i_data));
508 ei->i_dir_start_lookup = 0; 497 ei->i_dir_start_lookup = 0;
509 ei->i_disksize = 0; 498 ei->i_disksize = 0;
510 499
511 ei->i_flags = 500 ei->i_flags =
512 ext3_mask_flags(mode, EXT3_I(dir)->i_flags & EXT3_FL_INHERITED); 501 ext3_mask_flags(mode, EXT3_I(dir)->i_flags & EXT3_FL_INHERITED);
513 #ifdef EXT3_FRAGMENTS 502 #ifdef EXT3_FRAGMENTS
514 ei->i_faddr = 0; 503 ei->i_faddr = 0;
515 ei->i_frag_no = 0; 504 ei->i_frag_no = 0;
516 ei->i_frag_size = 0; 505 ei->i_frag_size = 0;
517 #endif 506 #endif
518 ei->i_file_acl = 0; 507 ei->i_file_acl = 0;
519 ei->i_dir_acl = 0; 508 ei->i_dir_acl = 0;
520 ei->i_dtime = 0; 509 ei->i_dtime = 0;
521 ei->i_block_alloc_info = NULL; 510 ei->i_block_alloc_info = NULL;
522 ei->i_block_group = group; 511 ei->i_block_group = group;
523 512
524 ext3_set_inode_flags(inode); 513 ext3_set_inode_flags(inode);
525 if (IS_DIRSYNC(inode)) 514 if (IS_DIRSYNC(inode))
526 handle->h_sync = 1; 515 handle->h_sync = 1;
527 if (insert_inode_locked(inode) < 0) { 516 if (insert_inode_locked(inode) < 0) {
528 /* 517 /*
529 * Likely a bitmap corruption causing inode to be allocated 518 * Likely a bitmap corruption causing inode to be allocated
530 * twice. 519 * twice.
531 */ 520 */
532 err = -EIO; 521 err = -EIO;
533 goto fail; 522 goto fail;
534 } 523 }
535 spin_lock(&sbi->s_next_gen_lock); 524 spin_lock(&sbi->s_next_gen_lock);
536 inode->i_generation = sbi->s_next_generation++; 525 inode->i_generation = sbi->s_next_generation++;
537 spin_unlock(&sbi->s_next_gen_lock); 526 spin_unlock(&sbi->s_next_gen_lock);
538 527
539 ei->i_state_flags = 0; 528 ei->i_state_flags = 0;
540 ext3_set_inode_state(inode, EXT3_STATE_NEW); 529 ext3_set_inode_state(inode, EXT3_STATE_NEW);
541 530
542 /* See comment in ext3_iget for explanation */ 531 /* See comment in ext3_iget for explanation */
543 if (ino >= EXT3_FIRST_INO(sb) + 1 && 532 if (ino >= EXT3_FIRST_INO(sb) + 1 &&
544 EXT3_INODE_SIZE(sb) > EXT3_GOOD_OLD_INODE_SIZE) { 533 EXT3_INODE_SIZE(sb) > EXT3_GOOD_OLD_INODE_SIZE) {
545 ei->i_extra_isize = 534 ei->i_extra_isize =
546 sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE; 535 sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;
547 } else { 536 } else {
548 ei->i_extra_isize = 0; 537 ei->i_extra_isize = 0;
549 } 538 }
550 539
551 ret = inode; 540 ret = inode;
552 dquot_initialize(inode); 541 dquot_initialize(inode);
553 err = dquot_alloc_inode(inode); 542 err = dquot_alloc_inode(inode);
554 if (err) 543 if (err)
555 goto fail_drop; 544 goto fail_drop;
556 545
557 err = ext3_init_acl(handle, inode, dir); 546 err = ext3_init_acl(handle, inode, dir);
558 if (err) 547 if (err)
559 goto fail_free_drop; 548 goto fail_free_drop;
560 549
561 err = ext3_init_security(handle, inode, dir, qstr); 550 err = ext3_init_security(handle, inode, dir, qstr);
562 if (err) 551 if (err)
563 goto fail_free_drop; 552 goto fail_free_drop;
564 553
565 err = ext3_mark_inode_dirty(handle, inode); 554 err = ext3_mark_inode_dirty(handle, inode);
566 if (err) { 555 if (err) {
567 ext3_std_error(sb, err); 556 ext3_std_error(sb, err);
568 goto fail_free_drop; 557 goto fail_free_drop;
569 } 558 }
570 559
571 ext3_debug("allocating inode %lu\n", inode->i_ino); 560 ext3_debug("allocating inode %lu\n", inode->i_ino);
572 trace_ext3_allocate_inode(inode, dir, mode); 561 trace_ext3_allocate_inode(inode, dir, mode);
573 goto really_out; 562 goto really_out;
574 fail: 563 fail:
575 ext3_std_error(sb, err); 564 ext3_std_error(sb, err);
576 out: 565 out:
577 iput(inode); 566 iput(inode);
578 ret = ERR_PTR(err); 567 ret = ERR_PTR(err);
579 really_out: 568 really_out:
580 brelse(bitmap_bh); 569 brelse(bitmap_bh);
581 return ret; 570 return ret;
582 571
583 fail_free_drop: 572 fail_free_drop:
584 dquot_free_inode(inode); 573 dquot_free_inode(inode);
585 574
586 fail_drop: 575 fail_drop:
587 dquot_drop(inode); 576 dquot_drop(inode);
588 inode->i_flags |= S_NOQUOTA; 577 inode->i_flags |= S_NOQUOTA;
589 clear_nlink(inode); 578 clear_nlink(inode);
590 unlock_new_inode(inode); 579 unlock_new_inode(inode);
591 iput(inode); 580 iput(inode);
592 brelse(bitmap_bh); 581 brelse(bitmap_bh);
593 return ERR_PTR(err); 582 return ERR_PTR(err);
594 } 583 }
595 584
596 /* Verify that we are loading a valid orphan from disk */ 585 /* Verify that we are loading a valid orphan from disk */
597 struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) 586 struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
598 { 587 {
599 unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count); 588 unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
600 unsigned long block_group; 589 unsigned long block_group;
601 int bit; 590 int bit;
602 struct buffer_head *bitmap_bh; 591 struct buffer_head *bitmap_bh;
603 struct inode *inode = NULL; 592 struct inode *inode = NULL;
604 long err = -EIO; 593 long err = -EIO;
605 594
606 /* Error cases - e2fsck has already cleaned up for us */ 595 /* Error cases - e2fsck has already cleaned up for us */
607 if (ino > max_ino) { 596 if (ino > max_ino) {
608 ext3_warning(sb, __func__, 597 ext3_warning(sb, __func__,
609 "bad orphan ino %lu! e2fsck was run?", ino); 598 "bad orphan ino %lu! e2fsck was run?", ino);
610 goto error; 599 goto error;
611 } 600 }
612 601
613 block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); 602 block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
614 bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb); 603 bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
615 bitmap_bh = read_inode_bitmap(sb, block_group); 604 bitmap_bh = read_inode_bitmap(sb, block_group);
616 if (!bitmap_bh) { 605 if (!bitmap_bh) {
617 ext3_warning(sb, __func__, 606 ext3_warning(sb, __func__,
618 "inode bitmap error for orphan %lu", ino); 607 "inode bitmap error for orphan %lu", ino);
619 goto error; 608 goto error;
620 } 609 }
621 610
622 /* Having the inode bit set should be a 100% indicator that this 611 /* Having the inode bit set should be a 100% indicator that this
623 * is a valid orphan (no e2fsck run on fs). Orphans also include 612 * is a valid orphan (no e2fsck run on fs). Orphans also include
624 * inodes that were being truncated, so we can't check i_nlink==0. 613 * inodes that were being truncated, so we can't check i_nlink==0.
625 */ 614 */
626 if (!ext3_test_bit(bit, bitmap_bh->b_data)) 615 if (!ext3_test_bit(bit, bitmap_bh->b_data))
627 goto bad_orphan; 616 goto bad_orphan;
628 617
629 inode = ext3_iget(sb, ino); 618 inode = ext3_iget(sb, ino);
630 if (IS_ERR(inode)) 619 if (IS_ERR(inode))
631 goto iget_failed; 620 goto iget_failed;
632 621
633 /* 622 /*
634 * If the orphans has i_nlinks > 0 then it should be able to be 623 * If the orphans has i_nlinks > 0 then it should be able to be
635 * truncated, otherwise it won't be removed from the orphan list 624 * truncated, otherwise it won't be removed from the orphan list
636 * during processing and an infinite loop will result. 625 * during processing and an infinite loop will result.
637 */ 626 */
638 if (inode->i_nlink && !ext3_can_truncate(inode)) 627 if (inode->i_nlink && !ext3_can_truncate(inode))
639 goto bad_orphan; 628 goto bad_orphan;
640 629
641 if (NEXT_ORPHAN(inode) > max_ino) 630 if (NEXT_ORPHAN(inode) > max_ino)
642 goto bad_orphan; 631 goto bad_orphan;
643 brelse(bitmap_bh); 632 brelse(bitmap_bh);
644 return inode; 633 return inode;
645 634
646 iget_failed: 635 iget_failed:
647 err = PTR_ERR(inode); 636 err = PTR_ERR(inode);
648 inode = NULL; 637 inode = NULL;
649 bad_orphan: 638 bad_orphan:
650 ext3_warning(sb, __func__, 639 ext3_warning(sb, __func__,
651 "bad orphan inode %lu! e2fsck was run?", ino); 640 "bad orphan inode %lu! e2fsck was run?", ino);
652 printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n", 641 printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
653 bit, (unsigned long long)bitmap_bh->b_blocknr, 642 bit, (unsigned long long)bitmap_bh->b_blocknr,
654 ext3_test_bit(bit, bitmap_bh->b_data)); 643 ext3_test_bit(bit, bitmap_bh->b_data));
655 printk(KERN_NOTICE "inode=%p\n", inode); 644 printk(KERN_NOTICE "inode=%p\n", inode);
656 if (inode) { 645 if (inode) {
657 printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", 646 printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
658 is_bad_inode(inode)); 647 is_bad_inode(inode));
659 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", 648 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
660 NEXT_ORPHAN(inode)); 649 NEXT_ORPHAN(inode));
661 printk(KERN_NOTICE "max_ino=%lu\n", max_ino); 650 printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
662 printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink); 651 printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
663 /* Avoid freeing blocks if we got a bad deleted inode */ 652 /* Avoid freeing blocks if we got a bad deleted inode */
664 if (inode->i_nlink == 0) 653 if (inode->i_nlink == 0)
665 inode->i_blocks = 0; 654 inode->i_blocks = 0;
666 iput(inode); 655 iput(inode);
667 } 656 }
668 brelse(bitmap_bh); 657 brelse(bitmap_bh);
669 error: 658 error:
670 return ERR_PTR(err); 659 return ERR_PTR(err);
671 } 660 }
672 661
673 unsigned long ext3_count_free_inodes (struct super_block * sb) 662 unsigned long ext3_count_free_inodes (struct super_block * sb)
674 { 663 {
675 unsigned long desc_count; 664 unsigned long desc_count;
676 struct ext3_group_desc *gdp; 665 struct ext3_group_desc *gdp;
677 int i; 666 int i;
678 #ifdef EXT3FS_DEBUG 667 #ifdef EXT3FS_DEBUG
679 struct ext3_super_block *es; 668 struct ext3_super_block *es;
680 unsigned long bitmap_count, x; 669 unsigned long bitmap_count, x;
681 struct buffer_head *bitmap_bh = NULL; 670 struct buffer_head *bitmap_bh = NULL;
682 671
683 es = EXT3_SB(sb)->s_es; 672 es = EXT3_SB(sb)->s_es;
684 desc_count = 0; 673 desc_count = 0;
685 bitmap_count = 0; 674 bitmap_count = 0;
686 gdp = NULL; 675 gdp = NULL;
687 for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { 676 for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
688 gdp = ext3_get_group_desc (sb, i, NULL); 677 gdp = ext3_get_group_desc (sb, i, NULL);
689 if (!gdp) 678 if (!gdp)
690 continue; 679 continue;
691 desc_count += le16_to_cpu(gdp->bg_free_inodes_count); 680 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
692 brelse(bitmap_bh); 681 brelse(bitmap_bh);
693 bitmap_bh = read_inode_bitmap(sb, i); 682 bitmap_bh = read_inode_bitmap(sb, i);
694 if (!bitmap_bh) 683 if (!bitmap_bh)
695 continue; 684 continue;
696 685
697 x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8); 686 x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8);
698 printk("group %d: stored = %d, counted = %lu\n", 687 printk("group %d: stored = %d, counted = %lu\n",
699 i, le16_to_cpu(gdp->bg_free_inodes_count), x); 688 i, le16_to_cpu(gdp->bg_free_inodes_count), x);
700 bitmap_count += x; 689 bitmap_count += x;
701 } 690 }
702 brelse(bitmap_bh); 691 brelse(bitmap_bh);
703 printk("ext3_count_free_inodes: stored = %u, computed = %lu, %lu\n", 692 printk("ext3_count_free_inodes: stored = %u, computed = %lu, %lu\n",
704 le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); 693 le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
705 return desc_count; 694 return desc_count;
706 #else 695 #else
707 desc_count = 0; 696 desc_count = 0;
708 for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { 697 for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
709 gdp = ext3_get_group_desc (sb, i, NULL); 698 gdp = ext3_get_group_desc (sb, i, NULL);
710 if (!gdp) 699 if (!gdp)
711 continue; 700 continue;
712 desc_count += le16_to_cpu(gdp->bg_free_inodes_count); 701 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
713 cond_resched(); 702 cond_resched();
714 } 703 }
715 return desc_count; 704 return desc_count;
716 #endif 705 #endif
717 } 706 }
718 707
719 /* Called at mount-time, super-block is locked */ 708 /* Called at mount-time, super-block is locked */
720 unsigned long ext3_count_dirs (struct super_block * sb) 709 unsigned long ext3_count_dirs (struct super_block * sb)
721 { 710 {
722 unsigned long count = 0; 711 unsigned long count = 0;
723 int i; 712 int i;
724 713
725 for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { 714 for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
726 struct ext3_group_desc *gdp = ext3_get_group_desc (sb, i, NULL); 715 struct ext3_group_desc *gdp = ext3_get_group_desc (sb, i, NULL);
727 if (!gdp) 716 if (!gdp)
728 continue; 717 continue;
729 count += le16_to_cpu(gdp->bg_used_dirs_count); 718 count += le16_to_cpu(gdp->bg_used_dirs_count);
730 } 719 }
731 return count; 720 return count;
732 } 721 }
733 722
734 723
1 /* 1 /*
2 * linux/fs/ext3/inode.c 2 * linux/fs/ext3/inode.c
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 * 8 *
9 * from 9 * from
10 * 10 *
11 * linux/fs/minix/inode.c 11 * linux/fs/minix/inode.c
12 * 12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 * 14 *
15 * Goal-directed block allocation by Stephen Tweedie 15 * Goal-directed block allocation by Stephen Tweedie
16 * (sct@redhat.com), 1993, 1998 16 * (sct@redhat.com), 1993, 1998
17 * Big-endian to little-endian byte-swapping/bitmaps by 17 * Big-endian to little-endian byte-swapping/bitmaps by
18 * David S. Miller (davem@caip.rutgers.edu), 1995 18 * David S. Miller (davem@caip.rutgers.edu), 1995
19 * 64-bit file support on 64-bit platforms by Jakub Jelinek 19 * 64-bit file support on 64-bit platforms by Jakub Jelinek
20 * (jj@sunsite.ms.mff.cuni.cz) 20 * (jj@sunsite.ms.mff.cuni.cz)
21 * 21 *
22 * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 22 * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
23 */ 23 */
24 24
25 #include <linux/fs.h>
26 #include <linux/time.h>
27 #include <linux/ext3_jbd.h>
28 #include <linux/jbd.h>
29 #include <linux/highuid.h> 25 #include <linux/highuid.h>
30 #include <linux/pagemap.h>
31 #include <linux/quotaops.h> 26 #include <linux/quotaops.h>
32 #include <linux/string.h>
33 #include <linux/buffer_head.h>
34 #include <linux/writeback.h> 27 #include <linux/writeback.h>
35 #include <linux/mpage.h> 28 #include <linux/mpage.h>
36 #include <linux/uio.h>
37 #include <linux/bio.h>
38 #include <linux/fiemap.h>
39 #include <linux/namei.h> 29 #include <linux/namei.h>
40 #include <trace/events/ext3.h> 30 #include "ext3.h"
41 #include "xattr.h" 31 #include "xattr.h"
42 #include "acl.h" 32 #include "acl.h"
43 33
44 static int ext3_writepage_trans_blocks(struct inode *inode); 34 static int ext3_writepage_trans_blocks(struct inode *inode);
45 static int ext3_block_truncate_page(struct inode *inode, loff_t from); 35 static int ext3_block_truncate_page(struct inode *inode, loff_t from);
46 36
47 /* 37 /*
48 * Test whether an inode is a fast symlink. 38 * Test whether an inode is a fast symlink.
49 */ 39 */
50 static int ext3_inode_is_fast_symlink(struct inode *inode) 40 static int ext3_inode_is_fast_symlink(struct inode *inode)
51 { 41 {
52 int ea_blocks = EXT3_I(inode)->i_file_acl ? 42 int ea_blocks = EXT3_I(inode)->i_file_acl ?
53 (inode->i_sb->s_blocksize >> 9) : 0; 43 (inode->i_sb->s_blocksize >> 9) : 0;
54 44
55 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); 45 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
56 } 46 }
57 47
58 /* 48 /*
59 * The ext3 forget function must perform a revoke if we are freeing data 49 * The ext3 forget function must perform a revoke if we are freeing data
60 * which has been journaled. Metadata (eg. indirect blocks) must be 50 * which has been journaled. Metadata (eg. indirect blocks) must be
61 * revoked in all cases. 51 * revoked in all cases.
62 * 52 *
63 * "bh" may be NULL: a metadata block may have been freed from memory 53 * "bh" may be NULL: a metadata block may have been freed from memory
64 * but there may still be a record of it in the journal, and that record 54 * but there may still be a record of it in the journal, and that record
65 * still needs to be revoked. 55 * still needs to be revoked.
66 */ 56 */
67 int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, 57 int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
68 struct buffer_head *bh, ext3_fsblk_t blocknr) 58 struct buffer_head *bh, ext3_fsblk_t blocknr)
69 { 59 {
70 int err; 60 int err;
71 61
72 might_sleep(); 62 might_sleep();
73 63
74 trace_ext3_forget(inode, is_metadata, blocknr); 64 trace_ext3_forget(inode, is_metadata, blocknr);
75 BUFFER_TRACE(bh, "enter"); 65 BUFFER_TRACE(bh, "enter");
76 66
77 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " 67 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
78 "data mode %lx\n", 68 "data mode %lx\n",
79 bh, is_metadata, inode->i_mode, 69 bh, is_metadata, inode->i_mode,
80 test_opt(inode->i_sb, DATA_FLAGS)); 70 test_opt(inode->i_sb, DATA_FLAGS));
81 71
82 /* Never use the revoke function if we are doing full data 72 /* Never use the revoke function if we are doing full data
83 * journaling: there is no need to, and a V1 superblock won't 73 * journaling: there is no need to, and a V1 superblock won't
84 * support it. Otherwise, only skip the revoke on un-journaled 74 * support it. Otherwise, only skip the revoke on un-journaled
85 * data blocks. */ 75 * data blocks. */
86 76
87 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA || 77 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ||
88 (!is_metadata && !ext3_should_journal_data(inode))) { 78 (!is_metadata && !ext3_should_journal_data(inode))) {
89 if (bh) { 79 if (bh) {
90 BUFFER_TRACE(bh, "call journal_forget"); 80 BUFFER_TRACE(bh, "call journal_forget");
91 return ext3_journal_forget(handle, bh); 81 return ext3_journal_forget(handle, bh);
92 } 82 }
93 return 0; 83 return 0;
94 } 84 }
95 85
96 /* 86 /*
97 * data!=journal && (is_metadata || should_journal_data(inode)) 87 * data!=journal && (is_metadata || should_journal_data(inode))
98 */ 88 */
99 BUFFER_TRACE(bh, "call ext3_journal_revoke"); 89 BUFFER_TRACE(bh, "call ext3_journal_revoke");
100 err = ext3_journal_revoke(handle, blocknr, bh); 90 err = ext3_journal_revoke(handle, blocknr, bh);
101 if (err) 91 if (err)
102 ext3_abort(inode->i_sb, __func__, 92 ext3_abort(inode->i_sb, __func__,
103 "error %d when attempting revoke", err); 93 "error %d when attempting revoke", err);
104 BUFFER_TRACE(bh, "exit"); 94 BUFFER_TRACE(bh, "exit");
105 return err; 95 return err;
106 } 96 }
107 97
108 /* 98 /*
109 * Work out how many blocks we need to proceed with the next chunk of a 99 * Work out how many blocks we need to proceed with the next chunk of a
110 * truncate transaction. 100 * truncate transaction.
111 */ 101 */
112 static unsigned long blocks_for_truncate(struct inode *inode) 102 static unsigned long blocks_for_truncate(struct inode *inode)
113 { 103 {
114 unsigned long needed; 104 unsigned long needed;
115 105
116 needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); 106 needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
117 107
118 /* Give ourselves just enough room to cope with inodes in which 108 /* Give ourselves just enough room to cope with inodes in which
119 * i_blocks is corrupt: we've seen disk corruptions in the past 109 * i_blocks is corrupt: we've seen disk corruptions in the past
120 * which resulted in random data in an inode which looked enough 110 * which resulted in random data in an inode which looked enough
121 * like a regular file for ext3 to try to delete it. Things 111 * like a regular file for ext3 to try to delete it. Things
122 * will go a bit crazy if that happens, but at least we should 112 * will go a bit crazy if that happens, but at least we should
123 * try not to panic the whole kernel. */ 113 * try not to panic the whole kernel. */
124 if (needed < 2) 114 if (needed < 2)
125 needed = 2; 115 needed = 2;
126 116
127 /* But we need to bound the transaction so we don't overflow the 117 /* But we need to bound the transaction so we don't overflow the
128 * journal. */ 118 * journal. */
129 if (needed > EXT3_MAX_TRANS_DATA) 119 if (needed > EXT3_MAX_TRANS_DATA)
130 needed = EXT3_MAX_TRANS_DATA; 120 needed = EXT3_MAX_TRANS_DATA;
131 121
132 return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed; 122 return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
133 } 123 }
134 124
135 /* 125 /*
136 * Truncate transactions can be complex and absolutely huge. So we need to 126 * Truncate transactions can be complex and absolutely huge. So we need to
137 * be able to restart the transaction at a conventient checkpoint to make 127 * be able to restart the transaction at a conventient checkpoint to make
138 * sure we don't overflow the journal. 128 * sure we don't overflow the journal.
139 * 129 *
140 * start_transaction gets us a new handle for a truncate transaction, 130 * start_transaction gets us a new handle for a truncate transaction,
141 * and extend_transaction tries to extend the existing one a bit. If 131 * and extend_transaction tries to extend the existing one a bit. If
142 * extend fails, we need to propagate the failure up and restart the 132 * extend fails, we need to propagate the failure up and restart the
143 * transaction in the top-level truncate loop. --sct 133 * transaction in the top-level truncate loop. --sct
144 */ 134 */
145 static handle_t *start_transaction(struct inode *inode) 135 static handle_t *start_transaction(struct inode *inode)
146 { 136 {
147 handle_t *result; 137 handle_t *result;
148 138
149 result = ext3_journal_start(inode, blocks_for_truncate(inode)); 139 result = ext3_journal_start(inode, blocks_for_truncate(inode));
150 if (!IS_ERR(result)) 140 if (!IS_ERR(result))
151 return result; 141 return result;
152 142
153 ext3_std_error(inode->i_sb, PTR_ERR(result)); 143 ext3_std_error(inode->i_sb, PTR_ERR(result));
154 return result; 144 return result;
155 } 145 }
156 146
157 /* 147 /*
158 * Try to extend this transaction for the purposes of truncation. 148 * Try to extend this transaction for the purposes of truncation.
159 * 149 *
160 * Returns 0 if we managed to create more room. If we can't create more 150 * Returns 0 if we managed to create more room. If we can't create more
161 * room, and the transaction must be restarted we return 1. 151 * room, and the transaction must be restarted we return 1.
162 */ 152 */
163 static int try_to_extend_transaction(handle_t *handle, struct inode *inode) 153 static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
164 { 154 {
165 if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS) 155 if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS)
166 return 0; 156 return 0;
167 if (!ext3_journal_extend(handle, blocks_for_truncate(inode))) 157 if (!ext3_journal_extend(handle, blocks_for_truncate(inode)))
168 return 0; 158 return 0;
169 return 1; 159 return 1;
170 } 160 }
171 161
172 /* 162 /*
173 * Restart the transaction associated with *handle. This does a commit, 163 * Restart the transaction associated with *handle. This does a commit,
174 * so before we call here everything must be consistently dirtied against 164 * so before we call here everything must be consistently dirtied against
175 * this transaction. 165 * this transaction.
176 */ 166 */
177 static int truncate_restart_transaction(handle_t *handle, struct inode *inode) 167 static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
178 { 168 {
179 int ret; 169 int ret;
180 170
181 jbd_debug(2, "restarting handle %p\n", handle); 171 jbd_debug(2, "restarting handle %p\n", handle);
182 /* 172 /*
183 * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle 173 * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle
184 * At this moment, get_block can be called only for blocks inside 174 * At this moment, get_block can be called only for blocks inside
185 * i_size since page cache has been already dropped and writes are 175 * i_size since page cache has been already dropped and writes are
186 * blocked by i_mutex. So we can safely drop the truncate_mutex. 176 * blocked by i_mutex. So we can safely drop the truncate_mutex.
187 */ 177 */
188 mutex_unlock(&EXT3_I(inode)->truncate_mutex); 178 mutex_unlock(&EXT3_I(inode)->truncate_mutex);
189 ret = ext3_journal_restart(handle, blocks_for_truncate(inode)); 179 ret = ext3_journal_restart(handle, blocks_for_truncate(inode));
190 mutex_lock(&EXT3_I(inode)->truncate_mutex); 180 mutex_lock(&EXT3_I(inode)->truncate_mutex);
191 return ret; 181 return ret;
192 } 182 }
193 183
194 /* 184 /*
195 * Called at inode eviction from icache 185 * Called at inode eviction from icache
196 */ 186 */
197 void ext3_evict_inode (struct inode *inode) 187 void ext3_evict_inode (struct inode *inode)
198 { 188 {
199 struct ext3_inode_info *ei = EXT3_I(inode); 189 struct ext3_inode_info *ei = EXT3_I(inode);
200 struct ext3_block_alloc_info *rsv; 190 struct ext3_block_alloc_info *rsv;
201 handle_t *handle; 191 handle_t *handle;
202 int want_delete = 0; 192 int want_delete = 0;
203 193
204 trace_ext3_evict_inode(inode); 194 trace_ext3_evict_inode(inode);
205 if (!inode->i_nlink && !is_bad_inode(inode)) { 195 if (!inode->i_nlink && !is_bad_inode(inode)) {
206 dquot_initialize(inode); 196 dquot_initialize(inode);
207 want_delete = 1; 197 want_delete = 1;
208 } 198 }
209 199
210 /* 200 /*
211 * When journalling data dirty buffers are tracked only in the journal. 201 * When journalling data dirty buffers are tracked only in the journal.
212 * So although mm thinks everything is clean and ready for reaping the 202 * So although mm thinks everything is clean and ready for reaping the
213 * inode might still have some pages to write in the running 203 * inode might still have some pages to write in the running
214 * transaction or waiting to be checkpointed. Thus calling 204 * transaction or waiting to be checkpointed. Thus calling
215 * journal_invalidatepage() (via truncate_inode_pages()) to discard 205 * journal_invalidatepage() (via truncate_inode_pages()) to discard
216 * these buffers can cause data loss. Also even if we did not discard 206 * these buffers can cause data loss. Also even if we did not discard
217 * these buffers, we would have no way to find them after the inode 207 * these buffers, we would have no way to find them after the inode
218 * is reaped and thus user could see stale data if he tries to read 208 * is reaped and thus user could see stale data if he tries to read
219 * them before the transaction is checkpointed. So be careful and 209 * them before the transaction is checkpointed. So be careful and
220 * force everything to disk here... We use ei->i_datasync_tid to 210 * force everything to disk here... We use ei->i_datasync_tid to
221 * store the newest transaction containing inode's data. 211 * store the newest transaction containing inode's data.
222 * 212 *
223 * Note that directories do not have this problem because they don't 213 * Note that directories do not have this problem because they don't
224 * use page cache. 214 * use page cache.
225 * 215 *
226 * The s_journal check handles the case when ext3_get_journal() fails 216 * The s_journal check handles the case when ext3_get_journal() fails
227 * and puts the journal inode. 217 * and puts the journal inode.
228 */ 218 */
229 if (inode->i_nlink && ext3_should_journal_data(inode) && 219 if (inode->i_nlink && ext3_should_journal_data(inode) &&
230 EXT3_SB(inode->i_sb)->s_journal && 220 EXT3_SB(inode->i_sb)->s_journal &&
231 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { 221 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
232 tid_t commit_tid = atomic_read(&ei->i_datasync_tid); 222 tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
233 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; 223 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
234 224
235 log_start_commit(journal, commit_tid); 225 log_start_commit(journal, commit_tid);
236 log_wait_commit(journal, commit_tid); 226 log_wait_commit(journal, commit_tid);
237 filemap_write_and_wait(&inode->i_data); 227 filemap_write_and_wait(&inode->i_data);
238 } 228 }
239 truncate_inode_pages(&inode->i_data, 0); 229 truncate_inode_pages(&inode->i_data, 0);
240 230
241 ext3_discard_reservation(inode); 231 ext3_discard_reservation(inode);
242 rsv = ei->i_block_alloc_info; 232 rsv = ei->i_block_alloc_info;
243 ei->i_block_alloc_info = NULL; 233 ei->i_block_alloc_info = NULL;
244 if (unlikely(rsv)) 234 if (unlikely(rsv))
245 kfree(rsv); 235 kfree(rsv);
246 236
247 if (!want_delete) 237 if (!want_delete)
248 goto no_delete; 238 goto no_delete;
249 239
250 handle = start_transaction(inode); 240 handle = start_transaction(inode);
251 if (IS_ERR(handle)) { 241 if (IS_ERR(handle)) {
252 /* 242 /*
253 * If we're going to skip the normal cleanup, we still need to 243 * If we're going to skip the normal cleanup, we still need to
254 * make sure that the in-core orphan linked list is properly 244 * make sure that the in-core orphan linked list is properly
255 * cleaned up. 245 * cleaned up.
256 */ 246 */
257 ext3_orphan_del(NULL, inode); 247 ext3_orphan_del(NULL, inode);
258 goto no_delete; 248 goto no_delete;
259 } 249 }
260 250
261 if (IS_SYNC(inode)) 251 if (IS_SYNC(inode))
262 handle->h_sync = 1; 252 handle->h_sync = 1;
263 inode->i_size = 0; 253 inode->i_size = 0;
264 if (inode->i_blocks) 254 if (inode->i_blocks)
265 ext3_truncate(inode); 255 ext3_truncate(inode);
266 /* 256 /*
267 * Kill off the orphan record created when the inode lost the last 257 * Kill off the orphan record created when the inode lost the last
268 * link. Note that ext3_orphan_del() has to be able to cope with the 258 * link. Note that ext3_orphan_del() has to be able to cope with the
269 * deletion of a non-existent orphan - ext3_truncate() could 259 * deletion of a non-existent orphan - ext3_truncate() could
270 * have removed the record. 260 * have removed the record.
271 */ 261 */
272 ext3_orphan_del(handle, inode); 262 ext3_orphan_del(handle, inode);
273 ei->i_dtime = get_seconds(); 263 ei->i_dtime = get_seconds();
274 264
275 /* 265 /*
276 * One subtle ordering requirement: if anything has gone wrong 266 * One subtle ordering requirement: if anything has gone wrong
277 * (transaction abort, IO errors, whatever), then we can still 267 * (transaction abort, IO errors, whatever), then we can still
278 * do these next steps (the fs will already have been marked as 268 * do these next steps (the fs will already have been marked as
279 * having errors), but we can't free the inode if the mark_dirty 269 * having errors), but we can't free the inode if the mark_dirty
280 * fails. 270 * fails.
281 */ 271 */
282 if (ext3_mark_inode_dirty(handle, inode)) { 272 if (ext3_mark_inode_dirty(handle, inode)) {
283 /* If that failed, just dquot_drop() and be done with that */ 273 /* If that failed, just dquot_drop() and be done with that */
284 dquot_drop(inode); 274 dquot_drop(inode);
285 end_writeback(inode); 275 end_writeback(inode);
286 } else { 276 } else {
287 ext3_xattr_delete_inode(handle, inode); 277 ext3_xattr_delete_inode(handle, inode);
288 dquot_free_inode(inode); 278 dquot_free_inode(inode);
289 dquot_drop(inode); 279 dquot_drop(inode);
290 end_writeback(inode); 280 end_writeback(inode);
291 ext3_free_inode(handle, inode); 281 ext3_free_inode(handle, inode);
292 } 282 }
293 ext3_journal_stop(handle); 283 ext3_journal_stop(handle);
294 return; 284 return;
295 no_delete: 285 no_delete:
296 end_writeback(inode); 286 end_writeback(inode);
297 dquot_drop(inode); 287 dquot_drop(inode);
298 } 288 }
299 289
300 typedef struct { 290 typedef struct {
301 __le32 *p; 291 __le32 *p;
302 __le32 key; 292 __le32 key;
303 struct buffer_head *bh; 293 struct buffer_head *bh;
304 } Indirect; 294 } Indirect;
305 295
306 static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) 296 static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
307 { 297 {
308 p->key = *(p->p = v); 298 p->key = *(p->p = v);
309 p->bh = bh; 299 p->bh = bh;
310 } 300 }
311 301
312 static int verify_chain(Indirect *from, Indirect *to) 302 static int verify_chain(Indirect *from, Indirect *to)
313 { 303 {
314 while (from <= to && from->key == *from->p) 304 while (from <= to && from->key == *from->p)
315 from++; 305 from++;
316 return (from > to); 306 return (from > to);
317 } 307 }
318 308
319 /** 309 /**
320 * ext3_block_to_path - parse the block number into array of offsets 310 * ext3_block_to_path - parse the block number into array of offsets
321 * @inode: inode in question (we are only interested in its superblock) 311 * @inode: inode in question (we are only interested in its superblock)
322 * @i_block: block number to be parsed 312 * @i_block: block number to be parsed
323 * @offsets: array to store the offsets in 313 * @offsets: array to store the offsets in
324 * @boundary: set this non-zero if the referred-to block is likely to be 314 * @boundary: set this non-zero if the referred-to block is likely to be
325 * followed (on disk) by an indirect block. 315 * followed (on disk) by an indirect block.
326 * 316 *
327 * To store the locations of file's data ext3 uses a data structure common 317 * To store the locations of file's data ext3 uses a data structure common
328 * for UNIX filesystems - tree of pointers anchored in the inode, with 318 * for UNIX filesystems - tree of pointers anchored in the inode, with
329 * data blocks at leaves and indirect blocks in intermediate nodes. 319 * data blocks at leaves and indirect blocks in intermediate nodes.
330 * This function translates the block number into path in that tree - 320 * This function translates the block number into path in that tree -
331 * return value is the path length and @offsets[n] is the offset of 321 * return value is the path length and @offsets[n] is the offset of
332 * pointer to (n+1)th node in the nth one. If @block is out of range 322 * pointer to (n+1)th node in the nth one. If @block is out of range
333 * (negative or too large) warning is printed and zero returned. 323 * (negative or too large) warning is printed and zero returned.
334 * 324 *
335 * Note: function doesn't find node addresses, so no IO is needed. All 325 * Note: function doesn't find node addresses, so no IO is needed. All
336 * we need to know is the capacity of indirect blocks (taken from the 326 * we need to know is the capacity of indirect blocks (taken from the
337 * inode->i_sb). 327 * inode->i_sb).
338 */ 328 */
339 329
340 /* 330 /*
341 * Portability note: the last comparison (check that we fit into triple 331 * Portability note: the last comparison (check that we fit into triple
342 * indirect block) is spelled differently, because otherwise on an 332 * indirect block) is spelled differently, because otherwise on an
343 * architecture with 32-bit longs and 8Kb pages we might get into trouble 333 * architecture with 32-bit longs and 8Kb pages we might get into trouble
344 * if our filesystem had 8Kb blocks. We might use long long, but that would 334 * if our filesystem had 8Kb blocks. We might use long long, but that would
345 * kill us on x86. Oh, well, at least the sign propagation does not matter - 335 * kill us on x86. Oh, well, at least the sign propagation does not matter -
346 * i_block would have to be negative in the very beginning, so we would not 336 * i_block would have to be negative in the very beginning, so we would not
347 * get there at all. 337 * get there at all.
348 */ 338 */
349 339
350 static int ext3_block_to_path(struct inode *inode, 340 static int ext3_block_to_path(struct inode *inode,
351 long i_block, int offsets[4], int *boundary) 341 long i_block, int offsets[4], int *boundary)
352 { 342 {
353 int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb); 343 int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
354 int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb); 344 int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
355 const long direct_blocks = EXT3_NDIR_BLOCKS, 345 const long direct_blocks = EXT3_NDIR_BLOCKS,
356 indirect_blocks = ptrs, 346 indirect_blocks = ptrs,
357 double_blocks = (1 << (ptrs_bits * 2)); 347 double_blocks = (1 << (ptrs_bits * 2));
358 int n = 0; 348 int n = 0;
359 int final = 0; 349 int final = 0;
360 350
361 if (i_block < 0) { 351 if (i_block < 0) {
362 ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0"); 352 ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0");
363 } else if (i_block < direct_blocks) { 353 } else if (i_block < direct_blocks) {
364 offsets[n++] = i_block; 354 offsets[n++] = i_block;
365 final = direct_blocks; 355 final = direct_blocks;
366 } else if ( (i_block -= direct_blocks) < indirect_blocks) { 356 } else if ( (i_block -= direct_blocks) < indirect_blocks) {
367 offsets[n++] = EXT3_IND_BLOCK; 357 offsets[n++] = EXT3_IND_BLOCK;
368 offsets[n++] = i_block; 358 offsets[n++] = i_block;
369 final = ptrs; 359 final = ptrs;
370 } else if ((i_block -= indirect_blocks) < double_blocks) { 360 } else if ((i_block -= indirect_blocks) < double_blocks) {
371 offsets[n++] = EXT3_DIND_BLOCK; 361 offsets[n++] = EXT3_DIND_BLOCK;
372 offsets[n++] = i_block >> ptrs_bits; 362 offsets[n++] = i_block >> ptrs_bits;
373 offsets[n++] = i_block & (ptrs - 1); 363 offsets[n++] = i_block & (ptrs - 1);
374 final = ptrs; 364 final = ptrs;
375 } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { 365 } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
376 offsets[n++] = EXT3_TIND_BLOCK; 366 offsets[n++] = EXT3_TIND_BLOCK;
377 offsets[n++] = i_block >> (ptrs_bits * 2); 367 offsets[n++] = i_block >> (ptrs_bits * 2);
378 offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); 368 offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
379 offsets[n++] = i_block & (ptrs - 1); 369 offsets[n++] = i_block & (ptrs - 1);
380 final = ptrs; 370 final = ptrs;
381 } else { 371 } else {
382 ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big"); 372 ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big");
383 } 373 }
384 if (boundary) 374 if (boundary)
385 *boundary = final - 1 - (i_block & (ptrs - 1)); 375 *boundary = final - 1 - (i_block & (ptrs - 1));
386 return n; 376 return n;
387 } 377 }
388 378
389 /** 379 /**
390 * ext3_get_branch - read the chain of indirect blocks leading to data 380 * ext3_get_branch - read the chain of indirect blocks leading to data
391 * @inode: inode in question 381 * @inode: inode in question
392 * @depth: depth of the chain (1 - direct pointer, etc.) 382 * @depth: depth of the chain (1 - direct pointer, etc.)
393 * @offsets: offsets of pointers in inode/indirect blocks 383 * @offsets: offsets of pointers in inode/indirect blocks
394 * @chain: place to store the result 384 * @chain: place to store the result
395 * @err: here we store the error value 385 * @err: here we store the error value
396 * 386 *
397 * Function fills the array of triples <key, p, bh> and returns %NULL 387 * Function fills the array of triples <key, p, bh> and returns %NULL
398 * if everything went OK or the pointer to the last filled triple 388 * if everything went OK or the pointer to the last filled triple
399 * (incomplete one) otherwise. Upon the return chain[i].key contains 389 * (incomplete one) otherwise. Upon the return chain[i].key contains
400 * the number of (i+1)-th block in the chain (as it is stored in memory, 390 * the number of (i+1)-th block in the chain (as it is stored in memory,
401 * i.e. little-endian 32-bit), chain[i].p contains the address of that 391 * i.e. little-endian 32-bit), chain[i].p contains the address of that
402 * number (it points into struct inode for i==0 and into the bh->b_data 392 * number (it points into struct inode for i==0 and into the bh->b_data
403 * for i>0) and chain[i].bh points to the buffer_head of i-th indirect 393 * for i>0) and chain[i].bh points to the buffer_head of i-th indirect
404 * block for i>0 and NULL for i==0. In other words, it holds the block 394 * block for i>0 and NULL for i==0. In other words, it holds the block
405 * numbers of the chain, addresses they were taken from (and where we can 395 * numbers of the chain, addresses they were taken from (and where we can
406 * verify that chain did not change) and buffer_heads hosting these 396 * verify that chain did not change) and buffer_heads hosting these
407 * numbers. 397 * numbers.
408 * 398 *
409 * Function stops when it stumbles upon zero pointer (absent block) 399 * Function stops when it stumbles upon zero pointer (absent block)
410 * (pointer to last triple returned, *@err == 0) 400 * (pointer to last triple returned, *@err == 0)
411 * or when it gets an IO error reading an indirect block 401 * or when it gets an IO error reading an indirect block
412 * (ditto, *@err == -EIO) 402 * (ditto, *@err == -EIO)
413 * or when it notices that chain had been changed while it was reading 403 * or when it notices that chain had been changed while it was reading
414 * (ditto, *@err == -EAGAIN) 404 * (ditto, *@err == -EAGAIN)
415 * or when it reads all @depth-1 indirect blocks successfully and finds 405 * or when it reads all @depth-1 indirect blocks successfully and finds
416 * the whole chain, all way to the data (returns %NULL, *err == 0). 406 * the whole chain, all way to the data (returns %NULL, *err == 0).
417 */ 407 */
418 static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets, 408 static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
419 Indirect chain[4], int *err) 409 Indirect chain[4], int *err)
420 { 410 {
421 struct super_block *sb = inode->i_sb; 411 struct super_block *sb = inode->i_sb;
422 Indirect *p = chain; 412 Indirect *p = chain;
423 struct buffer_head *bh; 413 struct buffer_head *bh;
424 414
425 *err = 0; 415 *err = 0;
426 /* i_data is not going away, no lock needed */ 416 /* i_data is not going away, no lock needed */
427 add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets); 417 add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets);
428 if (!p->key) 418 if (!p->key)
429 goto no_block; 419 goto no_block;
430 while (--depth) { 420 while (--depth) {
431 bh = sb_bread(sb, le32_to_cpu(p->key)); 421 bh = sb_bread(sb, le32_to_cpu(p->key));
432 if (!bh) 422 if (!bh)
433 goto failure; 423 goto failure;
434 /* Reader: pointers */ 424 /* Reader: pointers */
435 if (!verify_chain(chain, p)) 425 if (!verify_chain(chain, p))
436 goto changed; 426 goto changed;
437 add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); 427 add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
438 /* Reader: end */ 428 /* Reader: end */
439 if (!p->key) 429 if (!p->key)
440 goto no_block; 430 goto no_block;
441 } 431 }
442 return NULL; 432 return NULL;
443 433
444 changed: 434 changed:
445 brelse(bh); 435 brelse(bh);
446 *err = -EAGAIN; 436 *err = -EAGAIN;
447 goto no_block; 437 goto no_block;
448 failure: 438 failure:
449 *err = -EIO; 439 *err = -EIO;
450 no_block: 440 no_block:
451 return p; 441 return p;
452 } 442 }
453 443
454 /** 444 /**
455 * ext3_find_near - find a place for allocation with sufficient locality 445 * ext3_find_near - find a place for allocation with sufficient locality
456 * @inode: owner 446 * @inode: owner
457 * @ind: descriptor of indirect block. 447 * @ind: descriptor of indirect block.
458 * 448 *
459 * This function returns the preferred place for block allocation. 449 * This function returns the preferred place for block allocation.
460 * It is used when heuristic for sequential allocation fails. 450 * It is used when heuristic for sequential allocation fails.
461 * Rules are: 451 * Rules are:
462 * + if there is a block to the left of our position - allocate near it. 452 * + if there is a block to the left of our position - allocate near it.
463 * + if pointer will live in indirect block - allocate near that block. 453 * + if pointer will live in indirect block - allocate near that block.
464 * + if pointer will live in inode - allocate in the same 454 * + if pointer will live in inode - allocate in the same
465 * cylinder group. 455 * cylinder group.
466 * 456 *
467 * In the latter case we colour the starting block by the callers PID to 457 * In the latter case we colour the starting block by the callers PID to
468 * prevent it from clashing with concurrent allocations for a different inode 458 * prevent it from clashing with concurrent allocations for a different inode
469 * in the same block group. The PID is used here so that functionally related 459 * in the same block group. The PID is used here so that functionally related
470 * files will be close-by on-disk. 460 * files will be close-by on-disk.
471 * 461 *
472 * Caller must make sure that @ind is valid and will stay that way. 462 * Caller must make sure that @ind is valid and will stay that way.
473 */ 463 */
474 static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind) 464 static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
475 { 465 {
476 struct ext3_inode_info *ei = EXT3_I(inode); 466 struct ext3_inode_info *ei = EXT3_I(inode);
477 __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; 467 __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
478 __le32 *p; 468 __le32 *p;
479 ext3_fsblk_t bg_start; 469 ext3_fsblk_t bg_start;
480 ext3_grpblk_t colour; 470 ext3_grpblk_t colour;
481 471
482 /* Try to find previous block */ 472 /* Try to find previous block */
483 for (p = ind->p - 1; p >= start; p--) { 473 for (p = ind->p - 1; p >= start; p--) {
484 if (*p) 474 if (*p)
485 return le32_to_cpu(*p); 475 return le32_to_cpu(*p);
486 } 476 }
487 477
488 /* No such thing, so let's try location of indirect block */ 478 /* No such thing, so let's try location of indirect block */
489 if (ind->bh) 479 if (ind->bh)
490 return ind->bh->b_blocknr; 480 return ind->bh->b_blocknr;
491 481
492 /* 482 /*
493 * It is going to be referred to from the inode itself? OK, just put it 483 * It is going to be referred to from the inode itself? OK, just put it
494 * into the same cylinder group then. 484 * into the same cylinder group then.
495 */ 485 */
496 bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group); 486 bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group);
497 colour = (current->pid % 16) * 487 colour = (current->pid % 16) *
498 (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); 488 (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
499 return bg_start + colour; 489 return bg_start + colour;
500 } 490 }
501 491
502 /** 492 /**
503 * ext3_find_goal - find a preferred place for allocation. 493 * ext3_find_goal - find a preferred place for allocation.
504 * @inode: owner 494 * @inode: owner
505 * @block: block we want 495 * @block: block we want
506 * @partial: pointer to the last triple within a chain 496 * @partial: pointer to the last triple within a chain
507 * 497 *
508 * Normally this function find the preferred place for block allocation, 498 * Normally this function find the preferred place for block allocation,
509 * returns it. 499 * returns it.
510 */ 500 */
511 501
512 static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block, 502 static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
513 Indirect *partial) 503 Indirect *partial)
514 { 504 {
515 struct ext3_block_alloc_info *block_i; 505 struct ext3_block_alloc_info *block_i;
516 506
517 block_i = EXT3_I(inode)->i_block_alloc_info; 507 block_i = EXT3_I(inode)->i_block_alloc_info;
518 508
519 /* 509 /*
520 * try the heuristic for sequential allocation, 510 * try the heuristic for sequential allocation,
521 * failing that at least try to get decent locality. 511 * failing that at least try to get decent locality.
522 */ 512 */
523 if (block_i && (block == block_i->last_alloc_logical_block + 1) 513 if (block_i && (block == block_i->last_alloc_logical_block + 1)
524 && (block_i->last_alloc_physical_block != 0)) { 514 && (block_i->last_alloc_physical_block != 0)) {
525 return block_i->last_alloc_physical_block + 1; 515 return block_i->last_alloc_physical_block + 1;
526 } 516 }
527 517
528 return ext3_find_near(inode, partial); 518 return ext3_find_near(inode, partial);
529 } 519 }
530 520
531 /** 521 /**
532 * ext3_blks_to_allocate - Look up the block map and count the number 522 * ext3_blks_to_allocate - Look up the block map and count the number
533 * of direct blocks need to be allocated for the given branch. 523 * of direct blocks need to be allocated for the given branch.
534 * 524 *
535 * @branch: chain of indirect blocks 525 * @branch: chain of indirect blocks
536 * @k: number of blocks need for indirect blocks 526 * @k: number of blocks need for indirect blocks
537 * @blks: number of data blocks to be mapped. 527 * @blks: number of data blocks to be mapped.
538 * @blocks_to_boundary: the offset in the indirect block 528 * @blocks_to_boundary: the offset in the indirect block
539 * 529 *
540 * return the total number of blocks to be allocate, including the 530 * return the total number of blocks to be allocate, including the
541 * direct and indirect blocks. 531 * direct and indirect blocks.
542 */ 532 */
543 static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks, 533 static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
544 int blocks_to_boundary) 534 int blocks_to_boundary)
545 { 535 {
546 unsigned long count = 0; 536 unsigned long count = 0;
547 537
548 /* 538 /*
549 * Simple case, [t,d]Indirect block(s) has not allocated yet 539 * Simple case, [t,d]Indirect block(s) has not allocated yet
550 * then it's clear blocks on that path have not allocated 540 * then it's clear blocks on that path have not allocated
551 */ 541 */
552 if (k > 0) { 542 if (k > 0) {
553 /* right now we don't handle cross boundary allocation */ 543 /* right now we don't handle cross boundary allocation */
554 if (blks < blocks_to_boundary + 1) 544 if (blks < blocks_to_boundary + 1)
555 count += blks; 545 count += blks;
556 else 546 else
557 count += blocks_to_boundary + 1; 547 count += blocks_to_boundary + 1;
558 return count; 548 return count;
559 } 549 }
560 550
561 count++; 551 count++;
562 while (count < blks && count <= blocks_to_boundary && 552 while (count < blks && count <= blocks_to_boundary &&
563 le32_to_cpu(*(branch[0].p + count)) == 0) { 553 le32_to_cpu(*(branch[0].p + count)) == 0) {
564 count++; 554 count++;
565 } 555 }
566 return count; 556 return count;
567 } 557 }
568 558
569 /** 559 /**
570 * ext3_alloc_blocks - multiple allocate blocks needed for a branch 560 * ext3_alloc_blocks - multiple allocate blocks needed for a branch
571 * @handle: handle for this transaction 561 * @handle: handle for this transaction
572 * @inode: owner 562 * @inode: owner
573 * @goal: preferred place for allocation 563 * @goal: preferred place for allocation
574 * @indirect_blks: the number of blocks need to allocate for indirect 564 * @indirect_blks: the number of blocks need to allocate for indirect
575 * blocks 565 * blocks
576 * @blks: number of blocks need to allocated for direct blocks 566 * @blks: number of blocks need to allocated for direct blocks
577 * @new_blocks: on return it will store the new block numbers for 567 * @new_blocks: on return it will store the new block numbers for
578 * the indirect blocks(if needed) and the first direct block, 568 * the indirect blocks(if needed) and the first direct block,
579 * @err: here we store the error value 569 * @err: here we store the error value
580 * 570 *
581 * return the number of direct blocks allocated 571 * return the number of direct blocks allocated
582 */ 572 */
583 static int ext3_alloc_blocks(handle_t *handle, struct inode *inode, 573 static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
584 ext3_fsblk_t goal, int indirect_blks, int blks, 574 ext3_fsblk_t goal, int indirect_blks, int blks,
585 ext3_fsblk_t new_blocks[4], int *err) 575 ext3_fsblk_t new_blocks[4], int *err)
586 { 576 {
587 int target, i; 577 int target, i;
588 unsigned long count = 0; 578 unsigned long count = 0;
589 int index = 0; 579 int index = 0;
590 ext3_fsblk_t current_block = 0; 580 ext3_fsblk_t current_block = 0;
591 int ret = 0; 581 int ret = 0;
592 582
593 /* 583 /*
594 * Here we try to allocate the requested multiple blocks at once, 584 * Here we try to allocate the requested multiple blocks at once,
595 * on a best-effort basis. 585 * on a best-effort basis.
596 * To build a branch, we should allocate blocks for 586 * To build a branch, we should allocate blocks for
597 * the indirect blocks(if not allocated yet), and at least 587 * the indirect blocks(if not allocated yet), and at least
598 * the first direct block of this branch. That's the 588 * the first direct block of this branch. That's the
599 * minimum number of blocks need to allocate(required) 589 * minimum number of blocks need to allocate(required)
600 */ 590 */
601 target = blks + indirect_blks; 591 target = blks + indirect_blks;
602 592
603 while (1) { 593 while (1) {
604 count = target; 594 count = target;
605 /* allocating blocks for indirect blocks and direct blocks */ 595 /* allocating blocks for indirect blocks and direct blocks */
606 current_block = ext3_new_blocks(handle,inode,goal,&count,err); 596 current_block = ext3_new_blocks(handle,inode,goal,&count,err);
607 if (*err) 597 if (*err)
608 goto failed_out; 598 goto failed_out;
609 599
610 target -= count; 600 target -= count;
611 /* allocate blocks for indirect blocks */ 601 /* allocate blocks for indirect blocks */
612 while (index < indirect_blks && count) { 602 while (index < indirect_blks && count) {
613 new_blocks[index++] = current_block++; 603 new_blocks[index++] = current_block++;
614 count--; 604 count--;
615 } 605 }
616 606
617 if (count > 0) 607 if (count > 0)
618 break; 608 break;
619 } 609 }
620 610
621 /* save the new block number for the first direct block */ 611 /* save the new block number for the first direct block */
622 new_blocks[index] = current_block; 612 new_blocks[index] = current_block;
623 613
624 /* total number of blocks allocated for direct blocks */ 614 /* total number of blocks allocated for direct blocks */
625 ret = count; 615 ret = count;
626 *err = 0; 616 *err = 0;
627 return ret; 617 return ret;
628 failed_out: 618 failed_out:
629 for (i = 0; i <index; i++) 619 for (i = 0; i <index; i++)
630 ext3_free_blocks(handle, inode, new_blocks[i], 1); 620 ext3_free_blocks(handle, inode, new_blocks[i], 1);
631 return ret; 621 return ret;
632 } 622 }
633 623
634 /** 624 /**
635 * ext3_alloc_branch - allocate and set up a chain of blocks. 625 * ext3_alloc_branch - allocate and set up a chain of blocks.
636 * @handle: handle for this transaction 626 * @handle: handle for this transaction
637 * @inode: owner 627 * @inode: owner
638 * @indirect_blks: number of allocated indirect blocks 628 * @indirect_blks: number of allocated indirect blocks
639 * @blks: number of allocated direct blocks 629 * @blks: number of allocated direct blocks
640 * @goal: preferred place for allocation 630 * @goal: preferred place for allocation
641 * @offsets: offsets (in the blocks) to store the pointers to next. 631 * @offsets: offsets (in the blocks) to store the pointers to next.
642 * @branch: place to store the chain in. 632 * @branch: place to store the chain in.
643 * 633 *
644 * This function allocates blocks, zeroes out all but the last one, 634 * This function allocates blocks, zeroes out all but the last one,
645 * links them into chain and (if we are synchronous) writes them to disk. 635 * links them into chain and (if we are synchronous) writes them to disk.
646 * In other words, it prepares a branch that can be spliced onto the 636 * In other words, it prepares a branch that can be spliced onto the
647 * inode. It stores the information about that chain in the branch[], in 637 * inode. It stores the information about that chain in the branch[], in
648 * the same format as ext3_get_branch() would do. We are calling it after 638 * the same format as ext3_get_branch() would do. We are calling it after
649 * we had read the existing part of chain and partial points to the last 639 * we had read the existing part of chain and partial points to the last
650 * triple of that (one with zero ->key). Upon the exit we have the same 640 * triple of that (one with zero ->key). Upon the exit we have the same
651 * picture as after the successful ext3_get_block(), except that in one 641 * picture as after the successful ext3_get_block(), except that in one
652 * place chain is disconnected - *branch->p is still zero (we did not 642 * place chain is disconnected - *branch->p is still zero (we did not
653 * set the last link), but branch->key contains the number that should 643 * set the last link), but branch->key contains the number that should
654 * be placed into *branch->p to fill that gap. 644 * be placed into *branch->p to fill that gap.
655 * 645 *
656 * If allocation fails we free all blocks we've allocated (and forget 646 * If allocation fails we free all blocks we've allocated (and forget
657 * their buffer_heads) and return the error value the from failed 647 * their buffer_heads) and return the error value the from failed
658 * ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain 648 * ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain
659 * as described above and return 0. 649 * as described above and return 0.
660 */ 650 */
661 static int ext3_alloc_branch(handle_t *handle, struct inode *inode, 651 static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
662 int indirect_blks, int *blks, ext3_fsblk_t goal, 652 int indirect_blks, int *blks, ext3_fsblk_t goal,
663 int *offsets, Indirect *branch) 653 int *offsets, Indirect *branch)
664 { 654 {
665 int blocksize = inode->i_sb->s_blocksize; 655 int blocksize = inode->i_sb->s_blocksize;
666 int i, n = 0; 656 int i, n = 0;
667 int err = 0; 657 int err = 0;
668 struct buffer_head *bh; 658 struct buffer_head *bh;
669 int num; 659 int num;
670 ext3_fsblk_t new_blocks[4]; 660 ext3_fsblk_t new_blocks[4];
671 ext3_fsblk_t current_block; 661 ext3_fsblk_t current_block;
672 662
673 num = ext3_alloc_blocks(handle, inode, goal, indirect_blks, 663 num = ext3_alloc_blocks(handle, inode, goal, indirect_blks,
674 *blks, new_blocks, &err); 664 *blks, new_blocks, &err);
675 if (err) 665 if (err)
676 return err; 666 return err;
677 667
678 branch[0].key = cpu_to_le32(new_blocks[0]); 668 branch[0].key = cpu_to_le32(new_blocks[0]);
679 /* 669 /*
680 * metadata blocks and data blocks are allocated. 670 * metadata blocks and data blocks are allocated.
681 */ 671 */
682 for (n = 1; n <= indirect_blks; n++) { 672 for (n = 1; n <= indirect_blks; n++) {
683 /* 673 /*
684 * Get buffer_head for parent block, zero it out 674 * Get buffer_head for parent block, zero it out
685 * and set the pointer to new one, then send 675 * and set the pointer to new one, then send
686 * parent to disk. 676 * parent to disk.
687 */ 677 */
688 bh = sb_getblk(inode->i_sb, new_blocks[n-1]); 678 bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
689 branch[n].bh = bh; 679 branch[n].bh = bh;
690 lock_buffer(bh); 680 lock_buffer(bh);
691 BUFFER_TRACE(bh, "call get_create_access"); 681 BUFFER_TRACE(bh, "call get_create_access");
692 err = ext3_journal_get_create_access(handle, bh); 682 err = ext3_journal_get_create_access(handle, bh);
693 if (err) { 683 if (err) {
694 unlock_buffer(bh); 684 unlock_buffer(bh);
695 brelse(bh); 685 brelse(bh);
696 goto failed; 686 goto failed;
697 } 687 }
698 688
699 memset(bh->b_data, 0, blocksize); 689 memset(bh->b_data, 0, blocksize);
700 branch[n].p = (__le32 *) bh->b_data + offsets[n]; 690 branch[n].p = (__le32 *) bh->b_data + offsets[n];
701 branch[n].key = cpu_to_le32(new_blocks[n]); 691 branch[n].key = cpu_to_le32(new_blocks[n]);
702 *branch[n].p = branch[n].key; 692 *branch[n].p = branch[n].key;
703 if ( n == indirect_blks) { 693 if ( n == indirect_blks) {
704 current_block = new_blocks[n]; 694 current_block = new_blocks[n];
705 /* 695 /*
706 * End of chain, update the last new metablock of 696 * End of chain, update the last new metablock of
707 * the chain to point to the new allocated 697 * the chain to point to the new allocated
708 * data blocks numbers 698 * data blocks numbers
709 */ 699 */
710 for (i=1; i < num; i++) 700 for (i=1; i < num; i++)
711 *(branch[n].p + i) = cpu_to_le32(++current_block); 701 *(branch[n].p + i) = cpu_to_le32(++current_block);
712 } 702 }
713 BUFFER_TRACE(bh, "marking uptodate"); 703 BUFFER_TRACE(bh, "marking uptodate");
714 set_buffer_uptodate(bh); 704 set_buffer_uptodate(bh);
715 unlock_buffer(bh); 705 unlock_buffer(bh);
716 706
717 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 707 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
718 err = ext3_journal_dirty_metadata(handle, bh); 708 err = ext3_journal_dirty_metadata(handle, bh);
719 if (err) 709 if (err)
720 goto failed; 710 goto failed;
721 } 711 }
722 *blks = num; 712 *blks = num;
723 return err; 713 return err;
724 failed: 714 failed:
725 /* Allocation failed, free what we already allocated */ 715 /* Allocation failed, free what we already allocated */
726 for (i = 1; i <= n ; i++) { 716 for (i = 1; i <= n ; i++) {
727 BUFFER_TRACE(branch[i].bh, "call journal_forget"); 717 BUFFER_TRACE(branch[i].bh, "call journal_forget");
728 ext3_journal_forget(handle, branch[i].bh); 718 ext3_journal_forget(handle, branch[i].bh);
729 } 719 }
730 for (i = 0; i <indirect_blks; i++) 720 for (i = 0; i <indirect_blks; i++)
731 ext3_free_blocks(handle, inode, new_blocks[i], 1); 721 ext3_free_blocks(handle, inode, new_blocks[i], 1);
732 722
733 ext3_free_blocks(handle, inode, new_blocks[i], num); 723 ext3_free_blocks(handle, inode, new_blocks[i], num);
734 724
735 return err; 725 return err;
736 } 726 }
737 727
738 /** 728 /**
739 * ext3_splice_branch - splice the allocated branch onto inode. 729 * ext3_splice_branch - splice the allocated branch onto inode.
740 * @handle: handle for this transaction 730 * @handle: handle for this transaction
741 * @inode: owner 731 * @inode: owner
742 * @block: (logical) number of block we are adding 732 * @block: (logical) number of block we are adding
743 * @where: location of missing link 733 * @where: location of missing link
744 * @num: number of indirect blocks we are adding 734 * @num: number of indirect blocks we are adding
745 * @blks: number of direct blocks we are adding 735 * @blks: number of direct blocks we are adding
746 * 736 *
747 * This function fills the missing link and does all housekeeping needed in 737 * This function fills the missing link and does all housekeeping needed in
748 * inode (->i_blocks, etc.). In case of success we end up with the full 738 * inode (->i_blocks, etc.). In case of success we end up with the full
749 * chain to new block and return 0. 739 * chain to new block and return 0.
750 */ 740 */
751 static int ext3_splice_branch(handle_t *handle, struct inode *inode, 741 static int ext3_splice_branch(handle_t *handle, struct inode *inode,
752 long block, Indirect *where, int num, int blks) 742 long block, Indirect *where, int num, int blks)
753 { 743 {
754 int i; 744 int i;
755 int err = 0; 745 int err = 0;
756 struct ext3_block_alloc_info *block_i; 746 struct ext3_block_alloc_info *block_i;
757 ext3_fsblk_t current_block; 747 ext3_fsblk_t current_block;
758 struct ext3_inode_info *ei = EXT3_I(inode); 748 struct ext3_inode_info *ei = EXT3_I(inode);
759 struct timespec now; 749 struct timespec now;
760 750
761 block_i = ei->i_block_alloc_info; 751 block_i = ei->i_block_alloc_info;
762 /* 752 /*
763 * If we're splicing into a [td]indirect block (as opposed to the 753 * If we're splicing into a [td]indirect block (as opposed to the
764 * inode) then we need to get write access to the [td]indirect block 754 * inode) then we need to get write access to the [td]indirect block
765 * before the splice. 755 * before the splice.
766 */ 756 */
767 if (where->bh) { 757 if (where->bh) {
768 BUFFER_TRACE(where->bh, "get_write_access"); 758 BUFFER_TRACE(where->bh, "get_write_access");
769 err = ext3_journal_get_write_access(handle, where->bh); 759 err = ext3_journal_get_write_access(handle, where->bh);
770 if (err) 760 if (err)
771 goto err_out; 761 goto err_out;
772 } 762 }
773 /* That's it */ 763 /* That's it */
774 764
775 *where->p = where->key; 765 *where->p = where->key;
776 766
777 /* 767 /*
778 * Update the host buffer_head or inode to point to more just allocated 768 * Update the host buffer_head or inode to point to more just allocated
779 * direct blocks blocks 769 * direct blocks blocks
780 */ 770 */
781 if (num == 0 && blks > 1) { 771 if (num == 0 && blks > 1) {
782 current_block = le32_to_cpu(where->key) + 1; 772 current_block = le32_to_cpu(where->key) + 1;
783 for (i = 1; i < blks; i++) 773 for (i = 1; i < blks; i++)
784 *(where->p + i ) = cpu_to_le32(current_block++); 774 *(where->p + i ) = cpu_to_le32(current_block++);
785 } 775 }
786 776
787 /* 777 /*
788 * update the most recently allocated logical & physical block 778 * update the most recently allocated logical & physical block
789 * in i_block_alloc_info, to assist find the proper goal block for next 779 * in i_block_alloc_info, to assist find the proper goal block for next
790 * allocation 780 * allocation
791 */ 781 */
792 if (block_i) { 782 if (block_i) {
793 block_i->last_alloc_logical_block = block + blks - 1; 783 block_i->last_alloc_logical_block = block + blks - 1;
794 block_i->last_alloc_physical_block = 784 block_i->last_alloc_physical_block =
795 le32_to_cpu(where[num].key) + blks - 1; 785 le32_to_cpu(where[num].key) + blks - 1;
796 } 786 }
797 787
798 /* We are done with atomic stuff, now do the rest of housekeeping */ 788 /* We are done with atomic stuff, now do the rest of housekeeping */
799 now = CURRENT_TIME_SEC; 789 now = CURRENT_TIME_SEC;
800 if (!timespec_equal(&inode->i_ctime, &now) || !where->bh) { 790 if (!timespec_equal(&inode->i_ctime, &now) || !where->bh) {
801 inode->i_ctime = now; 791 inode->i_ctime = now;
802 ext3_mark_inode_dirty(handle, inode); 792 ext3_mark_inode_dirty(handle, inode);
803 } 793 }
804 /* ext3_mark_inode_dirty already updated i_sync_tid */ 794 /* ext3_mark_inode_dirty already updated i_sync_tid */
805 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); 795 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
806 796
807 /* had we spliced it onto indirect block? */ 797 /* had we spliced it onto indirect block? */
808 if (where->bh) { 798 if (where->bh) {
809 /* 799 /*
810 * If we spliced it onto an indirect block, we haven't 800 * If we spliced it onto an indirect block, we haven't
811 * altered the inode. Note however that if it is being spliced 801 * altered the inode. Note however that if it is being spliced
812 * onto an indirect block at the very end of the file (the 802 * onto an indirect block at the very end of the file (the
813 * file is growing) then we *will* alter the inode to reflect 803 * file is growing) then we *will* alter the inode to reflect
814 * the new i_size. But that is not done here - it is done in 804 * the new i_size. But that is not done here - it is done in
815 * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode. 805 * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode.
816 */ 806 */
817 jbd_debug(5, "splicing indirect only\n"); 807 jbd_debug(5, "splicing indirect only\n");
818 BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata"); 808 BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata");
819 err = ext3_journal_dirty_metadata(handle, where->bh); 809 err = ext3_journal_dirty_metadata(handle, where->bh);
820 if (err) 810 if (err)
821 goto err_out; 811 goto err_out;
822 } else { 812 } else {
823 /* 813 /*
824 * OK, we spliced it into the inode itself on a direct block. 814 * OK, we spliced it into the inode itself on a direct block.
825 * Inode was dirtied above. 815 * Inode was dirtied above.
826 */ 816 */
827 jbd_debug(5, "splicing direct\n"); 817 jbd_debug(5, "splicing direct\n");
828 } 818 }
829 return err; 819 return err;
830 820
831 err_out: 821 err_out:
832 for (i = 1; i <= num; i++) { 822 for (i = 1; i <= num; i++) {
833 BUFFER_TRACE(where[i].bh, "call journal_forget"); 823 BUFFER_TRACE(where[i].bh, "call journal_forget");
834 ext3_journal_forget(handle, where[i].bh); 824 ext3_journal_forget(handle, where[i].bh);
835 ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1); 825 ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
836 } 826 }
837 ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks); 827 ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
838 828
839 return err; 829 return err;
840 } 830 }
841 831
842 /* 832 /*
843 * Allocation strategy is simple: if we have to allocate something, we will 833 * Allocation strategy is simple: if we have to allocate something, we will
844 * have to go the whole way to leaf. So let's do it before attaching anything 834 * have to go the whole way to leaf. So let's do it before attaching anything
845 * to tree, set linkage between the newborn blocks, write them if sync is 835 * to tree, set linkage between the newborn blocks, write them if sync is
846 * required, recheck the path, free and repeat if check fails, otherwise 836 * required, recheck the path, free and repeat if check fails, otherwise
847 * set the last missing link (that will protect us from any truncate-generated 837 * set the last missing link (that will protect us from any truncate-generated
848 * removals - all blocks on the path are immune now) and possibly force the 838 * removals - all blocks on the path are immune now) and possibly force the
849 * write on the parent block. 839 * write on the parent block.
850 * That has a nice additional property: no special recovery from the failed 840 * That has a nice additional property: no special recovery from the failed
851 * allocations is needed - we simply release blocks and do not touch anything 841 * allocations is needed - we simply release blocks and do not touch anything
852 * reachable from inode. 842 * reachable from inode.
853 * 843 *
854 * `handle' can be NULL if create == 0. 844 * `handle' can be NULL if create == 0.
855 * 845 *
856 * The BKL may not be held on entry here. Be sure to take it early. 846 * The BKL may not be held on entry here. Be sure to take it early.
857 * return > 0, # of blocks mapped or allocated. 847 * return > 0, # of blocks mapped or allocated.
858 * return = 0, if plain lookup failed. 848 * return = 0, if plain lookup failed.
859 * return < 0, error case. 849 * return < 0, error case.
860 */ 850 */
861 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, 851 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
862 sector_t iblock, unsigned long maxblocks, 852 sector_t iblock, unsigned long maxblocks,
863 struct buffer_head *bh_result, 853 struct buffer_head *bh_result,
864 int create) 854 int create)
865 { 855 {
866 int err = -EIO; 856 int err = -EIO;
867 int offsets[4]; 857 int offsets[4];
868 Indirect chain[4]; 858 Indirect chain[4];
869 Indirect *partial; 859 Indirect *partial;
870 ext3_fsblk_t goal; 860 ext3_fsblk_t goal;
871 int indirect_blks; 861 int indirect_blks;
872 int blocks_to_boundary = 0; 862 int blocks_to_boundary = 0;
873 int depth; 863 int depth;
874 struct ext3_inode_info *ei = EXT3_I(inode); 864 struct ext3_inode_info *ei = EXT3_I(inode);
875 int count = 0; 865 int count = 0;
876 ext3_fsblk_t first_block = 0; 866 ext3_fsblk_t first_block = 0;
877 867
878 868
879 trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create); 869 trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
880 J_ASSERT(handle != NULL || create == 0); 870 J_ASSERT(handle != NULL || create == 0);
881 depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); 871 depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
882 872
883 if (depth == 0) 873 if (depth == 0)
884 goto out; 874 goto out;
885 875
886 partial = ext3_get_branch(inode, depth, offsets, chain, &err); 876 partial = ext3_get_branch(inode, depth, offsets, chain, &err);
887 877
888 /* Simplest case - block found, no allocation needed */ 878 /* Simplest case - block found, no allocation needed */
889 if (!partial) { 879 if (!partial) {
890 first_block = le32_to_cpu(chain[depth - 1].key); 880 first_block = le32_to_cpu(chain[depth - 1].key);
891 clear_buffer_new(bh_result); 881 clear_buffer_new(bh_result);
892 count++; 882 count++;
893 /*map more blocks*/ 883 /*map more blocks*/
894 while (count < maxblocks && count <= blocks_to_boundary) { 884 while (count < maxblocks && count <= blocks_to_boundary) {
895 ext3_fsblk_t blk; 885 ext3_fsblk_t blk;
896 886
897 if (!verify_chain(chain, chain + depth - 1)) { 887 if (!verify_chain(chain, chain + depth - 1)) {
898 /* 888 /*
899 * Indirect block might be removed by 889 * Indirect block might be removed by
900 * truncate while we were reading it. 890 * truncate while we were reading it.
901 * Handling of that case: forget what we've 891 * Handling of that case: forget what we've
902 * got now. Flag the err as EAGAIN, so it 892 * got now. Flag the err as EAGAIN, so it
903 * will reread. 893 * will reread.
904 */ 894 */
905 err = -EAGAIN; 895 err = -EAGAIN;
906 count = 0; 896 count = 0;
907 break; 897 break;
908 } 898 }
909 blk = le32_to_cpu(*(chain[depth-1].p + count)); 899 blk = le32_to_cpu(*(chain[depth-1].p + count));
910 900
911 if (blk == first_block + count) 901 if (blk == first_block + count)
912 count++; 902 count++;
913 else 903 else
914 break; 904 break;
915 } 905 }
916 if (err != -EAGAIN) 906 if (err != -EAGAIN)
917 goto got_it; 907 goto got_it;
918 } 908 }
919 909
920 /* Next simple case - plain lookup or failed read of indirect block */ 910 /* Next simple case - plain lookup or failed read of indirect block */
921 if (!create || err == -EIO) 911 if (!create || err == -EIO)
922 goto cleanup; 912 goto cleanup;
923 913
924 /* 914 /*
925 * Block out ext3_truncate while we alter the tree 915 * Block out ext3_truncate while we alter the tree
926 */ 916 */
927 mutex_lock(&ei->truncate_mutex); 917 mutex_lock(&ei->truncate_mutex);
928 918
929 /* 919 /*
930 * If the indirect block is missing while we are reading 920 * If the indirect block is missing while we are reading
931 * the chain(ext3_get_branch() returns -EAGAIN err), or 921 * the chain(ext3_get_branch() returns -EAGAIN err), or
932 * if the chain has been changed after we grab the semaphore, 922 * if the chain has been changed after we grab the semaphore,
933 * (either because another process truncated this branch, or 923 * (either because another process truncated this branch, or
934 * another get_block allocated this branch) re-grab the chain to see if 924 * another get_block allocated this branch) re-grab the chain to see if
935 * the request block has been allocated or not. 925 * the request block has been allocated or not.
936 * 926 *
937 * Since we already block the truncate/other get_block 927 * Since we already block the truncate/other get_block
938 * at this point, we will have the current copy of the chain when we 928 * at this point, we will have the current copy of the chain when we
939 * splice the branch into the tree. 929 * splice the branch into the tree.
940 */ 930 */
941 if (err == -EAGAIN || !verify_chain(chain, partial)) { 931 if (err == -EAGAIN || !verify_chain(chain, partial)) {
942 while (partial > chain) { 932 while (partial > chain) {
943 brelse(partial->bh); 933 brelse(partial->bh);
944 partial--; 934 partial--;
945 } 935 }
946 partial = ext3_get_branch(inode, depth, offsets, chain, &err); 936 partial = ext3_get_branch(inode, depth, offsets, chain, &err);
947 if (!partial) { 937 if (!partial) {
948 count++; 938 count++;
949 mutex_unlock(&ei->truncate_mutex); 939 mutex_unlock(&ei->truncate_mutex);
950 if (err) 940 if (err)
951 goto cleanup; 941 goto cleanup;
952 clear_buffer_new(bh_result); 942 clear_buffer_new(bh_result);
953 goto got_it; 943 goto got_it;
954 } 944 }
955 } 945 }
956 946
957 /* 947 /*
958 * Okay, we need to do block allocation. Lazily initialize the block 948 * Okay, we need to do block allocation. Lazily initialize the block
959 * allocation info here if necessary 949 * allocation info here if necessary
960 */ 950 */
961 if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) 951 if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
962 ext3_init_block_alloc_info(inode); 952 ext3_init_block_alloc_info(inode);
963 953
964 goal = ext3_find_goal(inode, iblock, partial); 954 goal = ext3_find_goal(inode, iblock, partial);
965 955
966 /* the number of blocks need to allocate for [d,t]indirect blocks */ 956 /* the number of blocks need to allocate for [d,t]indirect blocks */
967 indirect_blks = (chain + depth) - partial - 1; 957 indirect_blks = (chain + depth) - partial - 1;
968 958
969 /* 959 /*
970 * Next look up the indirect map to count the totoal number of 960 * Next look up the indirect map to count the totoal number of
971 * direct blocks to allocate for this branch. 961 * direct blocks to allocate for this branch.
972 */ 962 */
973 count = ext3_blks_to_allocate(partial, indirect_blks, 963 count = ext3_blks_to_allocate(partial, indirect_blks,
974 maxblocks, blocks_to_boundary); 964 maxblocks, blocks_to_boundary);
975 err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, 965 err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
976 offsets + (partial - chain), partial); 966 offsets + (partial - chain), partial);
977 967
978 /* 968 /*
979 * The ext3_splice_branch call will free and forget any buffers 969 * The ext3_splice_branch call will free and forget any buffers
980 * on the new chain if there is a failure, but that risks using 970 * on the new chain if there is a failure, but that risks using
981 * up transaction credits, especially for bitmaps where the 971 * up transaction credits, especially for bitmaps where the
982 * credits cannot be returned. Can we handle this somehow? We 972 * credits cannot be returned. Can we handle this somehow? We
983 * may need to return -EAGAIN upwards in the worst case. --sct 973 * may need to return -EAGAIN upwards in the worst case. --sct
984 */ 974 */
985 if (!err) 975 if (!err)
986 err = ext3_splice_branch(handle, inode, iblock, 976 err = ext3_splice_branch(handle, inode, iblock,
987 partial, indirect_blks, count); 977 partial, indirect_blks, count);
988 mutex_unlock(&ei->truncate_mutex); 978 mutex_unlock(&ei->truncate_mutex);
989 if (err) 979 if (err)
990 goto cleanup; 980 goto cleanup;
991 981
992 set_buffer_new(bh_result); 982 set_buffer_new(bh_result);
993 got_it: 983 got_it:
994 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); 984 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
995 if (count > blocks_to_boundary) 985 if (count > blocks_to_boundary)
996 set_buffer_boundary(bh_result); 986 set_buffer_boundary(bh_result);
997 err = count; 987 err = count;
998 /* Clean up and exit */ 988 /* Clean up and exit */
999 partial = chain + depth - 1; /* the whole chain */ 989 partial = chain + depth - 1; /* the whole chain */
1000 cleanup: 990 cleanup:
1001 while (partial > chain) { 991 while (partial > chain) {
1002 BUFFER_TRACE(partial->bh, "call brelse"); 992 BUFFER_TRACE(partial->bh, "call brelse");
1003 brelse(partial->bh); 993 brelse(partial->bh);
1004 partial--; 994 partial--;
1005 } 995 }
1006 BUFFER_TRACE(bh_result, "returned"); 996 BUFFER_TRACE(bh_result, "returned");
1007 out: 997 out:
1008 trace_ext3_get_blocks_exit(inode, iblock, 998 trace_ext3_get_blocks_exit(inode, iblock,
1009 depth ? le32_to_cpu(chain[depth-1].key) : 0, 999 depth ? le32_to_cpu(chain[depth-1].key) : 0,
1010 count, err); 1000 count, err);
1011 return err; 1001 return err;
1012 } 1002 }
1013 1003
1014 /* Maximum number of blocks we map for direct IO at once. */ 1004 /* Maximum number of blocks we map for direct IO at once. */
1015 #define DIO_MAX_BLOCKS 4096 1005 #define DIO_MAX_BLOCKS 4096
1016 /* 1006 /*
1017 * Number of credits we need for writing DIO_MAX_BLOCKS: 1007 * Number of credits we need for writing DIO_MAX_BLOCKS:
1018 * We need sb + group descriptor + bitmap + inode -> 4 1008 * We need sb + group descriptor + bitmap + inode -> 4
1019 * For B blocks with A block pointers per block we need: 1009 * For B blocks with A block pointers per block we need:
1020 * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). 1010 * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
1021 * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. 1011 * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
1022 */ 1012 */
1023 #define DIO_CREDITS 25 1013 #define DIO_CREDITS 25
1024 1014
1025 static int ext3_get_block(struct inode *inode, sector_t iblock, 1015 static int ext3_get_block(struct inode *inode, sector_t iblock,
1026 struct buffer_head *bh_result, int create) 1016 struct buffer_head *bh_result, int create)
1027 { 1017 {
1028 handle_t *handle = ext3_journal_current_handle(); 1018 handle_t *handle = ext3_journal_current_handle();
1029 int ret = 0, started = 0; 1019 int ret = 0, started = 0;
1030 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 1020 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
1031 1021
1032 if (create && !handle) { /* Direct IO write... */ 1022 if (create && !handle) { /* Direct IO write... */
1033 if (max_blocks > DIO_MAX_BLOCKS) 1023 if (max_blocks > DIO_MAX_BLOCKS)
1034 max_blocks = DIO_MAX_BLOCKS; 1024 max_blocks = DIO_MAX_BLOCKS;
1035 handle = ext3_journal_start(inode, DIO_CREDITS + 1025 handle = ext3_journal_start(inode, DIO_CREDITS +
1036 EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); 1026 EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
1037 if (IS_ERR(handle)) { 1027 if (IS_ERR(handle)) {
1038 ret = PTR_ERR(handle); 1028 ret = PTR_ERR(handle);
1039 goto out; 1029 goto out;
1040 } 1030 }
1041 started = 1; 1031 started = 1;
1042 } 1032 }
1043 1033
1044 ret = ext3_get_blocks_handle(handle, inode, iblock, 1034 ret = ext3_get_blocks_handle(handle, inode, iblock,
1045 max_blocks, bh_result, create); 1035 max_blocks, bh_result, create);
1046 if (ret > 0) { 1036 if (ret > 0) {
1047 bh_result->b_size = (ret << inode->i_blkbits); 1037 bh_result->b_size = (ret << inode->i_blkbits);
1048 ret = 0; 1038 ret = 0;
1049 } 1039 }
1050 if (started) 1040 if (started)
1051 ext3_journal_stop(handle); 1041 ext3_journal_stop(handle);
1052 out: 1042 out:
1053 return ret; 1043 return ret;
1054 } 1044 }
1055 1045
1056 int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1046 int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1057 u64 start, u64 len) 1047 u64 start, u64 len)
1058 { 1048 {
1059 return generic_block_fiemap(inode, fieinfo, start, len, 1049 return generic_block_fiemap(inode, fieinfo, start, len,
1060 ext3_get_block); 1050 ext3_get_block);
1061 } 1051 }
1062 1052
1063 /* 1053 /*
1064 * `handle' can be NULL if create is zero 1054 * `handle' can be NULL if create is zero
1065 */ 1055 */
1066 struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode, 1056 struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
1067 long block, int create, int *errp) 1057 long block, int create, int *errp)
1068 { 1058 {
1069 struct buffer_head dummy; 1059 struct buffer_head dummy;
1070 int fatal = 0, err; 1060 int fatal = 0, err;
1071 1061
1072 J_ASSERT(handle != NULL || create == 0); 1062 J_ASSERT(handle != NULL || create == 0);
1073 1063
1074 dummy.b_state = 0; 1064 dummy.b_state = 0;
1075 dummy.b_blocknr = -1000; 1065 dummy.b_blocknr = -1000;
1076 buffer_trace_init(&dummy.b_history); 1066 buffer_trace_init(&dummy.b_history);
1077 err = ext3_get_blocks_handle(handle, inode, block, 1, 1067 err = ext3_get_blocks_handle(handle, inode, block, 1,
1078 &dummy, create); 1068 &dummy, create);
1079 /* 1069 /*
1080 * ext3_get_blocks_handle() returns number of blocks 1070 * ext3_get_blocks_handle() returns number of blocks
1081 * mapped. 0 in case of a HOLE. 1071 * mapped. 0 in case of a HOLE.
1082 */ 1072 */
1083 if (err > 0) { 1073 if (err > 0) {
1084 if (err > 1) 1074 if (err > 1)
1085 WARN_ON(1); 1075 WARN_ON(1);
1086 err = 0; 1076 err = 0;
1087 } 1077 }
1088 *errp = err; 1078 *errp = err;
1089 if (!err && buffer_mapped(&dummy)) { 1079 if (!err && buffer_mapped(&dummy)) {
1090 struct buffer_head *bh; 1080 struct buffer_head *bh;
1091 bh = sb_getblk(inode->i_sb, dummy.b_blocknr); 1081 bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
1092 if (!bh) { 1082 if (!bh) {
1093 *errp = -EIO; 1083 *errp = -EIO;
1094 goto err; 1084 goto err;
1095 } 1085 }
1096 if (buffer_new(&dummy)) { 1086 if (buffer_new(&dummy)) {
1097 J_ASSERT(create != 0); 1087 J_ASSERT(create != 0);
1098 J_ASSERT(handle != NULL); 1088 J_ASSERT(handle != NULL);
1099 1089
1100 /* 1090 /*
1101 * Now that we do not always journal data, we should 1091 * Now that we do not always journal data, we should
1102 * keep in mind whether this should always journal the 1092 * keep in mind whether this should always journal the
1103 * new buffer as metadata. For now, regular file 1093 * new buffer as metadata. For now, regular file
1104 * writes use ext3_get_block instead, so it's not a 1094 * writes use ext3_get_block instead, so it's not a
1105 * problem. 1095 * problem.
1106 */ 1096 */
1107 lock_buffer(bh); 1097 lock_buffer(bh);
1108 BUFFER_TRACE(bh, "call get_create_access"); 1098 BUFFER_TRACE(bh, "call get_create_access");
1109 fatal = ext3_journal_get_create_access(handle, bh); 1099 fatal = ext3_journal_get_create_access(handle, bh);
1110 if (!fatal && !buffer_uptodate(bh)) { 1100 if (!fatal && !buffer_uptodate(bh)) {
1111 memset(bh->b_data,0,inode->i_sb->s_blocksize); 1101 memset(bh->b_data,0,inode->i_sb->s_blocksize);
1112 set_buffer_uptodate(bh); 1102 set_buffer_uptodate(bh);
1113 } 1103 }
1114 unlock_buffer(bh); 1104 unlock_buffer(bh);
1115 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 1105 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
1116 err = ext3_journal_dirty_metadata(handle, bh); 1106 err = ext3_journal_dirty_metadata(handle, bh);
1117 if (!fatal) 1107 if (!fatal)
1118 fatal = err; 1108 fatal = err;
1119 } else { 1109 } else {
1120 BUFFER_TRACE(bh, "not a new buffer"); 1110 BUFFER_TRACE(bh, "not a new buffer");
1121 } 1111 }
1122 if (fatal) { 1112 if (fatal) {
1123 *errp = fatal; 1113 *errp = fatal;
1124 brelse(bh); 1114 brelse(bh);
1125 bh = NULL; 1115 bh = NULL;
1126 } 1116 }
1127 return bh; 1117 return bh;
1128 } 1118 }
1129 err: 1119 err:
1130 return NULL; 1120 return NULL;
1131 } 1121 }
1132 1122
1133 struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, 1123 struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
1134 int block, int create, int *err) 1124 int block, int create, int *err)
1135 { 1125 {
1136 struct buffer_head * bh; 1126 struct buffer_head * bh;
1137 1127
1138 bh = ext3_getblk(handle, inode, block, create, err); 1128 bh = ext3_getblk(handle, inode, block, create, err);
1139 if (!bh) 1129 if (!bh)
1140 return bh; 1130 return bh;
1141 if (bh_uptodate_or_lock(bh)) 1131 if (bh_uptodate_or_lock(bh))
1142 return bh; 1132 return bh;
1143 get_bh(bh); 1133 get_bh(bh);
1144 bh->b_end_io = end_buffer_read_sync; 1134 bh->b_end_io = end_buffer_read_sync;
1145 submit_bh(READ | REQ_META | REQ_PRIO, bh); 1135 submit_bh(READ | REQ_META | REQ_PRIO, bh);
1146 wait_on_buffer(bh); 1136 wait_on_buffer(bh);
1147 if (buffer_uptodate(bh)) 1137 if (buffer_uptodate(bh))
1148 return bh; 1138 return bh;
1149 put_bh(bh); 1139 put_bh(bh);
1150 *err = -EIO; 1140 *err = -EIO;
1151 return NULL; 1141 return NULL;
1152 } 1142 }
1153 1143
1154 static int walk_page_buffers( handle_t *handle, 1144 static int walk_page_buffers( handle_t *handle,
1155 struct buffer_head *head, 1145 struct buffer_head *head,
1156 unsigned from, 1146 unsigned from,
1157 unsigned to, 1147 unsigned to,
1158 int *partial, 1148 int *partial,
1159 int (*fn)( handle_t *handle, 1149 int (*fn)( handle_t *handle,
1160 struct buffer_head *bh)) 1150 struct buffer_head *bh))
1161 { 1151 {
1162 struct buffer_head *bh; 1152 struct buffer_head *bh;
1163 unsigned block_start, block_end; 1153 unsigned block_start, block_end;
1164 unsigned blocksize = head->b_size; 1154 unsigned blocksize = head->b_size;
1165 int err, ret = 0; 1155 int err, ret = 0;
1166 struct buffer_head *next; 1156 struct buffer_head *next;
1167 1157
1168 for ( bh = head, block_start = 0; 1158 for ( bh = head, block_start = 0;
1169 ret == 0 && (bh != head || !block_start); 1159 ret == 0 && (bh != head || !block_start);
1170 block_start = block_end, bh = next) 1160 block_start = block_end, bh = next)
1171 { 1161 {
1172 next = bh->b_this_page; 1162 next = bh->b_this_page;
1173 block_end = block_start + blocksize; 1163 block_end = block_start + blocksize;
1174 if (block_end <= from || block_start >= to) { 1164 if (block_end <= from || block_start >= to) {
1175 if (partial && !buffer_uptodate(bh)) 1165 if (partial && !buffer_uptodate(bh))
1176 *partial = 1; 1166 *partial = 1;
1177 continue; 1167 continue;
1178 } 1168 }
1179 err = (*fn)(handle, bh); 1169 err = (*fn)(handle, bh);
1180 if (!ret) 1170 if (!ret)
1181 ret = err; 1171 ret = err;
1182 } 1172 }
1183 return ret; 1173 return ret;
1184 } 1174 }
1185 1175
1186 /* 1176 /*
1187 * To preserve ordering, it is essential that the hole instantiation and 1177 * To preserve ordering, it is essential that the hole instantiation and
1188 * the data write be encapsulated in a single transaction. We cannot 1178 * the data write be encapsulated in a single transaction. We cannot
1189 * close off a transaction and start a new one between the ext3_get_block() 1179 * close off a transaction and start a new one between the ext3_get_block()
1190 * and the commit_write(). So doing the journal_start at the start of 1180 * and the commit_write(). So doing the journal_start at the start of
1191 * prepare_write() is the right place. 1181 * prepare_write() is the right place.
1192 * 1182 *
1193 * Also, this function can nest inside ext3_writepage() -> 1183 * Also, this function can nest inside ext3_writepage() ->
1194 * block_write_full_page(). In that case, we *know* that ext3_writepage() 1184 * block_write_full_page(). In that case, we *know* that ext3_writepage()
1195 * has generated enough buffer credits to do the whole page. So we won't 1185 * has generated enough buffer credits to do the whole page. So we won't
1196 * block on the journal in that case, which is good, because the caller may 1186 * block on the journal in that case, which is good, because the caller may
1197 * be PF_MEMALLOC. 1187 * be PF_MEMALLOC.
1198 * 1188 *
1199 * By accident, ext3 can be reentered when a transaction is open via 1189 * By accident, ext3 can be reentered when a transaction is open via
1200 * quota file writes. If we were to commit the transaction while thus 1190 * quota file writes. If we were to commit the transaction while thus
1201 * reentered, there can be a deadlock - we would be holding a quota 1191 * reentered, there can be a deadlock - we would be holding a quota
1202 * lock, and the commit would never complete if another thread had a 1192 * lock, and the commit would never complete if another thread had a
1203 * transaction open and was blocking on the quota lock - a ranking 1193 * transaction open and was blocking on the quota lock - a ranking
1204 * violation. 1194 * violation.
1205 * 1195 *
1206 * So what we do is to rely on the fact that journal_stop/journal_start 1196 * So what we do is to rely on the fact that journal_stop/journal_start
1207 * will _not_ run commit under these circumstances because handle->h_ref 1197 * will _not_ run commit under these circumstances because handle->h_ref
1208 * is elevated. We'll still have enough credits for the tiny quotafile 1198 * is elevated. We'll still have enough credits for the tiny quotafile
1209 * write. 1199 * write.
1210 */ 1200 */
1211 static int do_journal_get_write_access(handle_t *handle, 1201 static int do_journal_get_write_access(handle_t *handle,
1212 struct buffer_head *bh) 1202 struct buffer_head *bh)
1213 { 1203 {
1214 int dirty = buffer_dirty(bh); 1204 int dirty = buffer_dirty(bh);
1215 int ret; 1205 int ret;
1216 1206
1217 if (!buffer_mapped(bh) || buffer_freed(bh)) 1207 if (!buffer_mapped(bh) || buffer_freed(bh))
1218 return 0; 1208 return 0;
1219 /* 1209 /*
1220 * __block_prepare_write() could have dirtied some buffers. Clean 1210 * __block_prepare_write() could have dirtied some buffers. Clean
1221 * the dirty bit as jbd2_journal_get_write_access() could complain 1211 * the dirty bit as jbd2_journal_get_write_access() could complain
1222 * otherwise about fs integrity issues. Setting of the dirty bit 1212 * otherwise about fs integrity issues. Setting of the dirty bit
1223 * by __block_prepare_write() isn't a real problem here as we clear 1213 * by __block_prepare_write() isn't a real problem here as we clear
1224 * the bit before releasing a page lock and thus writeback cannot 1214 * the bit before releasing a page lock and thus writeback cannot
1225 * ever write the buffer. 1215 * ever write the buffer.
1226 */ 1216 */
1227 if (dirty) 1217 if (dirty)
1228 clear_buffer_dirty(bh); 1218 clear_buffer_dirty(bh);
1229 ret = ext3_journal_get_write_access(handle, bh); 1219 ret = ext3_journal_get_write_access(handle, bh);
1230 if (!ret && dirty) 1220 if (!ret && dirty)
1231 ret = ext3_journal_dirty_metadata(handle, bh); 1221 ret = ext3_journal_dirty_metadata(handle, bh);
1232 return ret; 1222 return ret;
1233 } 1223 }
1234 1224
1235 /* 1225 /*
1236 * Truncate blocks that were not used by write. We have to truncate the 1226 * Truncate blocks that were not used by write. We have to truncate the
1237 * pagecache as well so that corresponding buffers get properly unmapped. 1227 * pagecache as well so that corresponding buffers get properly unmapped.
1238 */ 1228 */
1239 static void ext3_truncate_failed_write(struct inode *inode) 1229 static void ext3_truncate_failed_write(struct inode *inode)
1240 { 1230 {
1241 truncate_inode_pages(inode->i_mapping, inode->i_size); 1231 truncate_inode_pages(inode->i_mapping, inode->i_size);
1242 ext3_truncate(inode); 1232 ext3_truncate(inode);
1243 } 1233 }
1244 1234
1245 /* 1235 /*
1246 * Truncate blocks that were not used by direct IO write. We have to zero out 1236 * Truncate blocks that were not used by direct IO write. We have to zero out
1247 * the last file block as well because direct IO might have written to it. 1237 * the last file block as well because direct IO might have written to it.
1248 */ 1238 */
1249 static void ext3_truncate_failed_direct_write(struct inode *inode) 1239 static void ext3_truncate_failed_direct_write(struct inode *inode)
1250 { 1240 {
1251 ext3_block_truncate_page(inode, inode->i_size); 1241 ext3_block_truncate_page(inode, inode->i_size);
1252 ext3_truncate(inode); 1242 ext3_truncate(inode);
1253 } 1243 }
1254 1244
1255 static int ext3_write_begin(struct file *file, struct address_space *mapping, 1245 static int ext3_write_begin(struct file *file, struct address_space *mapping,
1256 loff_t pos, unsigned len, unsigned flags, 1246 loff_t pos, unsigned len, unsigned flags,
1257 struct page **pagep, void **fsdata) 1247 struct page **pagep, void **fsdata)
1258 { 1248 {
1259 struct inode *inode = mapping->host; 1249 struct inode *inode = mapping->host;
1260 int ret; 1250 int ret;
1261 handle_t *handle; 1251 handle_t *handle;
1262 int retries = 0; 1252 int retries = 0;
1263 struct page *page; 1253 struct page *page;
1264 pgoff_t index; 1254 pgoff_t index;
1265 unsigned from, to; 1255 unsigned from, to;
1266 /* Reserve one block more for addition to orphan list in case 1256 /* Reserve one block more for addition to orphan list in case
1267 * we allocate blocks but write fails for some reason */ 1257 * we allocate blocks but write fails for some reason */
1268 int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; 1258 int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
1269 1259
1270 trace_ext3_write_begin(inode, pos, len, flags); 1260 trace_ext3_write_begin(inode, pos, len, flags);
1271 1261
1272 index = pos >> PAGE_CACHE_SHIFT; 1262 index = pos >> PAGE_CACHE_SHIFT;
1273 from = pos & (PAGE_CACHE_SIZE - 1); 1263 from = pos & (PAGE_CACHE_SIZE - 1);
1274 to = from + len; 1264 to = from + len;
1275 1265
1276 retry: 1266 retry:
1277 page = grab_cache_page_write_begin(mapping, index, flags); 1267 page = grab_cache_page_write_begin(mapping, index, flags);
1278 if (!page) 1268 if (!page)
1279 return -ENOMEM; 1269 return -ENOMEM;
1280 *pagep = page; 1270 *pagep = page;
1281 1271
1282 handle = ext3_journal_start(inode, needed_blocks); 1272 handle = ext3_journal_start(inode, needed_blocks);
1283 if (IS_ERR(handle)) { 1273 if (IS_ERR(handle)) {
1284 unlock_page(page); 1274 unlock_page(page);
1285 page_cache_release(page); 1275 page_cache_release(page);
1286 ret = PTR_ERR(handle); 1276 ret = PTR_ERR(handle);
1287 goto out; 1277 goto out;
1288 } 1278 }
1289 ret = __block_write_begin(page, pos, len, ext3_get_block); 1279 ret = __block_write_begin(page, pos, len, ext3_get_block);
1290 if (ret) 1280 if (ret)
1291 goto write_begin_failed; 1281 goto write_begin_failed;
1292 1282
1293 if (ext3_should_journal_data(inode)) { 1283 if (ext3_should_journal_data(inode)) {
1294 ret = walk_page_buffers(handle, page_buffers(page), 1284 ret = walk_page_buffers(handle, page_buffers(page),
1295 from, to, NULL, do_journal_get_write_access); 1285 from, to, NULL, do_journal_get_write_access);
1296 } 1286 }
1297 write_begin_failed: 1287 write_begin_failed:
1298 if (ret) { 1288 if (ret) {
1299 /* 1289 /*
1300 * block_write_begin may have instantiated a few blocks 1290 * block_write_begin may have instantiated a few blocks
1301 * outside i_size. Trim these off again. Don't need 1291 * outside i_size. Trim these off again. Don't need
1302 * i_size_read because we hold i_mutex. 1292 * i_size_read because we hold i_mutex.
1303 * 1293 *
1304 * Add inode to orphan list in case we crash before truncate 1294 * Add inode to orphan list in case we crash before truncate
1305 * finishes. Do this only if ext3_can_truncate() agrees so 1295 * finishes. Do this only if ext3_can_truncate() agrees so
1306 * that orphan processing code is happy. 1296 * that orphan processing code is happy.
1307 */ 1297 */
1308 if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1298 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1309 ext3_orphan_add(handle, inode); 1299 ext3_orphan_add(handle, inode);
1310 ext3_journal_stop(handle); 1300 ext3_journal_stop(handle);
1311 unlock_page(page); 1301 unlock_page(page);
1312 page_cache_release(page); 1302 page_cache_release(page);
1313 if (pos + len > inode->i_size) 1303 if (pos + len > inode->i_size)
1314 ext3_truncate_failed_write(inode); 1304 ext3_truncate_failed_write(inode);
1315 } 1305 }
1316 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1306 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1317 goto retry; 1307 goto retry;
1318 out: 1308 out:
1319 return ret; 1309 return ret;
1320 } 1310 }
1321 1311
1322 1312
1323 int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh) 1313 int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1324 { 1314 {
1325 int err = journal_dirty_data(handle, bh); 1315 int err = journal_dirty_data(handle, bh);
1326 if (err) 1316 if (err)
1327 ext3_journal_abort_handle(__func__, __func__, 1317 ext3_journal_abort_handle(__func__, __func__,
1328 bh, handle, err); 1318 bh, handle, err);
1329 return err; 1319 return err;
1330 } 1320 }
1331 1321
1332 /* For ordered writepage and write_end functions */ 1322 /* For ordered writepage and write_end functions */
1333 static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) 1323 static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
1334 { 1324 {
1335 /* 1325 /*
1336 * Write could have mapped the buffer but it didn't copy the data in 1326 * Write could have mapped the buffer but it didn't copy the data in
1337 * yet. So avoid filing such buffer into a transaction. 1327 * yet. So avoid filing such buffer into a transaction.
1338 */ 1328 */
1339 if (buffer_mapped(bh) && buffer_uptodate(bh)) 1329 if (buffer_mapped(bh) && buffer_uptodate(bh))
1340 return ext3_journal_dirty_data(handle, bh); 1330 return ext3_journal_dirty_data(handle, bh);
1341 return 0; 1331 return 0;
1342 } 1332 }
1343 1333
1344 /* For write_end() in data=journal mode */ 1334 /* For write_end() in data=journal mode */
1345 static int write_end_fn(handle_t *handle, struct buffer_head *bh) 1335 static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1346 { 1336 {
1347 if (!buffer_mapped(bh) || buffer_freed(bh)) 1337 if (!buffer_mapped(bh) || buffer_freed(bh))
1348 return 0; 1338 return 0;
1349 set_buffer_uptodate(bh); 1339 set_buffer_uptodate(bh);
1350 return ext3_journal_dirty_metadata(handle, bh); 1340 return ext3_journal_dirty_metadata(handle, bh);
1351 } 1341 }
1352 1342
1353 /* 1343 /*
1354 * This is nasty and subtle: ext3_write_begin() could have allocated blocks 1344 * This is nasty and subtle: ext3_write_begin() could have allocated blocks
1355 * for the whole page but later we failed to copy the data in. Update inode 1345 * for the whole page but later we failed to copy the data in. Update inode
1356 * size according to what we managed to copy. The rest is going to be 1346 * size according to what we managed to copy. The rest is going to be
1357 * truncated in write_end function. 1347 * truncated in write_end function.
1358 */ 1348 */
1359 static void update_file_sizes(struct inode *inode, loff_t pos, unsigned copied) 1349 static void update_file_sizes(struct inode *inode, loff_t pos, unsigned copied)
1360 { 1350 {
1361 /* What matters to us is i_disksize. We don't write i_size anywhere */ 1351 /* What matters to us is i_disksize. We don't write i_size anywhere */
1362 if (pos + copied > inode->i_size) 1352 if (pos + copied > inode->i_size)
1363 i_size_write(inode, pos + copied); 1353 i_size_write(inode, pos + copied);
1364 if (pos + copied > EXT3_I(inode)->i_disksize) { 1354 if (pos + copied > EXT3_I(inode)->i_disksize) {
1365 EXT3_I(inode)->i_disksize = pos + copied; 1355 EXT3_I(inode)->i_disksize = pos + copied;
1366 mark_inode_dirty(inode); 1356 mark_inode_dirty(inode);
1367 } 1357 }
1368 } 1358 }
1369 1359
1370 /* 1360 /*
1371 * We need to pick up the new inode size which generic_commit_write gave us 1361 * We need to pick up the new inode size which generic_commit_write gave us
1372 * `file' can be NULL - eg, when called from page_symlink(). 1362 * `file' can be NULL - eg, when called from page_symlink().
1373 * 1363 *
1374 * ext3 never places buffers on inode->i_mapping->private_list. metadata 1364 * ext3 never places buffers on inode->i_mapping->private_list. metadata
1375 * buffers are managed internally. 1365 * buffers are managed internally.
1376 */ 1366 */
1377 static int ext3_ordered_write_end(struct file *file, 1367 static int ext3_ordered_write_end(struct file *file,
1378 struct address_space *mapping, 1368 struct address_space *mapping,
1379 loff_t pos, unsigned len, unsigned copied, 1369 loff_t pos, unsigned len, unsigned copied,
1380 struct page *page, void *fsdata) 1370 struct page *page, void *fsdata)
1381 { 1371 {
1382 handle_t *handle = ext3_journal_current_handle(); 1372 handle_t *handle = ext3_journal_current_handle();
1383 struct inode *inode = file->f_mapping->host; 1373 struct inode *inode = file->f_mapping->host;
1384 unsigned from, to; 1374 unsigned from, to;
1385 int ret = 0, ret2; 1375 int ret = 0, ret2;
1386 1376
1387 trace_ext3_ordered_write_end(inode, pos, len, copied); 1377 trace_ext3_ordered_write_end(inode, pos, len, copied);
1388 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 1378 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1389 1379
1390 from = pos & (PAGE_CACHE_SIZE - 1); 1380 from = pos & (PAGE_CACHE_SIZE - 1);
1391 to = from + copied; 1381 to = from + copied;
1392 ret = walk_page_buffers(handle, page_buffers(page), 1382 ret = walk_page_buffers(handle, page_buffers(page),
1393 from, to, NULL, journal_dirty_data_fn); 1383 from, to, NULL, journal_dirty_data_fn);
1394 1384
1395 if (ret == 0) 1385 if (ret == 0)
1396 update_file_sizes(inode, pos, copied); 1386 update_file_sizes(inode, pos, copied);
1397 /* 1387 /*
1398 * There may be allocated blocks outside of i_size because 1388 * There may be allocated blocks outside of i_size because
1399 * we failed to copy some data. Prepare for truncate. 1389 * we failed to copy some data. Prepare for truncate.
1400 */ 1390 */
1401 if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1391 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1402 ext3_orphan_add(handle, inode); 1392 ext3_orphan_add(handle, inode);
1403 ret2 = ext3_journal_stop(handle); 1393 ret2 = ext3_journal_stop(handle);
1404 if (!ret) 1394 if (!ret)
1405 ret = ret2; 1395 ret = ret2;
1406 unlock_page(page); 1396 unlock_page(page);
1407 page_cache_release(page); 1397 page_cache_release(page);
1408 1398
1409 if (pos + len > inode->i_size) 1399 if (pos + len > inode->i_size)
1410 ext3_truncate_failed_write(inode); 1400 ext3_truncate_failed_write(inode);
1411 return ret ? ret : copied; 1401 return ret ? ret : copied;
1412 } 1402 }
1413 1403
1414 static int ext3_writeback_write_end(struct file *file, 1404 static int ext3_writeback_write_end(struct file *file,
1415 struct address_space *mapping, 1405 struct address_space *mapping,
1416 loff_t pos, unsigned len, unsigned copied, 1406 loff_t pos, unsigned len, unsigned copied,
1417 struct page *page, void *fsdata) 1407 struct page *page, void *fsdata)
1418 { 1408 {
1419 handle_t *handle = ext3_journal_current_handle(); 1409 handle_t *handle = ext3_journal_current_handle();
1420 struct inode *inode = file->f_mapping->host; 1410 struct inode *inode = file->f_mapping->host;
1421 int ret; 1411 int ret;
1422 1412
1423 trace_ext3_writeback_write_end(inode, pos, len, copied); 1413 trace_ext3_writeback_write_end(inode, pos, len, copied);
1424 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 1414 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1425 update_file_sizes(inode, pos, copied); 1415 update_file_sizes(inode, pos, copied);
1426 /* 1416 /*
1427 * There may be allocated blocks outside of i_size because 1417 * There may be allocated blocks outside of i_size because
1428 * we failed to copy some data. Prepare for truncate. 1418 * we failed to copy some data. Prepare for truncate.
1429 */ 1419 */
1430 if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1420 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1431 ext3_orphan_add(handle, inode); 1421 ext3_orphan_add(handle, inode);
1432 ret = ext3_journal_stop(handle); 1422 ret = ext3_journal_stop(handle);
1433 unlock_page(page); 1423 unlock_page(page);
1434 page_cache_release(page); 1424 page_cache_release(page);
1435 1425
1436 if (pos + len > inode->i_size) 1426 if (pos + len > inode->i_size)
1437 ext3_truncate_failed_write(inode); 1427 ext3_truncate_failed_write(inode);
1438 return ret ? ret : copied; 1428 return ret ? ret : copied;
1439 } 1429 }
1440 1430
1441 static int ext3_journalled_write_end(struct file *file, 1431 static int ext3_journalled_write_end(struct file *file,
1442 struct address_space *mapping, 1432 struct address_space *mapping,
1443 loff_t pos, unsigned len, unsigned copied, 1433 loff_t pos, unsigned len, unsigned copied,
1444 struct page *page, void *fsdata) 1434 struct page *page, void *fsdata)
1445 { 1435 {
1446 handle_t *handle = ext3_journal_current_handle(); 1436 handle_t *handle = ext3_journal_current_handle();
1447 struct inode *inode = mapping->host; 1437 struct inode *inode = mapping->host;
1448 struct ext3_inode_info *ei = EXT3_I(inode); 1438 struct ext3_inode_info *ei = EXT3_I(inode);
1449 int ret = 0, ret2; 1439 int ret = 0, ret2;
1450 int partial = 0; 1440 int partial = 0;
1451 unsigned from, to; 1441 unsigned from, to;
1452 1442
1453 trace_ext3_journalled_write_end(inode, pos, len, copied); 1443 trace_ext3_journalled_write_end(inode, pos, len, copied);
1454 from = pos & (PAGE_CACHE_SIZE - 1); 1444 from = pos & (PAGE_CACHE_SIZE - 1);
1455 to = from + len; 1445 to = from + len;
1456 1446
1457 if (copied < len) { 1447 if (copied < len) {
1458 if (!PageUptodate(page)) 1448 if (!PageUptodate(page))
1459 copied = 0; 1449 copied = 0;
1460 page_zero_new_buffers(page, from + copied, to); 1450 page_zero_new_buffers(page, from + copied, to);
1461 to = from + copied; 1451 to = from + copied;
1462 } 1452 }
1463 1453
1464 ret = walk_page_buffers(handle, page_buffers(page), from, 1454 ret = walk_page_buffers(handle, page_buffers(page), from,
1465 to, &partial, write_end_fn); 1455 to, &partial, write_end_fn);
1466 if (!partial) 1456 if (!partial)
1467 SetPageUptodate(page); 1457 SetPageUptodate(page);
1468 1458
1469 if (pos + copied > inode->i_size) 1459 if (pos + copied > inode->i_size)
1470 i_size_write(inode, pos + copied); 1460 i_size_write(inode, pos + copied);
1471 /* 1461 /*
1472 * There may be allocated blocks outside of i_size because 1462 * There may be allocated blocks outside of i_size because
1473 * we failed to copy some data. Prepare for truncate. 1463 * we failed to copy some data. Prepare for truncate.
1474 */ 1464 */
1475 if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1465 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1476 ext3_orphan_add(handle, inode); 1466 ext3_orphan_add(handle, inode);
1477 ext3_set_inode_state(inode, EXT3_STATE_JDATA); 1467 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1478 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); 1468 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
1479 if (inode->i_size > ei->i_disksize) { 1469 if (inode->i_size > ei->i_disksize) {
1480 ei->i_disksize = inode->i_size; 1470 ei->i_disksize = inode->i_size;
1481 ret2 = ext3_mark_inode_dirty(handle, inode); 1471 ret2 = ext3_mark_inode_dirty(handle, inode);
1482 if (!ret) 1472 if (!ret)
1483 ret = ret2; 1473 ret = ret2;
1484 } 1474 }
1485 1475
1486 ret2 = ext3_journal_stop(handle); 1476 ret2 = ext3_journal_stop(handle);
1487 if (!ret) 1477 if (!ret)
1488 ret = ret2; 1478 ret = ret2;
1489 unlock_page(page); 1479 unlock_page(page);
1490 page_cache_release(page); 1480 page_cache_release(page);
1491 1481
1492 if (pos + len > inode->i_size) 1482 if (pos + len > inode->i_size)
1493 ext3_truncate_failed_write(inode); 1483 ext3_truncate_failed_write(inode);
1494 return ret ? ret : copied; 1484 return ret ? ret : copied;
1495 } 1485 }
1496 1486
1497 /* 1487 /*
1498 * bmap() is special. It gets used by applications such as lilo and by 1488 * bmap() is special. It gets used by applications such as lilo and by
1499 * the swapper to find the on-disk block of a specific piece of data. 1489 * the swapper to find the on-disk block of a specific piece of data.
1500 * 1490 *
1501 * Naturally, this is dangerous if the block concerned is still in the 1491 * Naturally, this is dangerous if the block concerned is still in the
1502 * journal. If somebody makes a swapfile on an ext3 data-journaling 1492 * journal. If somebody makes a swapfile on an ext3 data-journaling
1503 * filesystem and enables swap, then they may get a nasty shock when the 1493 * filesystem and enables swap, then they may get a nasty shock when the
1504 * data getting swapped to that swapfile suddenly gets overwritten by 1494 * data getting swapped to that swapfile suddenly gets overwritten by
1505 * the original zero's written out previously to the journal and 1495 * the original zero's written out previously to the journal and
1506 * awaiting writeback in the kernel's buffer cache. 1496 * awaiting writeback in the kernel's buffer cache.
1507 * 1497 *
1508 * So, if we see any bmap calls here on a modified, data-journaled file, 1498 * So, if we see any bmap calls here on a modified, data-journaled file,
1509 * take extra steps to flush any blocks which might be in the cache. 1499 * take extra steps to flush any blocks which might be in the cache.
1510 */ 1500 */
1511 static sector_t ext3_bmap(struct address_space *mapping, sector_t block) 1501 static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
1512 { 1502 {
1513 struct inode *inode = mapping->host; 1503 struct inode *inode = mapping->host;
1514 journal_t *journal; 1504 journal_t *journal;
1515 int err; 1505 int err;
1516 1506
1517 if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) { 1507 if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) {
1518 /* 1508 /*
1519 * This is a REALLY heavyweight approach, but the use of 1509 * This is a REALLY heavyweight approach, but the use of
1520 * bmap on dirty files is expected to be extremely rare: 1510 * bmap on dirty files is expected to be extremely rare:
1521 * only if we run lilo or swapon on a freshly made file 1511 * only if we run lilo or swapon on a freshly made file
1522 * do we expect this to happen. 1512 * do we expect this to happen.
1523 * 1513 *
1524 * (bmap requires CAP_SYS_RAWIO so this does not 1514 * (bmap requires CAP_SYS_RAWIO so this does not
1525 * represent an unprivileged user DOS attack --- we'd be 1515 * represent an unprivileged user DOS attack --- we'd be
1526 * in trouble if mortal users could trigger this path at 1516 * in trouble if mortal users could trigger this path at
1527 * will.) 1517 * will.)
1528 * 1518 *
1529 * NB. EXT3_STATE_JDATA is not set on files other than 1519 * NB. EXT3_STATE_JDATA is not set on files other than
1530 * regular files. If somebody wants to bmap a directory 1520 * regular files. If somebody wants to bmap a directory
1531 * or symlink and gets confused because the buffer 1521 * or symlink and gets confused because the buffer
1532 * hasn't yet been flushed to disk, they deserve 1522 * hasn't yet been flushed to disk, they deserve
1533 * everything they get. 1523 * everything they get.
1534 */ 1524 */
1535 1525
1536 ext3_clear_inode_state(inode, EXT3_STATE_JDATA); 1526 ext3_clear_inode_state(inode, EXT3_STATE_JDATA);
1537 journal = EXT3_JOURNAL(inode); 1527 journal = EXT3_JOURNAL(inode);
1538 journal_lock_updates(journal); 1528 journal_lock_updates(journal);
1539 err = journal_flush(journal); 1529 err = journal_flush(journal);
1540 journal_unlock_updates(journal); 1530 journal_unlock_updates(journal);
1541 1531
1542 if (err) 1532 if (err)
1543 return 0; 1533 return 0;
1544 } 1534 }
1545 1535
1546 return generic_block_bmap(mapping,block,ext3_get_block); 1536 return generic_block_bmap(mapping,block,ext3_get_block);
1547 } 1537 }
1548 1538
1549 static int bget_one(handle_t *handle, struct buffer_head *bh) 1539 static int bget_one(handle_t *handle, struct buffer_head *bh)
1550 { 1540 {
1551 get_bh(bh); 1541 get_bh(bh);
1552 return 0; 1542 return 0;
1553 } 1543 }
1554 1544
1555 static int bput_one(handle_t *handle, struct buffer_head *bh) 1545 static int bput_one(handle_t *handle, struct buffer_head *bh)
1556 { 1546 {
1557 put_bh(bh); 1547 put_bh(bh);
1558 return 0; 1548 return 0;
1559 } 1549 }
1560 1550
1561 static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) 1551 static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
1562 { 1552 {
1563 return !buffer_mapped(bh); 1553 return !buffer_mapped(bh);
1564 } 1554 }
1565 1555
1566 /* 1556 /*
1567 * Note that we always start a transaction even if we're not journalling 1557 * Note that we always start a transaction even if we're not journalling
1568 * data. This is to preserve ordering: any hole instantiation within 1558 * data. This is to preserve ordering: any hole instantiation within
1569 * __block_write_full_page -> ext3_get_block() should be journalled 1559 * __block_write_full_page -> ext3_get_block() should be journalled
1570 * along with the data so we don't crash and then get metadata which 1560 * along with the data so we don't crash and then get metadata which
1571 * refers to old data. 1561 * refers to old data.
1572 * 1562 *
1573 * In all journalling modes block_write_full_page() will start the I/O. 1563 * In all journalling modes block_write_full_page() will start the I/O.
1574 * 1564 *
1575 * Problem: 1565 * Problem:
1576 * 1566 *
1577 * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> 1567 * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
1578 * ext3_writepage() 1568 * ext3_writepage()
1579 * 1569 *
1580 * Similar for: 1570 * Similar for:
1581 * 1571 *
1582 * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ... 1572 * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
1583 * 1573 *
1584 * Same applies to ext3_get_block(). We will deadlock on various things like 1574 * Same applies to ext3_get_block(). We will deadlock on various things like
1585 * lock_journal and i_truncate_mutex. 1575 * lock_journal and i_truncate_mutex.
1586 * 1576 *
1587 * Setting PF_MEMALLOC here doesn't work - too many internal memory 1577 * Setting PF_MEMALLOC here doesn't work - too many internal memory
1588 * allocations fail. 1578 * allocations fail.
1589 * 1579 *
1590 * 16May01: If we're reentered then journal_current_handle() will be 1580 * 16May01: If we're reentered then journal_current_handle() will be
1591 * non-zero. We simply *return*. 1581 * non-zero. We simply *return*.
1592 * 1582 *
1593 * 1 July 2001: @@@ FIXME: 1583 * 1 July 2001: @@@ FIXME:
1594 * In journalled data mode, a data buffer may be metadata against the 1584 * In journalled data mode, a data buffer may be metadata against the
1595 * current transaction. But the same file is part of a shared mapping 1585 * current transaction. But the same file is part of a shared mapping
1596 * and someone does a writepage() on it. 1586 * and someone does a writepage() on it.
1597 * 1587 *
1598 * We will move the buffer onto the async_data list, but *after* it has 1588 * We will move the buffer onto the async_data list, but *after* it has
1599 * been dirtied. So there's a small window where we have dirty data on 1589 * been dirtied. So there's a small window where we have dirty data on
1600 * BJ_Metadata. 1590 * BJ_Metadata.
1601 * 1591 *
1602 * Note that this only applies to the last partial page in the file. The 1592 * Note that this only applies to the last partial page in the file. The
1603 * bit which block_write_full_page() uses prepare/commit for. (That's 1593 * bit which block_write_full_page() uses prepare/commit for. (That's
1604 * broken code anyway: it's wrong for msync()). 1594 * broken code anyway: it's wrong for msync()).
1605 * 1595 *
1606 * It's a rare case: affects the final partial page, for journalled data 1596 * It's a rare case: affects the final partial page, for journalled data
1607 * where the file is subject to bith write() and writepage() in the same 1597 * where the file is subject to bith write() and writepage() in the same
1608 * transction. To fix it we'll need a custom block_write_full_page(). 1598 * transction. To fix it we'll need a custom block_write_full_page().
1609 * We'll probably need that anyway for journalling writepage() output. 1599 * We'll probably need that anyway for journalling writepage() output.
1610 * 1600 *
1611 * We don't honour synchronous mounts for writepage(). That would be 1601 * We don't honour synchronous mounts for writepage(). That would be
1612 * disastrous. Any write() or metadata operation will sync the fs for 1602 * disastrous. Any write() or metadata operation will sync the fs for
1613 * us. 1603 * us.
1614 * 1604 *
1615 * AKPM2: if all the page's buffers are mapped to disk and !data=journal, 1605 * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
1616 * we don't need to open a transaction here. 1606 * we don't need to open a transaction here.
1617 */ 1607 */
1618 static int ext3_ordered_writepage(struct page *page, 1608 static int ext3_ordered_writepage(struct page *page,
1619 struct writeback_control *wbc) 1609 struct writeback_control *wbc)
1620 { 1610 {
1621 struct inode *inode = page->mapping->host; 1611 struct inode *inode = page->mapping->host;
1622 struct buffer_head *page_bufs; 1612 struct buffer_head *page_bufs;
1623 handle_t *handle = NULL; 1613 handle_t *handle = NULL;
1624 int ret = 0; 1614 int ret = 0;
1625 int err; 1615 int err;
1626 1616
1627 J_ASSERT(PageLocked(page)); 1617 J_ASSERT(PageLocked(page));
1628 /* 1618 /*
1629 * We don't want to warn for emergency remount. The condition is 1619 * We don't want to warn for emergency remount. The condition is
1630 * ordered to avoid dereferencing inode->i_sb in non-error case to 1620 * ordered to avoid dereferencing inode->i_sb in non-error case to
1631 * avoid slow-downs. 1621 * avoid slow-downs.
1632 */ 1622 */
1633 WARN_ON_ONCE(IS_RDONLY(inode) && 1623 WARN_ON_ONCE(IS_RDONLY(inode) &&
1634 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); 1624 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1635 1625
1636 /* 1626 /*
1637 * We give up here if we're reentered, because it might be for a 1627 * We give up here if we're reentered, because it might be for a
1638 * different filesystem. 1628 * different filesystem.
1639 */ 1629 */
1640 if (ext3_journal_current_handle()) 1630 if (ext3_journal_current_handle())
1641 goto out_fail; 1631 goto out_fail;
1642 1632
1643 trace_ext3_ordered_writepage(page); 1633 trace_ext3_ordered_writepage(page);
1644 if (!page_has_buffers(page)) { 1634 if (!page_has_buffers(page)) {
1645 create_empty_buffers(page, inode->i_sb->s_blocksize, 1635 create_empty_buffers(page, inode->i_sb->s_blocksize,
1646 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1636 (1 << BH_Dirty)|(1 << BH_Uptodate));
1647 page_bufs = page_buffers(page); 1637 page_bufs = page_buffers(page);
1648 } else { 1638 } else {
1649 page_bufs = page_buffers(page); 1639 page_bufs = page_buffers(page);
1650 if (!walk_page_buffers(NULL, page_bufs, 0, PAGE_CACHE_SIZE, 1640 if (!walk_page_buffers(NULL, page_bufs, 0, PAGE_CACHE_SIZE,
1651 NULL, buffer_unmapped)) { 1641 NULL, buffer_unmapped)) {
1652 /* Provide NULL get_block() to catch bugs if buffers 1642 /* Provide NULL get_block() to catch bugs if buffers
1653 * weren't really mapped */ 1643 * weren't really mapped */
1654 return block_write_full_page(page, NULL, wbc); 1644 return block_write_full_page(page, NULL, wbc);
1655 } 1645 }
1656 } 1646 }
1657 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); 1647 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1658 1648
1659 if (IS_ERR(handle)) { 1649 if (IS_ERR(handle)) {
1660 ret = PTR_ERR(handle); 1650 ret = PTR_ERR(handle);
1661 goto out_fail; 1651 goto out_fail;
1662 } 1652 }
1663 1653
1664 walk_page_buffers(handle, page_bufs, 0, 1654 walk_page_buffers(handle, page_bufs, 0,
1665 PAGE_CACHE_SIZE, NULL, bget_one); 1655 PAGE_CACHE_SIZE, NULL, bget_one);
1666 1656
1667 ret = block_write_full_page(page, ext3_get_block, wbc); 1657 ret = block_write_full_page(page, ext3_get_block, wbc);
1668 1658
1669 /* 1659 /*
1670 * The page can become unlocked at any point now, and 1660 * The page can become unlocked at any point now, and
1671 * truncate can then come in and change things. So we 1661 * truncate can then come in and change things. So we
1672 * can't touch *page from now on. But *page_bufs is 1662 * can't touch *page from now on. But *page_bufs is
1673 * safe due to elevated refcount. 1663 * safe due to elevated refcount.
1674 */ 1664 */
1675 1665
1676 /* 1666 /*
1677 * And attach them to the current transaction. But only if 1667 * And attach them to the current transaction. But only if
1678 * block_write_full_page() succeeded. Otherwise they are unmapped, 1668 * block_write_full_page() succeeded. Otherwise they are unmapped,
1679 * and generally junk. 1669 * and generally junk.
1680 */ 1670 */
1681 if (ret == 0) { 1671 if (ret == 0) {
1682 err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, 1672 err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
1683 NULL, journal_dirty_data_fn); 1673 NULL, journal_dirty_data_fn);
1684 if (!ret) 1674 if (!ret)
1685 ret = err; 1675 ret = err;
1686 } 1676 }
1687 walk_page_buffers(handle, page_bufs, 0, 1677 walk_page_buffers(handle, page_bufs, 0,
1688 PAGE_CACHE_SIZE, NULL, bput_one); 1678 PAGE_CACHE_SIZE, NULL, bput_one);
1689 err = ext3_journal_stop(handle); 1679 err = ext3_journal_stop(handle);
1690 if (!ret) 1680 if (!ret)
1691 ret = err; 1681 ret = err;
1692 return ret; 1682 return ret;
1693 1683
1694 out_fail: 1684 out_fail:
1695 redirty_page_for_writepage(wbc, page); 1685 redirty_page_for_writepage(wbc, page);
1696 unlock_page(page); 1686 unlock_page(page);
1697 return ret; 1687 return ret;
1698 } 1688 }
1699 1689
1700 static int ext3_writeback_writepage(struct page *page, 1690 static int ext3_writeback_writepage(struct page *page,
1701 struct writeback_control *wbc) 1691 struct writeback_control *wbc)
1702 { 1692 {
1703 struct inode *inode = page->mapping->host; 1693 struct inode *inode = page->mapping->host;
1704 handle_t *handle = NULL; 1694 handle_t *handle = NULL;
1705 int ret = 0; 1695 int ret = 0;
1706 int err; 1696 int err;
1707 1697
1708 J_ASSERT(PageLocked(page)); 1698 J_ASSERT(PageLocked(page));
1709 /* 1699 /*
1710 * We don't want to warn for emergency remount. The condition is 1700 * We don't want to warn for emergency remount. The condition is
1711 * ordered to avoid dereferencing inode->i_sb in non-error case to 1701 * ordered to avoid dereferencing inode->i_sb in non-error case to
1712 * avoid slow-downs. 1702 * avoid slow-downs.
1713 */ 1703 */
1714 WARN_ON_ONCE(IS_RDONLY(inode) && 1704 WARN_ON_ONCE(IS_RDONLY(inode) &&
1715 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); 1705 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1716 1706
1717 if (ext3_journal_current_handle()) 1707 if (ext3_journal_current_handle())
1718 goto out_fail; 1708 goto out_fail;
1719 1709
1720 trace_ext3_writeback_writepage(page); 1710 trace_ext3_writeback_writepage(page);
1721 if (page_has_buffers(page)) { 1711 if (page_has_buffers(page)) {
1722 if (!walk_page_buffers(NULL, page_buffers(page), 0, 1712 if (!walk_page_buffers(NULL, page_buffers(page), 0,
1723 PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { 1713 PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
1724 /* Provide NULL get_block() to catch bugs if buffers 1714 /* Provide NULL get_block() to catch bugs if buffers
1725 * weren't really mapped */ 1715 * weren't really mapped */
1726 return block_write_full_page(page, NULL, wbc); 1716 return block_write_full_page(page, NULL, wbc);
1727 } 1717 }
1728 } 1718 }
1729 1719
1730 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); 1720 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1731 if (IS_ERR(handle)) { 1721 if (IS_ERR(handle)) {
1732 ret = PTR_ERR(handle); 1722 ret = PTR_ERR(handle);
1733 goto out_fail; 1723 goto out_fail;
1734 } 1724 }
1735 1725
1736 ret = block_write_full_page(page, ext3_get_block, wbc); 1726 ret = block_write_full_page(page, ext3_get_block, wbc);
1737 1727
1738 err = ext3_journal_stop(handle); 1728 err = ext3_journal_stop(handle);
1739 if (!ret) 1729 if (!ret)
1740 ret = err; 1730 ret = err;
1741 return ret; 1731 return ret;
1742 1732
1743 out_fail: 1733 out_fail:
1744 redirty_page_for_writepage(wbc, page); 1734 redirty_page_for_writepage(wbc, page);
1745 unlock_page(page); 1735 unlock_page(page);
1746 return ret; 1736 return ret;
1747 } 1737 }
1748 1738
1749 static int ext3_journalled_writepage(struct page *page, 1739 static int ext3_journalled_writepage(struct page *page,
1750 struct writeback_control *wbc) 1740 struct writeback_control *wbc)
1751 { 1741 {
1752 struct inode *inode = page->mapping->host; 1742 struct inode *inode = page->mapping->host;
1753 handle_t *handle = NULL; 1743 handle_t *handle = NULL;
1754 int ret = 0; 1744 int ret = 0;
1755 int err; 1745 int err;
1756 1746
1757 J_ASSERT(PageLocked(page)); 1747 J_ASSERT(PageLocked(page));
1758 /* 1748 /*
1759 * We don't want to warn for emergency remount. The condition is 1749 * We don't want to warn for emergency remount. The condition is
1760 * ordered to avoid dereferencing inode->i_sb in non-error case to 1750 * ordered to avoid dereferencing inode->i_sb in non-error case to
1761 * avoid slow-downs. 1751 * avoid slow-downs.
1762 */ 1752 */
1763 WARN_ON_ONCE(IS_RDONLY(inode) && 1753 WARN_ON_ONCE(IS_RDONLY(inode) &&
1764 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); 1754 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1765 1755
1766 if (ext3_journal_current_handle()) 1756 if (ext3_journal_current_handle())
1767 goto no_write; 1757 goto no_write;
1768 1758
1769 trace_ext3_journalled_writepage(page); 1759 trace_ext3_journalled_writepage(page);
1770 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); 1760 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1771 if (IS_ERR(handle)) { 1761 if (IS_ERR(handle)) {
1772 ret = PTR_ERR(handle); 1762 ret = PTR_ERR(handle);
1773 goto no_write; 1763 goto no_write;
1774 } 1764 }
1775 1765
1776 if (!page_has_buffers(page) || PageChecked(page)) { 1766 if (!page_has_buffers(page) || PageChecked(page)) {
1777 /* 1767 /*
1778 * It's mmapped pagecache. Add buffers and journal it. There 1768 * It's mmapped pagecache. Add buffers and journal it. There
1779 * doesn't seem much point in redirtying the page here. 1769 * doesn't seem much point in redirtying the page here.
1780 */ 1770 */
1781 ClearPageChecked(page); 1771 ClearPageChecked(page);
1782 ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE, 1772 ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE,
1783 ext3_get_block); 1773 ext3_get_block);
1784 if (ret != 0) { 1774 if (ret != 0) {
1785 ext3_journal_stop(handle); 1775 ext3_journal_stop(handle);
1786 goto out_unlock; 1776 goto out_unlock;
1787 } 1777 }
1788 ret = walk_page_buffers(handle, page_buffers(page), 0, 1778 ret = walk_page_buffers(handle, page_buffers(page), 0,
1789 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); 1779 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
1790 1780
1791 err = walk_page_buffers(handle, page_buffers(page), 0, 1781 err = walk_page_buffers(handle, page_buffers(page), 0,
1792 PAGE_CACHE_SIZE, NULL, write_end_fn); 1782 PAGE_CACHE_SIZE, NULL, write_end_fn);
1793 if (ret == 0) 1783 if (ret == 0)
1794 ret = err; 1784 ret = err;
1795 ext3_set_inode_state(inode, EXT3_STATE_JDATA); 1785 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1796 atomic_set(&EXT3_I(inode)->i_datasync_tid, 1786 atomic_set(&EXT3_I(inode)->i_datasync_tid,
1797 handle->h_transaction->t_tid); 1787 handle->h_transaction->t_tid);
1798 unlock_page(page); 1788 unlock_page(page);
1799 } else { 1789 } else {
1800 /* 1790 /*
1801 * It may be a page full of checkpoint-mode buffers. We don't 1791 * It may be a page full of checkpoint-mode buffers. We don't
1802 * really know unless we go poke around in the buffer_heads. 1792 * really know unless we go poke around in the buffer_heads.
1803 * But block_write_full_page will do the right thing. 1793 * But block_write_full_page will do the right thing.
1804 */ 1794 */
1805 ret = block_write_full_page(page, ext3_get_block, wbc); 1795 ret = block_write_full_page(page, ext3_get_block, wbc);
1806 } 1796 }
1807 err = ext3_journal_stop(handle); 1797 err = ext3_journal_stop(handle);
1808 if (!ret) 1798 if (!ret)
1809 ret = err; 1799 ret = err;
1810 out: 1800 out:
1811 return ret; 1801 return ret;
1812 1802
1813 no_write: 1803 no_write:
1814 redirty_page_for_writepage(wbc, page); 1804 redirty_page_for_writepage(wbc, page);
1815 out_unlock: 1805 out_unlock:
1816 unlock_page(page); 1806 unlock_page(page);
1817 goto out; 1807 goto out;
1818 } 1808 }
1819 1809
1820 static int ext3_readpage(struct file *file, struct page *page) 1810 static int ext3_readpage(struct file *file, struct page *page)
1821 { 1811 {
1822 trace_ext3_readpage(page); 1812 trace_ext3_readpage(page);
1823 return mpage_readpage(page, ext3_get_block); 1813 return mpage_readpage(page, ext3_get_block);
1824 } 1814 }
1825 1815
1826 static int 1816 static int
1827 ext3_readpages(struct file *file, struct address_space *mapping, 1817 ext3_readpages(struct file *file, struct address_space *mapping,
1828 struct list_head *pages, unsigned nr_pages) 1818 struct list_head *pages, unsigned nr_pages)
1829 { 1819 {
1830 return mpage_readpages(mapping, pages, nr_pages, ext3_get_block); 1820 return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);
1831 } 1821 }
1832 1822
1833 static void ext3_invalidatepage(struct page *page, unsigned long offset) 1823 static void ext3_invalidatepage(struct page *page, unsigned long offset)
1834 { 1824 {
1835 journal_t *journal = EXT3_JOURNAL(page->mapping->host); 1825 journal_t *journal = EXT3_JOURNAL(page->mapping->host);
1836 1826
1837 trace_ext3_invalidatepage(page, offset); 1827 trace_ext3_invalidatepage(page, offset);
1838 1828
1839 /* 1829 /*
1840 * If it's a full truncate we just forget about the pending dirtying 1830 * If it's a full truncate we just forget about the pending dirtying
1841 */ 1831 */
1842 if (offset == 0) 1832 if (offset == 0)
1843 ClearPageChecked(page); 1833 ClearPageChecked(page);
1844 1834
1845 journal_invalidatepage(journal, page, offset); 1835 journal_invalidatepage(journal, page, offset);
1846 } 1836 }
1847 1837
1848 static int ext3_releasepage(struct page *page, gfp_t wait) 1838 static int ext3_releasepage(struct page *page, gfp_t wait)
1849 { 1839 {
1850 journal_t *journal = EXT3_JOURNAL(page->mapping->host); 1840 journal_t *journal = EXT3_JOURNAL(page->mapping->host);
1851 1841
1852 trace_ext3_releasepage(page); 1842 trace_ext3_releasepage(page);
1853 WARN_ON(PageChecked(page)); 1843 WARN_ON(PageChecked(page));
1854 if (!page_has_buffers(page)) 1844 if (!page_has_buffers(page))
1855 return 0; 1845 return 0;
1856 return journal_try_to_free_buffers(journal, page, wait); 1846 return journal_try_to_free_buffers(journal, page, wait);
1857 } 1847 }
1858 1848
1859 /* 1849 /*
1860 * If the O_DIRECT write will extend the file then add this inode to the 1850 * If the O_DIRECT write will extend the file then add this inode to the
1861 * orphan list. So recovery will truncate it back to the original size 1851 * orphan list. So recovery will truncate it back to the original size
1862 * if the machine crashes during the write. 1852 * if the machine crashes during the write.
1863 * 1853 *
1864 * If the O_DIRECT write is intantiating holes inside i_size and the machine 1854 * If the O_DIRECT write is intantiating holes inside i_size and the machine
1865 * crashes then stale disk data _may_ be exposed inside the file. But current 1855 * crashes then stale disk data _may_ be exposed inside the file. But current
1866 * VFS code falls back into buffered path in that case so we are safe. 1856 * VFS code falls back into buffered path in that case so we are safe.
1867 */ 1857 */
1868 static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, 1858 static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1869 const struct iovec *iov, loff_t offset, 1859 const struct iovec *iov, loff_t offset,
1870 unsigned long nr_segs) 1860 unsigned long nr_segs)
1871 { 1861 {
1872 struct file *file = iocb->ki_filp; 1862 struct file *file = iocb->ki_filp;
1873 struct inode *inode = file->f_mapping->host; 1863 struct inode *inode = file->f_mapping->host;
1874 struct ext3_inode_info *ei = EXT3_I(inode); 1864 struct ext3_inode_info *ei = EXT3_I(inode);
1875 handle_t *handle; 1865 handle_t *handle;
1876 ssize_t ret; 1866 ssize_t ret;
1877 int orphan = 0; 1867 int orphan = 0;
1878 size_t count = iov_length(iov, nr_segs); 1868 size_t count = iov_length(iov, nr_segs);
1879 int retries = 0; 1869 int retries = 0;
1880 1870
1881 trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); 1871 trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
1882 1872
1883 if (rw == WRITE) { 1873 if (rw == WRITE) {
1884 loff_t final_size = offset + count; 1874 loff_t final_size = offset + count;
1885 1875
1886 if (final_size > inode->i_size) { 1876 if (final_size > inode->i_size) {
1887 /* Credits for sb + inode write */ 1877 /* Credits for sb + inode write */
1888 handle = ext3_journal_start(inode, 2); 1878 handle = ext3_journal_start(inode, 2);
1889 if (IS_ERR(handle)) { 1879 if (IS_ERR(handle)) {
1890 ret = PTR_ERR(handle); 1880 ret = PTR_ERR(handle);
1891 goto out; 1881 goto out;
1892 } 1882 }
1893 ret = ext3_orphan_add(handle, inode); 1883 ret = ext3_orphan_add(handle, inode);
1894 if (ret) { 1884 if (ret) {
1895 ext3_journal_stop(handle); 1885 ext3_journal_stop(handle);
1896 goto out; 1886 goto out;
1897 } 1887 }
1898 orphan = 1; 1888 orphan = 1;
1899 ei->i_disksize = inode->i_size; 1889 ei->i_disksize = inode->i_size;
1900 ext3_journal_stop(handle); 1890 ext3_journal_stop(handle);
1901 } 1891 }
1902 } 1892 }
1903 1893
1904 retry: 1894 retry:
1905 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 1895 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
1906 ext3_get_block); 1896 ext3_get_block);
1907 /* 1897 /*
1908 * In case of error extending write may have instantiated a few 1898 * In case of error extending write may have instantiated a few
1909 * blocks outside i_size. Trim these off again. 1899 * blocks outside i_size. Trim these off again.
1910 */ 1900 */
1911 if (unlikely((rw & WRITE) && ret < 0)) { 1901 if (unlikely((rw & WRITE) && ret < 0)) {
1912 loff_t isize = i_size_read(inode); 1902 loff_t isize = i_size_read(inode);
1913 loff_t end = offset + iov_length(iov, nr_segs); 1903 loff_t end = offset + iov_length(iov, nr_segs);
1914 1904
1915 if (end > isize) 1905 if (end > isize)
1916 ext3_truncate_failed_direct_write(inode); 1906 ext3_truncate_failed_direct_write(inode);
1917 } 1907 }
1918 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1908 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1919 goto retry; 1909 goto retry;
1920 1910
1921 if (orphan) { 1911 if (orphan) {
1922 int err; 1912 int err;
1923 1913
1924 /* Credits for sb + inode write */ 1914 /* Credits for sb + inode write */
1925 handle = ext3_journal_start(inode, 2); 1915 handle = ext3_journal_start(inode, 2);
1926 if (IS_ERR(handle)) { 1916 if (IS_ERR(handle)) {
1927 /* This is really bad luck. We've written the data 1917 /* This is really bad luck. We've written the data
1928 * but cannot extend i_size. Truncate allocated blocks 1918 * but cannot extend i_size. Truncate allocated blocks
1929 * and pretend the write failed... */ 1919 * and pretend the write failed... */
1930 ext3_truncate_failed_direct_write(inode); 1920 ext3_truncate_failed_direct_write(inode);
1931 ret = PTR_ERR(handle); 1921 ret = PTR_ERR(handle);
1932 goto out; 1922 goto out;
1933 } 1923 }
1934 if (inode->i_nlink) 1924 if (inode->i_nlink)
1935 ext3_orphan_del(handle, inode); 1925 ext3_orphan_del(handle, inode);
1936 if (ret > 0) { 1926 if (ret > 0) {
1937 loff_t end = offset + ret; 1927 loff_t end = offset + ret;
1938 if (end > inode->i_size) { 1928 if (end > inode->i_size) {
1939 ei->i_disksize = end; 1929 ei->i_disksize = end;
1940 i_size_write(inode, end); 1930 i_size_write(inode, end);
1941 /* 1931 /*
1942 * We're going to return a positive `ret' 1932 * We're going to return a positive `ret'
1943 * here due to non-zero-length I/O, so there's 1933 * here due to non-zero-length I/O, so there's
1944 * no way of reporting error returns from 1934 * no way of reporting error returns from
1945 * ext3_mark_inode_dirty() to userspace. So 1935 * ext3_mark_inode_dirty() to userspace. So
1946 * ignore it. 1936 * ignore it.
1947 */ 1937 */
1948 ext3_mark_inode_dirty(handle, inode); 1938 ext3_mark_inode_dirty(handle, inode);
1949 } 1939 }
1950 } 1940 }
1951 err = ext3_journal_stop(handle); 1941 err = ext3_journal_stop(handle);
1952 if (ret == 0) 1942 if (ret == 0)
1953 ret = err; 1943 ret = err;
1954 } 1944 }
1955 out: 1945 out:
1956 trace_ext3_direct_IO_exit(inode, offset, 1946 trace_ext3_direct_IO_exit(inode, offset,
1957 iov_length(iov, nr_segs), rw, ret); 1947 iov_length(iov, nr_segs), rw, ret);
1958 return ret; 1948 return ret;
1959 } 1949 }
1960 1950
1961 /* 1951 /*
1962 * Pages can be marked dirty completely asynchronously from ext3's journalling 1952 * Pages can be marked dirty completely asynchronously from ext3's journalling
1963 * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do 1953 * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do
1964 * much here because ->set_page_dirty is called under VFS locks. The page is 1954 * much here because ->set_page_dirty is called under VFS locks. The page is
1965 * not necessarily locked. 1955 * not necessarily locked.
1966 * 1956 *
1967 * We cannot just dirty the page and leave attached buffers clean, because the 1957 * We cannot just dirty the page and leave attached buffers clean, because the
1968 * buffers' dirty state is "definitive". We cannot just set the buffers dirty 1958 * buffers' dirty state is "definitive". We cannot just set the buffers dirty
1969 * or jbddirty because all the journalling code will explode. 1959 * or jbddirty because all the journalling code will explode.
1970 * 1960 *
1971 * So what we do is to mark the page "pending dirty" and next time writepage 1961 * So what we do is to mark the page "pending dirty" and next time writepage
1972 * is called, propagate that into the buffers appropriately. 1962 * is called, propagate that into the buffers appropriately.
1973 */ 1963 */
1974 static int ext3_journalled_set_page_dirty(struct page *page) 1964 static int ext3_journalled_set_page_dirty(struct page *page)
1975 { 1965 {
1976 SetPageChecked(page); 1966 SetPageChecked(page);
1977 return __set_page_dirty_nobuffers(page); 1967 return __set_page_dirty_nobuffers(page);
1978 } 1968 }
1979 1969
1980 static const struct address_space_operations ext3_ordered_aops = { 1970 static const struct address_space_operations ext3_ordered_aops = {
1981 .readpage = ext3_readpage, 1971 .readpage = ext3_readpage,
1982 .readpages = ext3_readpages, 1972 .readpages = ext3_readpages,
1983 .writepage = ext3_ordered_writepage, 1973 .writepage = ext3_ordered_writepage,
1984 .write_begin = ext3_write_begin, 1974 .write_begin = ext3_write_begin,
1985 .write_end = ext3_ordered_write_end, 1975 .write_end = ext3_ordered_write_end,
1986 .bmap = ext3_bmap, 1976 .bmap = ext3_bmap,
1987 .invalidatepage = ext3_invalidatepage, 1977 .invalidatepage = ext3_invalidatepage,
1988 .releasepage = ext3_releasepage, 1978 .releasepage = ext3_releasepage,
1989 .direct_IO = ext3_direct_IO, 1979 .direct_IO = ext3_direct_IO,
1990 .migratepage = buffer_migrate_page, 1980 .migratepage = buffer_migrate_page,
1991 .is_partially_uptodate = block_is_partially_uptodate, 1981 .is_partially_uptodate = block_is_partially_uptodate,
1992 .error_remove_page = generic_error_remove_page, 1982 .error_remove_page = generic_error_remove_page,
1993 }; 1983 };
1994 1984
1995 static const struct address_space_operations ext3_writeback_aops = { 1985 static const struct address_space_operations ext3_writeback_aops = {
1996 .readpage = ext3_readpage, 1986 .readpage = ext3_readpage,
1997 .readpages = ext3_readpages, 1987 .readpages = ext3_readpages,
1998 .writepage = ext3_writeback_writepage, 1988 .writepage = ext3_writeback_writepage,
1999 .write_begin = ext3_write_begin, 1989 .write_begin = ext3_write_begin,
2000 .write_end = ext3_writeback_write_end, 1990 .write_end = ext3_writeback_write_end,
2001 .bmap = ext3_bmap, 1991 .bmap = ext3_bmap,
2002 .invalidatepage = ext3_invalidatepage, 1992 .invalidatepage = ext3_invalidatepage,
2003 .releasepage = ext3_releasepage, 1993 .releasepage = ext3_releasepage,
2004 .direct_IO = ext3_direct_IO, 1994 .direct_IO = ext3_direct_IO,
2005 .migratepage = buffer_migrate_page, 1995 .migratepage = buffer_migrate_page,
2006 .is_partially_uptodate = block_is_partially_uptodate, 1996 .is_partially_uptodate = block_is_partially_uptodate,
2007 .error_remove_page = generic_error_remove_page, 1997 .error_remove_page = generic_error_remove_page,
2008 }; 1998 };
2009 1999
2010 static const struct address_space_operations ext3_journalled_aops = { 2000 static const struct address_space_operations ext3_journalled_aops = {
2011 .readpage = ext3_readpage, 2001 .readpage = ext3_readpage,
2012 .readpages = ext3_readpages, 2002 .readpages = ext3_readpages,
2013 .writepage = ext3_journalled_writepage, 2003 .writepage = ext3_journalled_writepage,
2014 .write_begin = ext3_write_begin, 2004 .write_begin = ext3_write_begin,
2015 .write_end = ext3_journalled_write_end, 2005 .write_end = ext3_journalled_write_end,
2016 .set_page_dirty = ext3_journalled_set_page_dirty, 2006 .set_page_dirty = ext3_journalled_set_page_dirty,
2017 .bmap = ext3_bmap, 2007 .bmap = ext3_bmap,
2018 .invalidatepage = ext3_invalidatepage, 2008 .invalidatepage = ext3_invalidatepage,
2019 .releasepage = ext3_releasepage, 2009 .releasepage = ext3_releasepage,
2020 .is_partially_uptodate = block_is_partially_uptodate, 2010 .is_partially_uptodate = block_is_partially_uptodate,
2021 .error_remove_page = generic_error_remove_page, 2011 .error_remove_page = generic_error_remove_page,
2022 }; 2012 };
2023 2013
2024 void ext3_set_aops(struct inode *inode) 2014 void ext3_set_aops(struct inode *inode)
2025 { 2015 {
2026 if (ext3_should_order_data(inode)) 2016 if (ext3_should_order_data(inode))
2027 inode->i_mapping->a_ops = &ext3_ordered_aops; 2017 inode->i_mapping->a_ops = &ext3_ordered_aops;
2028 else if (ext3_should_writeback_data(inode)) 2018 else if (ext3_should_writeback_data(inode))
2029 inode->i_mapping->a_ops = &ext3_writeback_aops; 2019 inode->i_mapping->a_ops = &ext3_writeback_aops;
2030 else 2020 else
2031 inode->i_mapping->a_ops = &ext3_journalled_aops; 2021 inode->i_mapping->a_ops = &ext3_journalled_aops;
2032 } 2022 }
2033 2023
2034 /* 2024 /*
2035 * ext3_block_truncate_page() zeroes out a mapping from file offset `from' 2025 * ext3_block_truncate_page() zeroes out a mapping from file offset `from'
2036 * up to the end of the block which corresponds to `from'. 2026 * up to the end of the block which corresponds to `from'.
2037 * This required during truncate. We need to physically zero the tail end 2027 * This required during truncate. We need to physically zero the tail end
2038 * of that block so it doesn't yield old data if the file is later grown. 2028 * of that block so it doesn't yield old data if the file is later grown.
2039 */ 2029 */
2040 static int ext3_block_truncate_page(struct inode *inode, loff_t from) 2030 static int ext3_block_truncate_page(struct inode *inode, loff_t from)
2041 { 2031 {
2042 ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; 2032 ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
2043 unsigned offset = from & (PAGE_CACHE_SIZE - 1); 2033 unsigned offset = from & (PAGE_CACHE_SIZE - 1);
2044 unsigned blocksize, iblock, length, pos; 2034 unsigned blocksize, iblock, length, pos;
2045 struct page *page; 2035 struct page *page;
2046 handle_t *handle = NULL; 2036 handle_t *handle = NULL;
2047 struct buffer_head *bh; 2037 struct buffer_head *bh;
2048 int err = 0; 2038 int err = 0;
2049 2039
2050 /* Truncated on block boundary - nothing to do */ 2040 /* Truncated on block boundary - nothing to do */
2051 blocksize = inode->i_sb->s_blocksize; 2041 blocksize = inode->i_sb->s_blocksize;
2052 if ((from & (blocksize - 1)) == 0) 2042 if ((from & (blocksize - 1)) == 0)
2053 return 0; 2043 return 0;
2054 2044
2055 page = grab_cache_page(inode->i_mapping, index); 2045 page = grab_cache_page(inode->i_mapping, index);
2056 if (!page) 2046 if (!page)
2057 return -ENOMEM; 2047 return -ENOMEM;
2058 length = blocksize - (offset & (blocksize - 1)); 2048 length = blocksize - (offset & (blocksize - 1));
2059 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 2049 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
2060 2050
2061 if (!page_has_buffers(page)) 2051 if (!page_has_buffers(page))
2062 create_empty_buffers(page, blocksize, 0); 2052 create_empty_buffers(page, blocksize, 0);
2063 2053
2064 /* Find the buffer that contains "offset" */ 2054 /* Find the buffer that contains "offset" */
2065 bh = page_buffers(page); 2055 bh = page_buffers(page);
2066 pos = blocksize; 2056 pos = blocksize;
2067 while (offset >= pos) { 2057 while (offset >= pos) {
2068 bh = bh->b_this_page; 2058 bh = bh->b_this_page;
2069 iblock++; 2059 iblock++;
2070 pos += blocksize; 2060 pos += blocksize;
2071 } 2061 }
2072 2062
2073 err = 0; 2063 err = 0;
2074 if (buffer_freed(bh)) { 2064 if (buffer_freed(bh)) {
2075 BUFFER_TRACE(bh, "freed: skip"); 2065 BUFFER_TRACE(bh, "freed: skip");
2076 goto unlock; 2066 goto unlock;
2077 } 2067 }
2078 2068
2079 if (!buffer_mapped(bh)) { 2069 if (!buffer_mapped(bh)) {
2080 BUFFER_TRACE(bh, "unmapped"); 2070 BUFFER_TRACE(bh, "unmapped");
2081 ext3_get_block(inode, iblock, bh, 0); 2071 ext3_get_block(inode, iblock, bh, 0);
2082 /* unmapped? It's a hole - nothing to do */ 2072 /* unmapped? It's a hole - nothing to do */
2083 if (!buffer_mapped(bh)) { 2073 if (!buffer_mapped(bh)) {
2084 BUFFER_TRACE(bh, "still unmapped"); 2074 BUFFER_TRACE(bh, "still unmapped");
2085 goto unlock; 2075 goto unlock;
2086 } 2076 }
2087 } 2077 }
2088 2078
2089 /* Ok, it's mapped. Make sure it's up-to-date */ 2079 /* Ok, it's mapped. Make sure it's up-to-date */
2090 if (PageUptodate(page)) 2080 if (PageUptodate(page))
2091 set_buffer_uptodate(bh); 2081 set_buffer_uptodate(bh);
2092 2082
2093 if (!bh_uptodate_or_lock(bh)) { 2083 if (!bh_uptodate_or_lock(bh)) {
2094 err = bh_submit_read(bh); 2084 err = bh_submit_read(bh);
2095 /* Uhhuh. Read error. Complain and punt. */ 2085 /* Uhhuh. Read error. Complain and punt. */
2096 if (err) 2086 if (err)
2097 goto unlock; 2087 goto unlock;
2098 } 2088 }
2099 2089
2100 /* data=writeback mode doesn't need transaction to zero-out data */ 2090 /* data=writeback mode doesn't need transaction to zero-out data */
2101 if (!ext3_should_writeback_data(inode)) { 2091 if (!ext3_should_writeback_data(inode)) {
2102 /* We journal at most one block */ 2092 /* We journal at most one block */
2103 handle = ext3_journal_start(inode, 1); 2093 handle = ext3_journal_start(inode, 1);
2104 if (IS_ERR(handle)) { 2094 if (IS_ERR(handle)) {
2105 clear_highpage(page); 2095 clear_highpage(page);
2106 flush_dcache_page(page); 2096 flush_dcache_page(page);
2107 err = PTR_ERR(handle); 2097 err = PTR_ERR(handle);
2108 goto unlock; 2098 goto unlock;
2109 } 2099 }
2110 } 2100 }
2111 2101
2112 if (ext3_should_journal_data(inode)) { 2102 if (ext3_should_journal_data(inode)) {
2113 BUFFER_TRACE(bh, "get write access"); 2103 BUFFER_TRACE(bh, "get write access");
2114 err = ext3_journal_get_write_access(handle, bh); 2104 err = ext3_journal_get_write_access(handle, bh);
2115 if (err) 2105 if (err)
2116 goto stop; 2106 goto stop;
2117 } 2107 }
2118 2108
2119 zero_user(page, offset, length); 2109 zero_user(page, offset, length);
2120 BUFFER_TRACE(bh, "zeroed end of block"); 2110 BUFFER_TRACE(bh, "zeroed end of block");
2121 2111
2122 err = 0; 2112 err = 0;
2123 if (ext3_should_journal_data(inode)) { 2113 if (ext3_should_journal_data(inode)) {
2124 err = ext3_journal_dirty_metadata(handle, bh); 2114 err = ext3_journal_dirty_metadata(handle, bh);
2125 } else { 2115 } else {
2126 if (ext3_should_order_data(inode)) 2116 if (ext3_should_order_data(inode))
2127 err = ext3_journal_dirty_data(handle, bh); 2117 err = ext3_journal_dirty_data(handle, bh);
2128 mark_buffer_dirty(bh); 2118 mark_buffer_dirty(bh);
2129 } 2119 }
2130 stop: 2120 stop:
2131 if (handle) 2121 if (handle)
2132 ext3_journal_stop(handle); 2122 ext3_journal_stop(handle);
2133 2123
2134 unlock: 2124 unlock:
2135 unlock_page(page); 2125 unlock_page(page);
2136 page_cache_release(page); 2126 page_cache_release(page);
2137 return err; 2127 return err;
2138 } 2128 }
2139 2129
2140 /* 2130 /*
2141 * Probably it should be a library function... search for first non-zero word 2131 * Probably it should be a library function... search for first non-zero word
2142 * or memcmp with zero_page, whatever is better for particular architecture. 2132 * or memcmp with zero_page, whatever is better for particular architecture.
2143 * Linus? 2133 * Linus?
2144 */ 2134 */
2145 static inline int all_zeroes(__le32 *p, __le32 *q) 2135 static inline int all_zeroes(__le32 *p, __le32 *q)
2146 { 2136 {
2147 while (p < q) 2137 while (p < q)
2148 if (*p++) 2138 if (*p++)
2149 return 0; 2139 return 0;
2150 return 1; 2140 return 1;
2151 } 2141 }
2152 2142
2153 /** 2143 /**
2154 * ext3_find_shared - find the indirect blocks for partial truncation. 2144 * ext3_find_shared - find the indirect blocks for partial truncation.
2155 * @inode: inode in question 2145 * @inode: inode in question
2156 * @depth: depth of the affected branch 2146 * @depth: depth of the affected branch
2157 * @offsets: offsets of pointers in that branch (see ext3_block_to_path) 2147 * @offsets: offsets of pointers in that branch (see ext3_block_to_path)
2158 * @chain: place to store the pointers to partial indirect blocks 2148 * @chain: place to store the pointers to partial indirect blocks
2159 * @top: place to the (detached) top of branch 2149 * @top: place to the (detached) top of branch
2160 * 2150 *
2161 * This is a helper function used by ext3_truncate(). 2151 * This is a helper function used by ext3_truncate().
2162 * 2152 *
2163 * When we do truncate() we may have to clean the ends of several 2153 * When we do truncate() we may have to clean the ends of several
2164 * indirect blocks but leave the blocks themselves alive. Block is 2154 * indirect blocks but leave the blocks themselves alive. Block is
2165 * partially truncated if some data below the new i_size is referred 2155 * partially truncated if some data below the new i_size is referred
2166 * from it (and it is on the path to the first completely truncated 2156 * from it (and it is on the path to the first completely truncated
2167 * data block, indeed). We have to free the top of that path along 2157 * data block, indeed). We have to free the top of that path along
2168 * with everything to the right of the path. Since no allocation 2158 * with everything to the right of the path. Since no allocation
2169 * past the truncation point is possible until ext3_truncate() 2159 * past the truncation point is possible until ext3_truncate()
2170 * finishes, we may safely do the latter, but top of branch may 2160 * finishes, we may safely do the latter, but top of branch may
2171 * require special attention - pageout below the truncation point 2161 * require special attention - pageout below the truncation point
2172 * might try to populate it. 2162 * might try to populate it.
2173 * 2163 *
2174 * We atomically detach the top of branch from the tree, store the 2164 * We atomically detach the top of branch from the tree, store the
2175 * block number of its root in *@top, pointers to buffer_heads of 2165 * block number of its root in *@top, pointers to buffer_heads of
2176 * partially truncated blocks - in @chain[].bh and pointers to 2166 * partially truncated blocks - in @chain[].bh and pointers to
2177 * their last elements that should not be removed - in 2167 * their last elements that should not be removed - in
2178 * @chain[].p. Return value is the pointer to last filled element 2168 * @chain[].p. Return value is the pointer to last filled element
2179 * of @chain. 2169 * of @chain.
2180 * 2170 *
2181 * The work left to caller to do the actual freeing of subtrees: 2171 * The work left to caller to do the actual freeing of subtrees:
2182 * a) free the subtree starting from *@top 2172 * a) free the subtree starting from *@top
2183 * b) free the subtrees whose roots are stored in 2173 * b) free the subtrees whose roots are stored in
2184 * (@chain[i].p+1 .. end of @chain[i].bh->b_data) 2174 * (@chain[i].p+1 .. end of @chain[i].bh->b_data)
2185 * c) free the subtrees growing from the inode past the @chain[0]. 2175 * c) free the subtrees growing from the inode past the @chain[0].
2186 * (no partially truncated stuff there). */ 2176 * (no partially truncated stuff there). */
2187 2177
2188 static Indirect *ext3_find_shared(struct inode *inode, int depth, 2178 static Indirect *ext3_find_shared(struct inode *inode, int depth,
2189 int offsets[4], Indirect chain[4], __le32 *top) 2179 int offsets[4], Indirect chain[4], __le32 *top)
2190 { 2180 {
2191 Indirect *partial, *p; 2181 Indirect *partial, *p;
2192 int k, err; 2182 int k, err;
2193 2183
2194 *top = 0; 2184 *top = 0;
2195 /* Make k index the deepest non-null offset + 1 */ 2185 /* Make k index the deepest non-null offset + 1 */
2196 for (k = depth; k > 1 && !offsets[k-1]; k--) 2186 for (k = depth; k > 1 && !offsets[k-1]; k--)
2197 ; 2187 ;
2198 partial = ext3_get_branch(inode, k, offsets, chain, &err); 2188 partial = ext3_get_branch(inode, k, offsets, chain, &err);
2199 /* Writer: pointers */ 2189 /* Writer: pointers */
2200 if (!partial) 2190 if (!partial)
2201 partial = chain + k-1; 2191 partial = chain + k-1;
2202 /* 2192 /*
2203 * If the branch acquired continuation since we've looked at it - 2193 * If the branch acquired continuation since we've looked at it -
2204 * fine, it should all survive and (new) top doesn't belong to us. 2194 * fine, it should all survive and (new) top doesn't belong to us.
2205 */ 2195 */
2206 if (!partial->key && *partial->p) 2196 if (!partial->key && *partial->p)
2207 /* Writer: end */ 2197 /* Writer: end */
2208 goto no_top; 2198 goto no_top;
2209 for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) 2199 for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
2210 ; 2200 ;
2211 /* 2201 /*
2212 * OK, we've found the last block that must survive. The rest of our 2202 * OK, we've found the last block that must survive. The rest of our
2213 * branch should be detached before unlocking. However, if that rest 2203 * branch should be detached before unlocking. However, if that rest
2214 * of branch is all ours and does not grow immediately from the inode 2204 * of branch is all ours and does not grow immediately from the inode
2215 * it's easier to cheat and just decrement partial->p. 2205 * it's easier to cheat and just decrement partial->p.
2216 */ 2206 */
2217 if (p == chain + k - 1 && p > chain) { 2207 if (p == chain + k - 1 && p > chain) {
2218 p->p--; 2208 p->p--;
2219 } else { 2209 } else {
2220 *top = *p->p; 2210 *top = *p->p;
2221 /* Nope, don't do this in ext3. Must leave the tree intact */ 2211 /* Nope, don't do this in ext3. Must leave the tree intact */
2222 #if 0 2212 #if 0
2223 *p->p = 0; 2213 *p->p = 0;
2224 #endif 2214 #endif
2225 } 2215 }
2226 /* Writer: end */ 2216 /* Writer: end */
2227 2217
2228 while(partial > p) { 2218 while(partial > p) {
2229 brelse(partial->bh); 2219 brelse(partial->bh);
2230 partial--; 2220 partial--;
2231 } 2221 }
2232 no_top: 2222 no_top:
2233 return partial; 2223 return partial;
2234 } 2224 }
2235 2225
2236 /* 2226 /*
2237 * Zero a number of block pointers in either an inode or an indirect block. 2227 * Zero a number of block pointers in either an inode or an indirect block.
2238 * If we restart the transaction we must again get write access to the 2228 * If we restart the transaction we must again get write access to the
2239 * indirect block for further modification. 2229 * indirect block for further modification.
2240 * 2230 *
2241 * We release `count' blocks on disk, but (last - first) may be greater 2231 * We release `count' blocks on disk, but (last - first) may be greater
2242 * than `count' because there can be holes in there. 2232 * than `count' because there can be holes in there.
2243 */ 2233 */
2244 static void ext3_clear_blocks(handle_t *handle, struct inode *inode, 2234 static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
2245 struct buffer_head *bh, ext3_fsblk_t block_to_free, 2235 struct buffer_head *bh, ext3_fsblk_t block_to_free,
2246 unsigned long count, __le32 *first, __le32 *last) 2236 unsigned long count, __le32 *first, __le32 *last)
2247 { 2237 {
2248 __le32 *p; 2238 __le32 *p;
2249 if (try_to_extend_transaction(handle, inode)) { 2239 if (try_to_extend_transaction(handle, inode)) {
2250 if (bh) { 2240 if (bh) {
2251 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 2241 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
2252 if (ext3_journal_dirty_metadata(handle, bh)) 2242 if (ext3_journal_dirty_metadata(handle, bh))
2253 return; 2243 return;
2254 } 2244 }
2255 ext3_mark_inode_dirty(handle, inode); 2245 ext3_mark_inode_dirty(handle, inode);
2256 truncate_restart_transaction(handle, inode); 2246 truncate_restart_transaction(handle, inode);
2257 if (bh) { 2247 if (bh) {
2258 BUFFER_TRACE(bh, "retaking write access"); 2248 BUFFER_TRACE(bh, "retaking write access");
2259 if (ext3_journal_get_write_access(handle, bh)) 2249 if (ext3_journal_get_write_access(handle, bh))
2260 return; 2250 return;
2261 } 2251 }
2262 } 2252 }
2263 2253
2264 /* 2254 /*
2265 * Any buffers which are on the journal will be in memory. We find 2255 * Any buffers which are on the journal will be in memory. We find
2266 * them on the hash table so journal_revoke() will run journal_forget() 2256 * them on the hash table so journal_revoke() will run journal_forget()
2267 * on them. We've already detached each block from the file, so 2257 * on them. We've already detached each block from the file, so
2268 * bforget() in journal_forget() should be safe. 2258 * bforget() in journal_forget() should be safe.
2269 * 2259 *
2270 * AKPM: turn on bforget in journal_forget()!!! 2260 * AKPM: turn on bforget in journal_forget()!!!
2271 */ 2261 */
2272 for (p = first; p < last; p++) { 2262 for (p = first; p < last; p++) {
2273 u32 nr = le32_to_cpu(*p); 2263 u32 nr = le32_to_cpu(*p);
2274 if (nr) { 2264 if (nr) {
2275 struct buffer_head *bh; 2265 struct buffer_head *bh;
2276 2266
2277 *p = 0; 2267 *p = 0;
2278 bh = sb_find_get_block(inode->i_sb, nr); 2268 bh = sb_find_get_block(inode->i_sb, nr);
2279 ext3_forget(handle, 0, inode, bh, nr); 2269 ext3_forget(handle, 0, inode, bh, nr);
2280 } 2270 }
2281 } 2271 }
2282 2272
2283 ext3_free_blocks(handle, inode, block_to_free, count); 2273 ext3_free_blocks(handle, inode, block_to_free, count);
2284 } 2274 }
2285 2275
2286 /** 2276 /**
2287 * ext3_free_data - free a list of data blocks 2277 * ext3_free_data - free a list of data blocks
2288 * @handle: handle for this transaction 2278 * @handle: handle for this transaction
2289 * @inode: inode we are dealing with 2279 * @inode: inode we are dealing with
2290 * @this_bh: indirect buffer_head which contains *@first and *@last 2280 * @this_bh: indirect buffer_head which contains *@first and *@last
2291 * @first: array of block numbers 2281 * @first: array of block numbers
2292 * @last: points immediately past the end of array 2282 * @last: points immediately past the end of array
2293 * 2283 *
2294 * We are freeing all blocks referred from that array (numbers are stored as 2284 * We are freeing all blocks referred from that array (numbers are stored as
2295 * little-endian 32-bit) and updating @inode->i_blocks appropriately. 2285 * little-endian 32-bit) and updating @inode->i_blocks appropriately.
2296 * 2286 *
2297 * We accumulate contiguous runs of blocks to free. Conveniently, if these 2287 * We accumulate contiguous runs of blocks to free. Conveniently, if these
2298 * blocks are contiguous then releasing them at one time will only affect one 2288 * blocks are contiguous then releasing them at one time will only affect one
2299 * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't 2289 * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
2300 * actually use a lot of journal space. 2290 * actually use a lot of journal space.
2301 * 2291 *
2302 * @this_bh will be %NULL if @first and @last point into the inode's direct 2292 * @this_bh will be %NULL if @first and @last point into the inode's direct
2303 * block pointers. 2293 * block pointers.
2304 */ 2294 */
2305 static void ext3_free_data(handle_t *handle, struct inode *inode, 2295 static void ext3_free_data(handle_t *handle, struct inode *inode,
2306 struct buffer_head *this_bh, 2296 struct buffer_head *this_bh,
2307 __le32 *first, __le32 *last) 2297 __le32 *first, __le32 *last)
2308 { 2298 {
2309 ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */ 2299 ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */
2310 unsigned long count = 0; /* Number of blocks in the run */ 2300 unsigned long count = 0; /* Number of blocks in the run */
2311 __le32 *block_to_free_p = NULL; /* Pointer into inode/ind 2301 __le32 *block_to_free_p = NULL; /* Pointer into inode/ind
2312 corresponding to 2302 corresponding to
2313 block_to_free */ 2303 block_to_free */
2314 ext3_fsblk_t nr; /* Current block # */ 2304 ext3_fsblk_t nr; /* Current block # */
2315 __le32 *p; /* Pointer into inode/ind 2305 __le32 *p; /* Pointer into inode/ind
2316 for current block */ 2306 for current block */
2317 int err; 2307 int err;
2318 2308
2319 if (this_bh) { /* For indirect block */ 2309 if (this_bh) { /* For indirect block */
2320 BUFFER_TRACE(this_bh, "get_write_access"); 2310 BUFFER_TRACE(this_bh, "get_write_access");
2321 err = ext3_journal_get_write_access(handle, this_bh); 2311 err = ext3_journal_get_write_access(handle, this_bh);
2322 /* Important: if we can't update the indirect pointers 2312 /* Important: if we can't update the indirect pointers
2323 * to the blocks, we can't free them. */ 2313 * to the blocks, we can't free them. */
2324 if (err) 2314 if (err)
2325 return; 2315 return;
2326 } 2316 }
2327 2317
2328 for (p = first; p < last; p++) { 2318 for (p = first; p < last; p++) {
2329 nr = le32_to_cpu(*p); 2319 nr = le32_to_cpu(*p);
2330 if (nr) { 2320 if (nr) {
2331 /* accumulate blocks to free if they're contiguous */ 2321 /* accumulate blocks to free if they're contiguous */
2332 if (count == 0) { 2322 if (count == 0) {
2333 block_to_free = nr; 2323 block_to_free = nr;
2334 block_to_free_p = p; 2324 block_to_free_p = p;
2335 count = 1; 2325 count = 1;
2336 } else if (nr == block_to_free + count) { 2326 } else if (nr == block_to_free + count) {
2337 count++; 2327 count++;
2338 } else { 2328 } else {
2339 ext3_clear_blocks(handle, inode, this_bh, 2329 ext3_clear_blocks(handle, inode, this_bh,
2340 block_to_free, 2330 block_to_free,
2341 count, block_to_free_p, p); 2331 count, block_to_free_p, p);
2342 block_to_free = nr; 2332 block_to_free = nr;
2343 block_to_free_p = p; 2333 block_to_free_p = p;
2344 count = 1; 2334 count = 1;
2345 } 2335 }
2346 } 2336 }
2347 } 2337 }
2348 2338
2349 if (count > 0) 2339 if (count > 0)
2350 ext3_clear_blocks(handle, inode, this_bh, block_to_free, 2340 ext3_clear_blocks(handle, inode, this_bh, block_to_free,
2351 count, block_to_free_p, p); 2341 count, block_to_free_p, p);
2352 2342
2353 if (this_bh) { 2343 if (this_bh) {
2354 BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata"); 2344 BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
2355 2345
2356 /* 2346 /*
2357 * The buffer head should have an attached journal head at this 2347 * The buffer head should have an attached journal head at this
2358 * point. However, if the data is corrupted and an indirect 2348 * point. However, if the data is corrupted and an indirect
2359 * block pointed to itself, it would have been detached when 2349 * block pointed to itself, it would have been detached when
2360 * the block was cleared. Check for this instead of OOPSing. 2350 * the block was cleared. Check for this instead of OOPSing.
2361 */ 2351 */
2362 if (bh2jh(this_bh)) 2352 if (bh2jh(this_bh))
2363 ext3_journal_dirty_metadata(handle, this_bh); 2353 ext3_journal_dirty_metadata(handle, this_bh);
2364 else 2354 else
2365 ext3_error(inode->i_sb, "ext3_free_data", 2355 ext3_error(inode->i_sb, "ext3_free_data",
2366 "circular indirect block detected, " 2356 "circular indirect block detected, "
2367 "inode=%lu, block=%llu", 2357 "inode=%lu, block=%llu",
2368 inode->i_ino, 2358 inode->i_ino,
2369 (unsigned long long)this_bh->b_blocknr); 2359 (unsigned long long)this_bh->b_blocknr);
2370 } 2360 }
2371 } 2361 }
2372 2362
2373 /** 2363 /**
2374 * ext3_free_branches - free an array of branches 2364 * ext3_free_branches - free an array of branches
2375 * @handle: JBD handle for this transaction 2365 * @handle: JBD handle for this transaction
2376 * @inode: inode we are dealing with 2366 * @inode: inode we are dealing with
2377 * @parent_bh: the buffer_head which contains *@first and *@last 2367 * @parent_bh: the buffer_head which contains *@first and *@last
2378 * @first: array of block numbers 2368 * @first: array of block numbers
2379 * @last: pointer immediately past the end of array 2369 * @last: pointer immediately past the end of array
2380 * @depth: depth of the branches to free 2370 * @depth: depth of the branches to free
2381 * 2371 *
2382 * We are freeing all blocks referred from these branches (numbers are 2372 * We are freeing all blocks referred from these branches (numbers are
2383 * stored as little-endian 32-bit) and updating @inode->i_blocks 2373 * stored as little-endian 32-bit) and updating @inode->i_blocks
2384 * appropriately. 2374 * appropriately.
2385 */ 2375 */
2386 static void ext3_free_branches(handle_t *handle, struct inode *inode, 2376 static void ext3_free_branches(handle_t *handle, struct inode *inode,
2387 struct buffer_head *parent_bh, 2377 struct buffer_head *parent_bh,
2388 __le32 *first, __le32 *last, int depth) 2378 __le32 *first, __le32 *last, int depth)
2389 { 2379 {
2390 ext3_fsblk_t nr; 2380 ext3_fsblk_t nr;
2391 __le32 *p; 2381 __le32 *p;
2392 2382
2393 if (is_handle_aborted(handle)) 2383 if (is_handle_aborted(handle))
2394 return; 2384 return;
2395 2385
2396 if (depth--) { 2386 if (depth--) {
2397 struct buffer_head *bh; 2387 struct buffer_head *bh;
2398 int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); 2388 int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
2399 p = last; 2389 p = last;
2400 while (--p >= first) { 2390 while (--p >= first) {
2401 nr = le32_to_cpu(*p); 2391 nr = le32_to_cpu(*p);
2402 if (!nr) 2392 if (!nr)
2403 continue; /* A hole */ 2393 continue; /* A hole */
2404 2394
2405 /* Go read the buffer for the next level down */ 2395 /* Go read the buffer for the next level down */
2406 bh = sb_bread(inode->i_sb, nr); 2396 bh = sb_bread(inode->i_sb, nr);
2407 2397
2408 /* 2398 /*
2409 * A read failure? Report error and clear slot 2399 * A read failure? Report error and clear slot
2410 * (should be rare). 2400 * (should be rare).
2411 */ 2401 */
2412 if (!bh) { 2402 if (!bh) {
2413 ext3_error(inode->i_sb, "ext3_free_branches", 2403 ext3_error(inode->i_sb, "ext3_free_branches",
2414 "Read failure, inode=%lu, block="E3FSBLK, 2404 "Read failure, inode=%lu, block="E3FSBLK,
2415 inode->i_ino, nr); 2405 inode->i_ino, nr);
2416 continue; 2406 continue;
2417 } 2407 }
2418 2408
2419 /* This zaps the entire block. Bottom up. */ 2409 /* This zaps the entire block. Bottom up. */
2420 BUFFER_TRACE(bh, "free child branches"); 2410 BUFFER_TRACE(bh, "free child branches");
2421 ext3_free_branches(handle, inode, bh, 2411 ext3_free_branches(handle, inode, bh,
2422 (__le32*)bh->b_data, 2412 (__le32*)bh->b_data,
2423 (__le32*)bh->b_data + addr_per_block, 2413 (__le32*)bh->b_data + addr_per_block,
2424 depth); 2414 depth);
2425 2415
2426 /* 2416 /*
2427 * Everything below this this pointer has been 2417 * Everything below this this pointer has been
2428 * released. Now let this top-of-subtree go. 2418 * released. Now let this top-of-subtree go.
2429 * 2419 *
2430 * We want the freeing of this indirect block to be 2420 * We want the freeing of this indirect block to be
2431 * atomic in the journal with the updating of the 2421 * atomic in the journal with the updating of the
2432 * bitmap block which owns it. So make some room in 2422 * bitmap block which owns it. So make some room in
2433 * the journal. 2423 * the journal.
2434 * 2424 *
2435 * We zero the parent pointer *after* freeing its 2425 * We zero the parent pointer *after* freeing its
2436 * pointee in the bitmaps, so if extend_transaction() 2426 * pointee in the bitmaps, so if extend_transaction()
2437 * for some reason fails to put the bitmap changes and 2427 * for some reason fails to put the bitmap changes and
2438 * the release into the same transaction, recovery 2428 * the release into the same transaction, recovery
2439 * will merely complain about releasing a free block, 2429 * will merely complain about releasing a free block,
2440 * rather than leaking blocks. 2430 * rather than leaking blocks.
2441 */ 2431 */
2442 if (is_handle_aborted(handle)) 2432 if (is_handle_aborted(handle))
2443 return; 2433 return;
2444 if (try_to_extend_transaction(handle, inode)) { 2434 if (try_to_extend_transaction(handle, inode)) {
2445 ext3_mark_inode_dirty(handle, inode); 2435 ext3_mark_inode_dirty(handle, inode);
2446 truncate_restart_transaction(handle, inode); 2436 truncate_restart_transaction(handle, inode);
2447 } 2437 }
2448 2438
2449 /* 2439 /*
2450 * We've probably journalled the indirect block several 2440 * We've probably journalled the indirect block several
2451 * times during the truncate. But it's no longer 2441 * times during the truncate. But it's no longer
2452 * needed and we now drop it from the transaction via 2442 * needed and we now drop it from the transaction via
2453 * journal_revoke(). 2443 * journal_revoke().
2454 * 2444 *
2455 * That's easy if it's exclusively part of this 2445 * That's easy if it's exclusively part of this
2456 * transaction. But if it's part of the committing 2446 * transaction. But if it's part of the committing
2457 * transaction then journal_forget() will simply 2447 * transaction then journal_forget() will simply
2458 * brelse() it. That means that if the underlying 2448 * brelse() it. That means that if the underlying
2459 * block is reallocated in ext3_get_block(), 2449 * block is reallocated in ext3_get_block(),
2460 * unmap_underlying_metadata() will find this block 2450 * unmap_underlying_metadata() will find this block
2461 * and will try to get rid of it. damn, damn. Thus 2451 * and will try to get rid of it. damn, damn. Thus
2462 * we don't allow a block to be reallocated until 2452 * we don't allow a block to be reallocated until
2463 * a transaction freeing it has fully committed. 2453 * a transaction freeing it has fully committed.
2464 * 2454 *
2465 * We also have to make sure journal replay after a 2455 * We also have to make sure journal replay after a
2466 * crash does not overwrite non-journaled data blocks 2456 * crash does not overwrite non-journaled data blocks
2467 * with old metadata when the block got reallocated for 2457 * with old metadata when the block got reallocated for
2468 * data. Thus we have to store a revoke record for a 2458 * data. Thus we have to store a revoke record for a
2469 * block in the same transaction in which we free the 2459 * block in the same transaction in which we free the
2470 * block. 2460 * block.
2471 */ 2461 */
2472 ext3_forget(handle, 1, inode, bh, bh->b_blocknr); 2462 ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
2473 2463
2474 ext3_free_blocks(handle, inode, nr, 1); 2464 ext3_free_blocks(handle, inode, nr, 1);
2475 2465
2476 if (parent_bh) { 2466 if (parent_bh) {
2477 /* 2467 /*
2478 * The block which we have just freed is 2468 * The block which we have just freed is
2479 * pointed to by an indirect block: journal it 2469 * pointed to by an indirect block: journal it
2480 */ 2470 */
2481 BUFFER_TRACE(parent_bh, "get_write_access"); 2471 BUFFER_TRACE(parent_bh, "get_write_access");
2482 if (!ext3_journal_get_write_access(handle, 2472 if (!ext3_journal_get_write_access(handle,
2483 parent_bh)){ 2473 parent_bh)){
2484 *p = 0; 2474 *p = 0;
2485 BUFFER_TRACE(parent_bh, 2475 BUFFER_TRACE(parent_bh,
2486 "call ext3_journal_dirty_metadata"); 2476 "call ext3_journal_dirty_metadata");
2487 ext3_journal_dirty_metadata(handle, 2477 ext3_journal_dirty_metadata(handle,
2488 parent_bh); 2478 parent_bh);
2489 } 2479 }
2490 } 2480 }
2491 } 2481 }
2492 } else { 2482 } else {
2493 /* We have reached the bottom of the tree. */ 2483 /* We have reached the bottom of the tree. */
2494 BUFFER_TRACE(parent_bh, "free data blocks"); 2484 BUFFER_TRACE(parent_bh, "free data blocks");
2495 ext3_free_data(handle, inode, parent_bh, first, last); 2485 ext3_free_data(handle, inode, parent_bh, first, last);
2496 } 2486 }
2497 } 2487 }
2498 2488
2499 int ext3_can_truncate(struct inode *inode) 2489 int ext3_can_truncate(struct inode *inode)
2500 { 2490 {
2501 if (S_ISREG(inode->i_mode)) 2491 if (S_ISREG(inode->i_mode))
2502 return 1; 2492 return 1;
2503 if (S_ISDIR(inode->i_mode)) 2493 if (S_ISDIR(inode->i_mode))
2504 return 1; 2494 return 1;
2505 if (S_ISLNK(inode->i_mode)) 2495 if (S_ISLNK(inode->i_mode))
2506 return !ext3_inode_is_fast_symlink(inode); 2496 return !ext3_inode_is_fast_symlink(inode);
2507 return 0; 2497 return 0;
2508 } 2498 }
2509 2499
2510 /* 2500 /*
2511 * ext3_truncate() 2501 * ext3_truncate()
2512 * 2502 *
2513 * We block out ext3_get_block() block instantiations across the entire 2503 * We block out ext3_get_block() block instantiations across the entire
2514 * transaction, and VFS/VM ensures that ext3_truncate() cannot run 2504 * transaction, and VFS/VM ensures that ext3_truncate() cannot run
2515 * simultaneously on behalf of the same inode. 2505 * simultaneously on behalf of the same inode.
2516 * 2506 *
2517 * As we work through the truncate and commit bits of it to the journal there 2507 * As we work through the truncate and commit bits of it to the journal there
2518 * is one core, guiding principle: the file's tree must always be consistent on 2508 * is one core, guiding principle: the file's tree must always be consistent on
2519 * disk. We must be able to restart the truncate after a crash. 2509 * disk. We must be able to restart the truncate after a crash.
2520 * 2510 *
2521 * The file's tree may be transiently inconsistent in memory (although it 2511 * The file's tree may be transiently inconsistent in memory (although it
2522 * probably isn't), but whenever we close off and commit a journal transaction, 2512 * probably isn't), but whenever we close off and commit a journal transaction,
2523 * the contents of (the filesystem + the journal) must be consistent and 2513 * the contents of (the filesystem + the journal) must be consistent and
2524 * restartable. It's pretty simple, really: bottom up, right to left (although 2514 * restartable. It's pretty simple, really: bottom up, right to left (although
2525 * left-to-right works OK too). 2515 * left-to-right works OK too).
2526 * 2516 *
2527 * Note that at recovery time, journal replay occurs *before* the restart of 2517 * Note that at recovery time, journal replay occurs *before* the restart of
2528 * truncate against the orphan inode list. 2518 * truncate against the orphan inode list.
2529 * 2519 *
2530 * The committed inode has the new, desired i_size (which is the same as 2520 * The committed inode has the new, desired i_size (which is the same as
2531 * i_disksize in this case). After a crash, ext3_orphan_cleanup() will see 2521 * i_disksize in this case). After a crash, ext3_orphan_cleanup() will see
2532 * that this inode's truncate did not complete and it will again call 2522 * that this inode's truncate did not complete and it will again call
2533 * ext3_truncate() to have another go. So there will be instantiated blocks 2523 * ext3_truncate() to have another go. So there will be instantiated blocks
2534 * to the right of the truncation point in a crashed ext3 filesystem. But 2524 * to the right of the truncation point in a crashed ext3 filesystem. But
2535 * that's fine - as long as they are linked from the inode, the post-crash 2525 * that's fine - as long as they are linked from the inode, the post-crash
2536 * ext3_truncate() run will find them and release them. 2526 * ext3_truncate() run will find them and release them.
2537 */ 2527 */
2538 void ext3_truncate(struct inode *inode) 2528 void ext3_truncate(struct inode *inode)
2539 { 2529 {
2540 handle_t *handle; 2530 handle_t *handle;
2541 struct ext3_inode_info *ei = EXT3_I(inode); 2531 struct ext3_inode_info *ei = EXT3_I(inode);
2542 __le32 *i_data = ei->i_data; 2532 __le32 *i_data = ei->i_data;
2543 int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); 2533 int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
2544 int offsets[4]; 2534 int offsets[4];
2545 Indirect chain[4]; 2535 Indirect chain[4];
2546 Indirect *partial; 2536 Indirect *partial;
2547 __le32 nr = 0; 2537 __le32 nr = 0;
2548 int n; 2538 int n;
2549 long last_block; 2539 long last_block;
2550 unsigned blocksize = inode->i_sb->s_blocksize; 2540 unsigned blocksize = inode->i_sb->s_blocksize;
2551 2541
2552 trace_ext3_truncate_enter(inode); 2542 trace_ext3_truncate_enter(inode);
2553 2543
2554 if (!ext3_can_truncate(inode)) 2544 if (!ext3_can_truncate(inode))
2555 goto out_notrans; 2545 goto out_notrans;
2556 2546
2557 if (inode->i_size == 0 && ext3_should_writeback_data(inode)) 2547 if (inode->i_size == 0 && ext3_should_writeback_data(inode))
2558 ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); 2548 ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
2559 2549
2560 handle = start_transaction(inode); 2550 handle = start_transaction(inode);
2561 if (IS_ERR(handle)) 2551 if (IS_ERR(handle))
2562 goto out_notrans; 2552 goto out_notrans;
2563 2553
2564 last_block = (inode->i_size + blocksize-1) 2554 last_block = (inode->i_size + blocksize-1)
2565 >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); 2555 >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
2566 n = ext3_block_to_path(inode, last_block, offsets, NULL); 2556 n = ext3_block_to_path(inode, last_block, offsets, NULL);
2567 if (n == 0) 2557 if (n == 0)
2568 goto out_stop; /* error */ 2558 goto out_stop; /* error */
2569 2559
2570 /* 2560 /*
2571 * OK. This truncate is going to happen. We add the inode to the 2561 * OK. This truncate is going to happen. We add the inode to the
2572 * orphan list, so that if this truncate spans multiple transactions, 2562 * orphan list, so that if this truncate spans multiple transactions,
2573 * and we crash, we will resume the truncate when the filesystem 2563 * and we crash, we will resume the truncate when the filesystem
2574 * recovers. It also marks the inode dirty, to catch the new size. 2564 * recovers. It also marks the inode dirty, to catch the new size.
2575 * 2565 *
2576 * Implication: the file must always be in a sane, consistent 2566 * Implication: the file must always be in a sane, consistent
2577 * truncatable state while each transaction commits. 2567 * truncatable state while each transaction commits.
2578 */ 2568 */
2579 if (ext3_orphan_add(handle, inode)) 2569 if (ext3_orphan_add(handle, inode))
2580 goto out_stop; 2570 goto out_stop;
2581 2571
2582 /* 2572 /*
2583 * The orphan list entry will now protect us from any crash which 2573 * The orphan list entry will now protect us from any crash which
2584 * occurs before the truncate completes, so it is now safe to propagate 2574 * occurs before the truncate completes, so it is now safe to propagate
2585 * the new, shorter inode size (held for now in i_size) into the 2575 * the new, shorter inode size (held for now in i_size) into the
2586 * on-disk inode. We do this via i_disksize, which is the value which 2576 * on-disk inode. We do this via i_disksize, which is the value which
2587 * ext3 *really* writes onto the disk inode. 2577 * ext3 *really* writes onto the disk inode.
2588 */ 2578 */
2589 ei->i_disksize = inode->i_size; 2579 ei->i_disksize = inode->i_size;
2590 2580
2591 /* 2581 /*
2592 * From here we block out all ext3_get_block() callers who want to 2582 * From here we block out all ext3_get_block() callers who want to
2593 * modify the block allocation tree. 2583 * modify the block allocation tree.
2594 */ 2584 */
2595 mutex_lock(&ei->truncate_mutex); 2585 mutex_lock(&ei->truncate_mutex);
2596 2586
2597 if (n == 1) { /* direct blocks */ 2587 if (n == 1) { /* direct blocks */
2598 ext3_free_data(handle, inode, NULL, i_data+offsets[0], 2588 ext3_free_data(handle, inode, NULL, i_data+offsets[0],
2599 i_data + EXT3_NDIR_BLOCKS); 2589 i_data + EXT3_NDIR_BLOCKS);
2600 goto do_indirects; 2590 goto do_indirects;
2601 } 2591 }
2602 2592
2603 partial = ext3_find_shared(inode, n, offsets, chain, &nr); 2593 partial = ext3_find_shared(inode, n, offsets, chain, &nr);
2604 /* Kill the top of shared branch (not detached) */ 2594 /* Kill the top of shared branch (not detached) */
2605 if (nr) { 2595 if (nr) {
2606 if (partial == chain) { 2596 if (partial == chain) {
2607 /* Shared branch grows from the inode */ 2597 /* Shared branch grows from the inode */
2608 ext3_free_branches(handle, inode, NULL, 2598 ext3_free_branches(handle, inode, NULL,
2609 &nr, &nr+1, (chain+n-1) - partial); 2599 &nr, &nr+1, (chain+n-1) - partial);
2610 *partial->p = 0; 2600 *partial->p = 0;
2611 /* 2601 /*
2612 * We mark the inode dirty prior to restart, 2602 * We mark the inode dirty prior to restart,
2613 * and prior to stop. No need for it here. 2603 * and prior to stop. No need for it here.
2614 */ 2604 */
2615 } else { 2605 } else {
2616 /* Shared branch grows from an indirect block */ 2606 /* Shared branch grows from an indirect block */
2617 ext3_free_branches(handle, inode, partial->bh, 2607 ext3_free_branches(handle, inode, partial->bh,
2618 partial->p, 2608 partial->p,
2619 partial->p+1, (chain+n-1) - partial); 2609 partial->p+1, (chain+n-1) - partial);
2620 } 2610 }
2621 } 2611 }
2622 /* Clear the ends of indirect blocks on the shared branch */ 2612 /* Clear the ends of indirect blocks on the shared branch */
2623 while (partial > chain) { 2613 while (partial > chain) {
2624 ext3_free_branches(handle, inode, partial->bh, partial->p + 1, 2614 ext3_free_branches(handle, inode, partial->bh, partial->p + 1,
2625 (__le32*)partial->bh->b_data+addr_per_block, 2615 (__le32*)partial->bh->b_data+addr_per_block,
2626 (chain+n-1) - partial); 2616 (chain+n-1) - partial);
2627 BUFFER_TRACE(partial->bh, "call brelse"); 2617 BUFFER_TRACE(partial->bh, "call brelse");
2628 brelse (partial->bh); 2618 brelse (partial->bh);
2629 partial--; 2619 partial--;
2630 } 2620 }
2631 do_indirects: 2621 do_indirects:
2632 /* Kill the remaining (whole) subtrees */ 2622 /* Kill the remaining (whole) subtrees */
2633 switch (offsets[0]) { 2623 switch (offsets[0]) {
2634 default: 2624 default:
2635 nr = i_data[EXT3_IND_BLOCK]; 2625 nr = i_data[EXT3_IND_BLOCK];
2636 if (nr) { 2626 if (nr) {
2637 ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1); 2627 ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
2638 i_data[EXT3_IND_BLOCK] = 0; 2628 i_data[EXT3_IND_BLOCK] = 0;
2639 } 2629 }
2640 case EXT3_IND_BLOCK: 2630 case EXT3_IND_BLOCK:
2641 nr = i_data[EXT3_DIND_BLOCK]; 2631 nr = i_data[EXT3_DIND_BLOCK];
2642 if (nr) { 2632 if (nr) {
2643 ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2); 2633 ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
2644 i_data[EXT3_DIND_BLOCK] = 0; 2634 i_data[EXT3_DIND_BLOCK] = 0;
2645 } 2635 }
2646 case EXT3_DIND_BLOCK: 2636 case EXT3_DIND_BLOCK:
2647 nr = i_data[EXT3_TIND_BLOCK]; 2637 nr = i_data[EXT3_TIND_BLOCK];
2648 if (nr) { 2638 if (nr) {
2649 ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3); 2639 ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
2650 i_data[EXT3_TIND_BLOCK] = 0; 2640 i_data[EXT3_TIND_BLOCK] = 0;
2651 } 2641 }
2652 case EXT3_TIND_BLOCK: 2642 case EXT3_TIND_BLOCK:
2653 ; 2643 ;
2654 } 2644 }
2655 2645
2656 ext3_discard_reservation(inode); 2646 ext3_discard_reservation(inode);
2657 2647
2658 mutex_unlock(&ei->truncate_mutex); 2648 mutex_unlock(&ei->truncate_mutex);
2659 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 2649 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
2660 ext3_mark_inode_dirty(handle, inode); 2650 ext3_mark_inode_dirty(handle, inode);
2661 2651
2662 /* 2652 /*
2663 * In a multi-transaction truncate, we only make the final transaction 2653 * In a multi-transaction truncate, we only make the final transaction
2664 * synchronous 2654 * synchronous
2665 */ 2655 */
2666 if (IS_SYNC(inode)) 2656 if (IS_SYNC(inode))
2667 handle->h_sync = 1; 2657 handle->h_sync = 1;
2668 out_stop: 2658 out_stop:
2669 /* 2659 /*
2670 * If this was a simple ftruncate(), and the file will remain alive 2660 * If this was a simple ftruncate(), and the file will remain alive
2671 * then we need to clear up the orphan record which we created above. 2661 * then we need to clear up the orphan record which we created above.
2672 * However, if this was a real unlink then we were called by 2662 * However, if this was a real unlink then we were called by
2673 * ext3_evict_inode(), and we allow that function to clean up the 2663 * ext3_evict_inode(), and we allow that function to clean up the
2674 * orphan info for us. 2664 * orphan info for us.
2675 */ 2665 */
2676 if (inode->i_nlink) 2666 if (inode->i_nlink)
2677 ext3_orphan_del(handle, inode); 2667 ext3_orphan_del(handle, inode);
2678 2668
2679 ext3_journal_stop(handle); 2669 ext3_journal_stop(handle);
2680 trace_ext3_truncate_exit(inode); 2670 trace_ext3_truncate_exit(inode);
2681 return; 2671 return;
2682 out_notrans: 2672 out_notrans:
2683 /* 2673 /*
2684 * Delete the inode from orphan list so that it doesn't stay there 2674 * Delete the inode from orphan list so that it doesn't stay there
2685 * forever and trigger assertion on umount. 2675 * forever and trigger assertion on umount.
2686 */ 2676 */
2687 if (inode->i_nlink) 2677 if (inode->i_nlink)
2688 ext3_orphan_del(NULL, inode); 2678 ext3_orphan_del(NULL, inode);
2689 trace_ext3_truncate_exit(inode); 2679 trace_ext3_truncate_exit(inode);
2690 } 2680 }
2691 2681
2692 static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, 2682 static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
2693 unsigned long ino, struct ext3_iloc *iloc) 2683 unsigned long ino, struct ext3_iloc *iloc)
2694 { 2684 {
2695 unsigned long block_group; 2685 unsigned long block_group;
2696 unsigned long offset; 2686 unsigned long offset;
2697 ext3_fsblk_t block; 2687 ext3_fsblk_t block;
2698 struct ext3_group_desc *gdp; 2688 struct ext3_group_desc *gdp;
2699 2689
2700 if (!ext3_valid_inum(sb, ino)) { 2690 if (!ext3_valid_inum(sb, ino)) {
2701 /* 2691 /*
2702 * This error is already checked for in namei.c unless we are 2692 * This error is already checked for in namei.c unless we are
2703 * looking at an NFS filehandle, in which case no error 2693 * looking at an NFS filehandle, in which case no error
2704 * report is needed 2694 * report is needed
2705 */ 2695 */
2706 return 0; 2696 return 0;
2707 } 2697 }
2708 2698
2709 block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); 2699 block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
2710 gdp = ext3_get_group_desc(sb, block_group, NULL); 2700 gdp = ext3_get_group_desc(sb, block_group, NULL);
2711 if (!gdp) 2701 if (!gdp)
2712 return 0; 2702 return 0;
2713 /* 2703 /*
2714 * Figure out the offset within the block group inode table 2704 * Figure out the offset within the block group inode table
2715 */ 2705 */
2716 offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) * 2706 offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) *
2717 EXT3_INODE_SIZE(sb); 2707 EXT3_INODE_SIZE(sb);
2718 block = le32_to_cpu(gdp->bg_inode_table) + 2708 block = le32_to_cpu(gdp->bg_inode_table) +
2719 (offset >> EXT3_BLOCK_SIZE_BITS(sb)); 2709 (offset >> EXT3_BLOCK_SIZE_BITS(sb));
2720 2710
2721 iloc->block_group = block_group; 2711 iloc->block_group = block_group;
2722 iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1); 2712 iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1);
2723 return block; 2713 return block;
2724 } 2714 }
2725 2715
2726 /* 2716 /*
2727 * ext3_get_inode_loc returns with an extra refcount against the inode's 2717 * ext3_get_inode_loc returns with an extra refcount against the inode's
2728 * underlying buffer_head on success. If 'in_mem' is true, we have all 2718 * underlying buffer_head on success. If 'in_mem' is true, we have all
2729 * data in memory that is needed to recreate the on-disk version of this 2719 * data in memory that is needed to recreate the on-disk version of this
2730 * inode. 2720 * inode.
2731 */ 2721 */
2732 static int __ext3_get_inode_loc(struct inode *inode, 2722 static int __ext3_get_inode_loc(struct inode *inode,
2733 struct ext3_iloc *iloc, int in_mem) 2723 struct ext3_iloc *iloc, int in_mem)
2734 { 2724 {
2735 ext3_fsblk_t block; 2725 ext3_fsblk_t block;
2736 struct buffer_head *bh; 2726 struct buffer_head *bh;
2737 2727
2738 block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc); 2728 block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc);
2739 if (!block) 2729 if (!block)
2740 return -EIO; 2730 return -EIO;
2741 2731
2742 bh = sb_getblk(inode->i_sb, block); 2732 bh = sb_getblk(inode->i_sb, block);
2743 if (!bh) { 2733 if (!bh) {
2744 ext3_error (inode->i_sb, "ext3_get_inode_loc", 2734 ext3_error (inode->i_sb, "ext3_get_inode_loc",
2745 "unable to read inode block - " 2735 "unable to read inode block - "
2746 "inode=%lu, block="E3FSBLK, 2736 "inode=%lu, block="E3FSBLK,
2747 inode->i_ino, block); 2737 inode->i_ino, block);
2748 return -EIO; 2738 return -EIO;
2749 } 2739 }
2750 if (!buffer_uptodate(bh)) { 2740 if (!buffer_uptodate(bh)) {
2751 lock_buffer(bh); 2741 lock_buffer(bh);
2752 2742
2753 /* 2743 /*
2754 * If the buffer has the write error flag, we have failed 2744 * If the buffer has the write error flag, we have failed
2755 * to write out another inode in the same block. In this 2745 * to write out another inode in the same block. In this
2756 * case, we don't have to read the block because we may 2746 * case, we don't have to read the block because we may
2757 * read the old inode data successfully. 2747 * read the old inode data successfully.
2758 */ 2748 */
2759 if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) 2749 if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
2760 set_buffer_uptodate(bh); 2750 set_buffer_uptodate(bh);
2761 2751
2762 if (buffer_uptodate(bh)) { 2752 if (buffer_uptodate(bh)) {
2763 /* someone brought it uptodate while we waited */ 2753 /* someone brought it uptodate while we waited */
2764 unlock_buffer(bh); 2754 unlock_buffer(bh);
2765 goto has_buffer; 2755 goto has_buffer;
2766 } 2756 }
2767 2757
2768 /* 2758 /*
2769 * If we have all information of the inode in memory and this 2759 * If we have all information of the inode in memory and this
2770 * is the only valid inode in the block, we need not read the 2760 * is the only valid inode in the block, we need not read the
2771 * block. 2761 * block.
2772 */ 2762 */
2773 if (in_mem) { 2763 if (in_mem) {
2774 struct buffer_head *bitmap_bh; 2764 struct buffer_head *bitmap_bh;
2775 struct ext3_group_desc *desc; 2765 struct ext3_group_desc *desc;
2776 int inodes_per_buffer; 2766 int inodes_per_buffer;
2777 int inode_offset, i; 2767 int inode_offset, i;
2778 int block_group; 2768 int block_group;
2779 int start; 2769 int start;
2780 2770
2781 block_group = (inode->i_ino - 1) / 2771 block_group = (inode->i_ino - 1) /
2782 EXT3_INODES_PER_GROUP(inode->i_sb); 2772 EXT3_INODES_PER_GROUP(inode->i_sb);
2783 inodes_per_buffer = bh->b_size / 2773 inodes_per_buffer = bh->b_size /
2784 EXT3_INODE_SIZE(inode->i_sb); 2774 EXT3_INODE_SIZE(inode->i_sb);
2785 inode_offset = ((inode->i_ino - 1) % 2775 inode_offset = ((inode->i_ino - 1) %
2786 EXT3_INODES_PER_GROUP(inode->i_sb)); 2776 EXT3_INODES_PER_GROUP(inode->i_sb));
2787 start = inode_offset & ~(inodes_per_buffer - 1); 2777 start = inode_offset & ~(inodes_per_buffer - 1);
2788 2778
2789 /* Is the inode bitmap in cache? */ 2779 /* Is the inode bitmap in cache? */
2790 desc = ext3_get_group_desc(inode->i_sb, 2780 desc = ext3_get_group_desc(inode->i_sb,
2791 block_group, NULL); 2781 block_group, NULL);
2792 if (!desc) 2782 if (!desc)
2793 goto make_io; 2783 goto make_io;
2794 2784
2795 bitmap_bh = sb_getblk(inode->i_sb, 2785 bitmap_bh = sb_getblk(inode->i_sb,
2796 le32_to_cpu(desc->bg_inode_bitmap)); 2786 le32_to_cpu(desc->bg_inode_bitmap));
2797 if (!bitmap_bh) 2787 if (!bitmap_bh)
2798 goto make_io; 2788 goto make_io;
2799 2789
2800 /* 2790 /*
2801 * If the inode bitmap isn't in cache then the 2791 * If the inode bitmap isn't in cache then the
2802 * optimisation may end up performing two reads instead 2792 * optimisation may end up performing two reads instead
2803 * of one, so skip it. 2793 * of one, so skip it.
2804 */ 2794 */
2805 if (!buffer_uptodate(bitmap_bh)) { 2795 if (!buffer_uptodate(bitmap_bh)) {
2806 brelse(bitmap_bh); 2796 brelse(bitmap_bh);
2807 goto make_io; 2797 goto make_io;
2808 } 2798 }
2809 for (i = start; i < start + inodes_per_buffer; i++) { 2799 for (i = start; i < start + inodes_per_buffer; i++) {
2810 if (i == inode_offset) 2800 if (i == inode_offset)
2811 continue; 2801 continue;
2812 if (ext3_test_bit(i, bitmap_bh->b_data)) 2802 if (ext3_test_bit(i, bitmap_bh->b_data))
2813 break; 2803 break;
2814 } 2804 }
2815 brelse(bitmap_bh); 2805 brelse(bitmap_bh);
2816 if (i == start + inodes_per_buffer) { 2806 if (i == start + inodes_per_buffer) {
2817 /* all other inodes are free, so skip I/O */ 2807 /* all other inodes are free, so skip I/O */
2818 memset(bh->b_data, 0, bh->b_size); 2808 memset(bh->b_data, 0, bh->b_size);
2819 set_buffer_uptodate(bh); 2809 set_buffer_uptodate(bh);
2820 unlock_buffer(bh); 2810 unlock_buffer(bh);
2821 goto has_buffer; 2811 goto has_buffer;
2822 } 2812 }
2823 } 2813 }
2824 2814
2825 make_io: 2815 make_io:
2826 /* 2816 /*
2827 * There are other valid inodes in the buffer, this inode 2817 * There are other valid inodes in the buffer, this inode
2828 * has in-inode xattrs, or we don't have this inode in memory. 2818 * has in-inode xattrs, or we don't have this inode in memory.
2829 * Read the block from disk. 2819 * Read the block from disk.
2830 */ 2820 */
2831 trace_ext3_load_inode(inode); 2821 trace_ext3_load_inode(inode);
2832 get_bh(bh); 2822 get_bh(bh);
2833 bh->b_end_io = end_buffer_read_sync; 2823 bh->b_end_io = end_buffer_read_sync;
2834 submit_bh(READ | REQ_META | REQ_PRIO, bh); 2824 submit_bh(READ | REQ_META | REQ_PRIO, bh);
2835 wait_on_buffer(bh); 2825 wait_on_buffer(bh);
2836 if (!buffer_uptodate(bh)) { 2826 if (!buffer_uptodate(bh)) {
2837 ext3_error(inode->i_sb, "ext3_get_inode_loc", 2827 ext3_error(inode->i_sb, "ext3_get_inode_loc",
2838 "unable to read inode block - " 2828 "unable to read inode block - "
2839 "inode=%lu, block="E3FSBLK, 2829 "inode=%lu, block="E3FSBLK,
2840 inode->i_ino, block); 2830 inode->i_ino, block);
2841 brelse(bh); 2831 brelse(bh);
2842 return -EIO; 2832 return -EIO;
2843 } 2833 }
2844 } 2834 }
2845 has_buffer: 2835 has_buffer:
2846 iloc->bh = bh; 2836 iloc->bh = bh;
2847 return 0; 2837 return 0;
2848 } 2838 }
2849 2839
2850 int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) 2840 int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
2851 { 2841 {
2852 /* We have all inode data except xattrs in memory here. */ 2842 /* We have all inode data except xattrs in memory here. */
2853 return __ext3_get_inode_loc(inode, iloc, 2843 return __ext3_get_inode_loc(inode, iloc,
2854 !ext3_test_inode_state(inode, EXT3_STATE_XATTR)); 2844 !ext3_test_inode_state(inode, EXT3_STATE_XATTR));
2855 } 2845 }
2856 2846
2857 void ext3_set_inode_flags(struct inode *inode) 2847 void ext3_set_inode_flags(struct inode *inode)
2858 { 2848 {
2859 unsigned int flags = EXT3_I(inode)->i_flags; 2849 unsigned int flags = EXT3_I(inode)->i_flags;
2860 2850
2861 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 2851 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
2862 if (flags & EXT3_SYNC_FL) 2852 if (flags & EXT3_SYNC_FL)
2863 inode->i_flags |= S_SYNC; 2853 inode->i_flags |= S_SYNC;
2864 if (flags & EXT3_APPEND_FL) 2854 if (flags & EXT3_APPEND_FL)
2865 inode->i_flags |= S_APPEND; 2855 inode->i_flags |= S_APPEND;
2866 if (flags & EXT3_IMMUTABLE_FL) 2856 if (flags & EXT3_IMMUTABLE_FL)
2867 inode->i_flags |= S_IMMUTABLE; 2857 inode->i_flags |= S_IMMUTABLE;
2868 if (flags & EXT3_NOATIME_FL) 2858 if (flags & EXT3_NOATIME_FL)
2869 inode->i_flags |= S_NOATIME; 2859 inode->i_flags |= S_NOATIME;
2870 if (flags & EXT3_DIRSYNC_FL) 2860 if (flags & EXT3_DIRSYNC_FL)
2871 inode->i_flags |= S_DIRSYNC; 2861 inode->i_flags |= S_DIRSYNC;
2872 } 2862 }
2873 2863
2874 /* Propagate flags from i_flags to EXT3_I(inode)->i_flags */ 2864 /* Propagate flags from i_flags to EXT3_I(inode)->i_flags */
2875 void ext3_get_inode_flags(struct ext3_inode_info *ei) 2865 void ext3_get_inode_flags(struct ext3_inode_info *ei)
2876 { 2866 {
2877 unsigned int flags = ei->vfs_inode.i_flags; 2867 unsigned int flags = ei->vfs_inode.i_flags;
2878 2868
2879 ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL| 2869 ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL|
2880 EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL); 2870 EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL);
2881 if (flags & S_SYNC) 2871 if (flags & S_SYNC)
2882 ei->i_flags |= EXT3_SYNC_FL; 2872 ei->i_flags |= EXT3_SYNC_FL;
2883 if (flags & S_APPEND) 2873 if (flags & S_APPEND)
2884 ei->i_flags |= EXT3_APPEND_FL; 2874 ei->i_flags |= EXT3_APPEND_FL;
2885 if (flags & S_IMMUTABLE) 2875 if (flags & S_IMMUTABLE)
2886 ei->i_flags |= EXT3_IMMUTABLE_FL; 2876 ei->i_flags |= EXT3_IMMUTABLE_FL;
2887 if (flags & S_NOATIME) 2877 if (flags & S_NOATIME)
2888 ei->i_flags |= EXT3_NOATIME_FL; 2878 ei->i_flags |= EXT3_NOATIME_FL;
2889 if (flags & S_DIRSYNC) 2879 if (flags & S_DIRSYNC)
2890 ei->i_flags |= EXT3_DIRSYNC_FL; 2880 ei->i_flags |= EXT3_DIRSYNC_FL;
2891 } 2881 }
2892 2882
2893 struct inode *ext3_iget(struct super_block *sb, unsigned long ino) 2883 struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2894 { 2884 {
2895 struct ext3_iloc iloc; 2885 struct ext3_iloc iloc;
2896 struct ext3_inode *raw_inode; 2886 struct ext3_inode *raw_inode;
2897 struct ext3_inode_info *ei; 2887 struct ext3_inode_info *ei;
2898 struct buffer_head *bh; 2888 struct buffer_head *bh;
2899 struct inode *inode; 2889 struct inode *inode;
2900 journal_t *journal = EXT3_SB(sb)->s_journal; 2890 journal_t *journal = EXT3_SB(sb)->s_journal;
2901 transaction_t *transaction; 2891 transaction_t *transaction;
2902 long ret; 2892 long ret;
2903 int block; 2893 int block;
2904 2894
2905 inode = iget_locked(sb, ino); 2895 inode = iget_locked(sb, ino);
2906 if (!inode) 2896 if (!inode)
2907 return ERR_PTR(-ENOMEM); 2897 return ERR_PTR(-ENOMEM);
2908 if (!(inode->i_state & I_NEW)) 2898 if (!(inode->i_state & I_NEW))
2909 return inode; 2899 return inode;
2910 2900
2911 ei = EXT3_I(inode); 2901 ei = EXT3_I(inode);
2912 ei->i_block_alloc_info = NULL; 2902 ei->i_block_alloc_info = NULL;
2913 2903
2914 ret = __ext3_get_inode_loc(inode, &iloc, 0); 2904 ret = __ext3_get_inode_loc(inode, &iloc, 0);
2915 if (ret < 0) 2905 if (ret < 0)
2916 goto bad_inode; 2906 goto bad_inode;
2917 bh = iloc.bh; 2907 bh = iloc.bh;
2918 raw_inode = ext3_raw_inode(&iloc); 2908 raw_inode = ext3_raw_inode(&iloc);
2919 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 2909 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
2920 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 2910 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2921 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); 2911 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2922 if(!(test_opt (inode->i_sb, NO_UID32))) { 2912 if(!(test_opt (inode->i_sb, NO_UID32))) {
2923 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 2913 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2924 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 2914 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2925 } 2915 }
2926 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 2916 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
2927 inode->i_size = le32_to_cpu(raw_inode->i_size); 2917 inode->i_size = le32_to_cpu(raw_inode->i_size);
2928 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); 2918 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2929 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); 2919 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
2930 inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); 2920 inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
2931 inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; 2921 inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
2932 2922
2933 ei->i_state_flags = 0; 2923 ei->i_state_flags = 0;
2934 ei->i_dir_start_lookup = 0; 2924 ei->i_dir_start_lookup = 0;
2935 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 2925 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
2936 /* We now have enough fields to check if the inode was active or not. 2926 /* We now have enough fields to check if the inode was active or not.
2937 * This is needed because nfsd might try to access dead inodes 2927 * This is needed because nfsd might try to access dead inodes
2938 * the test is that same one that e2fsck uses 2928 * the test is that same one that e2fsck uses
2939 * NeilBrown 1999oct15 2929 * NeilBrown 1999oct15
2940 */ 2930 */
2941 if (inode->i_nlink == 0) { 2931 if (inode->i_nlink == 0) {
2942 if (inode->i_mode == 0 || 2932 if (inode->i_mode == 0 ||
2943 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { 2933 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
2944 /* this inode is deleted */ 2934 /* this inode is deleted */
2945 brelse (bh); 2935 brelse (bh);
2946 ret = -ESTALE; 2936 ret = -ESTALE;
2947 goto bad_inode; 2937 goto bad_inode;
2948 } 2938 }
2949 /* The only unlinked inodes we let through here have 2939 /* The only unlinked inodes we let through here have
2950 * valid i_mode and are being read by the orphan 2940 * valid i_mode and are being read by the orphan
2951 * recovery code: that's fine, we're about to complete 2941 * recovery code: that's fine, we're about to complete
2952 * the process of deleting those. */ 2942 * the process of deleting those. */
2953 } 2943 }
2954 inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); 2944 inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
2955 ei->i_flags = le32_to_cpu(raw_inode->i_flags); 2945 ei->i_flags = le32_to_cpu(raw_inode->i_flags);
2956 #ifdef EXT3_FRAGMENTS 2946 #ifdef EXT3_FRAGMENTS
2957 ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); 2947 ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
2958 ei->i_frag_no = raw_inode->i_frag; 2948 ei->i_frag_no = raw_inode->i_frag;
2959 ei->i_frag_size = raw_inode->i_fsize; 2949 ei->i_frag_size = raw_inode->i_fsize;
2960 #endif 2950 #endif
2961 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); 2951 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
2962 if (!S_ISREG(inode->i_mode)) { 2952 if (!S_ISREG(inode->i_mode)) {
2963 ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); 2953 ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
2964 } else { 2954 } else {
2965 inode->i_size |= 2955 inode->i_size |=
2966 ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; 2956 ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
2967 } 2957 }
2968 ei->i_disksize = inode->i_size; 2958 ei->i_disksize = inode->i_size;
2969 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 2959 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
2970 ei->i_block_group = iloc.block_group; 2960 ei->i_block_group = iloc.block_group;
2971 /* 2961 /*
2972 * NOTE! The in-memory inode i_data array is in little-endian order 2962 * NOTE! The in-memory inode i_data array is in little-endian order
2973 * even on big-endian machines: we do NOT byteswap the block numbers! 2963 * even on big-endian machines: we do NOT byteswap the block numbers!
2974 */ 2964 */
2975 for (block = 0; block < EXT3_N_BLOCKS; block++) 2965 for (block = 0; block < EXT3_N_BLOCKS; block++)
2976 ei->i_data[block] = raw_inode->i_block[block]; 2966 ei->i_data[block] = raw_inode->i_block[block];
2977 INIT_LIST_HEAD(&ei->i_orphan); 2967 INIT_LIST_HEAD(&ei->i_orphan);
2978 2968
2979 /* 2969 /*
2980 * Set transaction id's of transactions that have to be committed 2970 * Set transaction id's of transactions that have to be committed
2981 * to finish f[data]sync. We set them to currently running transaction 2971 * to finish f[data]sync. We set them to currently running transaction
2982 * as we cannot be sure that the inode or some of its metadata isn't 2972 * as we cannot be sure that the inode or some of its metadata isn't
2983 * part of the transaction - the inode could have been reclaimed and 2973 * part of the transaction - the inode could have been reclaimed and
2984 * now it is reread from disk. 2974 * now it is reread from disk.
2985 */ 2975 */
2986 if (journal) { 2976 if (journal) {
2987 tid_t tid; 2977 tid_t tid;
2988 2978
2989 spin_lock(&journal->j_state_lock); 2979 spin_lock(&journal->j_state_lock);
2990 if (journal->j_running_transaction) 2980 if (journal->j_running_transaction)
2991 transaction = journal->j_running_transaction; 2981 transaction = journal->j_running_transaction;
2992 else 2982 else
2993 transaction = journal->j_committing_transaction; 2983 transaction = journal->j_committing_transaction;
2994 if (transaction) 2984 if (transaction)
2995 tid = transaction->t_tid; 2985 tid = transaction->t_tid;
2996 else 2986 else
2997 tid = journal->j_commit_sequence; 2987 tid = journal->j_commit_sequence;
2998 spin_unlock(&journal->j_state_lock); 2988 spin_unlock(&journal->j_state_lock);
2999 atomic_set(&ei->i_sync_tid, tid); 2989 atomic_set(&ei->i_sync_tid, tid);
3000 atomic_set(&ei->i_datasync_tid, tid); 2990 atomic_set(&ei->i_datasync_tid, tid);
3001 } 2991 }
3002 2992
3003 if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 && 2993 if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 &&
3004 EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) { 2994 EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
3005 /* 2995 /*
3006 * When mke2fs creates big inodes it does not zero out 2996 * When mke2fs creates big inodes it does not zero out
3007 * the unused bytes above EXT3_GOOD_OLD_INODE_SIZE, 2997 * the unused bytes above EXT3_GOOD_OLD_INODE_SIZE,
3008 * so ignore those first few inodes. 2998 * so ignore those first few inodes.
3009 */ 2999 */
3010 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 3000 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
3011 if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 3001 if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
3012 EXT3_INODE_SIZE(inode->i_sb)) { 3002 EXT3_INODE_SIZE(inode->i_sb)) {
3013 brelse (bh); 3003 brelse (bh);
3014 ret = -EIO; 3004 ret = -EIO;
3015 goto bad_inode; 3005 goto bad_inode;
3016 } 3006 }
3017 if (ei->i_extra_isize == 0) { 3007 if (ei->i_extra_isize == 0) {
3018 /* The extra space is currently unused. Use it. */ 3008 /* The extra space is currently unused. Use it. */
3019 ei->i_extra_isize = sizeof(struct ext3_inode) - 3009 ei->i_extra_isize = sizeof(struct ext3_inode) -
3020 EXT3_GOOD_OLD_INODE_SIZE; 3010 EXT3_GOOD_OLD_INODE_SIZE;
3021 } else { 3011 } else {
3022 __le32 *magic = (void *)raw_inode + 3012 __le32 *magic = (void *)raw_inode +
3023 EXT3_GOOD_OLD_INODE_SIZE + 3013 EXT3_GOOD_OLD_INODE_SIZE +
3024 ei->i_extra_isize; 3014 ei->i_extra_isize;
3025 if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC)) 3015 if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC))
3026 ext3_set_inode_state(inode, EXT3_STATE_XATTR); 3016 ext3_set_inode_state(inode, EXT3_STATE_XATTR);
3027 } 3017 }
3028 } else 3018 } else
3029 ei->i_extra_isize = 0; 3019 ei->i_extra_isize = 0;
3030 3020
3031 if (S_ISREG(inode->i_mode)) { 3021 if (S_ISREG(inode->i_mode)) {
3032 inode->i_op = &ext3_file_inode_operations; 3022 inode->i_op = &ext3_file_inode_operations;
3033 inode->i_fop = &ext3_file_operations; 3023 inode->i_fop = &ext3_file_operations;
3034 ext3_set_aops(inode); 3024 ext3_set_aops(inode);
3035 } else if (S_ISDIR(inode->i_mode)) { 3025 } else if (S_ISDIR(inode->i_mode)) {
3036 inode->i_op = &ext3_dir_inode_operations; 3026 inode->i_op = &ext3_dir_inode_operations;
3037 inode->i_fop = &ext3_dir_operations; 3027 inode->i_fop = &ext3_dir_operations;
3038 } else if (S_ISLNK(inode->i_mode)) { 3028 } else if (S_ISLNK(inode->i_mode)) {
3039 if (ext3_inode_is_fast_symlink(inode)) { 3029 if (ext3_inode_is_fast_symlink(inode)) {
3040 inode->i_op = &ext3_fast_symlink_inode_operations; 3030 inode->i_op = &ext3_fast_symlink_inode_operations;
3041 nd_terminate_link(ei->i_data, inode->i_size, 3031 nd_terminate_link(ei->i_data, inode->i_size,
3042 sizeof(ei->i_data) - 1); 3032 sizeof(ei->i_data) - 1);
3043 } else { 3033 } else {
3044 inode->i_op = &ext3_symlink_inode_operations; 3034 inode->i_op = &ext3_symlink_inode_operations;
3045 ext3_set_aops(inode); 3035 ext3_set_aops(inode);
3046 } 3036 }
3047 } else { 3037 } else {
3048 inode->i_op = &ext3_special_inode_operations; 3038 inode->i_op = &ext3_special_inode_operations;
3049 if (raw_inode->i_block[0]) 3039 if (raw_inode->i_block[0])
3050 init_special_inode(inode, inode->i_mode, 3040 init_special_inode(inode, inode->i_mode,
3051 old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); 3041 old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
3052 else 3042 else
3053 init_special_inode(inode, inode->i_mode, 3043 init_special_inode(inode, inode->i_mode,
3054 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 3044 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
3055 } 3045 }
3056 brelse (iloc.bh); 3046 brelse (iloc.bh);
3057 ext3_set_inode_flags(inode); 3047 ext3_set_inode_flags(inode);
3058 unlock_new_inode(inode); 3048 unlock_new_inode(inode);
3059 return inode; 3049 return inode;
3060 3050
3061 bad_inode: 3051 bad_inode:
3062 iget_failed(inode); 3052 iget_failed(inode);
3063 return ERR_PTR(ret); 3053 return ERR_PTR(ret);
3064 } 3054 }
3065 3055
3066 /* 3056 /*
3067 * Post the struct inode info into an on-disk inode location in the 3057 * Post the struct inode info into an on-disk inode location in the
3068 * buffer-cache. This gobbles the caller's reference to the 3058 * buffer-cache. This gobbles the caller's reference to the
3069 * buffer_head in the inode location struct. 3059 * buffer_head in the inode location struct.
3070 * 3060 *
3071 * The caller must have write access to iloc->bh. 3061 * The caller must have write access to iloc->bh.
3072 */ 3062 */
3073 static int ext3_do_update_inode(handle_t *handle, 3063 static int ext3_do_update_inode(handle_t *handle,
3074 struct inode *inode, 3064 struct inode *inode,
3075 struct ext3_iloc *iloc) 3065 struct ext3_iloc *iloc)
3076 { 3066 {
3077 struct ext3_inode *raw_inode = ext3_raw_inode(iloc); 3067 struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
3078 struct ext3_inode_info *ei = EXT3_I(inode); 3068 struct ext3_inode_info *ei = EXT3_I(inode);
3079 struct buffer_head *bh = iloc->bh; 3069 struct buffer_head *bh = iloc->bh;
3080 int err = 0, rc, block; 3070 int err = 0, rc, block;
3081 3071
3082 again: 3072 again:
3083 /* we can't allow multiple procs in here at once, its a bit racey */ 3073 /* we can't allow multiple procs in here at once, its a bit racey */
3084 lock_buffer(bh); 3074 lock_buffer(bh);
3085 3075
3086 /* For fields not not tracking in the in-memory inode, 3076 /* For fields not not tracking in the in-memory inode,
3087 * initialise them to zero for new inodes. */ 3077 * initialise them to zero for new inodes. */
3088 if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) 3078 if (ext3_test_inode_state(inode, EXT3_STATE_NEW))
3089 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); 3079 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
3090 3080
3091 ext3_get_inode_flags(ei); 3081 ext3_get_inode_flags(ei);
3092 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 3082 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
3093 if(!(test_opt(inode->i_sb, NO_UID32))) { 3083 if(!(test_opt(inode->i_sb, NO_UID32))) {
3094 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); 3084 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
3095 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); 3085 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
3096 /* 3086 /*
3097 * Fix up interoperability with old kernels. Otherwise, old inodes get 3087 * Fix up interoperability with old kernels. Otherwise, old inodes get
3098 * re-used with the upper 16 bits of the uid/gid intact 3088 * re-used with the upper 16 bits of the uid/gid intact
3099 */ 3089 */
3100 if(!ei->i_dtime) { 3090 if(!ei->i_dtime) {
3101 raw_inode->i_uid_high = 3091 raw_inode->i_uid_high =
3102 cpu_to_le16(high_16_bits(inode->i_uid)); 3092 cpu_to_le16(high_16_bits(inode->i_uid));
3103 raw_inode->i_gid_high = 3093 raw_inode->i_gid_high =
3104 cpu_to_le16(high_16_bits(inode->i_gid)); 3094 cpu_to_le16(high_16_bits(inode->i_gid));
3105 } else { 3095 } else {
3106 raw_inode->i_uid_high = 0; 3096 raw_inode->i_uid_high = 0;
3107 raw_inode->i_gid_high = 0; 3097 raw_inode->i_gid_high = 0;
3108 } 3098 }
3109 } else { 3099 } else {
3110 raw_inode->i_uid_low = 3100 raw_inode->i_uid_low =
3111 cpu_to_le16(fs_high2lowuid(inode->i_uid)); 3101 cpu_to_le16(fs_high2lowuid(inode->i_uid));
3112 raw_inode->i_gid_low = 3102 raw_inode->i_gid_low =
3113 cpu_to_le16(fs_high2lowgid(inode->i_gid)); 3103 cpu_to_le16(fs_high2lowgid(inode->i_gid));
3114 raw_inode->i_uid_high = 0; 3104 raw_inode->i_uid_high = 0;
3115 raw_inode->i_gid_high = 0; 3105 raw_inode->i_gid_high = 0;
3116 } 3106 }
3117 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 3107 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
3118 raw_inode->i_size = cpu_to_le32(ei->i_disksize); 3108 raw_inode->i_size = cpu_to_le32(ei->i_disksize);
3119 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); 3109 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
3120 raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); 3110 raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
3121 raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); 3111 raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
3122 raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); 3112 raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
3123 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 3113 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
3124 raw_inode->i_flags = cpu_to_le32(ei->i_flags); 3114 raw_inode->i_flags = cpu_to_le32(ei->i_flags);
3125 #ifdef EXT3_FRAGMENTS 3115 #ifdef EXT3_FRAGMENTS
3126 raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); 3116 raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
3127 raw_inode->i_frag = ei->i_frag_no; 3117 raw_inode->i_frag = ei->i_frag_no;
3128 raw_inode->i_fsize = ei->i_frag_size; 3118 raw_inode->i_fsize = ei->i_frag_size;
3129 #endif 3119 #endif
3130 raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); 3120 raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
3131 if (!S_ISREG(inode->i_mode)) { 3121 if (!S_ISREG(inode->i_mode)) {
3132 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); 3122 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
3133 } else { 3123 } else {
3134 raw_inode->i_size_high = 3124 raw_inode->i_size_high =
3135 cpu_to_le32(ei->i_disksize >> 32); 3125 cpu_to_le32(ei->i_disksize >> 32);
3136 if (ei->i_disksize > 0x7fffffffULL) { 3126 if (ei->i_disksize > 0x7fffffffULL) {
3137 struct super_block *sb = inode->i_sb; 3127 struct super_block *sb = inode->i_sb;
3138 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, 3128 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
3139 EXT3_FEATURE_RO_COMPAT_LARGE_FILE) || 3129 EXT3_FEATURE_RO_COMPAT_LARGE_FILE) ||
3140 EXT3_SB(sb)->s_es->s_rev_level == 3130 EXT3_SB(sb)->s_es->s_rev_level ==
3141 cpu_to_le32(EXT3_GOOD_OLD_REV)) { 3131 cpu_to_le32(EXT3_GOOD_OLD_REV)) {
3142 /* If this is the first large file 3132 /* If this is the first large file
3143 * created, add a flag to the superblock. 3133 * created, add a flag to the superblock.
3144 */ 3134 */
3145 unlock_buffer(bh); 3135 unlock_buffer(bh);
3146 err = ext3_journal_get_write_access(handle, 3136 err = ext3_journal_get_write_access(handle,
3147 EXT3_SB(sb)->s_sbh); 3137 EXT3_SB(sb)->s_sbh);
3148 if (err) 3138 if (err)
3149 goto out_brelse; 3139 goto out_brelse;
3150 3140
3151 ext3_update_dynamic_rev(sb); 3141 ext3_update_dynamic_rev(sb);
3152 EXT3_SET_RO_COMPAT_FEATURE(sb, 3142 EXT3_SET_RO_COMPAT_FEATURE(sb,
3153 EXT3_FEATURE_RO_COMPAT_LARGE_FILE); 3143 EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
3154 handle->h_sync = 1; 3144 handle->h_sync = 1;
3155 err = ext3_journal_dirty_metadata(handle, 3145 err = ext3_journal_dirty_metadata(handle,
3156 EXT3_SB(sb)->s_sbh); 3146 EXT3_SB(sb)->s_sbh);
3157 /* get our lock and start over */ 3147 /* get our lock and start over */
3158 goto again; 3148 goto again;
3159 } 3149 }
3160 } 3150 }
3161 } 3151 }
3162 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 3152 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
3163 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 3153 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
3164 if (old_valid_dev(inode->i_rdev)) { 3154 if (old_valid_dev(inode->i_rdev)) {
3165 raw_inode->i_block[0] = 3155 raw_inode->i_block[0] =
3166 cpu_to_le32(old_encode_dev(inode->i_rdev)); 3156 cpu_to_le32(old_encode_dev(inode->i_rdev));
3167 raw_inode->i_block[1] = 0; 3157 raw_inode->i_block[1] = 0;
3168 } else { 3158 } else {
3169 raw_inode->i_block[0] = 0; 3159 raw_inode->i_block[0] = 0;
3170 raw_inode->i_block[1] = 3160 raw_inode->i_block[1] =
3171 cpu_to_le32(new_encode_dev(inode->i_rdev)); 3161 cpu_to_le32(new_encode_dev(inode->i_rdev));
3172 raw_inode->i_block[2] = 0; 3162 raw_inode->i_block[2] = 0;
3173 } 3163 }
3174 } else for (block = 0; block < EXT3_N_BLOCKS; block++) 3164 } else for (block = 0; block < EXT3_N_BLOCKS; block++)
3175 raw_inode->i_block[block] = ei->i_data[block]; 3165 raw_inode->i_block[block] = ei->i_data[block];
3176 3166
3177 if (ei->i_extra_isize) 3167 if (ei->i_extra_isize)
3178 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); 3168 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
3179 3169
3180 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 3170 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
3181 unlock_buffer(bh); 3171 unlock_buffer(bh);
3182 rc = ext3_journal_dirty_metadata(handle, bh); 3172 rc = ext3_journal_dirty_metadata(handle, bh);
3183 if (!err) 3173 if (!err)
3184 err = rc; 3174 err = rc;
3185 ext3_clear_inode_state(inode, EXT3_STATE_NEW); 3175 ext3_clear_inode_state(inode, EXT3_STATE_NEW);
3186 3176
3187 atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); 3177 atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
3188 out_brelse: 3178 out_brelse:
3189 brelse (bh); 3179 brelse (bh);
3190 ext3_std_error(inode->i_sb, err); 3180 ext3_std_error(inode->i_sb, err);
3191 return err; 3181 return err;
3192 } 3182 }
3193 3183
3194 /* 3184 /*
3195 * ext3_write_inode() 3185 * ext3_write_inode()
3196 * 3186 *
3197 * We are called from a few places: 3187 * We are called from a few places:
3198 * 3188 *
3199 * - Within generic_file_write() for O_SYNC files. 3189 * - Within generic_file_write() for O_SYNC files.
3200 * Here, there will be no transaction running. We wait for any running 3190 * Here, there will be no transaction running. We wait for any running
3201 * trasnaction to commit. 3191 * trasnaction to commit.
3202 * 3192 *
3203 * - Within sys_sync(), kupdate and such. 3193 * - Within sys_sync(), kupdate and such.
3204 * We wait on commit, if tol to. 3194 * We wait on commit, if tol to.
3205 * 3195 *
3206 * - Within prune_icache() (PF_MEMALLOC == true) 3196 * - Within prune_icache() (PF_MEMALLOC == true)
3207 * Here we simply return. We can't afford to block kswapd on the 3197 * Here we simply return. We can't afford to block kswapd on the
3208 * journal commit. 3198 * journal commit.
3209 * 3199 *
3210 * In all cases it is actually safe for us to return without doing anything, 3200 * In all cases it is actually safe for us to return without doing anything,
3211 * because the inode has been copied into a raw inode buffer in 3201 * because the inode has been copied into a raw inode buffer in
3212 * ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for 3202 * ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for
3213 * knfsd. 3203 * knfsd.
3214 * 3204 *
3215 * Note that we are absolutely dependent upon all inode dirtiers doing the 3205 * Note that we are absolutely dependent upon all inode dirtiers doing the
3216 * right thing: they *must* call mark_inode_dirty() after dirtying info in 3206 * right thing: they *must* call mark_inode_dirty() after dirtying info in
3217 * which we are interested. 3207 * which we are interested.
3218 * 3208 *
3219 * It would be a bug for them to not do this. The code: 3209 * It would be a bug for them to not do this. The code:
3220 * 3210 *
3221 * mark_inode_dirty(inode) 3211 * mark_inode_dirty(inode)
3222 * stuff(); 3212 * stuff();
3223 * inode->i_size = expr; 3213 * inode->i_size = expr;
3224 * 3214 *
3225 * is in error because a kswapd-driven write_inode() could occur while 3215 * is in error because a kswapd-driven write_inode() could occur while
3226 * `stuff()' is running, and the new i_size will be lost. Plus the inode 3216 * `stuff()' is running, and the new i_size will be lost. Plus the inode
3227 * will no longer be on the superblock's dirty inode list. 3217 * will no longer be on the superblock's dirty inode list.
3228 */ 3218 */
3229 int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) 3219 int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
3230 { 3220 {
3231 if (current->flags & PF_MEMALLOC) 3221 if (current->flags & PF_MEMALLOC)
3232 return 0; 3222 return 0;
3233 3223
3234 if (ext3_journal_current_handle()) { 3224 if (ext3_journal_current_handle()) {
3235 jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); 3225 jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
3236 dump_stack(); 3226 dump_stack();
3237 return -EIO; 3227 return -EIO;
3238 } 3228 }
3239 3229
3240 if (wbc->sync_mode != WB_SYNC_ALL) 3230 if (wbc->sync_mode != WB_SYNC_ALL)
3241 return 0; 3231 return 0;
3242 3232
3243 return ext3_force_commit(inode->i_sb); 3233 return ext3_force_commit(inode->i_sb);
3244 } 3234 }
3245 3235
3246 /* 3236 /*
3247 * ext3_setattr() 3237 * ext3_setattr()
3248 * 3238 *
3249 * Called from notify_change. 3239 * Called from notify_change.
3250 * 3240 *
3251 * We want to trap VFS attempts to truncate the file as soon as 3241 * We want to trap VFS attempts to truncate the file as soon as
3252 * possible. In particular, we want to make sure that when the VFS 3242 * possible. In particular, we want to make sure that when the VFS
3253 * shrinks i_size, we put the inode on the orphan list and modify 3243 * shrinks i_size, we put the inode on the orphan list and modify
3254 * i_disksize immediately, so that during the subsequent flushing of 3244 * i_disksize immediately, so that during the subsequent flushing of
3255 * dirty pages and freeing of disk blocks, we can guarantee that any 3245 * dirty pages and freeing of disk blocks, we can guarantee that any
3256 * commit will leave the blocks being flushed in an unused state on 3246 * commit will leave the blocks being flushed in an unused state on
3257 * disk. (On recovery, the inode will get truncated and the blocks will 3247 * disk. (On recovery, the inode will get truncated and the blocks will
3258 * be freed, so we have a strong guarantee that no future commit will 3248 * be freed, so we have a strong guarantee that no future commit will
3259 * leave these blocks visible to the user.) 3249 * leave these blocks visible to the user.)
3260 * 3250 *
3261 * Called with inode->sem down. 3251 * Called with inode->sem down.
3262 */ 3252 */
3263 int ext3_setattr(struct dentry *dentry, struct iattr *attr) 3253 int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3264 { 3254 {
3265 struct inode *inode = dentry->d_inode; 3255 struct inode *inode = dentry->d_inode;
3266 int error, rc = 0; 3256 int error, rc = 0;
3267 const unsigned int ia_valid = attr->ia_valid; 3257 const unsigned int ia_valid = attr->ia_valid;
3268 3258
3269 error = inode_change_ok(inode, attr); 3259 error = inode_change_ok(inode, attr);
3270 if (error) 3260 if (error)
3271 return error; 3261 return error;
3272 3262
3273 if (is_quota_modification(inode, attr)) 3263 if (is_quota_modification(inode, attr))
3274 dquot_initialize(inode); 3264 dquot_initialize(inode);
3275 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 3265 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3276 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 3266 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
3277 handle_t *handle; 3267 handle_t *handle;
3278 3268
3279 /* (user+group)*(old+new) structure, inode write (sb, 3269 /* (user+group)*(old+new) structure, inode write (sb,
3280 * inode block, ? - but truncate inode update has it) */ 3270 * inode block, ? - but truncate inode update has it) */
3281 handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ 3271 handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
3282 EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3); 3272 EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3);
3283 if (IS_ERR(handle)) { 3273 if (IS_ERR(handle)) {
3284 error = PTR_ERR(handle); 3274 error = PTR_ERR(handle);
3285 goto err_out; 3275 goto err_out;
3286 } 3276 }
3287 error = dquot_transfer(inode, attr); 3277 error = dquot_transfer(inode, attr);
3288 if (error) { 3278 if (error) {
3289 ext3_journal_stop(handle); 3279 ext3_journal_stop(handle);
3290 return error; 3280 return error;
3291 } 3281 }
3292 /* Update corresponding info in inode so that everything is in 3282 /* Update corresponding info in inode so that everything is in
3293 * one transaction */ 3283 * one transaction */
3294 if (attr->ia_valid & ATTR_UID) 3284 if (attr->ia_valid & ATTR_UID)
3295 inode->i_uid = attr->ia_uid; 3285 inode->i_uid = attr->ia_uid;
3296 if (attr->ia_valid & ATTR_GID) 3286 if (attr->ia_valid & ATTR_GID)
3297 inode->i_gid = attr->ia_gid; 3287 inode->i_gid = attr->ia_gid;
3298 error = ext3_mark_inode_dirty(handle, inode); 3288 error = ext3_mark_inode_dirty(handle, inode);
3299 ext3_journal_stop(handle); 3289 ext3_journal_stop(handle);
3300 } 3290 }
3301 3291
3302 if (attr->ia_valid & ATTR_SIZE) 3292 if (attr->ia_valid & ATTR_SIZE)
3303 inode_dio_wait(inode); 3293 inode_dio_wait(inode);
3304 3294
3305 if (S_ISREG(inode->i_mode) && 3295 if (S_ISREG(inode->i_mode) &&
3306 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { 3296 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
3307 handle_t *handle; 3297 handle_t *handle;
3308 3298
3309 handle = ext3_journal_start(inode, 3); 3299 handle = ext3_journal_start(inode, 3);
3310 if (IS_ERR(handle)) { 3300 if (IS_ERR(handle)) {
3311 error = PTR_ERR(handle); 3301 error = PTR_ERR(handle);
3312 goto err_out; 3302 goto err_out;
3313 } 3303 }
3314 3304
3315 error = ext3_orphan_add(handle, inode); 3305 error = ext3_orphan_add(handle, inode);
3316 if (error) { 3306 if (error) {
3317 ext3_journal_stop(handle); 3307 ext3_journal_stop(handle);
3318 goto err_out; 3308 goto err_out;
3319 } 3309 }
3320 EXT3_I(inode)->i_disksize = attr->ia_size; 3310 EXT3_I(inode)->i_disksize = attr->ia_size;
3321 error = ext3_mark_inode_dirty(handle, inode); 3311 error = ext3_mark_inode_dirty(handle, inode);
3322 ext3_journal_stop(handle); 3312 ext3_journal_stop(handle);
3323 if (error) { 3313 if (error) {
3324 /* Some hard fs error must have happened. Bail out. */ 3314 /* Some hard fs error must have happened. Bail out. */
3325 ext3_orphan_del(NULL, inode); 3315 ext3_orphan_del(NULL, inode);
3326 goto err_out; 3316 goto err_out;
3327 } 3317 }
3328 rc = ext3_block_truncate_page(inode, attr->ia_size); 3318 rc = ext3_block_truncate_page(inode, attr->ia_size);
3329 if (rc) { 3319 if (rc) {
3330 /* Cleanup orphan list and exit */ 3320 /* Cleanup orphan list and exit */
3331 handle = ext3_journal_start(inode, 3); 3321 handle = ext3_journal_start(inode, 3);
3332 if (IS_ERR(handle)) { 3322 if (IS_ERR(handle)) {
3333 ext3_orphan_del(NULL, inode); 3323 ext3_orphan_del(NULL, inode);
3334 goto err_out; 3324 goto err_out;
3335 } 3325 }
3336 ext3_orphan_del(handle, inode); 3326 ext3_orphan_del(handle, inode);
3337 ext3_journal_stop(handle); 3327 ext3_journal_stop(handle);
3338 goto err_out; 3328 goto err_out;
3339 } 3329 }
3340 } 3330 }
3341 3331
3342 if ((attr->ia_valid & ATTR_SIZE) && 3332 if ((attr->ia_valid & ATTR_SIZE) &&
3343 attr->ia_size != i_size_read(inode)) { 3333 attr->ia_size != i_size_read(inode)) {
3344 truncate_setsize(inode, attr->ia_size); 3334 truncate_setsize(inode, attr->ia_size);
3345 ext3_truncate(inode); 3335 ext3_truncate(inode);
3346 } 3336 }
3347 3337
3348 setattr_copy(inode, attr); 3338 setattr_copy(inode, attr);
3349 mark_inode_dirty(inode); 3339 mark_inode_dirty(inode);
3350 3340
3351 if (ia_valid & ATTR_MODE) 3341 if (ia_valid & ATTR_MODE)
3352 rc = ext3_acl_chmod(inode); 3342 rc = ext3_acl_chmod(inode);
3353 3343
3354 err_out: 3344 err_out:
3355 ext3_std_error(inode->i_sb, error); 3345 ext3_std_error(inode->i_sb, error);
3356 if (!error) 3346 if (!error)
3357 error = rc; 3347 error = rc;
3358 return error; 3348 return error;
3359 } 3349 }
3360 3350
3361 3351
3362 /* 3352 /*
3363 * How many blocks doth make a writepage()? 3353 * How many blocks doth make a writepage()?
3364 * 3354 *
3365 * With N blocks per page, it may be: 3355 * With N blocks per page, it may be:
3366 * N data blocks 3356 * N data blocks
3367 * 2 indirect block 3357 * 2 indirect block
3368 * 2 dindirect 3358 * 2 dindirect
3369 * 1 tindirect 3359 * 1 tindirect
3370 * N+5 bitmap blocks (from the above) 3360 * N+5 bitmap blocks (from the above)
3371 * N+5 group descriptor summary blocks 3361 * N+5 group descriptor summary blocks
3372 * 1 inode block 3362 * 1 inode block
3373 * 1 superblock. 3363 * 1 superblock.
3374 * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files 3364 * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files
3375 * 3365 *
3376 * 3 * (N + 5) + 2 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS 3366 * 3 * (N + 5) + 2 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
3377 * 3367 *
3378 * With ordered or writeback data it's the same, less the N data blocks. 3368 * With ordered or writeback data it's the same, less the N data blocks.
3379 * 3369 *
3380 * If the inode's direct blocks can hold an integral number of pages then a 3370 * If the inode's direct blocks can hold an integral number of pages then a
3381 * page cannot straddle two indirect blocks, and we can only touch one indirect 3371 * page cannot straddle two indirect blocks, and we can only touch one indirect
3382 * and dindirect block, and the "5" above becomes "3". 3372 * and dindirect block, and the "5" above becomes "3".
3383 * 3373 *
3384 * This still overestimates under most circumstances. If we were to pass the 3374 * This still overestimates under most circumstances. If we were to pass the
3385 * start and end offsets in here as well we could do block_to_path() on each 3375 * start and end offsets in here as well we could do block_to_path() on each
3386 * block and work out the exact number of indirects which are touched. Pah. 3376 * block and work out the exact number of indirects which are touched. Pah.
3387 */ 3377 */
3388 3378
3389 static int ext3_writepage_trans_blocks(struct inode *inode) 3379 static int ext3_writepage_trans_blocks(struct inode *inode)
3390 { 3380 {
3391 int bpp = ext3_journal_blocks_per_page(inode); 3381 int bpp = ext3_journal_blocks_per_page(inode);
3392 int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; 3382 int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
3393 int ret; 3383 int ret;
3394 3384
3395 if (ext3_should_journal_data(inode)) 3385 if (ext3_should_journal_data(inode))
3396 ret = 3 * (bpp + indirects) + 2; 3386 ret = 3 * (bpp + indirects) + 2;
3397 else 3387 else
3398 ret = 2 * (bpp + indirects) + indirects + 2; 3388 ret = 2 * (bpp + indirects) + indirects + 2;
3399 3389
3400 #ifdef CONFIG_QUOTA 3390 #ifdef CONFIG_QUOTA
3401 /* We know that structure was already allocated during dquot_initialize so 3391 /* We know that structure was already allocated during dquot_initialize so
3402 * we will be updating only the data blocks + inodes */ 3392 * we will be updating only the data blocks + inodes */
3403 ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); 3393 ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
3404 #endif 3394 #endif
3405 3395
3406 return ret; 3396 return ret;
3407 } 3397 }
3408 3398
3409 /* 3399 /*
3410 * The caller must have previously called ext3_reserve_inode_write(). 3400 * The caller must have previously called ext3_reserve_inode_write().
3411 * Give this, we know that the caller already has write access to iloc->bh. 3401 * Give this, we know that the caller already has write access to iloc->bh.
3412 */ 3402 */
3413 int ext3_mark_iloc_dirty(handle_t *handle, 3403 int ext3_mark_iloc_dirty(handle_t *handle,
3414 struct inode *inode, struct ext3_iloc *iloc) 3404 struct inode *inode, struct ext3_iloc *iloc)
3415 { 3405 {
3416 int err = 0; 3406 int err = 0;
3417 3407
3418 /* the do_update_inode consumes one bh->b_count */ 3408 /* the do_update_inode consumes one bh->b_count */
3419 get_bh(iloc->bh); 3409 get_bh(iloc->bh);
3420 3410
3421 /* ext3_do_update_inode() does journal_dirty_metadata */ 3411 /* ext3_do_update_inode() does journal_dirty_metadata */
3422 err = ext3_do_update_inode(handle, inode, iloc); 3412 err = ext3_do_update_inode(handle, inode, iloc);
3423 put_bh(iloc->bh); 3413 put_bh(iloc->bh);
3424 return err; 3414 return err;
3425 } 3415 }
3426 3416
3427 /* 3417 /*
3428 * On success, We end up with an outstanding reference count against 3418 * On success, We end up with an outstanding reference count against
3429 * iloc->bh. This _must_ be cleaned up later. 3419 * iloc->bh. This _must_ be cleaned up later.
3430 */ 3420 */
3431 3421
3432 int 3422 int
3433 ext3_reserve_inode_write(handle_t *handle, struct inode *inode, 3423 ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
3434 struct ext3_iloc *iloc) 3424 struct ext3_iloc *iloc)
3435 { 3425 {
3436 int err = 0; 3426 int err = 0;
3437 if (handle) { 3427 if (handle) {
3438 err = ext3_get_inode_loc(inode, iloc); 3428 err = ext3_get_inode_loc(inode, iloc);
3439 if (!err) { 3429 if (!err) {
3440 BUFFER_TRACE(iloc->bh, "get_write_access"); 3430 BUFFER_TRACE(iloc->bh, "get_write_access");
3441 err = ext3_journal_get_write_access(handle, iloc->bh); 3431 err = ext3_journal_get_write_access(handle, iloc->bh);
3442 if (err) { 3432 if (err) {
3443 brelse(iloc->bh); 3433 brelse(iloc->bh);
3444 iloc->bh = NULL; 3434 iloc->bh = NULL;
3445 } 3435 }
3446 } 3436 }
3447 } 3437 }
3448 ext3_std_error(inode->i_sb, err); 3438 ext3_std_error(inode->i_sb, err);
3449 return err; 3439 return err;
3450 } 3440 }
3451 3441
3452 /* 3442 /*
3453 * What we do here is to mark the in-core inode as clean with respect to inode 3443 * What we do here is to mark the in-core inode as clean with respect to inode
3454 * dirtiness (it may still be data-dirty). 3444 * dirtiness (it may still be data-dirty).
3455 * This means that the in-core inode may be reaped by prune_icache 3445 * This means that the in-core inode may be reaped by prune_icache
3456 * without having to perform any I/O. This is a very good thing, 3446 * without having to perform any I/O. This is a very good thing,
3457 * because *any* task may call prune_icache - even ones which 3447 * because *any* task may call prune_icache - even ones which
3458 * have a transaction open against a different journal. 3448 * have a transaction open against a different journal.
3459 * 3449 *
3460 * Is this cheating? Not really. Sure, we haven't written the 3450 * Is this cheating? Not really. Sure, we haven't written the
3461 * inode out, but prune_icache isn't a user-visible syncing function. 3451 * inode out, but prune_icache isn't a user-visible syncing function.
3462 * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) 3452 * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
3463 * we start and wait on commits. 3453 * we start and wait on commits.
3464 * 3454 *
3465 * Is this efficient/effective? Well, we're being nice to the system 3455 * Is this efficient/effective? Well, we're being nice to the system
3466 * by cleaning up our inodes proactively so they can be reaped 3456 * by cleaning up our inodes proactively so they can be reaped
3467 * without I/O. But we are potentially leaving up to five seconds' 3457 * without I/O. But we are potentially leaving up to five seconds'
3468 * worth of inodes floating about which prune_icache wants us to 3458 * worth of inodes floating about which prune_icache wants us to
3469 * write out. One way to fix that would be to get prune_icache() 3459 * write out. One way to fix that would be to get prune_icache()
3470 * to do a write_super() to free up some memory. It has the desired 3460 * to do a write_super() to free up some memory. It has the desired
3471 * effect. 3461 * effect.
3472 */ 3462 */
3473 int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) 3463 int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
3474 { 3464 {
3475 struct ext3_iloc iloc; 3465 struct ext3_iloc iloc;
3476 int err; 3466 int err;
3477 3467
3478 might_sleep(); 3468 might_sleep();
3479 trace_ext3_mark_inode_dirty(inode, _RET_IP_); 3469 trace_ext3_mark_inode_dirty(inode, _RET_IP_);
3480 err = ext3_reserve_inode_write(handle, inode, &iloc); 3470 err = ext3_reserve_inode_write(handle, inode, &iloc);
3481 if (!err) 3471 if (!err)
3482 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 3472 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
3483 return err; 3473 return err;
3484 } 3474 }
3485 3475
3486 /* 3476 /*
3487 * ext3_dirty_inode() is called from __mark_inode_dirty() 3477 * ext3_dirty_inode() is called from __mark_inode_dirty()
3488 * 3478 *
3489 * We're really interested in the case where a file is being extended. 3479 * We're really interested in the case where a file is being extended.
3490 * i_size has been changed by generic_commit_write() and we thus need 3480 * i_size has been changed by generic_commit_write() and we thus need
3491 * to include the updated inode in the current transaction. 3481 * to include the updated inode in the current transaction.
3492 * 3482 *
3493 * Also, dquot_alloc_space() will always dirty the inode when blocks 3483 * Also, dquot_alloc_space() will always dirty the inode when blocks
3494 * are allocated to the file. 3484 * are allocated to the file.
3495 * 3485 *
3496 * If the inode is marked synchronous, we don't honour that here - doing 3486 * If the inode is marked synchronous, we don't honour that here - doing
3497 * so would cause a commit on atime updates, which we don't bother doing. 3487 * so would cause a commit on atime updates, which we don't bother doing.
3498 * We handle synchronous inodes at the highest possible level. 3488 * We handle synchronous inodes at the highest possible level.
3499 */ 3489 */
3500 void ext3_dirty_inode(struct inode *inode, int flags) 3490 void ext3_dirty_inode(struct inode *inode, int flags)
3501 { 3491 {
3502 handle_t *current_handle = ext3_journal_current_handle(); 3492 handle_t *current_handle = ext3_journal_current_handle();
3503 handle_t *handle; 3493 handle_t *handle;
3504 3494
3505 handle = ext3_journal_start(inode, 2); 3495 handle = ext3_journal_start(inode, 2);
3506 if (IS_ERR(handle)) 3496 if (IS_ERR(handle))
3507 goto out; 3497 goto out;
3508 if (current_handle && 3498 if (current_handle &&
3509 current_handle->h_transaction != handle->h_transaction) { 3499 current_handle->h_transaction != handle->h_transaction) {
3510 /* This task has a transaction open against a different fs */ 3500 /* This task has a transaction open against a different fs */
3511 printk(KERN_EMERG "%s: transactions do not match!\n", 3501 printk(KERN_EMERG "%s: transactions do not match!\n",
3512 __func__); 3502 __func__);
3513 } else { 3503 } else {
3514 jbd_debug(5, "marking dirty. outer handle=%p\n", 3504 jbd_debug(5, "marking dirty. outer handle=%p\n",
3515 current_handle); 3505 current_handle);
3516 ext3_mark_inode_dirty(handle, inode); 3506 ext3_mark_inode_dirty(handle, inode);
3517 } 3507 }
3518 ext3_journal_stop(handle); 3508 ext3_journal_stop(handle);
3519 out: 3509 out:
3520 return; 3510 return;
3521 } 3511 }
3522 3512
3523 #if 0 3513 #if 0
3524 /* 3514 /*
3525 * Bind an inode's backing buffer_head into this transaction, to prevent 3515 * Bind an inode's backing buffer_head into this transaction, to prevent
3526 * it from being flushed to disk early. Unlike 3516 * it from being flushed to disk early. Unlike
3527 * ext3_reserve_inode_write, this leaves behind no bh reference and 3517 * ext3_reserve_inode_write, this leaves behind no bh reference and
3528 * returns no iloc structure, so the caller needs to repeat the iloc 3518 * returns no iloc structure, so the caller needs to repeat the iloc
3529 * lookup to mark the inode dirty later. 3519 * lookup to mark the inode dirty later.
3530 */ 3520 */
3531 static int ext3_pin_inode(handle_t *handle, struct inode *inode) 3521 static int ext3_pin_inode(handle_t *handle, struct inode *inode)
3532 { 3522 {
3533 struct ext3_iloc iloc; 3523 struct ext3_iloc iloc;
3534 3524
3535 int err = 0; 3525 int err = 0;
3536 if (handle) { 3526 if (handle) {
3537 err = ext3_get_inode_loc(inode, &iloc); 3527 err = ext3_get_inode_loc(inode, &iloc);
3538 if (!err) { 3528 if (!err) {
3539 BUFFER_TRACE(iloc.bh, "get_write_access"); 3529 BUFFER_TRACE(iloc.bh, "get_write_access");
3540 err = journal_get_write_access(handle, iloc.bh); 3530 err = journal_get_write_access(handle, iloc.bh);
3541 if (!err) 3531 if (!err)
3542 err = ext3_journal_dirty_metadata(handle, 3532 err = ext3_journal_dirty_metadata(handle,
3543 iloc.bh); 3533 iloc.bh);
3544 brelse(iloc.bh); 3534 brelse(iloc.bh);
3545 } 3535 }
3546 } 3536 }
3547 ext3_std_error(inode->i_sb, err); 3537 ext3_std_error(inode->i_sb, err);
3548 return err; 3538 return err;
3549 } 3539 }
3550 #endif 3540 #endif
3551 3541
3552 int ext3_change_inode_journal_flag(struct inode *inode, int val) 3542 int ext3_change_inode_journal_flag(struct inode *inode, int val)
3553 { 3543 {
3554 journal_t *journal; 3544 journal_t *journal;
3555 handle_t *handle; 3545 handle_t *handle;
3556 int err; 3546 int err;
3557 3547
3558 /* 3548 /*
3559 * We have to be very careful here: changing a data block's 3549 * We have to be very careful here: changing a data block's
3560 * journaling status dynamically is dangerous. If we write a 3550 * journaling status dynamically is dangerous. If we write a
3561 * data block to the journal, change the status and then delete 3551 * data block to the journal, change the status and then delete
3562 * that block, we risk forgetting to revoke the old log record 3552 * that block, we risk forgetting to revoke the old log record
3563 * from the journal and so a subsequent replay can corrupt data. 3553 * from the journal and so a subsequent replay can corrupt data.
3564 * So, first we make sure that the journal is empty and that 3554 * So, first we make sure that the journal is empty and that
3565 * nobody is changing anything. 3555 * nobody is changing anything.
3566 */ 3556 */
3567 3557
3568 journal = EXT3_JOURNAL(inode); 3558 journal = EXT3_JOURNAL(inode);
3569 if (is_journal_aborted(journal)) 3559 if (is_journal_aborted(journal))
3570 return -EROFS; 3560 return -EROFS;
3571 3561
3572 journal_lock_updates(journal); 3562 journal_lock_updates(journal);
3573 journal_flush(journal); 3563 journal_flush(journal);
3574 3564
3575 /* 3565 /*
3576 * OK, there are no updates running now, and all cached data is 3566 * OK, there are no updates running now, and all cached data is
3577 * synced to disk. We are now in a completely consistent state 3567 * synced to disk. We are now in a completely consistent state
3578 * which doesn't have anything in the journal, and we know that 3568 * which doesn't have anything in the journal, and we know that
3579 * no filesystem updates are running, so it is safe to modify 3569 * no filesystem updates are running, so it is safe to modify
3580 * the inode's in-core data-journaling state flag now. 3570 * the inode's in-core data-journaling state flag now.
3581 */ 3571 */
3582 3572
3583 if (val) 3573 if (val)
3584 EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL; 3574 EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
3585 else 3575 else
3586 EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL; 3576 EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL;
3587 ext3_set_aops(inode); 3577 ext3_set_aops(inode);
3588 3578
3589 journal_unlock_updates(journal); 3579 journal_unlock_updates(journal);
3590 3580
3591 /* Finally we can mark the inode as dirty. */ 3581 /* Finally we can mark the inode as dirty. */
3592 3582
3593 handle = ext3_journal_start(inode, 1); 3583 handle = ext3_journal_start(inode, 1);
3594 if (IS_ERR(handle)) 3584 if (IS_ERR(handle))
3595 return PTR_ERR(handle); 3585 return PTR_ERR(handle);
3596 3586
3597 err = ext3_mark_inode_dirty(handle, inode); 3587 err = ext3_mark_inode_dirty(handle, inode);
3598 handle->h_sync = 1; 3588 handle->h_sync = 1;
3599 ext3_journal_stop(handle); 3589 ext3_journal_stop(handle);
3600 ext3_std_error(inode->i_sb, err); 3590 ext3_std_error(inode->i_sb, err);
3601 3591
3602 return err; 3592 return err;
3603 } 3593 }
3604 3594
1 /* 1 /*
2 * linux/fs/ext3/ioctl.c 2 * linux/fs/ext3/ioctl.c
3 * 3 *
4 * Copyright (C) 1993, 1994, 1995 4 * Copyright (C) 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 */ 8 */
9 9
10 #include <linux/fs.h>
11 #include <linux/jbd.h>
12 #include <linux/capability.h>
13 #include <linux/ext3_fs.h>
14 #include <linux/ext3_jbd.h>
15 #include <linux/mount.h> 10 #include <linux/mount.h>
16 #include <linux/time.h>
17 #include <linux/compat.h> 11 #include <linux/compat.h>
18 #include <asm/uaccess.h> 12 #include <asm/uaccess.h>
13 #include "ext3.h"
19 14
20 long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 15 long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
21 { 16 {
22 struct inode *inode = filp->f_dentry->d_inode; 17 struct inode *inode = filp->f_dentry->d_inode;
23 struct ext3_inode_info *ei = EXT3_I(inode); 18 struct ext3_inode_info *ei = EXT3_I(inode);
24 unsigned int flags; 19 unsigned int flags;
25 unsigned short rsv_window_size; 20 unsigned short rsv_window_size;
26 21
27 ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); 22 ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);
28 23
29 switch (cmd) { 24 switch (cmd) {
30 case EXT3_IOC_GETFLAGS: 25 case EXT3_IOC_GETFLAGS:
31 ext3_get_inode_flags(ei); 26 ext3_get_inode_flags(ei);
32 flags = ei->i_flags & EXT3_FL_USER_VISIBLE; 27 flags = ei->i_flags & EXT3_FL_USER_VISIBLE;
33 return put_user(flags, (int __user *) arg); 28 return put_user(flags, (int __user *) arg);
34 case EXT3_IOC_SETFLAGS: { 29 case EXT3_IOC_SETFLAGS: {
35 handle_t *handle = NULL; 30 handle_t *handle = NULL;
36 int err; 31 int err;
37 struct ext3_iloc iloc; 32 struct ext3_iloc iloc;
38 unsigned int oldflags; 33 unsigned int oldflags;
39 unsigned int jflag; 34 unsigned int jflag;
40 35
41 if (!inode_owner_or_capable(inode)) 36 if (!inode_owner_or_capable(inode))
42 return -EACCES; 37 return -EACCES;
43 38
44 if (get_user(flags, (int __user *) arg)) 39 if (get_user(flags, (int __user *) arg))
45 return -EFAULT; 40 return -EFAULT;
46 41
47 err = mnt_want_write_file(filp); 42 err = mnt_want_write_file(filp);
48 if (err) 43 if (err)
49 return err; 44 return err;
50 45
51 flags = ext3_mask_flags(inode->i_mode, flags); 46 flags = ext3_mask_flags(inode->i_mode, flags);
52 47
53 mutex_lock(&inode->i_mutex); 48 mutex_lock(&inode->i_mutex);
54 49
55 /* Is it quota file? Do not allow user to mess with it */ 50 /* Is it quota file? Do not allow user to mess with it */
56 err = -EPERM; 51 err = -EPERM;
57 if (IS_NOQUOTA(inode)) 52 if (IS_NOQUOTA(inode))
58 goto flags_out; 53 goto flags_out;
59 54
60 oldflags = ei->i_flags; 55 oldflags = ei->i_flags;
61 56
62 /* The JOURNAL_DATA flag is modifiable only by root */ 57 /* The JOURNAL_DATA flag is modifiable only by root */
63 jflag = flags & EXT3_JOURNAL_DATA_FL; 58 jflag = flags & EXT3_JOURNAL_DATA_FL;
64 59
65 /* 60 /*
66 * The IMMUTABLE and APPEND_ONLY flags can only be changed by 61 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
67 * the relevant capability. 62 * the relevant capability.
68 * 63 *
69 * This test looks nicer. Thanks to Pauline Middelink 64 * This test looks nicer. Thanks to Pauline Middelink
70 */ 65 */
71 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { 66 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
72 if (!capable(CAP_LINUX_IMMUTABLE)) 67 if (!capable(CAP_LINUX_IMMUTABLE))
73 goto flags_out; 68 goto flags_out;
74 } 69 }
75 70
76 /* 71 /*
77 * The JOURNAL_DATA flag can only be changed by 72 * The JOURNAL_DATA flag can only be changed by
78 * the relevant capability. 73 * the relevant capability.
79 */ 74 */
80 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { 75 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
81 if (!capable(CAP_SYS_RESOURCE)) 76 if (!capable(CAP_SYS_RESOURCE))
82 goto flags_out; 77 goto flags_out;
83 } 78 }
84 79
85 handle = ext3_journal_start(inode, 1); 80 handle = ext3_journal_start(inode, 1);
86 if (IS_ERR(handle)) { 81 if (IS_ERR(handle)) {
87 err = PTR_ERR(handle); 82 err = PTR_ERR(handle);
88 goto flags_out; 83 goto flags_out;
89 } 84 }
90 if (IS_SYNC(inode)) 85 if (IS_SYNC(inode))
91 handle->h_sync = 1; 86 handle->h_sync = 1;
92 err = ext3_reserve_inode_write(handle, inode, &iloc); 87 err = ext3_reserve_inode_write(handle, inode, &iloc);
93 if (err) 88 if (err)
94 goto flags_err; 89 goto flags_err;
95 90
96 flags = flags & EXT3_FL_USER_MODIFIABLE; 91 flags = flags & EXT3_FL_USER_MODIFIABLE;
97 flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE; 92 flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
98 ei->i_flags = flags; 93 ei->i_flags = flags;
99 94
100 ext3_set_inode_flags(inode); 95 ext3_set_inode_flags(inode);
101 inode->i_ctime = CURRENT_TIME_SEC; 96 inode->i_ctime = CURRENT_TIME_SEC;
102 97
103 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 98 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
104 flags_err: 99 flags_err:
105 ext3_journal_stop(handle); 100 ext3_journal_stop(handle);
106 if (err) 101 if (err)
107 goto flags_out; 102 goto flags_out;
108 103
109 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) 104 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
110 err = ext3_change_inode_journal_flag(inode, jflag); 105 err = ext3_change_inode_journal_flag(inode, jflag);
111 flags_out: 106 flags_out:
112 mutex_unlock(&inode->i_mutex); 107 mutex_unlock(&inode->i_mutex);
113 mnt_drop_write_file(filp); 108 mnt_drop_write_file(filp);
114 return err; 109 return err;
115 } 110 }
116 case EXT3_IOC_GETVERSION: 111 case EXT3_IOC_GETVERSION:
117 case EXT3_IOC_GETVERSION_OLD: 112 case EXT3_IOC_GETVERSION_OLD:
118 return put_user(inode->i_generation, (int __user *) arg); 113 return put_user(inode->i_generation, (int __user *) arg);
119 case EXT3_IOC_SETVERSION: 114 case EXT3_IOC_SETVERSION:
120 case EXT3_IOC_SETVERSION_OLD: { 115 case EXT3_IOC_SETVERSION_OLD: {
121 handle_t *handle; 116 handle_t *handle;
122 struct ext3_iloc iloc; 117 struct ext3_iloc iloc;
123 __u32 generation; 118 __u32 generation;
124 int err; 119 int err;
125 120
126 if (!inode_owner_or_capable(inode)) 121 if (!inode_owner_or_capable(inode))
127 return -EPERM; 122 return -EPERM;
128 123
129 err = mnt_want_write_file(filp); 124 err = mnt_want_write_file(filp);
130 if (err) 125 if (err)
131 return err; 126 return err;
132 if (get_user(generation, (int __user *) arg)) { 127 if (get_user(generation, (int __user *) arg)) {
133 err = -EFAULT; 128 err = -EFAULT;
134 goto setversion_out; 129 goto setversion_out;
135 } 130 }
136 131
137 mutex_lock(&inode->i_mutex); 132 mutex_lock(&inode->i_mutex);
138 handle = ext3_journal_start(inode, 1); 133 handle = ext3_journal_start(inode, 1);
139 if (IS_ERR(handle)) { 134 if (IS_ERR(handle)) {
140 err = PTR_ERR(handle); 135 err = PTR_ERR(handle);
141 goto unlock_out; 136 goto unlock_out;
142 } 137 }
143 err = ext3_reserve_inode_write(handle, inode, &iloc); 138 err = ext3_reserve_inode_write(handle, inode, &iloc);
144 if (err == 0) { 139 if (err == 0) {
145 inode->i_ctime = CURRENT_TIME_SEC; 140 inode->i_ctime = CURRENT_TIME_SEC;
146 inode->i_generation = generation; 141 inode->i_generation = generation;
147 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 142 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
148 } 143 }
149 ext3_journal_stop(handle); 144 ext3_journal_stop(handle);
150 145
151 unlock_out: 146 unlock_out:
152 mutex_unlock(&inode->i_mutex); 147 mutex_unlock(&inode->i_mutex);
153 setversion_out: 148 setversion_out:
154 mnt_drop_write_file(filp); 149 mnt_drop_write_file(filp);
155 return err; 150 return err;
156 } 151 }
157 case EXT3_IOC_GETRSVSZ: 152 case EXT3_IOC_GETRSVSZ:
158 if (test_opt(inode->i_sb, RESERVATION) 153 if (test_opt(inode->i_sb, RESERVATION)
159 && S_ISREG(inode->i_mode) 154 && S_ISREG(inode->i_mode)
160 && ei->i_block_alloc_info) { 155 && ei->i_block_alloc_info) {
161 rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; 156 rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size;
162 return put_user(rsv_window_size, (int __user *)arg); 157 return put_user(rsv_window_size, (int __user *)arg);
163 } 158 }
164 return -ENOTTY; 159 return -ENOTTY;
165 case EXT3_IOC_SETRSVSZ: { 160 case EXT3_IOC_SETRSVSZ: {
166 int err; 161 int err;
167 162
168 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 163 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
169 return -ENOTTY; 164 return -ENOTTY;
170 165
171 err = mnt_want_write_file(filp); 166 err = mnt_want_write_file(filp);
172 if (err) 167 if (err)
173 return err; 168 return err;
174 169
175 if (!inode_owner_or_capable(inode)) { 170 if (!inode_owner_or_capable(inode)) {
176 err = -EACCES; 171 err = -EACCES;
177 goto setrsvsz_out; 172 goto setrsvsz_out;
178 } 173 }
179 174
180 if (get_user(rsv_window_size, (int __user *)arg)) { 175 if (get_user(rsv_window_size, (int __user *)arg)) {
181 err = -EFAULT; 176 err = -EFAULT;
182 goto setrsvsz_out; 177 goto setrsvsz_out;
183 } 178 }
184 179
185 if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS) 180 if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS)
186 rsv_window_size = EXT3_MAX_RESERVE_BLOCKS; 181 rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
187 182
188 /* 183 /*
189 * need to allocate reservation structure for this inode 184 * need to allocate reservation structure for this inode
190 * before set the window size 185 * before set the window size
191 */ 186 */
192 mutex_lock(&ei->truncate_mutex); 187 mutex_lock(&ei->truncate_mutex);
193 if (!ei->i_block_alloc_info) 188 if (!ei->i_block_alloc_info)
194 ext3_init_block_alloc_info(inode); 189 ext3_init_block_alloc_info(inode);
195 190
196 if (ei->i_block_alloc_info){ 191 if (ei->i_block_alloc_info){
197 struct ext3_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; 192 struct ext3_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
198 rsv->rsv_goal_size = rsv_window_size; 193 rsv->rsv_goal_size = rsv_window_size;
199 } 194 }
200 mutex_unlock(&ei->truncate_mutex); 195 mutex_unlock(&ei->truncate_mutex);
201 setrsvsz_out: 196 setrsvsz_out:
202 mnt_drop_write_file(filp); 197 mnt_drop_write_file(filp);
203 return err; 198 return err;
204 } 199 }
205 case EXT3_IOC_GROUP_EXTEND: { 200 case EXT3_IOC_GROUP_EXTEND: {
206 ext3_fsblk_t n_blocks_count; 201 ext3_fsblk_t n_blocks_count;
207 struct super_block *sb = inode->i_sb; 202 struct super_block *sb = inode->i_sb;
208 int err, err2; 203 int err, err2;
209 204
210 if (!capable(CAP_SYS_RESOURCE)) 205 if (!capable(CAP_SYS_RESOURCE))
211 return -EPERM; 206 return -EPERM;
212 207
213 err = mnt_want_write_file(filp); 208 err = mnt_want_write_file(filp);
214 if (err) 209 if (err)
215 return err; 210 return err;
216 211
217 if (get_user(n_blocks_count, (__u32 __user *)arg)) { 212 if (get_user(n_blocks_count, (__u32 __user *)arg)) {
218 err = -EFAULT; 213 err = -EFAULT;
219 goto group_extend_out; 214 goto group_extend_out;
220 } 215 }
221 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); 216 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
222 journal_lock_updates(EXT3_SB(sb)->s_journal); 217 journal_lock_updates(EXT3_SB(sb)->s_journal);
223 err2 = journal_flush(EXT3_SB(sb)->s_journal); 218 err2 = journal_flush(EXT3_SB(sb)->s_journal);
224 journal_unlock_updates(EXT3_SB(sb)->s_journal); 219 journal_unlock_updates(EXT3_SB(sb)->s_journal);
225 if (err == 0) 220 if (err == 0)
226 err = err2; 221 err = err2;
227 group_extend_out: 222 group_extend_out:
228 mnt_drop_write_file(filp); 223 mnt_drop_write_file(filp);
229 return err; 224 return err;
230 } 225 }
231 case EXT3_IOC_GROUP_ADD: { 226 case EXT3_IOC_GROUP_ADD: {
232 struct ext3_new_group_data input; 227 struct ext3_new_group_data input;
233 struct super_block *sb = inode->i_sb; 228 struct super_block *sb = inode->i_sb;
234 int err, err2; 229 int err, err2;
235 230
236 if (!capable(CAP_SYS_RESOURCE)) 231 if (!capable(CAP_SYS_RESOURCE))
237 return -EPERM; 232 return -EPERM;
238 233
239 err = mnt_want_write_file(filp); 234 err = mnt_want_write_file(filp);
240 if (err) 235 if (err)
241 return err; 236 return err;
242 237
243 if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg, 238 if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg,
244 sizeof(input))) { 239 sizeof(input))) {
245 err = -EFAULT; 240 err = -EFAULT;
246 goto group_add_out; 241 goto group_add_out;
247 } 242 }
248 243
249 err = ext3_group_add(sb, &input); 244 err = ext3_group_add(sb, &input);
250 journal_lock_updates(EXT3_SB(sb)->s_journal); 245 journal_lock_updates(EXT3_SB(sb)->s_journal);
251 err2 = journal_flush(EXT3_SB(sb)->s_journal); 246 err2 = journal_flush(EXT3_SB(sb)->s_journal);
252 journal_unlock_updates(EXT3_SB(sb)->s_journal); 247 journal_unlock_updates(EXT3_SB(sb)->s_journal);
253 if (err == 0) 248 if (err == 0)
254 err = err2; 249 err = err2;
255 group_add_out: 250 group_add_out:
256 mnt_drop_write_file(filp); 251 mnt_drop_write_file(filp);
257 return err; 252 return err;
258 } 253 }
259 case FITRIM: { 254 case FITRIM: {
260 255
261 struct super_block *sb = inode->i_sb; 256 struct super_block *sb = inode->i_sb;
262 struct fstrim_range range; 257 struct fstrim_range range;
263 int ret = 0; 258 int ret = 0;
264 259
265 if (!capable(CAP_SYS_ADMIN)) 260 if (!capable(CAP_SYS_ADMIN))
266 return -EPERM; 261 return -EPERM;
267 262
268 if (copy_from_user(&range, (struct fstrim_range __user *)arg, 263 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
269 sizeof(range))) 264 sizeof(range)))
270 return -EFAULT; 265 return -EFAULT;
271 266
272 ret = ext3_trim_fs(sb, &range); 267 ret = ext3_trim_fs(sb, &range);
273 if (ret < 0) 268 if (ret < 0)
274 return ret; 269 return ret;
275 270
276 if (copy_to_user((struct fstrim_range __user *)arg, &range, 271 if (copy_to_user((struct fstrim_range __user *)arg, &range,
277 sizeof(range))) 272 sizeof(range)))
278 return -EFAULT; 273 return -EFAULT;
279 274
280 return 0; 275 return 0;
281 } 276 }
282 277
283 default: 278 default:
284 return -ENOTTY; 279 return -ENOTTY;
285 } 280 }
286 } 281 }
287 282
288 #ifdef CONFIG_COMPAT 283 #ifdef CONFIG_COMPAT
289 long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 284 long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
290 { 285 {
291 /* These are just misnamed, they actually get/put from/to user an int */ 286 /* These are just misnamed, they actually get/put from/to user an int */
292 switch (cmd) { 287 switch (cmd) {
293 case EXT3_IOC32_GETFLAGS: 288 case EXT3_IOC32_GETFLAGS:
294 cmd = EXT3_IOC_GETFLAGS; 289 cmd = EXT3_IOC_GETFLAGS;
295 break; 290 break;
296 case EXT3_IOC32_SETFLAGS: 291 case EXT3_IOC32_SETFLAGS:
297 cmd = EXT3_IOC_SETFLAGS; 292 cmd = EXT3_IOC_SETFLAGS;
298 break; 293 break;
299 case EXT3_IOC32_GETVERSION: 294 case EXT3_IOC32_GETVERSION:
300 cmd = EXT3_IOC_GETVERSION; 295 cmd = EXT3_IOC_GETVERSION;
301 break; 296 break;
302 case EXT3_IOC32_SETVERSION: 297 case EXT3_IOC32_SETVERSION:
303 cmd = EXT3_IOC_SETVERSION; 298 cmd = EXT3_IOC_SETVERSION;
304 break; 299 break;
305 case EXT3_IOC32_GROUP_EXTEND: 300 case EXT3_IOC32_GROUP_EXTEND:
306 cmd = EXT3_IOC_GROUP_EXTEND; 301 cmd = EXT3_IOC_GROUP_EXTEND;
307 break; 302 break;
308 case EXT3_IOC32_GETVERSION_OLD: 303 case EXT3_IOC32_GETVERSION_OLD:
309 cmd = EXT3_IOC_GETVERSION_OLD; 304 cmd = EXT3_IOC_GETVERSION_OLD;
310 break; 305 break;
311 case EXT3_IOC32_SETVERSION_OLD: 306 case EXT3_IOC32_SETVERSION_OLD:
312 cmd = EXT3_IOC_SETVERSION_OLD; 307 cmd = EXT3_IOC_SETVERSION_OLD;
313 break; 308 break;
314 #ifdef CONFIG_JBD_DEBUG 309 #ifdef CONFIG_JBD_DEBUG
315 case EXT3_IOC32_WAIT_FOR_READONLY: 310 case EXT3_IOC32_WAIT_FOR_READONLY:
316 cmd = EXT3_IOC_WAIT_FOR_READONLY; 311 cmd = EXT3_IOC_WAIT_FOR_READONLY;
317 break; 312 break;
318 #endif 313 #endif
319 case EXT3_IOC32_GETRSVSZ: 314 case EXT3_IOC32_GETRSVSZ:
320 cmd = EXT3_IOC_GETRSVSZ; 315 cmd = EXT3_IOC_GETRSVSZ;
321 break; 316 break;
322 case EXT3_IOC32_SETRSVSZ: 317 case EXT3_IOC32_SETRSVSZ:
323 cmd = EXT3_IOC_SETRSVSZ; 318 cmd = EXT3_IOC_SETRSVSZ;
324 break; 319 break;
325 case EXT3_IOC_GROUP_ADD: 320 case EXT3_IOC_GROUP_ADD:
326 break; 321 break;
327 default: 322 default:
328 return -ENOIOCTLCMD; 323 return -ENOIOCTLCMD;
329 } 324 }
330 return ext3_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); 325 return ext3_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
331 } 326 }
332 #endif 327 #endif
1 /* 1 /*
2 * linux/fs/ext3/namei.c 2 * linux/fs/ext3/namei.c
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 * 8 *
9 * from 9 * from
10 * 10 *
11 * linux/fs/minix/namei.c 11 * linux/fs/minix/namei.c
12 * 12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 * 14 *
15 * Big-endian to little-endian byte-swapping/bitmaps by 15 * Big-endian to little-endian byte-swapping/bitmaps by
16 * David S. Miller (davem@caip.rutgers.edu), 1995 16 * David S. Miller (davem@caip.rutgers.edu), 1995
17 * Directory entry file type support and forward compatibility hooks 17 * Directory entry file type support and forward compatibility hooks
18 * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 18 * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
19 * Hash Tree Directory indexing (c) 19 * Hash Tree Directory indexing (c)
20 * Daniel Phillips, 2001 20 * Daniel Phillips, 2001
21 * Hash Tree Directory indexing porting 21 * Hash Tree Directory indexing porting
22 * Christopher Li, 2002 22 * Christopher Li, 2002
23 * Hash Tree Directory indexing cleanup 23 * Hash Tree Directory indexing cleanup
24 * Theodore Ts'o, 2002 24 * Theodore Ts'o, 2002
25 */ 25 */
26 26
27 #include <linux/fs.h>
28 #include <linux/pagemap.h>
29 #include <linux/jbd.h>
30 #include <linux/time.h>
31 #include <linux/ext3_fs.h>
32 #include <linux/ext3_jbd.h>
33 #include <linux/fcntl.h>
34 #include <linux/stat.h>
35 #include <linux/string.h>
36 #include <linux/quotaops.h> 27 #include <linux/quotaops.h>
37 #include <linux/buffer_head.h> 28 #include "ext3.h"
38 #include <linux/bio.h>
39 #include <trace/events/ext3.h>
40
41 #include "namei.h" 29 #include "namei.h"
42 #include "xattr.h" 30 #include "xattr.h"
43 #include "acl.h" 31 #include "acl.h"
44 32
45 /* 33 /*
46 * define how far ahead to read directories while searching them. 34 * define how far ahead to read directories while searching them.
47 */ 35 */
48 #define NAMEI_RA_CHUNKS 2 36 #define NAMEI_RA_CHUNKS 2
49 #define NAMEI_RA_BLOCKS 4 37 #define NAMEI_RA_BLOCKS 4
50 #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) 38 #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
51 #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) 39 #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
52 40
53 static struct buffer_head *ext3_append(handle_t *handle, 41 static struct buffer_head *ext3_append(handle_t *handle,
54 struct inode *inode, 42 struct inode *inode,
55 u32 *block, int *err) 43 u32 *block, int *err)
56 { 44 {
57 struct buffer_head *bh; 45 struct buffer_head *bh;
58 46
59 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 47 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
60 48
61 bh = ext3_bread(handle, inode, *block, 1, err); 49 bh = ext3_bread(handle, inode, *block, 1, err);
62 if (bh) { 50 if (bh) {
63 inode->i_size += inode->i_sb->s_blocksize; 51 inode->i_size += inode->i_sb->s_blocksize;
64 EXT3_I(inode)->i_disksize = inode->i_size; 52 EXT3_I(inode)->i_disksize = inode->i_size;
65 *err = ext3_journal_get_write_access(handle, bh); 53 *err = ext3_journal_get_write_access(handle, bh);
66 if (*err) { 54 if (*err) {
67 brelse(bh); 55 brelse(bh);
68 bh = NULL; 56 bh = NULL;
69 } 57 }
70 } 58 }
71 return bh; 59 return bh;
72 } 60 }
73 61
74 #ifndef assert 62 #ifndef assert
75 #define assert(test) J_ASSERT(test) 63 #define assert(test) J_ASSERT(test)
76 #endif 64 #endif
77 65
78 #ifdef DX_DEBUG 66 #ifdef DX_DEBUG
79 #define dxtrace(command) command 67 #define dxtrace(command) command
80 #else 68 #else
81 #define dxtrace(command) 69 #define dxtrace(command)
82 #endif 70 #endif
83 71
84 struct fake_dirent 72 struct fake_dirent
85 { 73 {
86 __le32 inode; 74 __le32 inode;
87 __le16 rec_len; 75 __le16 rec_len;
88 u8 name_len; 76 u8 name_len;
89 u8 file_type; 77 u8 file_type;
90 }; 78 };
91 79
92 struct dx_countlimit 80 struct dx_countlimit
93 { 81 {
94 __le16 limit; 82 __le16 limit;
95 __le16 count; 83 __le16 count;
96 }; 84 };
97 85
98 struct dx_entry 86 struct dx_entry
99 { 87 {
100 __le32 hash; 88 __le32 hash;
101 __le32 block; 89 __le32 block;
102 }; 90 };
103 91
104 /* 92 /*
105 * dx_root_info is laid out so that if it should somehow get overlaid by a 93 * dx_root_info is laid out so that if it should somehow get overlaid by a
106 * dirent the two low bits of the hash version will be zero. Therefore, the 94 * dirent the two low bits of the hash version will be zero. Therefore, the
107 * hash version mod 4 should never be 0. Sincerely, the paranoia department. 95 * hash version mod 4 should never be 0. Sincerely, the paranoia department.
108 */ 96 */
109 97
110 struct dx_root 98 struct dx_root
111 { 99 {
112 struct fake_dirent dot; 100 struct fake_dirent dot;
113 char dot_name[4]; 101 char dot_name[4];
114 struct fake_dirent dotdot; 102 struct fake_dirent dotdot;
115 char dotdot_name[4]; 103 char dotdot_name[4];
116 struct dx_root_info 104 struct dx_root_info
117 { 105 {
118 __le32 reserved_zero; 106 __le32 reserved_zero;
119 u8 hash_version; 107 u8 hash_version;
120 u8 info_length; /* 8 */ 108 u8 info_length; /* 8 */
121 u8 indirect_levels; 109 u8 indirect_levels;
122 u8 unused_flags; 110 u8 unused_flags;
123 } 111 }
124 info; 112 info;
125 struct dx_entry entries[0]; 113 struct dx_entry entries[0];
126 }; 114 };
127 115
128 struct dx_node 116 struct dx_node
129 { 117 {
130 struct fake_dirent fake; 118 struct fake_dirent fake;
131 struct dx_entry entries[0]; 119 struct dx_entry entries[0];
132 }; 120 };
133 121
134 122
135 struct dx_frame 123 struct dx_frame
136 { 124 {
137 struct buffer_head *bh; 125 struct buffer_head *bh;
138 struct dx_entry *entries; 126 struct dx_entry *entries;
139 struct dx_entry *at; 127 struct dx_entry *at;
140 }; 128 };
141 129
142 struct dx_map_entry 130 struct dx_map_entry
143 { 131 {
144 u32 hash; 132 u32 hash;
145 u16 offs; 133 u16 offs;
146 u16 size; 134 u16 size;
147 }; 135 };
148 136
149 static inline unsigned dx_get_block (struct dx_entry *entry); 137 static inline unsigned dx_get_block (struct dx_entry *entry);
150 static void dx_set_block (struct dx_entry *entry, unsigned value); 138 static void dx_set_block (struct dx_entry *entry, unsigned value);
151 static inline unsigned dx_get_hash (struct dx_entry *entry); 139 static inline unsigned dx_get_hash (struct dx_entry *entry);
152 static void dx_set_hash (struct dx_entry *entry, unsigned value); 140 static void dx_set_hash (struct dx_entry *entry, unsigned value);
153 static unsigned dx_get_count (struct dx_entry *entries); 141 static unsigned dx_get_count (struct dx_entry *entries);
154 static unsigned dx_get_limit (struct dx_entry *entries); 142 static unsigned dx_get_limit (struct dx_entry *entries);
155 static void dx_set_count (struct dx_entry *entries, unsigned value); 143 static void dx_set_count (struct dx_entry *entries, unsigned value);
156 static void dx_set_limit (struct dx_entry *entries, unsigned value); 144 static void dx_set_limit (struct dx_entry *entries, unsigned value);
157 static unsigned dx_root_limit (struct inode *dir, unsigned infosize); 145 static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
158 static unsigned dx_node_limit (struct inode *dir); 146 static unsigned dx_node_limit (struct inode *dir);
159 static struct dx_frame *dx_probe(struct qstr *entry, 147 static struct dx_frame *dx_probe(struct qstr *entry,
160 struct inode *dir, 148 struct inode *dir,
161 struct dx_hash_info *hinfo, 149 struct dx_hash_info *hinfo,
162 struct dx_frame *frame, 150 struct dx_frame *frame,
163 int *err); 151 int *err);
164 static void dx_release (struct dx_frame *frames); 152 static void dx_release (struct dx_frame *frames);
165 static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize, 153 static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize,
166 struct dx_hash_info *hinfo, struct dx_map_entry map[]); 154 struct dx_hash_info *hinfo, struct dx_map_entry map[]);
167 static void dx_sort_map(struct dx_map_entry *map, unsigned count); 155 static void dx_sort_map(struct dx_map_entry *map, unsigned count);
168 static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, 156 static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
169 struct dx_map_entry *offsets, int count); 157 struct dx_map_entry *offsets, int count);
170 static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize); 158 static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize);
171 static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); 159 static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
172 static int ext3_htree_next_block(struct inode *dir, __u32 hash, 160 static int ext3_htree_next_block(struct inode *dir, __u32 hash,
173 struct dx_frame *frame, 161 struct dx_frame *frame,
174 struct dx_frame *frames, 162 struct dx_frame *frames,
175 __u32 *start_hash); 163 __u32 *start_hash);
176 static struct buffer_head * ext3_dx_find_entry(struct inode *dir, 164 static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
177 struct qstr *entry, struct ext3_dir_entry_2 **res_dir, 165 struct qstr *entry, struct ext3_dir_entry_2 **res_dir,
178 int *err); 166 int *err);
179 static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, 167 static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
180 struct inode *inode); 168 struct inode *inode);
181 169
182 /* 170 /*
183 * p is at least 6 bytes before the end of page 171 * p is at least 6 bytes before the end of page
184 */ 172 */
185 static inline struct ext3_dir_entry_2 * 173 static inline struct ext3_dir_entry_2 *
186 ext3_next_entry(struct ext3_dir_entry_2 *p) 174 ext3_next_entry(struct ext3_dir_entry_2 *p)
187 { 175 {
188 return (struct ext3_dir_entry_2 *)((char *)p + 176 return (struct ext3_dir_entry_2 *)((char *)p +
189 ext3_rec_len_from_disk(p->rec_len)); 177 ext3_rec_len_from_disk(p->rec_len));
190 } 178 }
191 179
192 /* 180 /*
193 * Future: use high four bits of block for coalesce-on-delete flags 181 * Future: use high four bits of block for coalesce-on-delete flags
194 * Mask them off for now. 182 * Mask them off for now.
195 */ 183 */
196 184
197 static inline unsigned dx_get_block (struct dx_entry *entry) 185 static inline unsigned dx_get_block (struct dx_entry *entry)
198 { 186 {
199 return le32_to_cpu(entry->block) & 0x00ffffff; 187 return le32_to_cpu(entry->block) & 0x00ffffff;
200 } 188 }
201 189
202 static inline void dx_set_block (struct dx_entry *entry, unsigned value) 190 static inline void dx_set_block (struct dx_entry *entry, unsigned value)
203 { 191 {
204 entry->block = cpu_to_le32(value); 192 entry->block = cpu_to_le32(value);
205 } 193 }
206 194
207 static inline unsigned dx_get_hash (struct dx_entry *entry) 195 static inline unsigned dx_get_hash (struct dx_entry *entry)
208 { 196 {
209 return le32_to_cpu(entry->hash); 197 return le32_to_cpu(entry->hash);
210 } 198 }
211 199
212 static inline void dx_set_hash (struct dx_entry *entry, unsigned value) 200 static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
213 { 201 {
214 entry->hash = cpu_to_le32(value); 202 entry->hash = cpu_to_le32(value);
215 } 203 }
216 204
217 static inline unsigned dx_get_count (struct dx_entry *entries) 205 static inline unsigned dx_get_count (struct dx_entry *entries)
218 { 206 {
219 return le16_to_cpu(((struct dx_countlimit *) entries)->count); 207 return le16_to_cpu(((struct dx_countlimit *) entries)->count);
220 } 208 }
221 209
222 static inline unsigned dx_get_limit (struct dx_entry *entries) 210 static inline unsigned dx_get_limit (struct dx_entry *entries)
223 { 211 {
224 return le16_to_cpu(((struct dx_countlimit *) entries)->limit); 212 return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
225 } 213 }
226 214
227 static inline void dx_set_count (struct dx_entry *entries, unsigned value) 215 static inline void dx_set_count (struct dx_entry *entries, unsigned value)
228 { 216 {
229 ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); 217 ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
230 } 218 }
231 219
232 static inline void dx_set_limit (struct dx_entry *entries, unsigned value) 220 static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
233 { 221 {
234 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); 222 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
235 } 223 }
236 224
237 static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) 225 static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
238 { 226 {
239 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - 227 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
240 EXT3_DIR_REC_LEN(2) - infosize; 228 EXT3_DIR_REC_LEN(2) - infosize;
241 return entry_space / sizeof(struct dx_entry); 229 return entry_space / sizeof(struct dx_entry);
242 } 230 }
243 231
244 static inline unsigned dx_node_limit (struct inode *dir) 232 static inline unsigned dx_node_limit (struct inode *dir)
245 { 233 {
246 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); 234 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
247 return entry_space / sizeof(struct dx_entry); 235 return entry_space / sizeof(struct dx_entry);
248 } 236 }
249 237
250 /* 238 /*
251 * Debug 239 * Debug
252 */ 240 */
253 #ifdef DX_DEBUG 241 #ifdef DX_DEBUG
254 static void dx_show_index (char * label, struct dx_entry *entries) 242 static void dx_show_index (char * label, struct dx_entry *entries)
255 { 243 {
256 int i, n = dx_get_count (entries); 244 int i, n = dx_get_count (entries);
257 printk("%s index ", label); 245 printk("%s index ", label);
258 for (i = 0; i < n; i++) 246 for (i = 0; i < n; i++)
259 { 247 {
260 printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i)); 248 printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i));
261 } 249 }
262 printk("\n"); 250 printk("\n");
263 } 251 }
264 252
265 struct stats 253 struct stats
266 { 254 {
267 unsigned names; 255 unsigned names;
268 unsigned space; 256 unsigned space;
269 unsigned bcount; 257 unsigned bcount;
270 }; 258 };
271 259
272 static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de, 260 static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
273 int size, int show_names) 261 int size, int show_names)
274 { 262 {
275 unsigned names = 0, space = 0; 263 unsigned names = 0, space = 0;
276 char *base = (char *) de; 264 char *base = (char *) de;
277 struct dx_hash_info h = *hinfo; 265 struct dx_hash_info h = *hinfo;
278 266
279 printk("names: "); 267 printk("names: ");
280 while ((char *) de < base + size) 268 while ((char *) de < base + size)
281 { 269 {
282 if (de->inode) 270 if (de->inode)
283 { 271 {
284 if (show_names) 272 if (show_names)
285 { 273 {
286 int len = de->name_len; 274 int len = de->name_len;
287 char *name = de->name; 275 char *name = de->name;
288 while (len--) printk("%c", *name++); 276 while (len--) printk("%c", *name++);
289 ext3fs_dirhash(de->name, de->name_len, &h); 277 ext3fs_dirhash(de->name, de->name_len, &h);
290 printk(":%x.%u ", h.hash, 278 printk(":%x.%u ", h.hash,
291 (unsigned) ((char *) de - base)); 279 (unsigned) ((char *) de - base));
292 } 280 }
293 space += EXT3_DIR_REC_LEN(de->name_len); 281 space += EXT3_DIR_REC_LEN(de->name_len);
294 names++; 282 names++;
295 } 283 }
296 de = ext3_next_entry(de); 284 de = ext3_next_entry(de);
297 } 285 }
298 printk("(%i)\n", names); 286 printk("(%i)\n", names);
299 return (struct stats) { names, space, 1 }; 287 return (struct stats) { names, space, 1 };
300 } 288 }
301 289
302 struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, 290 struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
303 struct dx_entry *entries, int levels) 291 struct dx_entry *entries, int levels)
304 { 292 {
305 unsigned blocksize = dir->i_sb->s_blocksize; 293 unsigned blocksize = dir->i_sb->s_blocksize;
306 unsigned count = dx_get_count (entries), names = 0, space = 0, i; 294 unsigned count = dx_get_count (entries), names = 0, space = 0, i;
307 unsigned bcount = 0; 295 unsigned bcount = 0;
308 struct buffer_head *bh; 296 struct buffer_head *bh;
309 int err; 297 int err;
310 printk("%i indexed blocks...\n", count); 298 printk("%i indexed blocks...\n", count);
311 for (i = 0; i < count; i++, entries++) 299 for (i = 0; i < count; i++, entries++)
312 { 300 {
313 u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; 301 u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
314 u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; 302 u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
315 struct stats stats; 303 struct stats stats;
316 printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); 304 printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
317 if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue; 305 if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
318 stats = levels? 306 stats = levels?
319 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): 307 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
320 dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0); 308 dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
321 names += stats.names; 309 names += stats.names;
322 space += stats.space; 310 space += stats.space;
323 bcount += stats.bcount; 311 bcount += stats.bcount;
324 brelse (bh); 312 brelse (bh);
325 } 313 }
326 if (bcount) 314 if (bcount)
327 printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", 315 printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ",
328 names, space/bcount,(space/bcount)*100/blocksize); 316 names, space/bcount,(space/bcount)*100/blocksize);
329 return (struct stats) { names, space, bcount}; 317 return (struct stats) { names, space, bcount};
330 } 318 }
331 #endif /* DX_DEBUG */ 319 #endif /* DX_DEBUG */
332 320
333 /* 321 /*
334 * Probe for a directory leaf block to search. 322 * Probe for a directory leaf block to search.
335 * 323 *
336 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format 324 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
337 * error in the directory index, and the caller should fall back to 325 * error in the directory index, and the caller should fall back to
338 * searching the directory normally. The callers of dx_probe **MUST** 326 * searching the directory normally. The callers of dx_probe **MUST**
339 * check for this error code, and make sure it never gets reflected 327 * check for this error code, and make sure it never gets reflected
340 * back to userspace. 328 * back to userspace.
341 */ 329 */
342 static struct dx_frame * 330 static struct dx_frame *
343 dx_probe(struct qstr *entry, struct inode *dir, 331 dx_probe(struct qstr *entry, struct inode *dir,
344 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) 332 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
345 { 333 {
346 unsigned count, indirect; 334 unsigned count, indirect;
347 struct dx_entry *at, *entries, *p, *q, *m; 335 struct dx_entry *at, *entries, *p, *q, *m;
348 struct dx_root *root; 336 struct dx_root *root;
349 struct buffer_head *bh; 337 struct buffer_head *bh;
350 struct dx_frame *frame = frame_in; 338 struct dx_frame *frame = frame_in;
351 u32 hash; 339 u32 hash;
352 340
353 frame->bh = NULL; 341 frame->bh = NULL;
354 if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) 342 if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
355 goto fail; 343 goto fail;
356 root = (struct dx_root *) bh->b_data; 344 root = (struct dx_root *) bh->b_data;
357 if (root->info.hash_version != DX_HASH_TEA && 345 if (root->info.hash_version != DX_HASH_TEA &&
358 root->info.hash_version != DX_HASH_HALF_MD4 && 346 root->info.hash_version != DX_HASH_HALF_MD4 &&
359 root->info.hash_version != DX_HASH_LEGACY) { 347 root->info.hash_version != DX_HASH_LEGACY) {
360 ext3_warning(dir->i_sb, __func__, 348 ext3_warning(dir->i_sb, __func__,
361 "Unrecognised inode hash code %d", 349 "Unrecognised inode hash code %d",
362 root->info.hash_version); 350 root->info.hash_version);
363 brelse(bh); 351 brelse(bh);
364 *err = ERR_BAD_DX_DIR; 352 *err = ERR_BAD_DX_DIR;
365 goto fail; 353 goto fail;
366 } 354 }
367 hinfo->hash_version = root->info.hash_version; 355 hinfo->hash_version = root->info.hash_version;
368 if (hinfo->hash_version <= DX_HASH_TEA) 356 if (hinfo->hash_version <= DX_HASH_TEA)
369 hinfo->hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned; 357 hinfo->hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned;
370 hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed; 358 hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
371 if (entry) 359 if (entry)
372 ext3fs_dirhash(entry->name, entry->len, hinfo); 360 ext3fs_dirhash(entry->name, entry->len, hinfo);
373 hash = hinfo->hash; 361 hash = hinfo->hash;
374 362
375 if (root->info.unused_flags & 1) { 363 if (root->info.unused_flags & 1) {
376 ext3_warning(dir->i_sb, __func__, 364 ext3_warning(dir->i_sb, __func__,
377 "Unimplemented inode hash flags: %#06x", 365 "Unimplemented inode hash flags: %#06x",
378 root->info.unused_flags); 366 root->info.unused_flags);
379 brelse(bh); 367 brelse(bh);
380 *err = ERR_BAD_DX_DIR; 368 *err = ERR_BAD_DX_DIR;
381 goto fail; 369 goto fail;
382 } 370 }
383 371
384 if ((indirect = root->info.indirect_levels) > 1) { 372 if ((indirect = root->info.indirect_levels) > 1) {
385 ext3_warning(dir->i_sb, __func__, 373 ext3_warning(dir->i_sb, __func__,
386 "Unimplemented inode hash depth: %#06x", 374 "Unimplemented inode hash depth: %#06x",
387 root->info.indirect_levels); 375 root->info.indirect_levels);
388 brelse(bh); 376 brelse(bh);
389 *err = ERR_BAD_DX_DIR; 377 *err = ERR_BAD_DX_DIR;
390 goto fail; 378 goto fail;
391 } 379 }
392 380
393 entries = (struct dx_entry *) (((char *)&root->info) + 381 entries = (struct dx_entry *) (((char *)&root->info) +
394 root->info.info_length); 382 root->info.info_length);
395 383
396 if (dx_get_limit(entries) != dx_root_limit(dir, 384 if (dx_get_limit(entries) != dx_root_limit(dir,
397 root->info.info_length)) { 385 root->info.info_length)) {
398 ext3_warning(dir->i_sb, __func__, 386 ext3_warning(dir->i_sb, __func__,
399 "dx entry: limit != root limit"); 387 "dx entry: limit != root limit");
400 brelse(bh); 388 brelse(bh);
401 *err = ERR_BAD_DX_DIR; 389 *err = ERR_BAD_DX_DIR;
402 goto fail; 390 goto fail;
403 } 391 }
404 392
405 dxtrace (printk("Look up %x", hash)); 393 dxtrace (printk("Look up %x", hash));
406 while (1) 394 while (1)
407 { 395 {
408 count = dx_get_count(entries); 396 count = dx_get_count(entries);
409 if (!count || count > dx_get_limit(entries)) { 397 if (!count || count > dx_get_limit(entries)) {
410 ext3_warning(dir->i_sb, __func__, 398 ext3_warning(dir->i_sb, __func__,
411 "dx entry: no count or count > limit"); 399 "dx entry: no count or count > limit");
412 brelse(bh); 400 brelse(bh);
413 *err = ERR_BAD_DX_DIR; 401 *err = ERR_BAD_DX_DIR;
414 goto fail2; 402 goto fail2;
415 } 403 }
416 404
417 p = entries + 1; 405 p = entries + 1;
418 q = entries + count - 1; 406 q = entries + count - 1;
419 while (p <= q) 407 while (p <= q)
420 { 408 {
421 m = p + (q - p)/2; 409 m = p + (q - p)/2;
422 dxtrace(printk(".")); 410 dxtrace(printk("."));
423 if (dx_get_hash(m) > hash) 411 if (dx_get_hash(m) > hash)
424 q = m - 1; 412 q = m - 1;
425 else 413 else
426 p = m + 1; 414 p = m + 1;
427 } 415 }
428 416
429 if (0) // linear search cross check 417 if (0) // linear search cross check
430 { 418 {
431 unsigned n = count - 1; 419 unsigned n = count - 1;
432 at = entries; 420 at = entries;
433 while (n--) 421 while (n--)
434 { 422 {
435 dxtrace(printk(",")); 423 dxtrace(printk(","));
436 if (dx_get_hash(++at) > hash) 424 if (dx_get_hash(++at) > hash)
437 { 425 {
438 at--; 426 at--;
439 break; 427 break;
440 } 428 }
441 } 429 }
442 assert (at == p - 1); 430 assert (at == p - 1);
443 } 431 }
444 432
445 at = p - 1; 433 at = p - 1;
446 dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); 434 dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
447 frame->bh = bh; 435 frame->bh = bh;
448 frame->entries = entries; 436 frame->entries = entries;
449 frame->at = at; 437 frame->at = at;
450 if (!indirect--) return frame; 438 if (!indirect--) return frame;
451 if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) 439 if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
452 goto fail2; 440 goto fail2;
453 at = entries = ((struct dx_node *) bh->b_data)->entries; 441 at = entries = ((struct dx_node *) bh->b_data)->entries;
454 if (dx_get_limit(entries) != dx_node_limit (dir)) { 442 if (dx_get_limit(entries) != dx_node_limit (dir)) {
455 ext3_warning(dir->i_sb, __func__, 443 ext3_warning(dir->i_sb, __func__,
456 "dx entry: limit != node limit"); 444 "dx entry: limit != node limit");
457 brelse(bh); 445 brelse(bh);
458 *err = ERR_BAD_DX_DIR; 446 *err = ERR_BAD_DX_DIR;
459 goto fail2; 447 goto fail2;
460 } 448 }
461 frame++; 449 frame++;
462 frame->bh = NULL; 450 frame->bh = NULL;
463 } 451 }
464 fail2: 452 fail2:
465 while (frame >= frame_in) { 453 while (frame >= frame_in) {
466 brelse(frame->bh); 454 brelse(frame->bh);
467 frame--; 455 frame--;
468 } 456 }
469 fail: 457 fail:
470 if (*err == ERR_BAD_DX_DIR) 458 if (*err == ERR_BAD_DX_DIR)
471 ext3_warning(dir->i_sb, __func__, 459 ext3_warning(dir->i_sb, __func__,
472 "Corrupt dir inode %ld, running e2fsck is " 460 "Corrupt dir inode %ld, running e2fsck is "
473 "recommended.", dir->i_ino); 461 "recommended.", dir->i_ino);
474 return NULL; 462 return NULL;
475 } 463 }
476 464
477 static void dx_release (struct dx_frame *frames) 465 static void dx_release (struct dx_frame *frames)
478 { 466 {
479 if (frames[0].bh == NULL) 467 if (frames[0].bh == NULL)
480 return; 468 return;
481 469
482 if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) 470 if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
483 brelse(frames[1].bh); 471 brelse(frames[1].bh);
484 brelse(frames[0].bh); 472 brelse(frames[0].bh);
485 } 473 }
486 474
487 /* 475 /*
488 * This function increments the frame pointer to search the next leaf 476 * This function increments the frame pointer to search the next leaf
489 * block, and reads in the necessary intervening nodes if the search 477 * block, and reads in the necessary intervening nodes if the search
490 * should be necessary. Whether or not the search is necessary is 478 * should be necessary. Whether or not the search is necessary is
491 * controlled by the hash parameter. If the hash value is even, then 479 * controlled by the hash parameter. If the hash value is even, then
492 * the search is only continued if the next block starts with that 480 * the search is only continued if the next block starts with that
493 * hash value. This is used if we are searching for a specific file. 481 * hash value. This is used if we are searching for a specific file.
494 * 482 *
495 * If the hash value is HASH_NB_ALWAYS, then always go to the next block. 483 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
496 * 484 *
497 * This function returns 1 if the caller should continue to search, 485 * This function returns 1 if the caller should continue to search,
498 * or 0 if it should not. If there is an error reading one of the 486 * or 0 if it should not. If there is an error reading one of the
499 * index blocks, it will a negative error code. 487 * index blocks, it will a negative error code.
500 * 488 *
501 * If start_hash is non-null, it will be filled in with the starting 489 * If start_hash is non-null, it will be filled in with the starting
502 * hash of the next page. 490 * hash of the next page.
503 */ 491 */
504 static int ext3_htree_next_block(struct inode *dir, __u32 hash, 492 static int ext3_htree_next_block(struct inode *dir, __u32 hash,
505 struct dx_frame *frame, 493 struct dx_frame *frame,
506 struct dx_frame *frames, 494 struct dx_frame *frames,
507 __u32 *start_hash) 495 __u32 *start_hash)
508 { 496 {
509 struct dx_frame *p; 497 struct dx_frame *p;
510 struct buffer_head *bh; 498 struct buffer_head *bh;
511 int err, num_frames = 0; 499 int err, num_frames = 0;
512 __u32 bhash; 500 __u32 bhash;
513 501
514 p = frame; 502 p = frame;
515 /* 503 /*
516 * Find the next leaf page by incrementing the frame pointer. 504 * Find the next leaf page by incrementing the frame pointer.
517 * If we run out of entries in the interior node, loop around and 505 * If we run out of entries in the interior node, loop around and
518 * increment pointer in the parent node. When we break out of 506 * increment pointer in the parent node. When we break out of
519 * this loop, num_frames indicates the number of interior 507 * this loop, num_frames indicates the number of interior
520 * nodes need to be read. 508 * nodes need to be read.
521 */ 509 */
522 while (1) { 510 while (1) {
523 if (++(p->at) < p->entries + dx_get_count(p->entries)) 511 if (++(p->at) < p->entries + dx_get_count(p->entries))
524 break; 512 break;
525 if (p == frames) 513 if (p == frames)
526 return 0; 514 return 0;
527 num_frames++; 515 num_frames++;
528 p--; 516 p--;
529 } 517 }
530 518
531 /* 519 /*
532 * If the hash is 1, then continue only if the next page has a 520 * If the hash is 1, then continue only if the next page has a
533 * continuation hash of any value. This is used for readdir 521 * continuation hash of any value. This is used for readdir
534 * handling. Otherwise, check to see if the hash matches the 522 * handling. Otherwise, check to see if the hash matches the
535 * desired contiuation hash. If it doesn't, return since 523 * desired contiuation hash. If it doesn't, return since
536 * there's no point to read in the successive index pages. 524 * there's no point to read in the successive index pages.
537 */ 525 */
538 bhash = dx_get_hash(p->at); 526 bhash = dx_get_hash(p->at);
539 if (start_hash) 527 if (start_hash)
540 *start_hash = bhash; 528 *start_hash = bhash;
541 if ((hash & 1) == 0) { 529 if ((hash & 1) == 0) {
542 if ((bhash & ~1) != hash) 530 if ((bhash & ~1) != hash)
543 return 0; 531 return 0;
544 } 532 }
545 /* 533 /*
546 * If the hash is HASH_NB_ALWAYS, we always go to the next 534 * If the hash is HASH_NB_ALWAYS, we always go to the next
547 * block so no check is necessary 535 * block so no check is necessary
548 */ 536 */
549 while (num_frames--) { 537 while (num_frames--) {
550 if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), 538 if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
551 0, &err))) 539 0, &err)))
552 return err; /* Failure */ 540 return err; /* Failure */
553 p++; 541 p++;
554 brelse (p->bh); 542 brelse (p->bh);
555 p->bh = bh; 543 p->bh = bh;
556 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; 544 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
557 } 545 }
558 return 1; 546 return 1;
559 } 547 }
560 548
561 549
562 /* 550 /*
563 * This function fills a red-black tree with information from a 551 * This function fills a red-black tree with information from a
564 * directory block. It returns the number directory entries loaded 552 * directory block. It returns the number directory entries loaded
565 * into the tree. If there is an error it is returned in err. 553 * into the tree. If there is an error it is returned in err.
566 */ 554 */
567 static int htree_dirblock_to_tree(struct file *dir_file, 555 static int htree_dirblock_to_tree(struct file *dir_file,
568 struct inode *dir, int block, 556 struct inode *dir, int block,
569 struct dx_hash_info *hinfo, 557 struct dx_hash_info *hinfo,
570 __u32 start_hash, __u32 start_minor_hash) 558 __u32 start_hash, __u32 start_minor_hash)
571 { 559 {
572 struct buffer_head *bh; 560 struct buffer_head *bh;
573 struct ext3_dir_entry_2 *de, *top; 561 struct ext3_dir_entry_2 *de, *top;
574 int err, count = 0; 562 int err, count = 0;
575 563
576 dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); 564 dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
577 if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) 565 if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
578 return err; 566 return err;
579 567
580 de = (struct ext3_dir_entry_2 *) bh->b_data; 568 de = (struct ext3_dir_entry_2 *) bh->b_data;
581 top = (struct ext3_dir_entry_2 *) ((char *) de + 569 top = (struct ext3_dir_entry_2 *) ((char *) de +
582 dir->i_sb->s_blocksize - 570 dir->i_sb->s_blocksize -
583 EXT3_DIR_REC_LEN(0)); 571 EXT3_DIR_REC_LEN(0));
584 for (; de < top; de = ext3_next_entry(de)) { 572 for (; de < top; de = ext3_next_entry(de)) {
585 if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, 573 if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
586 (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb)) 574 (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
587 +((char *)de - bh->b_data))) { 575 +((char *)de - bh->b_data))) {
588 /* On error, skip the f_pos to the next block. */ 576 /* On error, skip the f_pos to the next block. */
589 dir_file->f_pos = (dir_file->f_pos | 577 dir_file->f_pos = (dir_file->f_pos |
590 (dir->i_sb->s_blocksize - 1)) + 1; 578 (dir->i_sb->s_blocksize - 1)) + 1;
591 brelse (bh); 579 brelse (bh);
592 return count; 580 return count;
593 } 581 }
594 ext3fs_dirhash(de->name, de->name_len, hinfo); 582 ext3fs_dirhash(de->name, de->name_len, hinfo);
595 if ((hinfo->hash < start_hash) || 583 if ((hinfo->hash < start_hash) ||
596 ((hinfo->hash == start_hash) && 584 ((hinfo->hash == start_hash) &&
597 (hinfo->minor_hash < start_minor_hash))) 585 (hinfo->minor_hash < start_minor_hash)))
598 continue; 586 continue;
599 if (de->inode == 0) 587 if (de->inode == 0)
600 continue; 588 continue;
601 if ((err = ext3_htree_store_dirent(dir_file, 589 if ((err = ext3_htree_store_dirent(dir_file,
602 hinfo->hash, hinfo->minor_hash, de)) != 0) { 590 hinfo->hash, hinfo->minor_hash, de)) != 0) {
603 brelse(bh); 591 brelse(bh);
604 return err; 592 return err;
605 } 593 }
606 count++; 594 count++;
607 } 595 }
608 brelse(bh); 596 brelse(bh);
609 return count; 597 return count;
610 } 598 }
611 599
612 600
613 /* 601 /*
614 * This function fills a red-black tree with information from a 602 * This function fills a red-black tree with information from a
615 * directory. We start scanning the directory in hash order, starting 603 * directory. We start scanning the directory in hash order, starting
616 * at start_hash and start_minor_hash. 604 * at start_hash and start_minor_hash.
617 * 605 *
618 * This function returns the number of entries inserted into the tree, 606 * This function returns the number of entries inserted into the tree,
619 * or a negative error code. 607 * or a negative error code.
620 */ 608 */
621 int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, 609 int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
622 __u32 start_minor_hash, __u32 *next_hash) 610 __u32 start_minor_hash, __u32 *next_hash)
623 { 611 {
624 struct dx_hash_info hinfo; 612 struct dx_hash_info hinfo;
625 struct ext3_dir_entry_2 *de; 613 struct ext3_dir_entry_2 *de;
626 struct dx_frame frames[2], *frame; 614 struct dx_frame frames[2], *frame;
627 struct inode *dir; 615 struct inode *dir;
628 int block, err; 616 int block, err;
629 int count = 0; 617 int count = 0;
630 int ret; 618 int ret;
631 __u32 hashval; 619 __u32 hashval;
632 620
633 dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, 621 dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
634 start_minor_hash)); 622 start_minor_hash));
635 dir = dir_file->f_path.dentry->d_inode; 623 dir = dir_file->f_path.dentry->d_inode;
636 if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) { 624 if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
637 hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; 625 hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
638 if (hinfo.hash_version <= DX_HASH_TEA) 626 if (hinfo.hash_version <= DX_HASH_TEA)
639 hinfo.hash_version += 627 hinfo.hash_version +=
640 EXT3_SB(dir->i_sb)->s_hash_unsigned; 628 EXT3_SB(dir->i_sb)->s_hash_unsigned;
641 hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed; 629 hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
642 count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, 630 count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
643 start_hash, start_minor_hash); 631 start_hash, start_minor_hash);
644 *next_hash = ~0; 632 *next_hash = ~0;
645 return count; 633 return count;
646 } 634 }
647 hinfo.hash = start_hash; 635 hinfo.hash = start_hash;
648 hinfo.minor_hash = 0; 636 hinfo.minor_hash = 0;
649 frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); 637 frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err);
650 if (!frame) 638 if (!frame)
651 return err; 639 return err;
652 640
653 /* Add '.' and '..' from the htree header */ 641 /* Add '.' and '..' from the htree header */
654 if (!start_hash && !start_minor_hash) { 642 if (!start_hash && !start_minor_hash) {
655 de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; 643 de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
656 if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) 644 if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
657 goto errout; 645 goto errout;
658 count++; 646 count++;
659 } 647 }
660 if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) { 648 if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
661 de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; 649 de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
662 de = ext3_next_entry(de); 650 de = ext3_next_entry(de);
663 if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0) 651 if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
664 goto errout; 652 goto errout;
665 count++; 653 count++;
666 } 654 }
667 655
668 while (1) { 656 while (1) {
669 block = dx_get_block(frame->at); 657 block = dx_get_block(frame->at);
670 ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, 658 ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
671 start_hash, start_minor_hash); 659 start_hash, start_minor_hash);
672 if (ret < 0) { 660 if (ret < 0) {
673 err = ret; 661 err = ret;
674 goto errout; 662 goto errout;
675 } 663 }
676 count += ret; 664 count += ret;
677 hashval = ~0; 665 hashval = ~0;
678 ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, 666 ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
679 frame, frames, &hashval); 667 frame, frames, &hashval);
680 *next_hash = hashval; 668 *next_hash = hashval;
681 if (ret < 0) { 669 if (ret < 0) {
682 err = ret; 670 err = ret;
683 goto errout; 671 goto errout;
684 } 672 }
685 /* 673 /*
686 * Stop if: (a) there are no more entries, or 674 * Stop if: (a) there are no more entries, or
687 * (b) we have inserted at least one entry and the 675 * (b) we have inserted at least one entry and the
688 * next hash value is not a continuation 676 * next hash value is not a continuation
689 */ 677 */
690 if ((ret == 0) || 678 if ((ret == 0) ||
691 (count && ((hashval & 1) == 0))) 679 (count && ((hashval & 1) == 0)))
692 break; 680 break;
693 } 681 }
694 dx_release(frames); 682 dx_release(frames);
695 dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", 683 dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
696 count, *next_hash)); 684 count, *next_hash));
697 return count; 685 return count;
698 errout: 686 errout:
699 dx_release(frames); 687 dx_release(frames);
700 return (err); 688 return (err);
701 } 689 }
702 690
703 691
704 /* 692 /*
705 * Directory block splitting, compacting 693 * Directory block splitting, compacting
706 */ 694 */
707 695
708 /* 696 /*
709 * Create map of hash values, offsets, and sizes, stored at end of block. 697 * Create map of hash values, offsets, and sizes, stored at end of block.
710 * Returns number of entries mapped. 698 * Returns number of entries mapped.
711 */ 699 */
712 static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize, 700 static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize,
713 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) 701 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
714 { 702 {
715 int count = 0; 703 int count = 0;
716 char *base = (char *) de; 704 char *base = (char *) de;
717 struct dx_hash_info h = *hinfo; 705 struct dx_hash_info h = *hinfo;
718 706
719 while ((char *) de < base + blocksize) 707 while ((char *) de < base + blocksize)
720 { 708 {
721 if (de->name_len && de->inode) { 709 if (de->name_len && de->inode) {
722 ext3fs_dirhash(de->name, de->name_len, &h); 710 ext3fs_dirhash(de->name, de->name_len, &h);
723 map_tail--; 711 map_tail--;
724 map_tail->hash = h.hash; 712 map_tail->hash = h.hash;
725 map_tail->offs = (u16) ((char *) de - base); 713 map_tail->offs = (u16) ((char *) de - base);
726 map_tail->size = le16_to_cpu(de->rec_len); 714 map_tail->size = le16_to_cpu(de->rec_len);
727 count++; 715 count++;
728 cond_resched(); 716 cond_resched();
729 } 717 }
730 /* XXX: do we need to check rec_len == 0 case? -Chris */ 718 /* XXX: do we need to check rec_len == 0 case? -Chris */
731 de = ext3_next_entry(de); 719 de = ext3_next_entry(de);
732 } 720 }
733 return count; 721 return count;
734 } 722 }
735 723
736 /* Sort map by hash value */ 724 /* Sort map by hash value */
737 static void dx_sort_map (struct dx_map_entry *map, unsigned count) 725 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
738 { 726 {
739 struct dx_map_entry *p, *q, *top = map + count - 1; 727 struct dx_map_entry *p, *q, *top = map + count - 1;
740 int more; 728 int more;
741 /* Combsort until bubble sort doesn't suck */ 729 /* Combsort until bubble sort doesn't suck */
742 while (count > 2) 730 while (count > 2)
743 { 731 {
744 count = count*10/13; 732 count = count*10/13;
745 if (count - 9 < 2) /* 9, 10 -> 11 */ 733 if (count - 9 < 2) /* 9, 10 -> 11 */
746 count = 11; 734 count = 11;
747 for (p = top, q = p - count; q >= map; p--, q--) 735 for (p = top, q = p - count; q >= map; p--, q--)
748 if (p->hash < q->hash) 736 if (p->hash < q->hash)
749 swap(*p, *q); 737 swap(*p, *q);
750 } 738 }
751 /* Garden variety bubble sort */ 739 /* Garden variety bubble sort */
752 do { 740 do {
753 more = 0; 741 more = 0;
754 q = top; 742 q = top;
755 while (q-- > map) 743 while (q-- > map)
756 { 744 {
757 if (q[1].hash >= q[0].hash) 745 if (q[1].hash >= q[0].hash)
758 continue; 746 continue;
759 swap(*(q+1), *q); 747 swap(*(q+1), *q);
760 more = 1; 748 more = 1;
761 } 749 }
762 } while(more); 750 } while(more);
763 } 751 }
764 752
765 static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) 753 static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
766 { 754 {
767 struct dx_entry *entries = frame->entries; 755 struct dx_entry *entries = frame->entries;
768 struct dx_entry *old = frame->at, *new = old + 1; 756 struct dx_entry *old = frame->at, *new = old + 1;
769 int count = dx_get_count(entries); 757 int count = dx_get_count(entries);
770 758
771 assert(count < dx_get_limit(entries)); 759 assert(count < dx_get_limit(entries));
772 assert(old < entries + count); 760 assert(old < entries + count);
773 memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); 761 memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
774 dx_set_hash(new, hash); 762 dx_set_hash(new, hash);
775 dx_set_block(new, block); 763 dx_set_block(new, block);
776 dx_set_count(entries, count + 1); 764 dx_set_count(entries, count + 1);
777 } 765 }
778 766
779 static void ext3_update_dx_flag(struct inode *inode) 767 static void ext3_update_dx_flag(struct inode *inode)
780 { 768 {
781 if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb, 769 if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
782 EXT3_FEATURE_COMPAT_DIR_INDEX)) 770 EXT3_FEATURE_COMPAT_DIR_INDEX))
783 EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; 771 EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
784 } 772 }
785 773
786 /* 774 /*
787 * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure. 775 * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
788 * 776 *
789 * `len <= EXT3_NAME_LEN' is guaranteed by caller. 777 * `len <= EXT3_NAME_LEN' is guaranteed by caller.
790 * `de != NULL' is guaranteed by caller. 778 * `de != NULL' is guaranteed by caller.
791 */ 779 */
792 static inline int ext3_match (int len, const char * const name, 780 static inline int ext3_match (int len, const char * const name,
793 struct ext3_dir_entry_2 * de) 781 struct ext3_dir_entry_2 * de)
794 { 782 {
795 if (len != de->name_len) 783 if (len != de->name_len)
796 return 0; 784 return 0;
797 if (!de->inode) 785 if (!de->inode)
798 return 0; 786 return 0;
799 return !memcmp(name, de->name, len); 787 return !memcmp(name, de->name, len);
800 } 788 }
801 789
802 /* 790 /*
803 * Returns 0 if not found, -1 on failure, and 1 on success 791 * Returns 0 if not found, -1 on failure, and 1 on success
804 */ 792 */
805 static inline int search_dirblock(struct buffer_head * bh, 793 static inline int search_dirblock(struct buffer_head * bh,
806 struct inode *dir, 794 struct inode *dir,
807 struct qstr *child, 795 struct qstr *child,
808 unsigned long offset, 796 unsigned long offset,
809 struct ext3_dir_entry_2 ** res_dir) 797 struct ext3_dir_entry_2 ** res_dir)
810 { 798 {
811 struct ext3_dir_entry_2 * de; 799 struct ext3_dir_entry_2 * de;
812 char * dlimit; 800 char * dlimit;
813 int de_len; 801 int de_len;
814 const char *name = child->name; 802 const char *name = child->name;
815 int namelen = child->len; 803 int namelen = child->len;
816 804
817 de = (struct ext3_dir_entry_2 *) bh->b_data; 805 de = (struct ext3_dir_entry_2 *) bh->b_data;
818 dlimit = bh->b_data + dir->i_sb->s_blocksize; 806 dlimit = bh->b_data + dir->i_sb->s_blocksize;
819 while ((char *) de < dlimit) { 807 while ((char *) de < dlimit) {
820 /* this code is executed quadratically often */ 808 /* this code is executed quadratically often */
821 /* do minimal checking `by hand' */ 809 /* do minimal checking `by hand' */
822 810
823 if ((char *) de + namelen <= dlimit && 811 if ((char *) de + namelen <= dlimit &&
824 ext3_match (namelen, name, de)) { 812 ext3_match (namelen, name, de)) {
825 /* found a match - just to be sure, do a full check */ 813 /* found a match - just to be sure, do a full check */
826 if (!ext3_check_dir_entry("ext3_find_entry", 814 if (!ext3_check_dir_entry("ext3_find_entry",
827 dir, de, bh, offset)) 815 dir, de, bh, offset))
828 return -1; 816 return -1;
829 *res_dir = de; 817 *res_dir = de;
830 return 1; 818 return 1;
831 } 819 }
832 /* prevent looping on a bad block */ 820 /* prevent looping on a bad block */
833 de_len = ext3_rec_len_from_disk(de->rec_len); 821 de_len = ext3_rec_len_from_disk(de->rec_len);
834 if (de_len <= 0) 822 if (de_len <= 0)
835 return -1; 823 return -1;
836 offset += de_len; 824 offset += de_len;
837 de = (struct ext3_dir_entry_2 *) ((char *) de + de_len); 825 de = (struct ext3_dir_entry_2 *) ((char *) de + de_len);
838 } 826 }
839 return 0; 827 return 0;
840 } 828 }
841 829
842 830
843 /* 831 /*
844 * ext3_find_entry() 832 * ext3_find_entry()
845 * 833 *
846 * finds an entry in the specified directory with the wanted name. It 834 * finds an entry in the specified directory with the wanted name. It
847 * returns the cache buffer in which the entry was found, and the entry 835 * returns the cache buffer in which the entry was found, and the entry
848 * itself (as a parameter - res_dir). It does NOT read the inode of the 836 * itself (as a parameter - res_dir). It does NOT read the inode of the
849 * entry - you'll have to do that yourself if you want to. 837 * entry - you'll have to do that yourself if you want to.
850 * 838 *
851 * The returned buffer_head has ->b_count elevated. The caller is expected 839 * The returned buffer_head has ->b_count elevated. The caller is expected
852 * to brelse() it when appropriate. 840 * to brelse() it when appropriate.
853 */ 841 */
854 static struct buffer_head *ext3_find_entry(struct inode *dir, 842 static struct buffer_head *ext3_find_entry(struct inode *dir,
855 struct qstr *entry, 843 struct qstr *entry,
856 struct ext3_dir_entry_2 **res_dir) 844 struct ext3_dir_entry_2 **res_dir)
857 { 845 {
858 struct super_block * sb; 846 struct super_block * sb;
859 struct buffer_head * bh_use[NAMEI_RA_SIZE]; 847 struct buffer_head * bh_use[NAMEI_RA_SIZE];
860 struct buffer_head * bh, *ret = NULL; 848 struct buffer_head * bh, *ret = NULL;
861 unsigned long start, block, b; 849 unsigned long start, block, b;
862 const u8 *name = entry->name; 850 const u8 *name = entry->name;
863 int ra_max = 0; /* Number of bh's in the readahead 851 int ra_max = 0; /* Number of bh's in the readahead
864 buffer, bh_use[] */ 852 buffer, bh_use[] */
865 int ra_ptr = 0; /* Current index into readahead 853 int ra_ptr = 0; /* Current index into readahead
866 buffer */ 854 buffer */
867 int num = 0; 855 int num = 0;
868 int nblocks, i, err; 856 int nblocks, i, err;
869 int namelen; 857 int namelen;
870 858
871 *res_dir = NULL; 859 *res_dir = NULL;
872 sb = dir->i_sb; 860 sb = dir->i_sb;
873 namelen = entry->len; 861 namelen = entry->len;
874 if (namelen > EXT3_NAME_LEN) 862 if (namelen > EXT3_NAME_LEN)
875 return NULL; 863 return NULL;
876 if ((namelen <= 2) && (name[0] == '.') && 864 if ((namelen <= 2) && (name[0] == '.') &&
877 (name[1] == '.' || name[1] == 0)) { 865 (name[1] == '.' || name[1] == 0)) {
878 /* 866 /*
879 * "." or ".." will only be in the first block 867 * "." or ".." will only be in the first block
880 * NFS may look up ".."; "." should be handled by the VFS 868 * NFS may look up ".."; "." should be handled by the VFS
881 */ 869 */
882 block = start = 0; 870 block = start = 0;
883 nblocks = 1; 871 nblocks = 1;
884 goto restart; 872 goto restart;
885 } 873 }
886 if (is_dx(dir)) { 874 if (is_dx(dir)) {
887 bh = ext3_dx_find_entry(dir, entry, res_dir, &err); 875 bh = ext3_dx_find_entry(dir, entry, res_dir, &err);
888 /* 876 /*
889 * On success, or if the error was file not found, 877 * On success, or if the error was file not found,
890 * return. Otherwise, fall back to doing a search the 878 * return. Otherwise, fall back to doing a search the
891 * old fashioned way. 879 * old fashioned way.
892 */ 880 */
893 if (bh || (err != ERR_BAD_DX_DIR)) 881 if (bh || (err != ERR_BAD_DX_DIR))
894 return bh; 882 return bh;
895 dxtrace(printk("ext3_find_entry: dx failed, falling back\n")); 883 dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
896 } 884 }
897 nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); 885 nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
898 start = EXT3_I(dir)->i_dir_start_lookup; 886 start = EXT3_I(dir)->i_dir_start_lookup;
899 if (start >= nblocks) 887 if (start >= nblocks)
900 start = 0; 888 start = 0;
901 block = start; 889 block = start;
902 restart: 890 restart:
903 do { 891 do {
904 /* 892 /*
905 * We deal with the read-ahead logic here. 893 * We deal with the read-ahead logic here.
906 */ 894 */
907 if (ra_ptr >= ra_max) { 895 if (ra_ptr >= ra_max) {
908 /* Refill the readahead buffer */ 896 /* Refill the readahead buffer */
909 ra_ptr = 0; 897 ra_ptr = 0;
910 b = block; 898 b = block;
911 for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) { 899 for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
912 /* 900 /*
913 * Terminate if we reach the end of the 901 * Terminate if we reach the end of the
914 * directory and must wrap, or if our 902 * directory and must wrap, or if our
915 * search has finished at this block. 903 * search has finished at this block.
916 */ 904 */
917 if (b >= nblocks || (num && block == start)) { 905 if (b >= nblocks || (num && block == start)) {
918 bh_use[ra_max] = NULL; 906 bh_use[ra_max] = NULL;
919 break; 907 break;
920 } 908 }
921 num++; 909 num++;
922 bh = ext3_getblk(NULL, dir, b++, 0, &err); 910 bh = ext3_getblk(NULL, dir, b++, 0, &err);
923 bh_use[ra_max] = bh; 911 bh_use[ra_max] = bh;
924 if (bh && !bh_uptodate_or_lock(bh)) { 912 if (bh && !bh_uptodate_or_lock(bh)) {
925 get_bh(bh); 913 get_bh(bh);
926 bh->b_end_io = end_buffer_read_sync; 914 bh->b_end_io = end_buffer_read_sync;
927 submit_bh(READ | REQ_META | REQ_PRIO, 915 submit_bh(READ | REQ_META | REQ_PRIO,
928 bh); 916 bh);
929 } 917 }
930 } 918 }
931 } 919 }
932 if ((bh = bh_use[ra_ptr++]) == NULL) 920 if ((bh = bh_use[ra_ptr++]) == NULL)
933 goto next; 921 goto next;
934 wait_on_buffer(bh); 922 wait_on_buffer(bh);
935 if (!buffer_uptodate(bh)) { 923 if (!buffer_uptodate(bh)) {
936 /* read error, skip block & hope for the best */ 924 /* read error, skip block & hope for the best */
937 ext3_error(sb, __func__, "reading directory #%lu " 925 ext3_error(sb, __func__, "reading directory #%lu "
938 "offset %lu", dir->i_ino, block); 926 "offset %lu", dir->i_ino, block);
939 brelse(bh); 927 brelse(bh);
940 goto next; 928 goto next;
941 } 929 }
942 i = search_dirblock(bh, dir, entry, 930 i = search_dirblock(bh, dir, entry,
943 block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); 931 block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
944 if (i == 1) { 932 if (i == 1) {
945 EXT3_I(dir)->i_dir_start_lookup = block; 933 EXT3_I(dir)->i_dir_start_lookup = block;
946 ret = bh; 934 ret = bh;
947 goto cleanup_and_exit; 935 goto cleanup_and_exit;
948 } else { 936 } else {
949 brelse(bh); 937 brelse(bh);
950 if (i < 0) 938 if (i < 0)
951 goto cleanup_and_exit; 939 goto cleanup_and_exit;
952 } 940 }
953 next: 941 next:
954 if (++block >= nblocks) 942 if (++block >= nblocks)
955 block = 0; 943 block = 0;
956 } while (block != start); 944 } while (block != start);
957 945
958 /* 946 /*
959 * If the directory has grown while we were searching, then 947 * If the directory has grown while we were searching, then
960 * search the last part of the directory before giving up. 948 * search the last part of the directory before giving up.
961 */ 949 */
962 block = nblocks; 950 block = nblocks;
963 nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); 951 nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
964 if (block < nblocks) { 952 if (block < nblocks) {
965 start = 0; 953 start = 0;
966 goto restart; 954 goto restart;
967 } 955 }
968 956
969 cleanup_and_exit: 957 cleanup_and_exit:
970 /* Clean up the read-ahead blocks */ 958 /* Clean up the read-ahead blocks */
971 for (; ra_ptr < ra_max; ra_ptr++) 959 for (; ra_ptr < ra_max; ra_ptr++)
972 brelse (bh_use[ra_ptr]); 960 brelse (bh_use[ra_ptr]);
973 return ret; 961 return ret;
974 } 962 }
975 963
976 static struct buffer_head * ext3_dx_find_entry(struct inode *dir, 964 static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
977 struct qstr *entry, struct ext3_dir_entry_2 **res_dir, 965 struct qstr *entry, struct ext3_dir_entry_2 **res_dir,
978 int *err) 966 int *err)
979 { 967 {
980 struct super_block *sb = dir->i_sb; 968 struct super_block *sb = dir->i_sb;
981 struct dx_hash_info hinfo; 969 struct dx_hash_info hinfo;
982 struct dx_frame frames[2], *frame; 970 struct dx_frame frames[2], *frame;
983 struct buffer_head *bh; 971 struct buffer_head *bh;
984 unsigned long block; 972 unsigned long block;
985 int retval; 973 int retval;
986 974
987 if (!(frame = dx_probe(entry, dir, &hinfo, frames, err))) 975 if (!(frame = dx_probe(entry, dir, &hinfo, frames, err)))
988 return NULL; 976 return NULL;
989 do { 977 do {
990 block = dx_get_block(frame->at); 978 block = dx_get_block(frame->at);
991 if (!(bh = ext3_bread (NULL,dir, block, 0, err))) 979 if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
992 goto errout; 980 goto errout;
993 981
994 retval = search_dirblock(bh, dir, entry, 982 retval = search_dirblock(bh, dir, entry,
995 block << EXT3_BLOCK_SIZE_BITS(sb), 983 block << EXT3_BLOCK_SIZE_BITS(sb),
996 res_dir); 984 res_dir);
997 if (retval == 1) { 985 if (retval == 1) {
998 dx_release(frames); 986 dx_release(frames);
999 return bh; 987 return bh;
1000 } 988 }
1001 brelse(bh); 989 brelse(bh);
1002 if (retval == -1) { 990 if (retval == -1) {
1003 *err = ERR_BAD_DX_DIR; 991 *err = ERR_BAD_DX_DIR;
1004 goto errout; 992 goto errout;
1005 } 993 }
1006 994
1007 /* Check to see if we should continue to search */ 995 /* Check to see if we should continue to search */
1008 retval = ext3_htree_next_block(dir, hinfo.hash, frame, 996 retval = ext3_htree_next_block(dir, hinfo.hash, frame,
1009 frames, NULL); 997 frames, NULL);
1010 if (retval < 0) { 998 if (retval < 0) {
1011 ext3_warning(sb, __func__, 999 ext3_warning(sb, __func__,
1012 "error reading index page in directory #%lu", 1000 "error reading index page in directory #%lu",
1013 dir->i_ino); 1001 dir->i_ino);
1014 *err = retval; 1002 *err = retval;
1015 goto errout; 1003 goto errout;
1016 } 1004 }
1017 } while (retval == 1); 1005 } while (retval == 1);
1018 1006
1019 *err = -ENOENT; 1007 *err = -ENOENT;
1020 errout: 1008 errout:
1021 dxtrace(printk("%s not found\n", entry->name)); 1009 dxtrace(printk("%s not found\n", entry->name));
1022 dx_release (frames); 1010 dx_release (frames);
1023 return NULL; 1011 return NULL;
1024 } 1012 }
1025 1013
1026 static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 1014 static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
1027 { 1015 {
1028 struct inode * inode; 1016 struct inode * inode;
1029 struct ext3_dir_entry_2 * de; 1017 struct ext3_dir_entry_2 * de;
1030 struct buffer_head * bh; 1018 struct buffer_head * bh;
1031 1019
1032 if (dentry->d_name.len > EXT3_NAME_LEN) 1020 if (dentry->d_name.len > EXT3_NAME_LEN)
1033 return ERR_PTR(-ENAMETOOLONG); 1021 return ERR_PTR(-ENAMETOOLONG);
1034 1022
1035 bh = ext3_find_entry(dir, &dentry->d_name, &de); 1023 bh = ext3_find_entry(dir, &dentry->d_name, &de);
1036 inode = NULL; 1024 inode = NULL;
1037 if (bh) { 1025 if (bh) {
1038 unsigned long ino = le32_to_cpu(de->inode); 1026 unsigned long ino = le32_to_cpu(de->inode);
1039 brelse (bh); 1027 brelse (bh);
1040 if (!ext3_valid_inum(dir->i_sb, ino)) { 1028 if (!ext3_valid_inum(dir->i_sb, ino)) {
1041 ext3_error(dir->i_sb, "ext3_lookup", 1029 ext3_error(dir->i_sb, "ext3_lookup",
1042 "bad inode number: %lu", ino); 1030 "bad inode number: %lu", ino);
1043 return ERR_PTR(-EIO); 1031 return ERR_PTR(-EIO);
1044 } 1032 }
1045 inode = ext3_iget(dir->i_sb, ino); 1033 inode = ext3_iget(dir->i_sb, ino);
1046 if (inode == ERR_PTR(-ESTALE)) { 1034 if (inode == ERR_PTR(-ESTALE)) {
1047 ext3_error(dir->i_sb, __func__, 1035 ext3_error(dir->i_sb, __func__,
1048 "deleted inode referenced: %lu", 1036 "deleted inode referenced: %lu",
1049 ino); 1037 ino);
1050 return ERR_PTR(-EIO); 1038 return ERR_PTR(-EIO);
1051 } 1039 }
1052 } 1040 }
1053 return d_splice_alias(inode, dentry); 1041 return d_splice_alias(inode, dentry);
1054 } 1042 }
1055 1043
1056 1044
1057 struct dentry *ext3_get_parent(struct dentry *child) 1045 struct dentry *ext3_get_parent(struct dentry *child)
1058 { 1046 {
1059 unsigned long ino; 1047 unsigned long ino;
1060 struct qstr dotdot = {.name = "..", .len = 2}; 1048 struct qstr dotdot = {.name = "..", .len = 2};
1061 struct ext3_dir_entry_2 * de; 1049 struct ext3_dir_entry_2 * de;
1062 struct buffer_head *bh; 1050 struct buffer_head *bh;
1063 1051
1064 bh = ext3_find_entry(child->d_inode, &dotdot, &de); 1052 bh = ext3_find_entry(child->d_inode, &dotdot, &de);
1065 if (!bh) 1053 if (!bh)
1066 return ERR_PTR(-ENOENT); 1054 return ERR_PTR(-ENOENT);
1067 ino = le32_to_cpu(de->inode); 1055 ino = le32_to_cpu(de->inode);
1068 brelse(bh); 1056 brelse(bh);
1069 1057
1070 if (!ext3_valid_inum(child->d_inode->i_sb, ino)) { 1058 if (!ext3_valid_inum(child->d_inode->i_sb, ino)) {
1071 ext3_error(child->d_inode->i_sb, "ext3_get_parent", 1059 ext3_error(child->d_inode->i_sb, "ext3_get_parent",
1072 "bad inode number: %lu", ino); 1060 "bad inode number: %lu", ino);
1073 return ERR_PTR(-EIO); 1061 return ERR_PTR(-EIO);
1074 } 1062 }
1075 1063
1076 return d_obtain_alias(ext3_iget(child->d_inode->i_sb, ino)); 1064 return d_obtain_alias(ext3_iget(child->d_inode->i_sb, ino));
1077 } 1065 }
1078 1066
1079 #define S_SHIFT 12 1067 #define S_SHIFT 12
1080 static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = { 1068 static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
1081 [S_IFREG >> S_SHIFT] = EXT3_FT_REG_FILE, 1069 [S_IFREG >> S_SHIFT] = EXT3_FT_REG_FILE,
1082 [S_IFDIR >> S_SHIFT] = EXT3_FT_DIR, 1070 [S_IFDIR >> S_SHIFT] = EXT3_FT_DIR,
1083 [S_IFCHR >> S_SHIFT] = EXT3_FT_CHRDEV, 1071 [S_IFCHR >> S_SHIFT] = EXT3_FT_CHRDEV,
1084 [S_IFBLK >> S_SHIFT] = EXT3_FT_BLKDEV, 1072 [S_IFBLK >> S_SHIFT] = EXT3_FT_BLKDEV,
1085 [S_IFIFO >> S_SHIFT] = EXT3_FT_FIFO, 1073 [S_IFIFO >> S_SHIFT] = EXT3_FT_FIFO,
1086 [S_IFSOCK >> S_SHIFT] = EXT3_FT_SOCK, 1074 [S_IFSOCK >> S_SHIFT] = EXT3_FT_SOCK,
1087 [S_IFLNK >> S_SHIFT] = EXT3_FT_SYMLINK, 1075 [S_IFLNK >> S_SHIFT] = EXT3_FT_SYMLINK,
1088 }; 1076 };
1089 1077
1090 static inline void ext3_set_de_type(struct super_block *sb, 1078 static inline void ext3_set_de_type(struct super_block *sb,
1091 struct ext3_dir_entry_2 *de, 1079 struct ext3_dir_entry_2 *de,
1092 umode_t mode) { 1080 umode_t mode) {
1093 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE)) 1081 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE))
1094 de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; 1082 de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
1095 } 1083 }
1096 1084
1097 /* 1085 /*
1098 * Move count entries from end of map between two memory locations. 1086 * Move count entries from end of map between two memory locations.
1099 * Returns pointer to last entry moved. 1087 * Returns pointer to last entry moved.
1100 */ 1088 */
1101 static struct ext3_dir_entry_2 * 1089 static struct ext3_dir_entry_2 *
1102 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) 1090 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1103 { 1091 {
1104 unsigned rec_len = 0; 1092 unsigned rec_len = 0;
1105 1093
1106 while (count--) { 1094 while (count--) {
1107 struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs); 1095 struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
1108 rec_len = EXT3_DIR_REC_LEN(de->name_len); 1096 rec_len = EXT3_DIR_REC_LEN(de->name_len);
1109 memcpy (to, de, rec_len); 1097 memcpy (to, de, rec_len);
1110 ((struct ext3_dir_entry_2 *) to)->rec_len = 1098 ((struct ext3_dir_entry_2 *) to)->rec_len =
1111 ext3_rec_len_to_disk(rec_len); 1099 ext3_rec_len_to_disk(rec_len);
1112 de->inode = 0; 1100 de->inode = 0;
1113 map++; 1101 map++;
1114 to += rec_len; 1102 to += rec_len;
1115 } 1103 }
1116 return (struct ext3_dir_entry_2 *) (to - rec_len); 1104 return (struct ext3_dir_entry_2 *) (to - rec_len);
1117 } 1105 }
1118 1106
1119 /* 1107 /*
1120 * Compact each dir entry in the range to the minimal rec_len. 1108 * Compact each dir entry in the range to the minimal rec_len.
1121 * Returns pointer to last entry in range. 1109 * Returns pointer to last entry in range.
1122 */ 1110 */
1123 static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize) 1111 static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize)
1124 { 1112 {
1125 struct ext3_dir_entry_2 *next, *to, *prev; 1113 struct ext3_dir_entry_2 *next, *to, *prev;
1126 struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *)base; 1114 struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *)base;
1127 unsigned rec_len = 0; 1115 unsigned rec_len = 0;
1128 1116
1129 prev = to = de; 1117 prev = to = de;
1130 while ((char *)de < base + blocksize) { 1118 while ((char *)de < base + blocksize) {
1131 next = ext3_next_entry(de); 1119 next = ext3_next_entry(de);
1132 if (de->inode && de->name_len) { 1120 if (de->inode && de->name_len) {
1133 rec_len = EXT3_DIR_REC_LEN(de->name_len); 1121 rec_len = EXT3_DIR_REC_LEN(de->name_len);
1134 if (de > to) 1122 if (de > to)
1135 memmove(to, de, rec_len); 1123 memmove(to, de, rec_len);
1136 to->rec_len = ext3_rec_len_to_disk(rec_len); 1124 to->rec_len = ext3_rec_len_to_disk(rec_len);
1137 prev = to; 1125 prev = to;
1138 to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len); 1126 to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
1139 } 1127 }
1140 de = next; 1128 de = next;
1141 } 1129 }
1142 return prev; 1130 return prev;
1143 } 1131 }
1144 1132
1145 /* 1133 /*
1146 * Split a full leaf block to make room for a new dir entry. 1134 * Split a full leaf block to make room for a new dir entry.
1147 * Allocate a new block, and move entries so that they are approx. equally full. 1135 * Allocate a new block, and move entries so that they are approx. equally full.
1148 * Returns pointer to de in block into which the new entry will be inserted. 1136 * Returns pointer to de in block into which the new entry will be inserted.
1149 */ 1137 */
1150 static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, 1138 static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1151 struct buffer_head **bh,struct dx_frame *frame, 1139 struct buffer_head **bh,struct dx_frame *frame,
1152 struct dx_hash_info *hinfo, int *error) 1140 struct dx_hash_info *hinfo, int *error)
1153 { 1141 {
1154 unsigned blocksize = dir->i_sb->s_blocksize; 1142 unsigned blocksize = dir->i_sb->s_blocksize;
1155 unsigned count, continued; 1143 unsigned count, continued;
1156 struct buffer_head *bh2; 1144 struct buffer_head *bh2;
1157 u32 newblock; 1145 u32 newblock;
1158 u32 hash2; 1146 u32 hash2;
1159 struct dx_map_entry *map; 1147 struct dx_map_entry *map;
1160 char *data1 = (*bh)->b_data, *data2; 1148 char *data1 = (*bh)->b_data, *data2;
1161 unsigned split, move, size; 1149 unsigned split, move, size;
1162 struct ext3_dir_entry_2 *de = NULL, *de2; 1150 struct ext3_dir_entry_2 *de = NULL, *de2;
1163 int err = 0, i; 1151 int err = 0, i;
1164 1152
1165 bh2 = ext3_append (handle, dir, &newblock, &err); 1153 bh2 = ext3_append (handle, dir, &newblock, &err);
1166 if (!(bh2)) { 1154 if (!(bh2)) {
1167 brelse(*bh); 1155 brelse(*bh);
1168 *bh = NULL; 1156 *bh = NULL;
1169 goto errout; 1157 goto errout;
1170 } 1158 }
1171 1159
1172 BUFFER_TRACE(*bh, "get_write_access"); 1160 BUFFER_TRACE(*bh, "get_write_access");
1173 err = ext3_journal_get_write_access(handle, *bh); 1161 err = ext3_journal_get_write_access(handle, *bh);
1174 if (err) 1162 if (err)
1175 goto journal_error; 1163 goto journal_error;
1176 1164
1177 BUFFER_TRACE(frame->bh, "get_write_access"); 1165 BUFFER_TRACE(frame->bh, "get_write_access");
1178 err = ext3_journal_get_write_access(handle, frame->bh); 1166 err = ext3_journal_get_write_access(handle, frame->bh);
1179 if (err) 1167 if (err)
1180 goto journal_error; 1168 goto journal_error;
1181 1169
1182 data2 = bh2->b_data; 1170 data2 = bh2->b_data;
1183 1171
1184 /* create map in the end of data2 block */ 1172 /* create map in the end of data2 block */
1185 map = (struct dx_map_entry *) (data2 + blocksize); 1173 map = (struct dx_map_entry *) (data2 + blocksize);
1186 count = dx_make_map ((struct ext3_dir_entry_2 *) data1, 1174 count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
1187 blocksize, hinfo, map); 1175 blocksize, hinfo, map);
1188 map -= count; 1176 map -= count;
1189 dx_sort_map (map, count); 1177 dx_sort_map (map, count);
1190 /* Split the existing block in the middle, size-wise */ 1178 /* Split the existing block in the middle, size-wise */
1191 size = 0; 1179 size = 0;
1192 move = 0; 1180 move = 0;
1193 for (i = count-1; i >= 0; i--) { 1181 for (i = count-1; i >= 0; i--) {
1194 /* is more than half of this entry in 2nd half of the block? */ 1182 /* is more than half of this entry in 2nd half of the block? */
1195 if (size + map[i].size/2 > blocksize/2) 1183 if (size + map[i].size/2 > blocksize/2)
1196 break; 1184 break;
1197 size += map[i].size; 1185 size += map[i].size;
1198 move++; 1186 move++;
1199 } 1187 }
1200 /* map index at which we will split */ 1188 /* map index at which we will split */
1201 split = count - move; 1189 split = count - move;
1202 hash2 = map[split].hash; 1190 hash2 = map[split].hash;
1203 continued = hash2 == map[split - 1].hash; 1191 continued = hash2 == map[split - 1].hash;
1204 dxtrace(printk("Split block %i at %x, %i/%i\n", 1192 dxtrace(printk("Split block %i at %x, %i/%i\n",
1205 dx_get_block(frame->at), hash2, split, count-split)); 1193 dx_get_block(frame->at), hash2, split, count-split));
1206 1194
1207 /* Fancy dance to stay within two buffers */ 1195 /* Fancy dance to stay within two buffers */
1208 de2 = dx_move_dirents(data1, data2, map + split, count - split); 1196 de2 = dx_move_dirents(data1, data2, map + split, count - split);
1209 de = dx_pack_dirents(data1,blocksize); 1197 de = dx_pack_dirents(data1,blocksize);
1210 de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de); 1198 de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
1211 de2->rec_len = ext3_rec_len_to_disk(data2 + blocksize - (char *) de2); 1199 de2->rec_len = ext3_rec_len_to_disk(data2 + blocksize - (char *) de2);
1212 dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1)); 1200 dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
1213 dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1)); 1201 dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
1214 1202
1215 /* Which block gets the new entry? */ 1203 /* Which block gets the new entry? */
1216 if (hinfo->hash >= hash2) 1204 if (hinfo->hash >= hash2)
1217 { 1205 {
1218 swap(*bh, bh2); 1206 swap(*bh, bh2);
1219 de = de2; 1207 de = de2;
1220 } 1208 }
1221 dx_insert_block (frame, hash2 + continued, newblock); 1209 dx_insert_block (frame, hash2 + continued, newblock);
1222 err = ext3_journal_dirty_metadata (handle, bh2); 1210 err = ext3_journal_dirty_metadata (handle, bh2);
1223 if (err) 1211 if (err)
1224 goto journal_error; 1212 goto journal_error;
1225 err = ext3_journal_dirty_metadata (handle, frame->bh); 1213 err = ext3_journal_dirty_metadata (handle, frame->bh);
1226 if (err) 1214 if (err)
1227 goto journal_error; 1215 goto journal_error;
1228 brelse (bh2); 1216 brelse (bh2);
1229 dxtrace(dx_show_index ("frame", frame->entries)); 1217 dxtrace(dx_show_index ("frame", frame->entries));
1230 return de; 1218 return de;
1231 1219
1232 journal_error: 1220 journal_error:
1233 brelse(*bh); 1221 brelse(*bh);
1234 brelse(bh2); 1222 brelse(bh2);
1235 *bh = NULL; 1223 *bh = NULL;
1236 ext3_std_error(dir->i_sb, err); 1224 ext3_std_error(dir->i_sb, err);
1237 errout: 1225 errout:
1238 *error = err; 1226 *error = err;
1239 return NULL; 1227 return NULL;
1240 } 1228 }
1241 1229
1242 1230
1243 /* 1231 /*
1244 * Add a new entry into a directory (leaf) block. If de is non-NULL, 1232 * Add a new entry into a directory (leaf) block. If de is non-NULL,
1245 * it points to a directory entry which is guaranteed to be large 1233 * it points to a directory entry which is guaranteed to be large
1246 * enough for new directory entry. If de is NULL, then 1234 * enough for new directory entry. If de is NULL, then
1247 * add_dirent_to_buf will attempt search the directory block for 1235 * add_dirent_to_buf will attempt search the directory block for
1248 * space. It will return -ENOSPC if no space is available, and -EIO 1236 * space. It will return -ENOSPC if no space is available, and -EIO
1249 * and -EEXIST if directory entry already exists. 1237 * and -EEXIST if directory entry already exists.
1250 * 1238 *
1251 * NOTE! bh is NOT released in the case where ENOSPC is returned. In 1239 * NOTE! bh is NOT released in the case where ENOSPC is returned. In
1252 * all other cases bh is released. 1240 * all other cases bh is released.
1253 */ 1241 */
1254 static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, 1242 static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1255 struct inode *inode, struct ext3_dir_entry_2 *de, 1243 struct inode *inode, struct ext3_dir_entry_2 *de,
1256 struct buffer_head * bh) 1244 struct buffer_head * bh)
1257 { 1245 {
1258 struct inode *dir = dentry->d_parent->d_inode; 1246 struct inode *dir = dentry->d_parent->d_inode;
1259 const char *name = dentry->d_name.name; 1247 const char *name = dentry->d_name.name;
1260 int namelen = dentry->d_name.len; 1248 int namelen = dentry->d_name.len;
1261 unsigned long offset = 0; 1249 unsigned long offset = 0;
1262 unsigned short reclen; 1250 unsigned short reclen;
1263 int nlen, rlen, err; 1251 int nlen, rlen, err;
1264 char *top; 1252 char *top;
1265 1253
1266 reclen = EXT3_DIR_REC_LEN(namelen); 1254 reclen = EXT3_DIR_REC_LEN(namelen);
1267 if (!de) { 1255 if (!de) {
1268 de = (struct ext3_dir_entry_2 *)bh->b_data; 1256 de = (struct ext3_dir_entry_2 *)bh->b_data;
1269 top = bh->b_data + dir->i_sb->s_blocksize - reclen; 1257 top = bh->b_data + dir->i_sb->s_blocksize - reclen;
1270 while ((char *) de <= top) { 1258 while ((char *) de <= top) {
1271 if (!ext3_check_dir_entry("ext3_add_entry", dir, de, 1259 if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
1272 bh, offset)) { 1260 bh, offset)) {
1273 brelse (bh); 1261 brelse (bh);
1274 return -EIO; 1262 return -EIO;
1275 } 1263 }
1276 if (ext3_match (namelen, name, de)) { 1264 if (ext3_match (namelen, name, de)) {
1277 brelse (bh); 1265 brelse (bh);
1278 return -EEXIST; 1266 return -EEXIST;
1279 } 1267 }
1280 nlen = EXT3_DIR_REC_LEN(de->name_len); 1268 nlen = EXT3_DIR_REC_LEN(de->name_len);
1281 rlen = ext3_rec_len_from_disk(de->rec_len); 1269 rlen = ext3_rec_len_from_disk(de->rec_len);
1282 if ((de->inode? rlen - nlen: rlen) >= reclen) 1270 if ((de->inode? rlen - nlen: rlen) >= reclen)
1283 break; 1271 break;
1284 de = (struct ext3_dir_entry_2 *)((char *)de + rlen); 1272 de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
1285 offset += rlen; 1273 offset += rlen;
1286 } 1274 }
1287 if ((char *) de > top) 1275 if ((char *) de > top)
1288 return -ENOSPC; 1276 return -ENOSPC;
1289 } 1277 }
1290 BUFFER_TRACE(bh, "get_write_access"); 1278 BUFFER_TRACE(bh, "get_write_access");
1291 err = ext3_journal_get_write_access(handle, bh); 1279 err = ext3_journal_get_write_access(handle, bh);
1292 if (err) { 1280 if (err) {
1293 ext3_std_error(dir->i_sb, err); 1281 ext3_std_error(dir->i_sb, err);
1294 brelse(bh); 1282 brelse(bh);
1295 return err; 1283 return err;
1296 } 1284 }
1297 1285
1298 /* By now the buffer is marked for journaling */ 1286 /* By now the buffer is marked for journaling */
1299 nlen = EXT3_DIR_REC_LEN(de->name_len); 1287 nlen = EXT3_DIR_REC_LEN(de->name_len);
1300 rlen = ext3_rec_len_from_disk(de->rec_len); 1288 rlen = ext3_rec_len_from_disk(de->rec_len);
1301 if (de->inode) { 1289 if (de->inode) {
1302 struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); 1290 struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
1303 de1->rec_len = ext3_rec_len_to_disk(rlen - nlen); 1291 de1->rec_len = ext3_rec_len_to_disk(rlen - nlen);
1304 de->rec_len = ext3_rec_len_to_disk(nlen); 1292 de->rec_len = ext3_rec_len_to_disk(nlen);
1305 de = de1; 1293 de = de1;
1306 } 1294 }
1307 de->file_type = EXT3_FT_UNKNOWN; 1295 de->file_type = EXT3_FT_UNKNOWN;
1308 if (inode) { 1296 if (inode) {
1309 de->inode = cpu_to_le32(inode->i_ino); 1297 de->inode = cpu_to_le32(inode->i_ino);
1310 ext3_set_de_type(dir->i_sb, de, inode->i_mode); 1298 ext3_set_de_type(dir->i_sb, de, inode->i_mode);
1311 } else 1299 } else
1312 de->inode = 0; 1300 de->inode = 0;
1313 de->name_len = namelen; 1301 de->name_len = namelen;
1314 memcpy (de->name, name, namelen); 1302 memcpy (de->name, name, namelen);
1315 /* 1303 /*
1316 * XXX shouldn't update any times until successful 1304 * XXX shouldn't update any times until successful
1317 * completion of syscall, but too many callers depend 1305 * completion of syscall, but too many callers depend
1318 * on this. 1306 * on this.
1319 * 1307 *
1320 * XXX similarly, too many callers depend on 1308 * XXX similarly, too many callers depend on
1321 * ext3_new_inode() setting the times, but error 1309 * ext3_new_inode() setting the times, but error
1322 * recovery deletes the inode, so the worst that can 1310 * recovery deletes the inode, so the worst that can
1323 * happen is that the times are slightly out of date 1311 * happen is that the times are slightly out of date
1324 * and/or different from the directory change time. 1312 * and/or different from the directory change time.
1325 */ 1313 */
1326 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 1314 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
1327 ext3_update_dx_flag(dir); 1315 ext3_update_dx_flag(dir);
1328 dir->i_version++; 1316 dir->i_version++;
1329 ext3_mark_inode_dirty(handle, dir); 1317 ext3_mark_inode_dirty(handle, dir);
1330 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 1318 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
1331 err = ext3_journal_dirty_metadata(handle, bh); 1319 err = ext3_journal_dirty_metadata(handle, bh);
1332 if (err) 1320 if (err)
1333 ext3_std_error(dir->i_sb, err); 1321 ext3_std_error(dir->i_sb, err);
1334 brelse(bh); 1322 brelse(bh);
1335 return 0; 1323 return 0;
1336 } 1324 }
1337 1325
1338 /* 1326 /*
1339 * This converts a one block unindexed directory to a 3 block indexed 1327 * This converts a one block unindexed directory to a 3 block indexed
1340 * directory, and adds the dentry to the indexed directory. 1328 * directory, and adds the dentry to the indexed directory.
1341 */ 1329 */
1342 static int make_indexed_dir(handle_t *handle, struct dentry *dentry, 1330 static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1343 struct inode *inode, struct buffer_head *bh) 1331 struct inode *inode, struct buffer_head *bh)
1344 { 1332 {
1345 struct inode *dir = dentry->d_parent->d_inode; 1333 struct inode *dir = dentry->d_parent->d_inode;
1346 const char *name = dentry->d_name.name; 1334 const char *name = dentry->d_name.name;
1347 int namelen = dentry->d_name.len; 1335 int namelen = dentry->d_name.len;
1348 struct buffer_head *bh2; 1336 struct buffer_head *bh2;
1349 struct dx_root *root; 1337 struct dx_root *root;
1350 struct dx_frame frames[2], *frame; 1338 struct dx_frame frames[2], *frame;
1351 struct dx_entry *entries; 1339 struct dx_entry *entries;
1352 struct ext3_dir_entry_2 *de, *de2; 1340 struct ext3_dir_entry_2 *de, *de2;
1353 char *data1, *top; 1341 char *data1, *top;
1354 unsigned len; 1342 unsigned len;
1355 int retval; 1343 int retval;
1356 unsigned blocksize; 1344 unsigned blocksize;
1357 struct dx_hash_info hinfo; 1345 struct dx_hash_info hinfo;
1358 u32 block; 1346 u32 block;
1359 struct fake_dirent *fde; 1347 struct fake_dirent *fde;
1360 1348
1361 blocksize = dir->i_sb->s_blocksize; 1349 blocksize = dir->i_sb->s_blocksize;
1362 dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); 1350 dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
1363 retval = ext3_journal_get_write_access(handle, bh); 1351 retval = ext3_journal_get_write_access(handle, bh);
1364 if (retval) { 1352 if (retval) {
1365 ext3_std_error(dir->i_sb, retval); 1353 ext3_std_error(dir->i_sb, retval);
1366 brelse(bh); 1354 brelse(bh);
1367 return retval; 1355 return retval;
1368 } 1356 }
1369 root = (struct dx_root *) bh->b_data; 1357 root = (struct dx_root *) bh->b_data;
1370 1358
1371 /* The 0th block becomes the root, move the dirents out */ 1359 /* The 0th block becomes the root, move the dirents out */
1372 fde = &root->dotdot; 1360 fde = &root->dotdot;
1373 de = (struct ext3_dir_entry_2 *)((char *)fde + 1361 de = (struct ext3_dir_entry_2 *)((char *)fde +
1374 ext3_rec_len_from_disk(fde->rec_len)); 1362 ext3_rec_len_from_disk(fde->rec_len));
1375 if ((char *) de >= (((char *) root) + blocksize)) { 1363 if ((char *) de >= (((char *) root) + blocksize)) {
1376 ext3_error(dir->i_sb, __func__, 1364 ext3_error(dir->i_sb, __func__,
1377 "invalid rec_len for '..' in inode %lu", 1365 "invalid rec_len for '..' in inode %lu",
1378 dir->i_ino); 1366 dir->i_ino);
1379 brelse(bh); 1367 brelse(bh);
1380 return -EIO; 1368 return -EIO;
1381 } 1369 }
1382 len = ((char *) root) + blocksize - (char *) de; 1370 len = ((char *) root) + blocksize - (char *) de;
1383 1371
1384 bh2 = ext3_append (handle, dir, &block, &retval); 1372 bh2 = ext3_append (handle, dir, &block, &retval);
1385 if (!(bh2)) { 1373 if (!(bh2)) {
1386 brelse(bh); 1374 brelse(bh);
1387 return retval; 1375 return retval;
1388 } 1376 }
1389 EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; 1377 EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
1390 data1 = bh2->b_data; 1378 data1 = bh2->b_data;
1391 1379
1392 memcpy (data1, de, len); 1380 memcpy (data1, de, len);
1393 de = (struct ext3_dir_entry_2 *) data1; 1381 de = (struct ext3_dir_entry_2 *) data1;
1394 top = data1 + len; 1382 top = data1 + len;
1395 while ((char *)(de2 = ext3_next_entry(de)) < top) 1383 while ((char *)(de2 = ext3_next_entry(de)) < top)
1396 de = de2; 1384 de = de2;
1397 de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de); 1385 de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
1398 /* Initialize the root; the dot dirents already exist */ 1386 /* Initialize the root; the dot dirents already exist */
1399 de = (struct ext3_dir_entry_2 *) (&root->dotdot); 1387 de = (struct ext3_dir_entry_2 *) (&root->dotdot);
1400 de->rec_len = ext3_rec_len_to_disk(blocksize - EXT3_DIR_REC_LEN(2)); 1388 de->rec_len = ext3_rec_len_to_disk(blocksize - EXT3_DIR_REC_LEN(2));
1401 memset (&root->info, 0, sizeof(root->info)); 1389 memset (&root->info, 0, sizeof(root->info));
1402 root->info.info_length = sizeof(root->info); 1390 root->info.info_length = sizeof(root->info);
1403 root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; 1391 root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
1404 entries = root->entries; 1392 entries = root->entries;
1405 dx_set_block (entries, 1); 1393 dx_set_block (entries, 1);
1406 dx_set_count (entries, 1); 1394 dx_set_count (entries, 1);
1407 dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); 1395 dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
1408 1396
1409 /* Initialize as for dx_probe */ 1397 /* Initialize as for dx_probe */
1410 hinfo.hash_version = root->info.hash_version; 1398 hinfo.hash_version = root->info.hash_version;
1411 if (hinfo.hash_version <= DX_HASH_TEA) 1399 if (hinfo.hash_version <= DX_HASH_TEA)
1412 hinfo.hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned; 1400 hinfo.hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned;
1413 hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed; 1401 hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
1414 ext3fs_dirhash(name, namelen, &hinfo); 1402 ext3fs_dirhash(name, namelen, &hinfo);
1415 frame = frames; 1403 frame = frames;
1416 frame->entries = entries; 1404 frame->entries = entries;
1417 frame->at = entries; 1405 frame->at = entries;
1418 frame->bh = bh; 1406 frame->bh = bh;
1419 bh = bh2; 1407 bh = bh2;
1420 /* 1408 /*
1421 * Mark buffers dirty here so that if do_split() fails we write a 1409 * Mark buffers dirty here so that if do_split() fails we write a
1422 * consistent set of buffers to disk. 1410 * consistent set of buffers to disk.
1423 */ 1411 */
1424 ext3_journal_dirty_metadata(handle, frame->bh); 1412 ext3_journal_dirty_metadata(handle, frame->bh);
1425 ext3_journal_dirty_metadata(handle, bh); 1413 ext3_journal_dirty_metadata(handle, bh);
1426 de = do_split(handle,dir, &bh, frame, &hinfo, &retval); 1414 de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
1427 if (!de) { 1415 if (!de) {
1428 ext3_mark_inode_dirty(handle, dir); 1416 ext3_mark_inode_dirty(handle, dir);
1429 dx_release(frames); 1417 dx_release(frames);
1430 return retval; 1418 return retval;
1431 } 1419 }
1432 dx_release(frames); 1420 dx_release(frames);
1433 1421
1434 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1422 return add_dirent_to_buf(handle, dentry, inode, de, bh);
1435 } 1423 }
1436 1424
1437 /* 1425 /*
1438 * ext3_add_entry() 1426 * ext3_add_entry()
1439 * 1427 *
1440 * adds a file entry to the specified directory, using the same 1428 * adds a file entry to the specified directory, using the same
1441 * semantics as ext3_find_entry(). It returns NULL if it failed. 1429 * semantics as ext3_find_entry(). It returns NULL if it failed.
1442 * 1430 *
1443 * NOTE!! The inode part of 'de' is left at 0 - which means you 1431 * NOTE!! The inode part of 'de' is left at 0 - which means you
1444 * may not sleep between calling this and putting something into 1432 * may not sleep between calling this and putting something into
1445 * the entry, as someone else might have used it while you slept. 1433 * the entry, as someone else might have used it while you slept.
1446 */ 1434 */
1447 static int ext3_add_entry (handle_t *handle, struct dentry *dentry, 1435 static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
1448 struct inode *inode) 1436 struct inode *inode)
1449 { 1437 {
1450 struct inode *dir = dentry->d_parent->d_inode; 1438 struct inode *dir = dentry->d_parent->d_inode;
1451 struct buffer_head * bh; 1439 struct buffer_head * bh;
1452 struct ext3_dir_entry_2 *de; 1440 struct ext3_dir_entry_2 *de;
1453 struct super_block * sb; 1441 struct super_block * sb;
1454 int retval; 1442 int retval;
1455 int dx_fallback=0; 1443 int dx_fallback=0;
1456 unsigned blocksize; 1444 unsigned blocksize;
1457 u32 block, blocks; 1445 u32 block, blocks;
1458 1446
1459 sb = dir->i_sb; 1447 sb = dir->i_sb;
1460 blocksize = sb->s_blocksize; 1448 blocksize = sb->s_blocksize;
1461 if (!dentry->d_name.len) 1449 if (!dentry->d_name.len)
1462 return -EINVAL; 1450 return -EINVAL;
1463 if (is_dx(dir)) { 1451 if (is_dx(dir)) {
1464 retval = ext3_dx_add_entry(handle, dentry, inode); 1452 retval = ext3_dx_add_entry(handle, dentry, inode);
1465 if (!retval || (retval != ERR_BAD_DX_DIR)) 1453 if (!retval || (retval != ERR_BAD_DX_DIR))
1466 return retval; 1454 return retval;
1467 EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; 1455 EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
1468 dx_fallback++; 1456 dx_fallback++;
1469 ext3_mark_inode_dirty(handle, dir); 1457 ext3_mark_inode_dirty(handle, dir);
1470 } 1458 }
1471 blocks = dir->i_size >> sb->s_blocksize_bits; 1459 blocks = dir->i_size >> sb->s_blocksize_bits;
1472 for (block = 0; block < blocks; block++) { 1460 for (block = 0; block < blocks; block++) {
1473 bh = ext3_bread(handle, dir, block, 0, &retval); 1461 bh = ext3_bread(handle, dir, block, 0, &retval);
1474 if(!bh) 1462 if(!bh)
1475 return retval; 1463 return retval;
1476 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1464 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1477 if (retval != -ENOSPC) 1465 if (retval != -ENOSPC)
1478 return retval; 1466 return retval;
1479 1467
1480 if (blocks == 1 && !dx_fallback && 1468 if (blocks == 1 && !dx_fallback &&
1481 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) 1469 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
1482 return make_indexed_dir(handle, dentry, inode, bh); 1470 return make_indexed_dir(handle, dentry, inode, bh);
1483 brelse(bh); 1471 brelse(bh);
1484 } 1472 }
1485 bh = ext3_append(handle, dir, &block, &retval); 1473 bh = ext3_append(handle, dir, &block, &retval);
1486 if (!bh) 1474 if (!bh)
1487 return retval; 1475 return retval;
1488 de = (struct ext3_dir_entry_2 *) bh->b_data; 1476 de = (struct ext3_dir_entry_2 *) bh->b_data;
1489 de->inode = 0; 1477 de->inode = 0;
1490 de->rec_len = ext3_rec_len_to_disk(blocksize); 1478 de->rec_len = ext3_rec_len_to_disk(blocksize);
1491 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1479 return add_dirent_to_buf(handle, dentry, inode, de, bh);
1492 } 1480 }
1493 1481
1494 /* 1482 /*
1495 * Returns 0 for success, or a negative error value 1483 * Returns 0 for success, or a negative error value
1496 */ 1484 */
1497 static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, 1485 static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1498 struct inode *inode) 1486 struct inode *inode)
1499 { 1487 {
1500 struct dx_frame frames[2], *frame; 1488 struct dx_frame frames[2], *frame;
1501 struct dx_entry *entries, *at; 1489 struct dx_entry *entries, *at;
1502 struct dx_hash_info hinfo; 1490 struct dx_hash_info hinfo;
1503 struct buffer_head * bh; 1491 struct buffer_head * bh;
1504 struct inode *dir = dentry->d_parent->d_inode; 1492 struct inode *dir = dentry->d_parent->d_inode;
1505 struct super_block * sb = dir->i_sb; 1493 struct super_block * sb = dir->i_sb;
1506 struct ext3_dir_entry_2 *de; 1494 struct ext3_dir_entry_2 *de;
1507 int err; 1495 int err;
1508 1496
1509 frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); 1497 frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
1510 if (!frame) 1498 if (!frame)
1511 return err; 1499 return err;
1512 entries = frame->entries; 1500 entries = frame->entries;
1513 at = frame->at; 1501 at = frame->at;
1514 1502
1515 if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) 1503 if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
1516 goto cleanup; 1504 goto cleanup;
1517 1505
1518 BUFFER_TRACE(bh, "get_write_access"); 1506 BUFFER_TRACE(bh, "get_write_access");
1519 err = ext3_journal_get_write_access(handle, bh); 1507 err = ext3_journal_get_write_access(handle, bh);
1520 if (err) 1508 if (err)
1521 goto journal_error; 1509 goto journal_error;
1522 1510
1523 err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1511 err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1524 if (err != -ENOSPC) { 1512 if (err != -ENOSPC) {
1525 bh = NULL; 1513 bh = NULL;
1526 goto cleanup; 1514 goto cleanup;
1527 } 1515 }
1528 1516
1529 /* Block full, should compress but for now just split */ 1517 /* Block full, should compress but for now just split */
1530 dxtrace(printk("using %u of %u node entries\n", 1518 dxtrace(printk("using %u of %u node entries\n",
1531 dx_get_count(entries), dx_get_limit(entries))); 1519 dx_get_count(entries), dx_get_limit(entries)));
1532 /* Need to split index? */ 1520 /* Need to split index? */
1533 if (dx_get_count(entries) == dx_get_limit(entries)) { 1521 if (dx_get_count(entries) == dx_get_limit(entries)) {
1534 u32 newblock; 1522 u32 newblock;
1535 unsigned icount = dx_get_count(entries); 1523 unsigned icount = dx_get_count(entries);
1536 int levels = frame - frames; 1524 int levels = frame - frames;
1537 struct dx_entry *entries2; 1525 struct dx_entry *entries2;
1538 struct dx_node *node2; 1526 struct dx_node *node2;
1539 struct buffer_head *bh2; 1527 struct buffer_head *bh2;
1540 1528
1541 if (levels && (dx_get_count(frames->entries) == 1529 if (levels && (dx_get_count(frames->entries) ==
1542 dx_get_limit(frames->entries))) { 1530 dx_get_limit(frames->entries))) {
1543 ext3_warning(sb, __func__, 1531 ext3_warning(sb, __func__,
1544 "Directory index full!"); 1532 "Directory index full!");
1545 err = -ENOSPC; 1533 err = -ENOSPC;
1546 goto cleanup; 1534 goto cleanup;
1547 } 1535 }
1548 bh2 = ext3_append (handle, dir, &newblock, &err); 1536 bh2 = ext3_append (handle, dir, &newblock, &err);
1549 if (!(bh2)) 1537 if (!(bh2))
1550 goto cleanup; 1538 goto cleanup;
1551 node2 = (struct dx_node *)(bh2->b_data); 1539 node2 = (struct dx_node *)(bh2->b_data);
1552 entries2 = node2->entries; 1540 entries2 = node2->entries;
1553 memset(&node2->fake, 0, sizeof(struct fake_dirent)); 1541 memset(&node2->fake, 0, sizeof(struct fake_dirent));
1554 node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize); 1542 node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize);
1555 BUFFER_TRACE(frame->bh, "get_write_access"); 1543 BUFFER_TRACE(frame->bh, "get_write_access");
1556 err = ext3_journal_get_write_access(handle, frame->bh); 1544 err = ext3_journal_get_write_access(handle, frame->bh);
1557 if (err) 1545 if (err)
1558 goto journal_error; 1546 goto journal_error;
1559 if (levels) { 1547 if (levels) {
1560 unsigned icount1 = icount/2, icount2 = icount - icount1; 1548 unsigned icount1 = icount/2, icount2 = icount - icount1;
1561 unsigned hash2 = dx_get_hash(entries + icount1); 1549 unsigned hash2 = dx_get_hash(entries + icount1);
1562 dxtrace(printk("Split index %i/%i\n", icount1, icount2)); 1550 dxtrace(printk("Split index %i/%i\n", icount1, icount2));
1563 1551
1564 BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ 1552 BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
1565 err = ext3_journal_get_write_access(handle, 1553 err = ext3_journal_get_write_access(handle,
1566 frames[0].bh); 1554 frames[0].bh);
1567 if (err) 1555 if (err)
1568 goto journal_error; 1556 goto journal_error;
1569 1557
1570 memcpy ((char *) entries2, (char *) (entries + icount1), 1558 memcpy ((char *) entries2, (char *) (entries + icount1),
1571 icount2 * sizeof(struct dx_entry)); 1559 icount2 * sizeof(struct dx_entry));
1572 dx_set_count (entries, icount1); 1560 dx_set_count (entries, icount1);
1573 dx_set_count (entries2, icount2); 1561 dx_set_count (entries2, icount2);
1574 dx_set_limit (entries2, dx_node_limit(dir)); 1562 dx_set_limit (entries2, dx_node_limit(dir));
1575 1563
1576 /* Which index block gets the new entry? */ 1564 /* Which index block gets the new entry? */
1577 if (at - entries >= icount1) { 1565 if (at - entries >= icount1) {
1578 frame->at = at = at - entries - icount1 + entries2; 1566 frame->at = at = at - entries - icount1 + entries2;
1579 frame->entries = entries = entries2; 1567 frame->entries = entries = entries2;
1580 swap(frame->bh, bh2); 1568 swap(frame->bh, bh2);
1581 } 1569 }
1582 dx_insert_block (frames + 0, hash2, newblock); 1570 dx_insert_block (frames + 0, hash2, newblock);
1583 dxtrace(dx_show_index ("node", frames[1].entries)); 1571 dxtrace(dx_show_index ("node", frames[1].entries));
1584 dxtrace(dx_show_index ("node", 1572 dxtrace(dx_show_index ("node",
1585 ((struct dx_node *) bh2->b_data)->entries)); 1573 ((struct dx_node *) bh2->b_data)->entries));
1586 err = ext3_journal_dirty_metadata(handle, bh2); 1574 err = ext3_journal_dirty_metadata(handle, bh2);
1587 if (err) 1575 if (err)
1588 goto journal_error; 1576 goto journal_error;
1589 brelse (bh2); 1577 brelse (bh2);
1590 } else { 1578 } else {
1591 dxtrace(printk("Creating second level index...\n")); 1579 dxtrace(printk("Creating second level index...\n"));
1592 memcpy((char *) entries2, (char *) entries, 1580 memcpy((char *) entries2, (char *) entries,
1593 icount * sizeof(struct dx_entry)); 1581 icount * sizeof(struct dx_entry));
1594 dx_set_limit(entries2, dx_node_limit(dir)); 1582 dx_set_limit(entries2, dx_node_limit(dir));
1595 1583
1596 /* Set up root */ 1584 /* Set up root */
1597 dx_set_count(entries, 1); 1585 dx_set_count(entries, 1);
1598 dx_set_block(entries + 0, newblock); 1586 dx_set_block(entries + 0, newblock);
1599 ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; 1587 ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
1600 1588
1601 /* Add new access path frame */ 1589 /* Add new access path frame */
1602 frame = frames + 1; 1590 frame = frames + 1;
1603 frame->at = at = at - entries + entries2; 1591 frame->at = at = at - entries + entries2;
1604 frame->entries = entries = entries2; 1592 frame->entries = entries = entries2;
1605 frame->bh = bh2; 1593 frame->bh = bh2;
1606 err = ext3_journal_get_write_access(handle, 1594 err = ext3_journal_get_write_access(handle,
1607 frame->bh); 1595 frame->bh);
1608 if (err) 1596 if (err)
1609 goto journal_error; 1597 goto journal_error;
1610 } 1598 }
1611 err = ext3_journal_dirty_metadata(handle, frames[0].bh); 1599 err = ext3_journal_dirty_metadata(handle, frames[0].bh);
1612 if (err) 1600 if (err)
1613 goto journal_error; 1601 goto journal_error;
1614 } 1602 }
1615 de = do_split(handle, dir, &bh, frame, &hinfo, &err); 1603 de = do_split(handle, dir, &bh, frame, &hinfo, &err);
1616 if (!de) 1604 if (!de)
1617 goto cleanup; 1605 goto cleanup;
1618 err = add_dirent_to_buf(handle, dentry, inode, de, bh); 1606 err = add_dirent_to_buf(handle, dentry, inode, de, bh);
1619 bh = NULL; 1607 bh = NULL;
1620 goto cleanup; 1608 goto cleanup;
1621 1609
1622 journal_error: 1610 journal_error:
1623 ext3_std_error(dir->i_sb, err); 1611 ext3_std_error(dir->i_sb, err);
1624 cleanup: 1612 cleanup:
1625 if (bh) 1613 if (bh)
1626 brelse(bh); 1614 brelse(bh);
1627 dx_release(frames); 1615 dx_release(frames);
1628 return err; 1616 return err;
1629 } 1617 }
1630 1618
1631 /* 1619 /*
1632 * ext3_delete_entry deletes a directory entry by merging it with the 1620 * ext3_delete_entry deletes a directory entry by merging it with the
1633 * previous entry 1621 * previous entry
1634 */ 1622 */
1635 static int ext3_delete_entry (handle_t *handle, 1623 static int ext3_delete_entry (handle_t *handle,
1636 struct inode * dir, 1624 struct inode * dir,
1637 struct ext3_dir_entry_2 * de_del, 1625 struct ext3_dir_entry_2 * de_del,
1638 struct buffer_head * bh) 1626 struct buffer_head * bh)
1639 { 1627 {
1640 struct ext3_dir_entry_2 * de, * pde; 1628 struct ext3_dir_entry_2 * de, * pde;
1641 int i; 1629 int i;
1642 1630
1643 i = 0; 1631 i = 0;
1644 pde = NULL; 1632 pde = NULL;
1645 de = (struct ext3_dir_entry_2 *) bh->b_data; 1633 de = (struct ext3_dir_entry_2 *) bh->b_data;
1646 while (i < bh->b_size) { 1634 while (i < bh->b_size) {
1647 if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i)) 1635 if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
1648 return -EIO; 1636 return -EIO;
1649 if (de == de_del) { 1637 if (de == de_del) {
1650 int err; 1638 int err;
1651 1639
1652 BUFFER_TRACE(bh, "get_write_access"); 1640 BUFFER_TRACE(bh, "get_write_access");
1653 err = ext3_journal_get_write_access(handle, bh); 1641 err = ext3_journal_get_write_access(handle, bh);
1654 if (err) 1642 if (err)
1655 goto journal_error; 1643 goto journal_error;
1656 1644
1657 if (pde) 1645 if (pde)
1658 pde->rec_len = ext3_rec_len_to_disk( 1646 pde->rec_len = ext3_rec_len_to_disk(
1659 ext3_rec_len_from_disk(pde->rec_len) + 1647 ext3_rec_len_from_disk(pde->rec_len) +
1660 ext3_rec_len_from_disk(de->rec_len)); 1648 ext3_rec_len_from_disk(de->rec_len));
1661 else 1649 else
1662 de->inode = 0; 1650 de->inode = 0;
1663 dir->i_version++; 1651 dir->i_version++;
1664 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 1652 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
1665 err = ext3_journal_dirty_metadata(handle, bh); 1653 err = ext3_journal_dirty_metadata(handle, bh);
1666 if (err) { 1654 if (err) {
1667 journal_error: 1655 journal_error:
1668 ext3_std_error(dir->i_sb, err); 1656 ext3_std_error(dir->i_sb, err);
1669 return err; 1657 return err;
1670 } 1658 }
1671 return 0; 1659 return 0;
1672 } 1660 }
1673 i += ext3_rec_len_from_disk(de->rec_len); 1661 i += ext3_rec_len_from_disk(de->rec_len);
1674 pde = de; 1662 pde = de;
1675 de = ext3_next_entry(de); 1663 de = ext3_next_entry(de);
1676 } 1664 }
1677 return -ENOENT; 1665 return -ENOENT;
1678 } 1666 }
1679 1667
1680 static int ext3_add_nondir(handle_t *handle, 1668 static int ext3_add_nondir(handle_t *handle,
1681 struct dentry *dentry, struct inode *inode) 1669 struct dentry *dentry, struct inode *inode)
1682 { 1670 {
1683 int err = ext3_add_entry(handle, dentry, inode); 1671 int err = ext3_add_entry(handle, dentry, inode);
1684 if (!err) { 1672 if (!err) {
1685 ext3_mark_inode_dirty(handle, inode); 1673 ext3_mark_inode_dirty(handle, inode);
1686 d_instantiate(dentry, inode); 1674 d_instantiate(dentry, inode);
1687 unlock_new_inode(inode); 1675 unlock_new_inode(inode);
1688 return 0; 1676 return 0;
1689 } 1677 }
1690 drop_nlink(inode); 1678 drop_nlink(inode);
1691 unlock_new_inode(inode); 1679 unlock_new_inode(inode);
1692 iput(inode); 1680 iput(inode);
1693 return err; 1681 return err;
1694 } 1682 }
1695 1683
1696 /* 1684 /*
1697 * By the time this is called, we already have created 1685 * By the time this is called, we already have created
1698 * the directory cache entry for the new file, but it 1686 * the directory cache entry for the new file, but it
1699 * is so far negative - it has no inode. 1687 * is so far negative - it has no inode.
1700 * 1688 *
1701 * If the create succeeds, we fill in the inode information 1689 * If the create succeeds, we fill in the inode information
1702 * with d_instantiate(). 1690 * with d_instantiate().
1703 */ 1691 */
1704 static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode, 1692 static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode,
1705 struct nameidata *nd) 1693 struct nameidata *nd)
1706 { 1694 {
1707 handle_t *handle; 1695 handle_t *handle;
1708 struct inode * inode; 1696 struct inode * inode;
1709 int err, retries = 0; 1697 int err, retries = 0;
1710 1698
1711 dquot_initialize(dir); 1699 dquot_initialize(dir);
1712 1700
1713 retry: 1701 retry:
1714 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1702 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1715 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1703 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1716 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); 1704 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1717 if (IS_ERR(handle)) 1705 if (IS_ERR(handle))
1718 return PTR_ERR(handle); 1706 return PTR_ERR(handle);
1719 1707
1720 if (IS_DIRSYNC(dir)) 1708 if (IS_DIRSYNC(dir))
1721 handle->h_sync = 1; 1709 handle->h_sync = 1;
1722 1710
1723 inode = ext3_new_inode (handle, dir, &dentry->d_name, mode); 1711 inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
1724 err = PTR_ERR(inode); 1712 err = PTR_ERR(inode);
1725 if (!IS_ERR(inode)) { 1713 if (!IS_ERR(inode)) {
1726 inode->i_op = &ext3_file_inode_operations; 1714 inode->i_op = &ext3_file_inode_operations;
1727 inode->i_fop = &ext3_file_operations; 1715 inode->i_fop = &ext3_file_operations;
1728 ext3_set_aops(inode); 1716 ext3_set_aops(inode);
1729 err = ext3_add_nondir(handle, dentry, inode); 1717 err = ext3_add_nondir(handle, dentry, inode);
1730 } 1718 }
1731 ext3_journal_stop(handle); 1719 ext3_journal_stop(handle);
1732 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 1720 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
1733 goto retry; 1721 goto retry;
1734 return err; 1722 return err;
1735 } 1723 }
1736 1724
1737 static int ext3_mknod (struct inode * dir, struct dentry *dentry, 1725 static int ext3_mknod (struct inode * dir, struct dentry *dentry,
1738 umode_t mode, dev_t rdev) 1726 umode_t mode, dev_t rdev)
1739 { 1727 {
1740 handle_t *handle; 1728 handle_t *handle;
1741 struct inode *inode; 1729 struct inode *inode;
1742 int err, retries = 0; 1730 int err, retries = 0;
1743 1731
1744 if (!new_valid_dev(rdev)) 1732 if (!new_valid_dev(rdev))
1745 return -EINVAL; 1733 return -EINVAL;
1746 1734
1747 dquot_initialize(dir); 1735 dquot_initialize(dir);
1748 1736
1749 retry: 1737 retry:
1750 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1738 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1751 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1739 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1752 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); 1740 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1753 if (IS_ERR(handle)) 1741 if (IS_ERR(handle))
1754 return PTR_ERR(handle); 1742 return PTR_ERR(handle);
1755 1743
1756 if (IS_DIRSYNC(dir)) 1744 if (IS_DIRSYNC(dir))
1757 handle->h_sync = 1; 1745 handle->h_sync = 1;
1758 1746
1759 inode = ext3_new_inode (handle, dir, &dentry->d_name, mode); 1747 inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
1760 err = PTR_ERR(inode); 1748 err = PTR_ERR(inode);
1761 if (!IS_ERR(inode)) { 1749 if (!IS_ERR(inode)) {
1762 init_special_inode(inode, inode->i_mode, rdev); 1750 init_special_inode(inode, inode->i_mode, rdev);
1763 #ifdef CONFIG_EXT3_FS_XATTR 1751 #ifdef CONFIG_EXT3_FS_XATTR
1764 inode->i_op = &ext3_special_inode_operations; 1752 inode->i_op = &ext3_special_inode_operations;
1765 #endif 1753 #endif
1766 err = ext3_add_nondir(handle, dentry, inode); 1754 err = ext3_add_nondir(handle, dentry, inode);
1767 } 1755 }
1768 ext3_journal_stop(handle); 1756 ext3_journal_stop(handle);
1769 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 1757 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
1770 goto retry; 1758 goto retry;
1771 return err; 1759 return err;
1772 } 1760 }
1773 1761
1774 static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) 1762 static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
1775 { 1763 {
1776 handle_t *handle; 1764 handle_t *handle;
1777 struct inode * inode; 1765 struct inode * inode;
1778 struct buffer_head * dir_block = NULL; 1766 struct buffer_head * dir_block = NULL;
1779 struct ext3_dir_entry_2 * de; 1767 struct ext3_dir_entry_2 * de;
1780 int err, retries = 0; 1768 int err, retries = 0;
1781 1769
1782 if (dir->i_nlink >= EXT3_LINK_MAX) 1770 if (dir->i_nlink >= EXT3_LINK_MAX)
1783 return -EMLINK; 1771 return -EMLINK;
1784 1772
1785 dquot_initialize(dir); 1773 dquot_initialize(dir);
1786 1774
1787 retry: 1775 retry:
1788 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1776 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1789 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1777 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1790 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); 1778 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1791 if (IS_ERR(handle)) 1779 if (IS_ERR(handle))
1792 return PTR_ERR(handle); 1780 return PTR_ERR(handle);
1793 1781
1794 if (IS_DIRSYNC(dir)) 1782 if (IS_DIRSYNC(dir))
1795 handle->h_sync = 1; 1783 handle->h_sync = 1;
1796 1784
1797 inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFDIR | mode); 1785 inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFDIR | mode);
1798 err = PTR_ERR(inode); 1786 err = PTR_ERR(inode);
1799 if (IS_ERR(inode)) 1787 if (IS_ERR(inode))
1800 goto out_stop; 1788 goto out_stop;
1801 1789
1802 inode->i_op = &ext3_dir_inode_operations; 1790 inode->i_op = &ext3_dir_inode_operations;
1803 inode->i_fop = &ext3_dir_operations; 1791 inode->i_fop = &ext3_dir_operations;
1804 inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1792 inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
1805 dir_block = ext3_bread (handle, inode, 0, 1, &err); 1793 dir_block = ext3_bread (handle, inode, 0, 1, &err);
1806 if (!dir_block) 1794 if (!dir_block)
1807 goto out_clear_inode; 1795 goto out_clear_inode;
1808 1796
1809 BUFFER_TRACE(dir_block, "get_write_access"); 1797 BUFFER_TRACE(dir_block, "get_write_access");
1810 err = ext3_journal_get_write_access(handle, dir_block); 1798 err = ext3_journal_get_write_access(handle, dir_block);
1811 if (err) 1799 if (err)
1812 goto out_clear_inode; 1800 goto out_clear_inode;
1813 1801
1814 de = (struct ext3_dir_entry_2 *) dir_block->b_data; 1802 de = (struct ext3_dir_entry_2 *) dir_block->b_data;
1815 de->inode = cpu_to_le32(inode->i_ino); 1803 de->inode = cpu_to_le32(inode->i_ino);
1816 de->name_len = 1; 1804 de->name_len = 1;
1817 de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len)); 1805 de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
1818 strcpy (de->name, "."); 1806 strcpy (de->name, ".");
1819 ext3_set_de_type(dir->i_sb, de, S_IFDIR); 1807 ext3_set_de_type(dir->i_sb, de, S_IFDIR);
1820 de = ext3_next_entry(de); 1808 de = ext3_next_entry(de);
1821 de->inode = cpu_to_le32(dir->i_ino); 1809 de->inode = cpu_to_le32(dir->i_ino);
1822 de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize - 1810 de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize -
1823 EXT3_DIR_REC_LEN(1)); 1811 EXT3_DIR_REC_LEN(1));
1824 de->name_len = 2; 1812 de->name_len = 2;
1825 strcpy (de->name, ".."); 1813 strcpy (de->name, "..");
1826 ext3_set_de_type(dir->i_sb, de, S_IFDIR); 1814 ext3_set_de_type(dir->i_sb, de, S_IFDIR);
1827 set_nlink(inode, 2); 1815 set_nlink(inode, 2);
1828 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); 1816 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
1829 err = ext3_journal_dirty_metadata(handle, dir_block); 1817 err = ext3_journal_dirty_metadata(handle, dir_block);
1830 if (err) 1818 if (err)
1831 goto out_clear_inode; 1819 goto out_clear_inode;
1832 1820
1833 err = ext3_mark_inode_dirty(handle, inode); 1821 err = ext3_mark_inode_dirty(handle, inode);
1834 if (!err) 1822 if (!err)
1835 err = ext3_add_entry (handle, dentry, inode); 1823 err = ext3_add_entry (handle, dentry, inode);
1836 1824
1837 if (err) { 1825 if (err) {
1838 out_clear_inode: 1826 out_clear_inode:
1839 clear_nlink(inode); 1827 clear_nlink(inode);
1840 unlock_new_inode(inode); 1828 unlock_new_inode(inode);
1841 ext3_mark_inode_dirty(handle, inode); 1829 ext3_mark_inode_dirty(handle, inode);
1842 iput (inode); 1830 iput (inode);
1843 goto out_stop; 1831 goto out_stop;
1844 } 1832 }
1845 inc_nlink(dir); 1833 inc_nlink(dir);
1846 ext3_update_dx_flag(dir); 1834 ext3_update_dx_flag(dir);
1847 err = ext3_mark_inode_dirty(handle, dir); 1835 err = ext3_mark_inode_dirty(handle, dir);
1848 if (err) 1836 if (err)
1849 goto out_clear_inode; 1837 goto out_clear_inode;
1850 1838
1851 d_instantiate(dentry, inode); 1839 d_instantiate(dentry, inode);
1852 unlock_new_inode(inode); 1840 unlock_new_inode(inode);
1853 out_stop: 1841 out_stop:
1854 brelse(dir_block); 1842 brelse(dir_block);
1855 ext3_journal_stop(handle); 1843 ext3_journal_stop(handle);
1856 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 1844 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
1857 goto retry; 1845 goto retry;
1858 return err; 1846 return err;
1859 } 1847 }
1860 1848
1861 /* 1849 /*
1862 * routine to check that the specified directory is empty (for rmdir) 1850 * routine to check that the specified directory is empty (for rmdir)
1863 */ 1851 */
1864 static int empty_dir (struct inode * inode) 1852 static int empty_dir (struct inode * inode)
1865 { 1853 {
1866 unsigned long offset; 1854 unsigned long offset;
1867 struct buffer_head * bh; 1855 struct buffer_head * bh;
1868 struct ext3_dir_entry_2 * de, * de1; 1856 struct ext3_dir_entry_2 * de, * de1;
1869 struct super_block * sb; 1857 struct super_block * sb;
1870 int err = 0; 1858 int err = 0;
1871 1859
1872 sb = inode->i_sb; 1860 sb = inode->i_sb;
1873 if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) || 1861 if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
1874 !(bh = ext3_bread (NULL, inode, 0, 0, &err))) { 1862 !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
1875 if (err) 1863 if (err)
1876 ext3_error(inode->i_sb, __func__, 1864 ext3_error(inode->i_sb, __func__,
1877 "error %d reading directory #%lu offset 0", 1865 "error %d reading directory #%lu offset 0",
1878 err, inode->i_ino); 1866 err, inode->i_ino);
1879 else 1867 else
1880 ext3_warning(inode->i_sb, __func__, 1868 ext3_warning(inode->i_sb, __func__,
1881 "bad directory (dir #%lu) - no data block", 1869 "bad directory (dir #%lu) - no data block",
1882 inode->i_ino); 1870 inode->i_ino);
1883 return 1; 1871 return 1;
1884 } 1872 }
1885 de = (struct ext3_dir_entry_2 *) bh->b_data; 1873 de = (struct ext3_dir_entry_2 *) bh->b_data;
1886 de1 = ext3_next_entry(de); 1874 de1 = ext3_next_entry(de);
1887 if (le32_to_cpu(de->inode) != inode->i_ino || 1875 if (le32_to_cpu(de->inode) != inode->i_ino ||
1888 !le32_to_cpu(de1->inode) || 1876 !le32_to_cpu(de1->inode) ||
1889 strcmp (".", de->name) || 1877 strcmp (".", de->name) ||
1890 strcmp ("..", de1->name)) { 1878 strcmp ("..", de1->name)) {
1891 ext3_warning (inode->i_sb, "empty_dir", 1879 ext3_warning (inode->i_sb, "empty_dir",
1892 "bad directory (dir #%lu) - no `.' or `..'", 1880 "bad directory (dir #%lu) - no `.' or `..'",
1893 inode->i_ino); 1881 inode->i_ino);
1894 brelse (bh); 1882 brelse (bh);
1895 return 1; 1883 return 1;
1896 } 1884 }
1897 offset = ext3_rec_len_from_disk(de->rec_len) + 1885 offset = ext3_rec_len_from_disk(de->rec_len) +
1898 ext3_rec_len_from_disk(de1->rec_len); 1886 ext3_rec_len_from_disk(de1->rec_len);
1899 de = ext3_next_entry(de1); 1887 de = ext3_next_entry(de1);
1900 while (offset < inode->i_size ) { 1888 while (offset < inode->i_size ) {
1901 if (!bh || 1889 if (!bh ||
1902 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { 1890 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
1903 err = 0; 1891 err = 0;
1904 brelse (bh); 1892 brelse (bh);
1905 bh = ext3_bread (NULL, inode, 1893 bh = ext3_bread (NULL, inode,
1906 offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err); 1894 offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
1907 if (!bh) { 1895 if (!bh) {
1908 if (err) 1896 if (err)
1909 ext3_error(sb, __func__, 1897 ext3_error(sb, __func__,
1910 "error %d reading directory" 1898 "error %d reading directory"
1911 " #%lu offset %lu", 1899 " #%lu offset %lu",
1912 err, inode->i_ino, offset); 1900 err, inode->i_ino, offset);
1913 offset += sb->s_blocksize; 1901 offset += sb->s_blocksize;
1914 continue; 1902 continue;
1915 } 1903 }
1916 de = (struct ext3_dir_entry_2 *) bh->b_data; 1904 de = (struct ext3_dir_entry_2 *) bh->b_data;
1917 } 1905 }
1918 if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) { 1906 if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
1919 de = (struct ext3_dir_entry_2 *)(bh->b_data + 1907 de = (struct ext3_dir_entry_2 *)(bh->b_data +
1920 sb->s_blocksize); 1908 sb->s_blocksize);
1921 offset = (offset | (sb->s_blocksize - 1)) + 1; 1909 offset = (offset | (sb->s_blocksize - 1)) + 1;
1922 continue; 1910 continue;
1923 } 1911 }
1924 if (le32_to_cpu(de->inode)) { 1912 if (le32_to_cpu(de->inode)) {
1925 brelse (bh); 1913 brelse (bh);
1926 return 0; 1914 return 0;
1927 } 1915 }
1928 offset += ext3_rec_len_from_disk(de->rec_len); 1916 offset += ext3_rec_len_from_disk(de->rec_len);
1929 de = ext3_next_entry(de); 1917 de = ext3_next_entry(de);
1930 } 1918 }
1931 brelse (bh); 1919 brelse (bh);
1932 return 1; 1920 return 1;
1933 } 1921 }
1934 1922
1935 /* ext3_orphan_add() links an unlinked or truncated inode into a list of 1923 /* ext3_orphan_add() links an unlinked or truncated inode into a list of
1936 * such inodes, starting at the superblock, in case we crash before the 1924 * such inodes, starting at the superblock, in case we crash before the
1937 * file is closed/deleted, or in case the inode truncate spans multiple 1925 * file is closed/deleted, or in case the inode truncate spans multiple
1938 * transactions and the last transaction is not recovered after a crash. 1926 * transactions and the last transaction is not recovered after a crash.
1939 * 1927 *
1940 * At filesystem recovery time, we walk this list deleting unlinked 1928 * At filesystem recovery time, we walk this list deleting unlinked
1941 * inodes and truncating linked inodes in ext3_orphan_cleanup(). 1929 * inodes and truncating linked inodes in ext3_orphan_cleanup().
1942 */ 1930 */
1943 int ext3_orphan_add(handle_t *handle, struct inode *inode) 1931 int ext3_orphan_add(handle_t *handle, struct inode *inode)
1944 { 1932 {
1945 struct super_block *sb = inode->i_sb; 1933 struct super_block *sb = inode->i_sb;
1946 struct ext3_iloc iloc; 1934 struct ext3_iloc iloc;
1947 int err = 0, rc; 1935 int err = 0, rc;
1948 1936
1949 mutex_lock(&EXT3_SB(sb)->s_orphan_lock); 1937 mutex_lock(&EXT3_SB(sb)->s_orphan_lock);
1950 if (!list_empty(&EXT3_I(inode)->i_orphan)) 1938 if (!list_empty(&EXT3_I(inode)->i_orphan))
1951 goto out_unlock; 1939 goto out_unlock;
1952 1940
1953 /* Orphan handling is only valid for files with data blocks 1941 /* Orphan handling is only valid for files with data blocks
1954 * being truncated, or files being unlinked. */ 1942 * being truncated, or files being unlinked. */
1955 1943
1956 /* @@@ FIXME: Observation from aviro: 1944 /* @@@ FIXME: Observation from aviro:
1957 * I think I can trigger J_ASSERT in ext3_orphan_add(). We block 1945 * I think I can trigger J_ASSERT in ext3_orphan_add(). We block
1958 * here (on s_orphan_lock), so race with ext3_link() which might bump 1946 * here (on s_orphan_lock), so race with ext3_link() which might bump
1959 * ->i_nlink. For, say it, character device. Not a regular file, 1947 * ->i_nlink. For, say it, character device. Not a regular file,
1960 * not a directory, not a symlink and ->i_nlink > 0. 1948 * not a directory, not a symlink and ->i_nlink > 0.
1961 * 1949 *
1962 * tytso, 4/25/2009: I'm not sure how that could happen; 1950 * tytso, 4/25/2009: I'm not sure how that could happen;
1963 * shouldn't the fs core protect us from these sort of 1951 * shouldn't the fs core protect us from these sort of
1964 * unlink()/link() races? 1952 * unlink()/link() races?
1965 */ 1953 */
1966 J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1954 J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1967 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); 1955 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
1968 1956
1969 BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); 1957 BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
1970 err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); 1958 err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
1971 if (err) 1959 if (err)
1972 goto out_unlock; 1960 goto out_unlock;
1973 1961
1974 err = ext3_reserve_inode_write(handle, inode, &iloc); 1962 err = ext3_reserve_inode_write(handle, inode, &iloc);
1975 if (err) 1963 if (err)
1976 goto out_unlock; 1964 goto out_unlock;
1977 1965
1978 /* Insert this inode at the head of the on-disk orphan list... */ 1966 /* Insert this inode at the head of the on-disk orphan list... */
1979 NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan); 1967 NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan);
1980 EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); 1968 EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
1981 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 1969 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1982 rc = ext3_mark_iloc_dirty(handle, inode, &iloc); 1970 rc = ext3_mark_iloc_dirty(handle, inode, &iloc);
1983 if (!err) 1971 if (!err)
1984 err = rc; 1972 err = rc;
1985 1973
1986 /* Only add to the head of the in-memory list if all the 1974 /* Only add to the head of the in-memory list if all the
1987 * previous operations succeeded. If the orphan_add is going to 1975 * previous operations succeeded. If the orphan_add is going to
1988 * fail (possibly taking the journal offline), we can't risk 1976 * fail (possibly taking the journal offline), we can't risk
1989 * leaving the inode on the orphan list: stray orphan-list 1977 * leaving the inode on the orphan list: stray orphan-list
1990 * entries can cause panics at unmount time. 1978 * entries can cause panics at unmount time.
1991 * 1979 *
1992 * This is safe: on error we're going to ignore the orphan list 1980 * This is safe: on error we're going to ignore the orphan list
1993 * anyway on the next recovery. */ 1981 * anyway on the next recovery. */
1994 if (!err) 1982 if (!err)
1995 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); 1983 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
1996 1984
1997 jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); 1985 jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
1998 jbd_debug(4, "orphan inode %lu will point to %d\n", 1986 jbd_debug(4, "orphan inode %lu will point to %d\n",
1999 inode->i_ino, NEXT_ORPHAN(inode)); 1987 inode->i_ino, NEXT_ORPHAN(inode));
2000 out_unlock: 1988 out_unlock:
2001 mutex_unlock(&EXT3_SB(sb)->s_orphan_lock); 1989 mutex_unlock(&EXT3_SB(sb)->s_orphan_lock);
2002 ext3_std_error(inode->i_sb, err); 1990 ext3_std_error(inode->i_sb, err);
2003 return err; 1991 return err;
2004 } 1992 }
2005 1993
2006 /* 1994 /*
2007 * ext3_orphan_del() removes an unlinked or truncated inode from the list 1995 * ext3_orphan_del() removes an unlinked or truncated inode from the list
2008 * of such inodes stored on disk, because it is finally being cleaned up. 1996 * of such inodes stored on disk, because it is finally being cleaned up.
2009 */ 1997 */
2010 int ext3_orphan_del(handle_t *handle, struct inode *inode) 1998 int ext3_orphan_del(handle_t *handle, struct inode *inode)
2011 { 1999 {
2012 struct list_head *prev; 2000 struct list_head *prev;
2013 struct ext3_inode_info *ei = EXT3_I(inode); 2001 struct ext3_inode_info *ei = EXT3_I(inode);
2014 struct ext3_sb_info *sbi; 2002 struct ext3_sb_info *sbi;
2015 unsigned long ino_next; 2003 unsigned long ino_next;
2016 struct ext3_iloc iloc; 2004 struct ext3_iloc iloc;
2017 int err = 0; 2005 int err = 0;
2018 2006
2019 mutex_lock(&EXT3_SB(inode->i_sb)->s_orphan_lock); 2007 mutex_lock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
2020 if (list_empty(&ei->i_orphan)) 2008 if (list_empty(&ei->i_orphan))
2021 goto out; 2009 goto out;
2022 2010
2023 ino_next = NEXT_ORPHAN(inode); 2011 ino_next = NEXT_ORPHAN(inode);
2024 prev = ei->i_orphan.prev; 2012 prev = ei->i_orphan.prev;
2025 sbi = EXT3_SB(inode->i_sb); 2013 sbi = EXT3_SB(inode->i_sb);
2026 2014
2027 jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); 2015 jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
2028 2016
2029 list_del_init(&ei->i_orphan); 2017 list_del_init(&ei->i_orphan);
2030 2018
2031 /* If we're on an error path, we may not have a valid 2019 /* If we're on an error path, we may not have a valid
2032 * transaction handle with which to update the orphan list on 2020 * transaction handle with which to update the orphan list on
2033 * disk, but we still need to remove the inode from the linked 2021 * disk, but we still need to remove the inode from the linked
2034 * list in memory. */ 2022 * list in memory. */
2035 if (!handle) 2023 if (!handle)
2036 goto out; 2024 goto out;
2037 2025
2038 err = ext3_reserve_inode_write(handle, inode, &iloc); 2026 err = ext3_reserve_inode_write(handle, inode, &iloc);
2039 if (err) 2027 if (err)
2040 goto out_err; 2028 goto out_err;
2041 2029
2042 if (prev == &sbi->s_orphan) { 2030 if (prev == &sbi->s_orphan) {
2043 jbd_debug(4, "superblock will point to %lu\n", ino_next); 2031 jbd_debug(4, "superblock will point to %lu\n", ino_next);
2044 BUFFER_TRACE(sbi->s_sbh, "get_write_access"); 2032 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
2045 err = ext3_journal_get_write_access(handle, sbi->s_sbh); 2033 err = ext3_journal_get_write_access(handle, sbi->s_sbh);
2046 if (err) 2034 if (err)
2047 goto out_brelse; 2035 goto out_brelse;
2048 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); 2036 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
2049 err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); 2037 err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
2050 } else { 2038 } else {
2051 struct ext3_iloc iloc2; 2039 struct ext3_iloc iloc2;
2052 struct inode *i_prev = 2040 struct inode *i_prev =
2053 &list_entry(prev, struct ext3_inode_info, i_orphan)->vfs_inode; 2041 &list_entry(prev, struct ext3_inode_info, i_orphan)->vfs_inode;
2054 2042
2055 jbd_debug(4, "orphan inode %lu will point to %lu\n", 2043 jbd_debug(4, "orphan inode %lu will point to %lu\n",
2056 i_prev->i_ino, ino_next); 2044 i_prev->i_ino, ino_next);
2057 err = ext3_reserve_inode_write(handle, i_prev, &iloc2); 2045 err = ext3_reserve_inode_write(handle, i_prev, &iloc2);
2058 if (err) 2046 if (err)
2059 goto out_brelse; 2047 goto out_brelse;
2060 NEXT_ORPHAN(i_prev) = ino_next; 2048 NEXT_ORPHAN(i_prev) = ino_next;
2061 err = ext3_mark_iloc_dirty(handle, i_prev, &iloc2); 2049 err = ext3_mark_iloc_dirty(handle, i_prev, &iloc2);
2062 } 2050 }
2063 if (err) 2051 if (err)
2064 goto out_brelse; 2052 goto out_brelse;
2065 NEXT_ORPHAN(inode) = 0; 2053 NEXT_ORPHAN(inode) = 0;
2066 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 2054 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
2067 2055
2068 out_err: 2056 out_err:
2069 ext3_std_error(inode->i_sb, err); 2057 ext3_std_error(inode->i_sb, err);
2070 out: 2058 out:
2071 mutex_unlock(&EXT3_SB(inode->i_sb)->s_orphan_lock); 2059 mutex_unlock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
2072 return err; 2060 return err;
2073 2061
2074 out_brelse: 2062 out_brelse:
2075 brelse(iloc.bh); 2063 brelse(iloc.bh);
2076 goto out_err; 2064 goto out_err;
2077 } 2065 }
2078 2066
2079 static int ext3_rmdir (struct inode * dir, struct dentry *dentry) 2067 static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
2080 { 2068 {
2081 int retval; 2069 int retval;
2082 struct inode * inode; 2070 struct inode * inode;
2083 struct buffer_head * bh; 2071 struct buffer_head * bh;
2084 struct ext3_dir_entry_2 * de; 2072 struct ext3_dir_entry_2 * de;
2085 handle_t *handle; 2073 handle_t *handle;
2086 2074
2087 /* Initialize quotas before so that eventual writes go in 2075 /* Initialize quotas before so that eventual writes go in
2088 * separate transaction */ 2076 * separate transaction */
2089 dquot_initialize(dir); 2077 dquot_initialize(dir);
2090 dquot_initialize(dentry->d_inode); 2078 dquot_initialize(dentry->d_inode);
2091 2079
2092 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); 2080 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
2093 if (IS_ERR(handle)) 2081 if (IS_ERR(handle))
2094 return PTR_ERR(handle); 2082 return PTR_ERR(handle);
2095 2083
2096 retval = -ENOENT; 2084 retval = -ENOENT;
2097 bh = ext3_find_entry(dir, &dentry->d_name, &de); 2085 bh = ext3_find_entry(dir, &dentry->d_name, &de);
2098 if (!bh) 2086 if (!bh)
2099 goto end_rmdir; 2087 goto end_rmdir;
2100 2088
2101 if (IS_DIRSYNC(dir)) 2089 if (IS_DIRSYNC(dir))
2102 handle->h_sync = 1; 2090 handle->h_sync = 1;
2103 2091
2104 inode = dentry->d_inode; 2092 inode = dentry->d_inode;
2105 2093
2106 retval = -EIO; 2094 retval = -EIO;
2107 if (le32_to_cpu(de->inode) != inode->i_ino) 2095 if (le32_to_cpu(de->inode) != inode->i_ino)
2108 goto end_rmdir; 2096 goto end_rmdir;
2109 2097
2110 retval = -ENOTEMPTY; 2098 retval = -ENOTEMPTY;
2111 if (!empty_dir (inode)) 2099 if (!empty_dir (inode))
2112 goto end_rmdir; 2100 goto end_rmdir;
2113 2101
2114 retval = ext3_delete_entry(handle, dir, de, bh); 2102 retval = ext3_delete_entry(handle, dir, de, bh);
2115 if (retval) 2103 if (retval)
2116 goto end_rmdir; 2104 goto end_rmdir;
2117 if (inode->i_nlink != 2) 2105 if (inode->i_nlink != 2)
2118 ext3_warning (inode->i_sb, "ext3_rmdir", 2106 ext3_warning (inode->i_sb, "ext3_rmdir",
2119 "empty directory has nlink!=2 (%d)", 2107 "empty directory has nlink!=2 (%d)",
2120 inode->i_nlink); 2108 inode->i_nlink);
2121 inode->i_version++; 2109 inode->i_version++;
2122 clear_nlink(inode); 2110 clear_nlink(inode);
2123 /* There's no need to set i_disksize: the fact that i_nlink is 2111 /* There's no need to set i_disksize: the fact that i_nlink is
2124 * zero will ensure that the right thing happens during any 2112 * zero will ensure that the right thing happens during any
2125 * recovery. */ 2113 * recovery. */
2126 inode->i_size = 0; 2114 inode->i_size = 0;
2127 ext3_orphan_add(handle, inode); 2115 ext3_orphan_add(handle, inode);
2128 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; 2116 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
2129 ext3_mark_inode_dirty(handle, inode); 2117 ext3_mark_inode_dirty(handle, inode);
2130 drop_nlink(dir); 2118 drop_nlink(dir);
2131 ext3_update_dx_flag(dir); 2119 ext3_update_dx_flag(dir);
2132 ext3_mark_inode_dirty(handle, dir); 2120 ext3_mark_inode_dirty(handle, dir);
2133 2121
2134 end_rmdir: 2122 end_rmdir:
2135 ext3_journal_stop(handle); 2123 ext3_journal_stop(handle);
2136 brelse (bh); 2124 brelse (bh);
2137 return retval; 2125 return retval;
2138 } 2126 }
2139 2127
2140 static int ext3_unlink(struct inode * dir, struct dentry *dentry) 2128 static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2141 { 2129 {
2142 int retval; 2130 int retval;
2143 struct inode * inode; 2131 struct inode * inode;
2144 struct buffer_head * bh; 2132 struct buffer_head * bh;
2145 struct ext3_dir_entry_2 * de; 2133 struct ext3_dir_entry_2 * de;
2146 handle_t *handle; 2134 handle_t *handle;
2147 2135
2148 trace_ext3_unlink_enter(dir, dentry); 2136 trace_ext3_unlink_enter(dir, dentry);
2149 /* Initialize quotas before so that eventual writes go 2137 /* Initialize quotas before so that eventual writes go
2150 * in separate transaction */ 2138 * in separate transaction */
2151 dquot_initialize(dir); 2139 dquot_initialize(dir);
2152 dquot_initialize(dentry->d_inode); 2140 dquot_initialize(dentry->d_inode);
2153 2141
2154 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); 2142 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
2155 if (IS_ERR(handle)) 2143 if (IS_ERR(handle))
2156 return PTR_ERR(handle); 2144 return PTR_ERR(handle);
2157 2145
2158 if (IS_DIRSYNC(dir)) 2146 if (IS_DIRSYNC(dir))
2159 handle->h_sync = 1; 2147 handle->h_sync = 1;
2160 2148
2161 retval = -ENOENT; 2149 retval = -ENOENT;
2162 bh = ext3_find_entry(dir, &dentry->d_name, &de); 2150 bh = ext3_find_entry(dir, &dentry->d_name, &de);
2163 if (!bh) 2151 if (!bh)
2164 goto end_unlink; 2152 goto end_unlink;
2165 2153
2166 inode = dentry->d_inode; 2154 inode = dentry->d_inode;
2167 2155
2168 retval = -EIO; 2156 retval = -EIO;
2169 if (le32_to_cpu(de->inode) != inode->i_ino) 2157 if (le32_to_cpu(de->inode) != inode->i_ino)
2170 goto end_unlink; 2158 goto end_unlink;
2171 2159
2172 if (!inode->i_nlink) { 2160 if (!inode->i_nlink) {
2173 ext3_warning (inode->i_sb, "ext3_unlink", 2161 ext3_warning (inode->i_sb, "ext3_unlink",
2174 "Deleting nonexistent file (%lu), %d", 2162 "Deleting nonexistent file (%lu), %d",
2175 inode->i_ino, inode->i_nlink); 2163 inode->i_ino, inode->i_nlink);
2176 set_nlink(inode, 1); 2164 set_nlink(inode, 1);
2177 } 2165 }
2178 retval = ext3_delete_entry(handle, dir, de, bh); 2166 retval = ext3_delete_entry(handle, dir, de, bh);
2179 if (retval) 2167 if (retval)
2180 goto end_unlink; 2168 goto end_unlink;
2181 dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; 2169 dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
2182 ext3_update_dx_flag(dir); 2170 ext3_update_dx_flag(dir);
2183 ext3_mark_inode_dirty(handle, dir); 2171 ext3_mark_inode_dirty(handle, dir);
2184 drop_nlink(inode); 2172 drop_nlink(inode);
2185 if (!inode->i_nlink) 2173 if (!inode->i_nlink)
2186 ext3_orphan_add(handle, inode); 2174 ext3_orphan_add(handle, inode);
2187 inode->i_ctime = dir->i_ctime; 2175 inode->i_ctime = dir->i_ctime;
2188 ext3_mark_inode_dirty(handle, inode); 2176 ext3_mark_inode_dirty(handle, inode);
2189 retval = 0; 2177 retval = 0;
2190 2178
2191 end_unlink: 2179 end_unlink:
2192 ext3_journal_stop(handle); 2180 ext3_journal_stop(handle);
2193 brelse (bh); 2181 brelse (bh);
2194 trace_ext3_unlink_exit(dentry, retval); 2182 trace_ext3_unlink_exit(dentry, retval);
2195 return retval; 2183 return retval;
2196 } 2184 }
2197 2185
2198 static int ext3_symlink (struct inode * dir, 2186 static int ext3_symlink (struct inode * dir,
2199 struct dentry *dentry, const char * symname) 2187 struct dentry *dentry, const char * symname)
2200 { 2188 {
2201 handle_t *handle; 2189 handle_t *handle;
2202 struct inode * inode; 2190 struct inode * inode;
2203 int l, err, retries = 0; 2191 int l, err, retries = 0;
2204 int credits; 2192 int credits;
2205 2193
2206 l = strlen(symname)+1; 2194 l = strlen(symname)+1;
2207 if (l > dir->i_sb->s_blocksize) 2195 if (l > dir->i_sb->s_blocksize)
2208 return -ENAMETOOLONG; 2196 return -ENAMETOOLONG;
2209 2197
2210 dquot_initialize(dir); 2198 dquot_initialize(dir);
2211 2199
2212 if (l > EXT3_N_BLOCKS * 4) { 2200 if (l > EXT3_N_BLOCKS * 4) {
2213 /* 2201 /*
2214 * For non-fast symlinks, we just allocate inode and put it on 2202 * For non-fast symlinks, we just allocate inode and put it on
2215 * orphan list in the first transaction => we need bitmap, 2203 * orphan list in the first transaction => we need bitmap,
2216 * group descriptor, sb, inode block, quota blocks, and 2204 * group descriptor, sb, inode block, quota blocks, and
2217 * possibly selinux xattr blocks. 2205 * possibly selinux xattr blocks.
2218 */ 2206 */
2219 credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + 2207 credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2220 EXT3_XATTR_TRANS_BLOCKS; 2208 EXT3_XATTR_TRANS_BLOCKS;
2221 } else { 2209 } else {
2222 /* 2210 /*
2223 * Fast symlink. We have to add entry to directory 2211 * Fast symlink. We have to add entry to directory
2224 * (EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS), 2212 * (EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS),
2225 * allocate new inode (bitmap, group descriptor, inode block, 2213 * allocate new inode (bitmap, group descriptor, inode block,
2226 * quota blocks, sb is already counted in previous macros). 2214 * quota blocks, sb is already counted in previous macros).
2227 */ 2215 */
2228 credits = EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2216 credits = EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2229 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 2217 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2230 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); 2218 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
2231 } 2219 }
2232 retry: 2220 retry:
2233 handle = ext3_journal_start(dir, credits); 2221 handle = ext3_journal_start(dir, credits);
2234 if (IS_ERR(handle)) 2222 if (IS_ERR(handle))
2235 return PTR_ERR(handle); 2223 return PTR_ERR(handle);
2236 2224
2237 if (IS_DIRSYNC(dir)) 2225 if (IS_DIRSYNC(dir))
2238 handle->h_sync = 1; 2226 handle->h_sync = 1;
2239 2227
2240 inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFLNK|S_IRWXUGO); 2228 inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFLNK|S_IRWXUGO);
2241 err = PTR_ERR(inode); 2229 err = PTR_ERR(inode);
2242 if (IS_ERR(inode)) 2230 if (IS_ERR(inode))
2243 goto out_stop; 2231 goto out_stop;
2244 2232
2245 if (l > EXT3_N_BLOCKS * 4) { 2233 if (l > EXT3_N_BLOCKS * 4) {
2246 inode->i_op = &ext3_symlink_inode_operations; 2234 inode->i_op = &ext3_symlink_inode_operations;
2247 ext3_set_aops(inode); 2235 ext3_set_aops(inode);
2248 /* 2236 /*
2249 * We cannot call page_symlink() with transaction started 2237 * We cannot call page_symlink() with transaction started
2250 * because it calls into ext3_write_begin() which acquires page 2238 * because it calls into ext3_write_begin() which acquires page
2251 * lock which ranks below transaction start (and it can also 2239 * lock which ranks below transaction start (and it can also
2252 * wait for journal commit if we are running out of space). So 2240 * wait for journal commit if we are running out of space). So
2253 * we have to stop transaction now and restart it when symlink 2241 * we have to stop transaction now and restart it when symlink
2254 * contents is written. 2242 * contents is written.
2255 * 2243 *
2256 * To keep fs consistent in case of crash, we have to put inode 2244 * To keep fs consistent in case of crash, we have to put inode
2257 * to orphan list in the mean time. 2245 * to orphan list in the mean time.
2258 */ 2246 */
2259 drop_nlink(inode); 2247 drop_nlink(inode);
2260 err = ext3_orphan_add(handle, inode); 2248 err = ext3_orphan_add(handle, inode);
2261 ext3_journal_stop(handle); 2249 ext3_journal_stop(handle);
2262 if (err) 2250 if (err)
2263 goto err_drop_inode; 2251 goto err_drop_inode;
2264 err = __page_symlink(inode, symname, l, 1); 2252 err = __page_symlink(inode, symname, l, 1);
2265 if (err) 2253 if (err)
2266 goto err_drop_inode; 2254 goto err_drop_inode;
2267 /* 2255 /*
2268 * Now inode is being linked into dir (EXT3_DATA_TRANS_BLOCKS 2256 * Now inode is being linked into dir (EXT3_DATA_TRANS_BLOCKS
2269 * + EXT3_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified 2257 * + EXT3_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
2270 */ 2258 */
2271 handle = ext3_journal_start(dir, 2259 handle = ext3_journal_start(dir,
2272 EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2260 EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2273 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1); 2261 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1);
2274 if (IS_ERR(handle)) { 2262 if (IS_ERR(handle)) {
2275 err = PTR_ERR(handle); 2263 err = PTR_ERR(handle);
2276 goto err_drop_inode; 2264 goto err_drop_inode;
2277 } 2265 }
2278 set_nlink(inode, 1); 2266 set_nlink(inode, 1);
2279 err = ext3_orphan_del(handle, inode); 2267 err = ext3_orphan_del(handle, inode);
2280 if (err) { 2268 if (err) {
2281 ext3_journal_stop(handle); 2269 ext3_journal_stop(handle);
2282 drop_nlink(inode); 2270 drop_nlink(inode);
2283 goto err_drop_inode; 2271 goto err_drop_inode;
2284 } 2272 }
2285 } else { 2273 } else {
2286 inode->i_op = &ext3_fast_symlink_inode_operations; 2274 inode->i_op = &ext3_fast_symlink_inode_operations;
2287 memcpy((char*)&EXT3_I(inode)->i_data,symname,l); 2275 memcpy((char*)&EXT3_I(inode)->i_data,symname,l);
2288 inode->i_size = l-1; 2276 inode->i_size = l-1;
2289 } 2277 }
2290 EXT3_I(inode)->i_disksize = inode->i_size; 2278 EXT3_I(inode)->i_disksize = inode->i_size;
2291 err = ext3_add_nondir(handle, dentry, inode); 2279 err = ext3_add_nondir(handle, dentry, inode);
2292 out_stop: 2280 out_stop:
2293 ext3_journal_stop(handle); 2281 ext3_journal_stop(handle);
2294 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 2282 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
2295 goto retry; 2283 goto retry;
2296 return err; 2284 return err;
2297 err_drop_inode: 2285 err_drop_inode:
2298 unlock_new_inode(inode); 2286 unlock_new_inode(inode);
2299 iput(inode); 2287 iput(inode);
2300 return err; 2288 return err;
2301 } 2289 }
2302 2290
2303 static int ext3_link (struct dentry * old_dentry, 2291 static int ext3_link (struct dentry * old_dentry,
2304 struct inode * dir, struct dentry *dentry) 2292 struct inode * dir, struct dentry *dentry)
2305 { 2293 {
2306 handle_t *handle; 2294 handle_t *handle;
2307 struct inode *inode = old_dentry->d_inode; 2295 struct inode *inode = old_dentry->d_inode;
2308 int err, retries = 0; 2296 int err, retries = 0;
2309 2297
2310 if (inode->i_nlink >= EXT3_LINK_MAX) 2298 if (inode->i_nlink >= EXT3_LINK_MAX)
2311 return -EMLINK; 2299 return -EMLINK;
2312 2300
2313 dquot_initialize(dir); 2301 dquot_initialize(dir);
2314 2302
2315 retry: 2303 retry:
2316 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2304 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2317 EXT3_INDEX_EXTRA_TRANS_BLOCKS); 2305 EXT3_INDEX_EXTRA_TRANS_BLOCKS);
2318 if (IS_ERR(handle)) 2306 if (IS_ERR(handle))
2319 return PTR_ERR(handle); 2307 return PTR_ERR(handle);
2320 2308
2321 if (IS_DIRSYNC(dir)) 2309 if (IS_DIRSYNC(dir))
2322 handle->h_sync = 1; 2310 handle->h_sync = 1;
2323 2311
2324 inode->i_ctime = CURRENT_TIME_SEC; 2312 inode->i_ctime = CURRENT_TIME_SEC;
2325 inc_nlink(inode); 2313 inc_nlink(inode);
2326 ihold(inode); 2314 ihold(inode);
2327 2315
2328 err = ext3_add_entry(handle, dentry, inode); 2316 err = ext3_add_entry(handle, dentry, inode);
2329 if (!err) { 2317 if (!err) {
2330 ext3_mark_inode_dirty(handle, inode); 2318 ext3_mark_inode_dirty(handle, inode);
2331 d_instantiate(dentry, inode); 2319 d_instantiate(dentry, inode);
2332 } else { 2320 } else {
2333 drop_nlink(inode); 2321 drop_nlink(inode);
2334 iput(inode); 2322 iput(inode);
2335 } 2323 }
2336 ext3_journal_stop(handle); 2324 ext3_journal_stop(handle);
2337 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 2325 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
2338 goto retry; 2326 goto retry;
2339 return err; 2327 return err;
2340 } 2328 }
2341 2329
2342 #define PARENT_INO(buffer) \ 2330 #define PARENT_INO(buffer) \
2343 (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode) 2331 (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
2344 2332
2345 /* 2333 /*
2346 * Anybody can rename anything with this: the permission checks are left to the 2334 * Anybody can rename anything with this: the permission checks are left to the
2347 * higher-level routines. 2335 * higher-level routines.
2348 */ 2336 */
2349 static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, 2337 static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2350 struct inode * new_dir,struct dentry *new_dentry) 2338 struct inode * new_dir,struct dentry *new_dentry)
2351 { 2339 {
2352 handle_t *handle; 2340 handle_t *handle;
2353 struct inode * old_inode, * new_inode; 2341 struct inode * old_inode, * new_inode;
2354 struct buffer_head * old_bh, * new_bh, * dir_bh; 2342 struct buffer_head * old_bh, * new_bh, * dir_bh;
2355 struct ext3_dir_entry_2 * old_de, * new_de; 2343 struct ext3_dir_entry_2 * old_de, * new_de;
2356 int retval, flush_file = 0; 2344 int retval, flush_file = 0;
2357 2345
2358 dquot_initialize(old_dir); 2346 dquot_initialize(old_dir);
2359 dquot_initialize(new_dir); 2347 dquot_initialize(new_dir);
2360 2348
2361 old_bh = new_bh = dir_bh = NULL; 2349 old_bh = new_bh = dir_bh = NULL;
2362 2350
2363 /* Initialize quotas before so that eventual writes go 2351 /* Initialize quotas before so that eventual writes go
2364 * in separate transaction */ 2352 * in separate transaction */
2365 if (new_dentry->d_inode) 2353 if (new_dentry->d_inode)
2366 dquot_initialize(new_dentry->d_inode); 2354 dquot_initialize(new_dentry->d_inode);
2367 handle = ext3_journal_start(old_dir, 2 * 2355 handle = ext3_journal_start(old_dir, 2 *
2368 EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) + 2356 EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
2369 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); 2357 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
2370 if (IS_ERR(handle)) 2358 if (IS_ERR(handle))
2371 return PTR_ERR(handle); 2359 return PTR_ERR(handle);
2372 2360
2373 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) 2361 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
2374 handle->h_sync = 1; 2362 handle->h_sync = 1;
2375 2363
2376 old_bh = ext3_find_entry(old_dir, &old_dentry->d_name, &old_de); 2364 old_bh = ext3_find_entry(old_dir, &old_dentry->d_name, &old_de);
2377 /* 2365 /*
2378 * Check for inode number is _not_ due to possible IO errors. 2366 * Check for inode number is _not_ due to possible IO errors.
2379 * We might rmdir the source, keep it as pwd of some process 2367 * We might rmdir the source, keep it as pwd of some process
2380 * and merrily kill the link to whatever was created under the 2368 * and merrily kill the link to whatever was created under the
2381 * same name. Goodbye sticky bit ;-< 2369 * same name. Goodbye sticky bit ;-<
2382 */ 2370 */
2383 old_inode = old_dentry->d_inode; 2371 old_inode = old_dentry->d_inode;
2384 retval = -ENOENT; 2372 retval = -ENOENT;
2385 if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino) 2373 if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
2386 goto end_rename; 2374 goto end_rename;
2387 2375
2388 new_inode = new_dentry->d_inode; 2376 new_inode = new_dentry->d_inode;
2389 new_bh = ext3_find_entry(new_dir, &new_dentry->d_name, &new_de); 2377 new_bh = ext3_find_entry(new_dir, &new_dentry->d_name, &new_de);
2390 if (new_bh) { 2378 if (new_bh) {
2391 if (!new_inode) { 2379 if (!new_inode) {
2392 brelse (new_bh); 2380 brelse (new_bh);
2393 new_bh = NULL; 2381 new_bh = NULL;
2394 } 2382 }
2395 } 2383 }
2396 if (S_ISDIR(old_inode->i_mode)) { 2384 if (S_ISDIR(old_inode->i_mode)) {
2397 if (new_inode) { 2385 if (new_inode) {
2398 retval = -ENOTEMPTY; 2386 retval = -ENOTEMPTY;
2399 if (!empty_dir (new_inode)) 2387 if (!empty_dir (new_inode))
2400 goto end_rename; 2388 goto end_rename;
2401 } 2389 }
2402 retval = -EIO; 2390 retval = -EIO;
2403 dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval); 2391 dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval);
2404 if (!dir_bh) 2392 if (!dir_bh)
2405 goto end_rename; 2393 goto end_rename;
2406 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) 2394 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
2407 goto end_rename; 2395 goto end_rename;
2408 retval = -EMLINK; 2396 retval = -EMLINK;
2409 if (!new_inode && new_dir!=old_dir && 2397 if (!new_inode && new_dir!=old_dir &&
2410 new_dir->i_nlink >= EXT3_LINK_MAX) 2398 new_dir->i_nlink >= EXT3_LINK_MAX)
2411 goto end_rename; 2399 goto end_rename;
2412 } 2400 }
2413 if (!new_bh) { 2401 if (!new_bh) {
2414 retval = ext3_add_entry (handle, new_dentry, old_inode); 2402 retval = ext3_add_entry (handle, new_dentry, old_inode);
2415 if (retval) 2403 if (retval)
2416 goto end_rename; 2404 goto end_rename;
2417 } else { 2405 } else {
2418 BUFFER_TRACE(new_bh, "get write access"); 2406 BUFFER_TRACE(new_bh, "get write access");
2419 retval = ext3_journal_get_write_access(handle, new_bh); 2407 retval = ext3_journal_get_write_access(handle, new_bh);
2420 if (retval) 2408 if (retval)
2421 goto journal_error; 2409 goto journal_error;
2422 new_de->inode = cpu_to_le32(old_inode->i_ino); 2410 new_de->inode = cpu_to_le32(old_inode->i_ino);
2423 if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb, 2411 if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
2424 EXT3_FEATURE_INCOMPAT_FILETYPE)) 2412 EXT3_FEATURE_INCOMPAT_FILETYPE))
2425 new_de->file_type = old_de->file_type; 2413 new_de->file_type = old_de->file_type;
2426 new_dir->i_version++; 2414 new_dir->i_version++;
2427 new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME_SEC; 2415 new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME_SEC;
2428 ext3_mark_inode_dirty(handle, new_dir); 2416 ext3_mark_inode_dirty(handle, new_dir);
2429 BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata"); 2417 BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
2430 retval = ext3_journal_dirty_metadata(handle, new_bh); 2418 retval = ext3_journal_dirty_metadata(handle, new_bh);
2431 if (retval) 2419 if (retval)
2432 goto journal_error; 2420 goto journal_error;
2433 brelse(new_bh); 2421 brelse(new_bh);
2434 new_bh = NULL; 2422 new_bh = NULL;
2435 } 2423 }
2436 2424
2437 /* 2425 /*
2438 * Like most other Unix systems, set the ctime for inodes on a 2426 * Like most other Unix systems, set the ctime for inodes on a
2439 * rename. 2427 * rename.
2440 */ 2428 */
2441 old_inode->i_ctime = CURRENT_TIME_SEC; 2429 old_inode->i_ctime = CURRENT_TIME_SEC;
2442 ext3_mark_inode_dirty(handle, old_inode); 2430 ext3_mark_inode_dirty(handle, old_inode);
2443 2431
2444 /* 2432 /*
2445 * ok, that's it 2433 * ok, that's it
2446 */ 2434 */
2447 if (le32_to_cpu(old_de->inode) != old_inode->i_ino || 2435 if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
2448 old_de->name_len != old_dentry->d_name.len || 2436 old_de->name_len != old_dentry->d_name.len ||
2449 strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || 2437 strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
2450 (retval = ext3_delete_entry(handle, old_dir, 2438 (retval = ext3_delete_entry(handle, old_dir,
2451 old_de, old_bh)) == -ENOENT) { 2439 old_de, old_bh)) == -ENOENT) {
2452 /* old_de could have moved from under us during htree split, so 2440 /* old_de could have moved from under us during htree split, so
2453 * make sure that we are deleting the right entry. We might 2441 * make sure that we are deleting the right entry. We might
2454 * also be pointing to a stale entry in the unused part of 2442 * also be pointing to a stale entry in the unused part of
2455 * old_bh so just checking inum and the name isn't enough. */ 2443 * old_bh so just checking inum and the name isn't enough. */
2456 struct buffer_head *old_bh2; 2444 struct buffer_head *old_bh2;
2457 struct ext3_dir_entry_2 *old_de2; 2445 struct ext3_dir_entry_2 *old_de2;
2458 2446
2459 old_bh2 = ext3_find_entry(old_dir, &old_dentry->d_name, 2447 old_bh2 = ext3_find_entry(old_dir, &old_dentry->d_name,
2460 &old_de2); 2448 &old_de2);
2461 if (old_bh2) { 2449 if (old_bh2) {
2462 retval = ext3_delete_entry(handle, old_dir, 2450 retval = ext3_delete_entry(handle, old_dir,
2463 old_de2, old_bh2); 2451 old_de2, old_bh2);
2464 brelse(old_bh2); 2452 brelse(old_bh2);
2465 } 2453 }
2466 } 2454 }
2467 if (retval) { 2455 if (retval) {
2468 ext3_warning(old_dir->i_sb, "ext3_rename", 2456 ext3_warning(old_dir->i_sb, "ext3_rename",
2469 "Deleting old file (%lu), %d, error=%d", 2457 "Deleting old file (%lu), %d, error=%d",
2470 old_dir->i_ino, old_dir->i_nlink, retval); 2458 old_dir->i_ino, old_dir->i_nlink, retval);
2471 } 2459 }
2472 2460
2473 if (new_inode) { 2461 if (new_inode) {
2474 drop_nlink(new_inode); 2462 drop_nlink(new_inode);
2475 new_inode->i_ctime = CURRENT_TIME_SEC; 2463 new_inode->i_ctime = CURRENT_TIME_SEC;
2476 } 2464 }
2477 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; 2465 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
2478 ext3_update_dx_flag(old_dir); 2466 ext3_update_dx_flag(old_dir);
2479 if (dir_bh) { 2467 if (dir_bh) {
2480 BUFFER_TRACE(dir_bh, "get_write_access"); 2468 BUFFER_TRACE(dir_bh, "get_write_access");
2481 retval = ext3_journal_get_write_access(handle, dir_bh); 2469 retval = ext3_journal_get_write_access(handle, dir_bh);
2482 if (retval) 2470 if (retval)
2483 goto journal_error; 2471 goto journal_error;
2484 PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); 2472 PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
2485 BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); 2473 BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
2486 retval = ext3_journal_dirty_metadata(handle, dir_bh); 2474 retval = ext3_journal_dirty_metadata(handle, dir_bh);
2487 if (retval) { 2475 if (retval) {
2488 journal_error: 2476 journal_error:
2489 ext3_std_error(new_dir->i_sb, retval); 2477 ext3_std_error(new_dir->i_sb, retval);
2490 goto end_rename; 2478 goto end_rename;
2491 } 2479 }
2492 drop_nlink(old_dir); 2480 drop_nlink(old_dir);
2493 if (new_inode) { 2481 if (new_inode) {
2494 drop_nlink(new_inode); 2482 drop_nlink(new_inode);
2495 } else { 2483 } else {
2496 inc_nlink(new_dir); 2484 inc_nlink(new_dir);
2497 ext3_update_dx_flag(new_dir); 2485 ext3_update_dx_flag(new_dir);
2498 ext3_mark_inode_dirty(handle, new_dir); 2486 ext3_mark_inode_dirty(handle, new_dir);
2499 } 2487 }
2500 } 2488 }
2501 ext3_mark_inode_dirty(handle, old_dir); 2489 ext3_mark_inode_dirty(handle, old_dir);
2502 if (new_inode) { 2490 if (new_inode) {
2503 ext3_mark_inode_dirty(handle, new_inode); 2491 ext3_mark_inode_dirty(handle, new_inode);
2504 if (!new_inode->i_nlink) 2492 if (!new_inode->i_nlink)
2505 ext3_orphan_add(handle, new_inode); 2493 ext3_orphan_add(handle, new_inode);
2506 if (ext3_should_writeback_data(new_inode)) 2494 if (ext3_should_writeback_data(new_inode))
2507 flush_file = 1; 2495 flush_file = 1;
2508 } 2496 }
2509 retval = 0; 2497 retval = 0;
2510 2498
2511 end_rename: 2499 end_rename:
2512 brelse (dir_bh); 2500 brelse (dir_bh);
2513 brelse (old_bh); 2501 brelse (old_bh);
2514 brelse (new_bh); 2502 brelse (new_bh);
2515 ext3_journal_stop(handle); 2503 ext3_journal_stop(handle);
2516 if (retval == 0 && flush_file) 2504 if (retval == 0 && flush_file)
2517 filemap_flush(old_inode->i_mapping); 2505 filemap_flush(old_inode->i_mapping);
2518 return retval; 2506 return retval;
2519 } 2507 }
2520 2508
2521 /* 2509 /*
2522 * directories can handle most operations... 2510 * directories can handle most operations...
2523 */ 2511 */
2524 const struct inode_operations ext3_dir_inode_operations = { 2512 const struct inode_operations ext3_dir_inode_operations = {
2525 .create = ext3_create, 2513 .create = ext3_create,
2526 .lookup = ext3_lookup, 2514 .lookup = ext3_lookup,
2527 .link = ext3_link, 2515 .link = ext3_link,
2528 .unlink = ext3_unlink, 2516 .unlink = ext3_unlink,
2529 .symlink = ext3_symlink, 2517 .symlink = ext3_symlink,
2530 .mkdir = ext3_mkdir, 2518 .mkdir = ext3_mkdir,
2531 .rmdir = ext3_rmdir, 2519 .rmdir = ext3_rmdir,
2532 .mknod = ext3_mknod, 2520 .mknod = ext3_mknod,
2533 .rename = ext3_rename, 2521 .rename = ext3_rename,
2534 .setattr = ext3_setattr, 2522 .setattr = ext3_setattr,
2535 #ifdef CONFIG_EXT3_FS_XATTR 2523 #ifdef CONFIG_EXT3_FS_XATTR
2536 .setxattr = generic_setxattr, 2524 .setxattr = generic_setxattr,
2537 .getxattr = generic_getxattr, 2525 .getxattr = generic_getxattr,
2538 .listxattr = ext3_listxattr, 2526 .listxattr = ext3_listxattr,
2539 .removexattr = generic_removexattr, 2527 .removexattr = generic_removexattr,
2540 #endif 2528 #endif
2541 .get_acl = ext3_get_acl, 2529 .get_acl = ext3_get_acl,
2542 }; 2530 };
2543 2531
2544 const struct inode_operations ext3_special_inode_operations = { 2532 const struct inode_operations ext3_special_inode_operations = {
2545 .setattr = ext3_setattr, 2533 .setattr = ext3_setattr,
2546 #ifdef CONFIG_EXT3_FS_XATTR 2534 #ifdef CONFIG_EXT3_FS_XATTR
2547 .setxattr = generic_setxattr, 2535 .setxattr = generic_setxattr,
2548 .getxattr = generic_getxattr, 2536 .getxattr = generic_getxattr,
2549 .listxattr = ext3_listxattr, 2537 .listxattr = ext3_listxattr,
2550 .removexattr = generic_removexattr, 2538 .removexattr = generic_removexattr,
2551 #endif 2539 #endif
2552 .get_acl = ext3_get_acl, 2540 .get_acl = ext3_get_acl,
2553 }; 2541 };
2554 2542
1 /* 1 /*
2 * linux/fs/ext3/resize.c 2 * linux/fs/ext3/resize.c
3 * 3 *
4 * Support for resizing an ext3 filesystem while it is mounted. 4 * Support for resizing an ext3 filesystem while it is mounted.
5 * 5 *
6 * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com> 6 * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
7 * 7 *
8 * This could probably be made into a module, because it is not often in use. 8 * This could probably be made into a module, because it is not often in use.
9 */ 9 */
10 10
11 11
12 #define EXT3FS_DEBUG 12 #define EXT3FS_DEBUG
13 13
14 #include <linux/ext3_jbd.h> 14 #include "ext3.h"
15
16 #include <linux/errno.h>
17 #include <linux/slab.h>
18 15
19 16
20 #define outside(b, first, last) ((b) < (first) || (b) >= (last)) 17 #define outside(b, first, last) ((b) < (first) || (b) >= (last))
21 #define inside(b, first, last) ((b) >= (first) && (b) < (last)) 18 #define inside(b, first, last) ((b) >= (first) && (b) < (last))
22 19
23 static int verify_group_input(struct super_block *sb, 20 static int verify_group_input(struct super_block *sb,
24 struct ext3_new_group_data *input) 21 struct ext3_new_group_data *input)
25 { 22 {
26 struct ext3_sb_info *sbi = EXT3_SB(sb); 23 struct ext3_sb_info *sbi = EXT3_SB(sb);
27 struct ext3_super_block *es = sbi->s_es; 24 struct ext3_super_block *es = sbi->s_es;
28 ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count); 25 ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count);
29 ext3_fsblk_t end = start + input->blocks_count; 26 ext3_fsblk_t end = start + input->blocks_count;
30 unsigned group = input->group; 27 unsigned group = input->group;
31 ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; 28 ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
32 unsigned overhead = ext3_bg_has_super(sb, group) ? 29 unsigned overhead = ext3_bg_has_super(sb, group) ?
33 (1 + ext3_bg_num_gdb(sb, group) + 30 (1 + ext3_bg_num_gdb(sb, group) +
34 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; 31 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
35 ext3_fsblk_t metaend = start + overhead; 32 ext3_fsblk_t metaend = start + overhead;
36 struct buffer_head *bh = NULL; 33 struct buffer_head *bh = NULL;
37 ext3_grpblk_t free_blocks_count; 34 ext3_grpblk_t free_blocks_count;
38 int err = -EINVAL; 35 int err = -EINVAL;
39 36
40 input->free_blocks_count = free_blocks_count = 37 input->free_blocks_count = free_blocks_count =
41 input->blocks_count - 2 - overhead - sbi->s_itb_per_group; 38 input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
42 39
43 if (test_opt(sb, DEBUG)) 40 if (test_opt(sb, DEBUG))
44 printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks " 41 printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks "
45 "(%d free, %u reserved)\n", 42 "(%d free, %u reserved)\n",
46 ext3_bg_has_super(sb, input->group) ? "normal" : 43 ext3_bg_has_super(sb, input->group) ? "normal" :
47 "no-super", input->group, input->blocks_count, 44 "no-super", input->group, input->blocks_count,
48 free_blocks_count, input->reserved_blocks); 45 free_blocks_count, input->reserved_blocks);
49 46
50 if (group != sbi->s_groups_count) 47 if (group != sbi->s_groups_count)
51 ext3_warning(sb, __func__, 48 ext3_warning(sb, __func__,
52 "Cannot add at group %u (only %lu groups)", 49 "Cannot add at group %u (only %lu groups)",
53 input->group, sbi->s_groups_count); 50 input->group, sbi->s_groups_count);
54 else if ((start - le32_to_cpu(es->s_first_data_block)) % 51 else if ((start - le32_to_cpu(es->s_first_data_block)) %
55 EXT3_BLOCKS_PER_GROUP(sb)) 52 EXT3_BLOCKS_PER_GROUP(sb))
56 ext3_warning(sb, __func__, "Last group not full"); 53 ext3_warning(sb, __func__, "Last group not full");
57 else if (input->reserved_blocks > input->blocks_count / 5) 54 else if (input->reserved_blocks > input->blocks_count / 5)
58 ext3_warning(sb, __func__, "Reserved blocks too high (%u)", 55 ext3_warning(sb, __func__, "Reserved blocks too high (%u)",
59 input->reserved_blocks); 56 input->reserved_blocks);
60 else if (free_blocks_count < 0) 57 else if (free_blocks_count < 0)
61 ext3_warning(sb, __func__, "Bad blocks count %u", 58 ext3_warning(sb, __func__, "Bad blocks count %u",
62 input->blocks_count); 59 input->blocks_count);
63 else if (!(bh = sb_bread(sb, end - 1))) 60 else if (!(bh = sb_bread(sb, end - 1)))
64 ext3_warning(sb, __func__, 61 ext3_warning(sb, __func__,
65 "Cannot read last block ("E3FSBLK")", 62 "Cannot read last block ("E3FSBLK")",
66 end - 1); 63 end - 1);
67 else if (outside(input->block_bitmap, start, end)) 64 else if (outside(input->block_bitmap, start, end))
68 ext3_warning(sb, __func__, 65 ext3_warning(sb, __func__,
69 "Block bitmap not in group (block %u)", 66 "Block bitmap not in group (block %u)",
70 input->block_bitmap); 67 input->block_bitmap);
71 else if (outside(input->inode_bitmap, start, end)) 68 else if (outside(input->inode_bitmap, start, end))
72 ext3_warning(sb, __func__, 69 ext3_warning(sb, __func__,
73 "Inode bitmap not in group (block %u)", 70 "Inode bitmap not in group (block %u)",
74 input->inode_bitmap); 71 input->inode_bitmap);
75 else if (outside(input->inode_table, start, end) || 72 else if (outside(input->inode_table, start, end) ||
76 outside(itend - 1, start, end)) 73 outside(itend - 1, start, end))
77 ext3_warning(sb, __func__, 74 ext3_warning(sb, __func__,
78 "Inode table not in group (blocks %u-"E3FSBLK")", 75 "Inode table not in group (blocks %u-"E3FSBLK")",
79 input->inode_table, itend - 1); 76 input->inode_table, itend - 1);
80 else if (input->inode_bitmap == input->block_bitmap) 77 else if (input->inode_bitmap == input->block_bitmap)
81 ext3_warning(sb, __func__, 78 ext3_warning(sb, __func__,
82 "Block bitmap same as inode bitmap (%u)", 79 "Block bitmap same as inode bitmap (%u)",
83 input->block_bitmap); 80 input->block_bitmap);
84 else if (inside(input->block_bitmap, input->inode_table, itend)) 81 else if (inside(input->block_bitmap, input->inode_table, itend))
85 ext3_warning(sb, __func__, 82 ext3_warning(sb, __func__,
86 "Block bitmap (%u) in inode table (%u-"E3FSBLK")", 83 "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
87 input->block_bitmap, input->inode_table, itend-1); 84 input->block_bitmap, input->inode_table, itend-1);
88 else if (inside(input->inode_bitmap, input->inode_table, itend)) 85 else if (inside(input->inode_bitmap, input->inode_table, itend))
89 ext3_warning(sb, __func__, 86 ext3_warning(sb, __func__,
90 "Inode bitmap (%u) in inode table (%u-"E3FSBLK")", 87 "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
91 input->inode_bitmap, input->inode_table, itend-1); 88 input->inode_bitmap, input->inode_table, itend-1);
92 else if (inside(input->block_bitmap, start, metaend)) 89 else if (inside(input->block_bitmap, start, metaend))
93 ext3_warning(sb, __func__, 90 ext3_warning(sb, __func__,
94 "Block bitmap (%u) in GDT table" 91 "Block bitmap (%u) in GDT table"
95 " ("E3FSBLK"-"E3FSBLK")", 92 " ("E3FSBLK"-"E3FSBLK")",
96 input->block_bitmap, start, metaend - 1); 93 input->block_bitmap, start, metaend - 1);
97 else if (inside(input->inode_bitmap, start, metaend)) 94 else if (inside(input->inode_bitmap, start, metaend))
98 ext3_warning(sb, __func__, 95 ext3_warning(sb, __func__,
99 "Inode bitmap (%u) in GDT table" 96 "Inode bitmap (%u) in GDT table"
100 " ("E3FSBLK"-"E3FSBLK")", 97 " ("E3FSBLK"-"E3FSBLK")",
101 input->inode_bitmap, start, metaend - 1); 98 input->inode_bitmap, start, metaend - 1);
102 else if (inside(input->inode_table, start, metaend) || 99 else if (inside(input->inode_table, start, metaend) ||
103 inside(itend - 1, start, metaend)) 100 inside(itend - 1, start, metaend))
104 ext3_warning(sb, __func__, 101 ext3_warning(sb, __func__,
105 "Inode table (%u-"E3FSBLK") overlaps" 102 "Inode table (%u-"E3FSBLK") overlaps"
106 "GDT table ("E3FSBLK"-"E3FSBLK")", 103 "GDT table ("E3FSBLK"-"E3FSBLK")",
107 input->inode_table, itend - 1, start, metaend - 1); 104 input->inode_table, itend - 1, start, metaend - 1);
108 else 105 else
109 err = 0; 106 err = 0;
110 brelse(bh); 107 brelse(bh);
111 108
112 return err; 109 return err;
113 } 110 }
114 111
115 static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, 112 static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
116 ext3_fsblk_t blk) 113 ext3_fsblk_t blk)
117 { 114 {
118 struct buffer_head *bh; 115 struct buffer_head *bh;
119 int err; 116 int err;
120 117
121 bh = sb_getblk(sb, blk); 118 bh = sb_getblk(sb, blk);
122 if (!bh) 119 if (!bh)
123 return ERR_PTR(-EIO); 120 return ERR_PTR(-EIO);
124 if ((err = ext3_journal_get_write_access(handle, bh))) { 121 if ((err = ext3_journal_get_write_access(handle, bh))) {
125 brelse(bh); 122 brelse(bh);
126 bh = ERR_PTR(err); 123 bh = ERR_PTR(err);
127 } else { 124 } else {
128 lock_buffer(bh); 125 lock_buffer(bh);
129 memset(bh->b_data, 0, sb->s_blocksize); 126 memset(bh->b_data, 0, sb->s_blocksize);
130 set_buffer_uptodate(bh); 127 set_buffer_uptodate(bh);
131 unlock_buffer(bh); 128 unlock_buffer(bh);
132 } 129 }
133 130
134 return bh; 131 return bh;
135 } 132 }
136 133
137 /* 134 /*
138 * To avoid calling the atomic setbit hundreds or thousands of times, we only 135 * To avoid calling the atomic setbit hundreds or thousands of times, we only
139 * need to use it within a single byte (to ensure we get endianness right). 136 * need to use it within a single byte (to ensure we get endianness right).
140 * We can use memset for the rest of the bitmap as there are no other users. 137 * We can use memset for the rest of the bitmap as there are no other users.
141 */ 138 */
142 static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 139 static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
143 { 140 {
144 int i; 141 int i;
145 142
146 if (start_bit >= end_bit) 143 if (start_bit >= end_bit)
147 return; 144 return;
148 145
149 ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); 146 ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
150 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) 147 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
151 ext3_set_bit(i, bitmap); 148 ext3_set_bit(i, bitmap);
152 if (i < end_bit) 149 if (i < end_bit)
153 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); 150 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
154 } 151 }
155 152
156 /* 153 /*
157 * If we have fewer than thresh credits, extend by EXT3_MAX_TRANS_DATA. 154 * If we have fewer than thresh credits, extend by EXT3_MAX_TRANS_DATA.
158 * If that fails, restart the transaction & regain write access for the 155 * If that fails, restart the transaction & regain write access for the
159 * buffer head which is used for block_bitmap modifications. 156 * buffer head which is used for block_bitmap modifications.
160 */ 157 */
161 static int extend_or_restart_transaction(handle_t *handle, int thresh, 158 static int extend_or_restart_transaction(handle_t *handle, int thresh,
162 struct buffer_head *bh) 159 struct buffer_head *bh)
163 { 160 {
164 int err; 161 int err;
165 162
166 if (handle->h_buffer_credits >= thresh) 163 if (handle->h_buffer_credits >= thresh)
167 return 0; 164 return 0;
168 165
169 err = ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA); 166 err = ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA);
170 if (err < 0) 167 if (err < 0)
171 return err; 168 return err;
172 if (err) { 169 if (err) {
173 err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA); 170 err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA);
174 if (err) 171 if (err)
175 return err; 172 return err;
176 err = ext3_journal_get_write_access(handle, bh); 173 err = ext3_journal_get_write_access(handle, bh);
177 if (err) 174 if (err)
178 return err; 175 return err;
179 } 176 }
180 177
181 return 0; 178 return 0;
182 } 179 }
183 180
184 /* 181 /*
185 * Set up the block and inode bitmaps, and the inode table for the new group. 182 * Set up the block and inode bitmaps, and the inode table for the new group.
186 * This doesn't need to be part of the main transaction, since we are only 183 * This doesn't need to be part of the main transaction, since we are only
187 * changing blocks outside the actual filesystem. We still do journaling to 184 * changing blocks outside the actual filesystem. We still do journaling to
188 * ensure the recovery is correct in case of a failure just after resize. 185 * ensure the recovery is correct in case of a failure just after resize.
189 * If any part of this fails, we simply abort the resize. 186 * If any part of this fails, we simply abort the resize.
190 */ 187 */
191 static int setup_new_group_blocks(struct super_block *sb, 188 static int setup_new_group_blocks(struct super_block *sb,
192 struct ext3_new_group_data *input) 189 struct ext3_new_group_data *input)
193 { 190 {
194 struct ext3_sb_info *sbi = EXT3_SB(sb); 191 struct ext3_sb_info *sbi = EXT3_SB(sb);
195 ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group); 192 ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group);
196 int reserved_gdb = ext3_bg_has_super(sb, input->group) ? 193 int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
197 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; 194 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
198 unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group); 195 unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
199 struct buffer_head *bh; 196 struct buffer_head *bh;
200 handle_t *handle; 197 handle_t *handle;
201 ext3_fsblk_t block; 198 ext3_fsblk_t block;
202 ext3_grpblk_t bit; 199 ext3_grpblk_t bit;
203 int i; 200 int i;
204 int err = 0, err2; 201 int err = 0, err2;
205 202
206 /* This transaction may be extended/restarted along the way */ 203 /* This transaction may be extended/restarted along the way */
207 handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA); 204 handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
208 205
209 if (IS_ERR(handle)) 206 if (IS_ERR(handle))
210 return PTR_ERR(handle); 207 return PTR_ERR(handle);
211 208
212 mutex_lock(&sbi->s_resize_lock); 209 mutex_lock(&sbi->s_resize_lock);
213 if (input->group != sbi->s_groups_count) { 210 if (input->group != sbi->s_groups_count) {
214 err = -EBUSY; 211 err = -EBUSY;
215 goto exit_journal; 212 goto exit_journal;
216 } 213 }
217 214
218 if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { 215 if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
219 err = PTR_ERR(bh); 216 err = PTR_ERR(bh);
220 goto exit_journal; 217 goto exit_journal;
221 } 218 }
222 219
223 if (ext3_bg_has_super(sb, input->group)) { 220 if (ext3_bg_has_super(sb, input->group)) {
224 ext3_debug("mark backup superblock %#04lx (+0)\n", start); 221 ext3_debug("mark backup superblock %#04lx (+0)\n", start);
225 ext3_set_bit(0, bh->b_data); 222 ext3_set_bit(0, bh->b_data);
226 } 223 }
227 224
228 /* Copy all of the GDT blocks into the backup in this group */ 225 /* Copy all of the GDT blocks into the backup in this group */
229 for (i = 0, bit = 1, block = start + 1; 226 for (i = 0, bit = 1, block = start + 1;
230 i < gdblocks; i++, block++, bit++) { 227 i < gdblocks; i++, block++, bit++) {
231 struct buffer_head *gdb; 228 struct buffer_head *gdb;
232 229
233 ext3_debug("update backup group %#04lx (+%d)\n", block, bit); 230 ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
234 231
235 err = extend_or_restart_transaction(handle, 1, bh); 232 err = extend_or_restart_transaction(handle, 1, bh);
236 if (err) 233 if (err)
237 goto exit_bh; 234 goto exit_bh;
238 235
239 gdb = sb_getblk(sb, block); 236 gdb = sb_getblk(sb, block);
240 if (!gdb) { 237 if (!gdb) {
241 err = -EIO; 238 err = -EIO;
242 goto exit_bh; 239 goto exit_bh;
243 } 240 }
244 if ((err = ext3_journal_get_write_access(handle, gdb))) { 241 if ((err = ext3_journal_get_write_access(handle, gdb))) {
245 brelse(gdb); 242 brelse(gdb);
246 goto exit_bh; 243 goto exit_bh;
247 } 244 }
248 lock_buffer(gdb); 245 lock_buffer(gdb);
249 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 246 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
250 set_buffer_uptodate(gdb); 247 set_buffer_uptodate(gdb);
251 unlock_buffer(gdb); 248 unlock_buffer(gdb);
252 err = ext3_journal_dirty_metadata(handle, gdb); 249 err = ext3_journal_dirty_metadata(handle, gdb);
253 if (err) { 250 if (err) {
254 brelse(gdb); 251 brelse(gdb);
255 goto exit_bh; 252 goto exit_bh;
256 } 253 }
257 ext3_set_bit(bit, bh->b_data); 254 ext3_set_bit(bit, bh->b_data);
258 brelse(gdb); 255 brelse(gdb);
259 } 256 }
260 257
261 /* Zero out all of the reserved backup group descriptor table blocks */ 258 /* Zero out all of the reserved backup group descriptor table blocks */
262 for (i = 0, bit = gdblocks + 1, block = start + bit; 259 for (i = 0, bit = gdblocks + 1, block = start + bit;
263 i < reserved_gdb; i++, block++, bit++) { 260 i < reserved_gdb; i++, block++, bit++) {
264 struct buffer_head *gdb; 261 struct buffer_head *gdb;
265 262
266 ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit); 263 ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit);
267 264
268 err = extend_or_restart_transaction(handle, 1, bh); 265 err = extend_or_restart_transaction(handle, 1, bh);
269 if (err) 266 if (err)
270 goto exit_bh; 267 goto exit_bh;
271 268
272 if (IS_ERR(gdb = bclean(handle, sb, block))) { 269 if (IS_ERR(gdb = bclean(handle, sb, block))) {
273 err = PTR_ERR(gdb); 270 err = PTR_ERR(gdb);
274 goto exit_bh; 271 goto exit_bh;
275 } 272 }
276 err = ext3_journal_dirty_metadata(handle, gdb); 273 err = ext3_journal_dirty_metadata(handle, gdb);
277 if (err) { 274 if (err) {
278 brelse(gdb); 275 brelse(gdb);
279 goto exit_bh; 276 goto exit_bh;
280 } 277 }
281 ext3_set_bit(bit, bh->b_data); 278 ext3_set_bit(bit, bh->b_data);
282 brelse(gdb); 279 brelse(gdb);
283 } 280 }
284 ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap, 281 ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
285 input->block_bitmap - start); 282 input->block_bitmap - start);
286 ext3_set_bit(input->block_bitmap - start, bh->b_data); 283 ext3_set_bit(input->block_bitmap - start, bh->b_data);
287 ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap, 284 ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
288 input->inode_bitmap - start); 285 input->inode_bitmap - start);
289 ext3_set_bit(input->inode_bitmap - start, bh->b_data); 286 ext3_set_bit(input->inode_bitmap - start, bh->b_data);
290 287
291 /* Zero out all of the inode table blocks */ 288 /* Zero out all of the inode table blocks */
292 for (i = 0, block = input->inode_table, bit = block - start; 289 for (i = 0, block = input->inode_table, bit = block - start;
293 i < sbi->s_itb_per_group; i++, bit++, block++) { 290 i < sbi->s_itb_per_group; i++, bit++, block++) {
294 struct buffer_head *it; 291 struct buffer_head *it;
295 292
296 ext3_debug("clear inode block %#04lx (+%d)\n", block, bit); 293 ext3_debug("clear inode block %#04lx (+%d)\n", block, bit);
297 294
298 err = extend_or_restart_transaction(handle, 1, bh); 295 err = extend_or_restart_transaction(handle, 1, bh);
299 if (err) 296 if (err)
300 goto exit_bh; 297 goto exit_bh;
301 298
302 if (IS_ERR(it = bclean(handle, sb, block))) { 299 if (IS_ERR(it = bclean(handle, sb, block))) {
303 err = PTR_ERR(it); 300 err = PTR_ERR(it);
304 goto exit_bh; 301 goto exit_bh;
305 } 302 }
306 err = ext3_journal_dirty_metadata(handle, it); 303 err = ext3_journal_dirty_metadata(handle, it);
307 if (err) { 304 if (err) {
308 brelse(it); 305 brelse(it);
309 goto exit_bh; 306 goto exit_bh;
310 } 307 }
311 brelse(it); 308 brelse(it);
312 ext3_set_bit(bit, bh->b_data); 309 ext3_set_bit(bit, bh->b_data);
313 } 310 }
314 311
315 err = extend_or_restart_transaction(handle, 2, bh); 312 err = extend_or_restart_transaction(handle, 2, bh);
316 if (err) 313 if (err)
317 goto exit_bh; 314 goto exit_bh;
318 315
319 mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb), 316 mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
320 bh->b_data); 317 bh->b_data);
321 err = ext3_journal_dirty_metadata(handle, bh); 318 err = ext3_journal_dirty_metadata(handle, bh);
322 if (err) 319 if (err)
323 goto exit_bh; 320 goto exit_bh;
324 brelse(bh); 321 brelse(bh);
325 322
326 /* Mark unused entries in inode bitmap used */ 323 /* Mark unused entries in inode bitmap used */
327 ext3_debug("clear inode bitmap %#04x (+%ld)\n", 324 ext3_debug("clear inode bitmap %#04x (+%ld)\n",
328 input->inode_bitmap, input->inode_bitmap - start); 325 input->inode_bitmap, input->inode_bitmap - start);
329 if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { 326 if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
330 err = PTR_ERR(bh); 327 err = PTR_ERR(bh);
331 goto exit_journal; 328 goto exit_journal;
332 } 329 }
333 330
334 mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), 331 mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
335 bh->b_data); 332 bh->b_data);
336 err = ext3_journal_dirty_metadata(handle, bh); 333 err = ext3_journal_dirty_metadata(handle, bh);
337 exit_bh: 334 exit_bh:
338 brelse(bh); 335 brelse(bh);
339 336
340 exit_journal: 337 exit_journal:
341 mutex_unlock(&sbi->s_resize_lock); 338 mutex_unlock(&sbi->s_resize_lock);
342 if ((err2 = ext3_journal_stop(handle)) && !err) 339 if ((err2 = ext3_journal_stop(handle)) && !err)
343 err = err2; 340 err = err2;
344 341
345 return err; 342 return err;
346 } 343 }
347 344
348 /* 345 /*
349 * Iterate through the groups which hold BACKUP superblock/GDT copies in an 346 * Iterate through the groups which hold BACKUP superblock/GDT copies in an
350 * ext3 filesystem. The counters should be initialized to 1, 5, and 7 before 347 * ext3 filesystem. The counters should be initialized to 1, 5, and 7 before
351 * calling this for the first time. In a sparse filesystem it will be the 348 * calling this for the first time. In a sparse filesystem it will be the
352 * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... 349 * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
353 * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... 350 * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
354 */ 351 */
355 static unsigned ext3_list_backups(struct super_block *sb, unsigned *three, 352 static unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
356 unsigned *five, unsigned *seven) 353 unsigned *five, unsigned *seven)
357 { 354 {
358 unsigned *min = three; 355 unsigned *min = three;
359 int mult = 3; 356 int mult = 3;
360 unsigned ret; 357 unsigned ret;
361 358
362 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, 359 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
363 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 360 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
364 ret = *min; 361 ret = *min;
365 *min += 1; 362 *min += 1;
366 return ret; 363 return ret;
367 } 364 }
368 365
369 if (*five < *min) { 366 if (*five < *min) {
370 min = five; 367 min = five;
371 mult = 5; 368 mult = 5;
372 } 369 }
373 if (*seven < *min) { 370 if (*seven < *min) {
374 min = seven; 371 min = seven;
375 mult = 7; 372 mult = 7;
376 } 373 }
377 374
378 ret = *min; 375 ret = *min;
379 *min *= mult; 376 *min *= mult;
380 377
381 return ret; 378 return ret;
382 } 379 }
383 380
384 /* 381 /*
385 * Check that all of the backup GDT blocks are held in the primary GDT block. 382 * Check that all of the backup GDT blocks are held in the primary GDT block.
386 * It is assumed that they are stored in group order. Returns the number of 383 * It is assumed that they are stored in group order. Returns the number of
387 * groups in current filesystem that have BACKUPS, or -ve error code. 384 * groups in current filesystem that have BACKUPS, or -ve error code.
388 */ 385 */
389 static int verify_reserved_gdb(struct super_block *sb, 386 static int verify_reserved_gdb(struct super_block *sb,
390 struct buffer_head *primary) 387 struct buffer_head *primary)
391 { 388 {
392 const ext3_fsblk_t blk = primary->b_blocknr; 389 const ext3_fsblk_t blk = primary->b_blocknr;
393 const unsigned long end = EXT3_SB(sb)->s_groups_count; 390 const unsigned long end = EXT3_SB(sb)->s_groups_count;
394 unsigned three = 1; 391 unsigned three = 1;
395 unsigned five = 5; 392 unsigned five = 5;
396 unsigned seven = 7; 393 unsigned seven = 7;
397 unsigned grp; 394 unsigned grp;
398 __le32 *p = (__le32 *)primary->b_data; 395 __le32 *p = (__le32 *)primary->b_data;
399 int gdbackups = 0; 396 int gdbackups = 0;
400 397
401 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { 398 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
402 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ 399 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
403 ext3_warning(sb, __func__, 400 ext3_warning(sb, __func__,
404 "reserved GDT "E3FSBLK 401 "reserved GDT "E3FSBLK
405 " missing grp %d ("E3FSBLK")", 402 " missing grp %d ("E3FSBLK")",
406 blk, grp, 403 blk, grp,
407 grp * EXT3_BLOCKS_PER_GROUP(sb) + blk); 404 grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
408 return -EINVAL; 405 return -EINVAL;
409 } 406 }
410 if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb)) 407 if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb))
411 return -EFBIG; 408 return -EFBIG;
412 } 409 }
413 410
414 return gdbackups; 411 return gdbackups;
415 } 412 }
416 413
417 /* 414 /*
418 * Called when we need to bring a reserved group descriptor table block into 415 * Called when we need to bring a reserved group descriptor table block into
419 * use from the resize inode. The primary copy of the new GDT block currently 416 * use from the resize inode. The primary copy of the new GDT block currently
420 * is an indirect block (under the double indirect block in the resize inode). 417 * is an indirect block (under the double indirect block in the resize inode).
421 * The new backup GDT blocks will be stored as leaf blocks in this indirect 418 * The new backup GDT blocks will be stored as leaf blocks in this indirect
422 * block, in group order. Even though we know all the block numbers we need, 419 * block, in group order. Even though we know all the block numbers we need,
423 * we check to ensure that the resize inode has actually reserved these blocks. 420 * we check to ensure that the resize inode has actually reserved these blocks.
424 * 421 *
425 * Don't need to update the block bitmaps because the blocks are still in use. 422 * Don't need to update the block bitmaps because the blocks are still in use.
426 * 423 *
427 * We get all of the error cases out of the way, so that we are sure to not 424 * We get all of the error cases out of the way, so that we are sure to not
428 * fail once we start modifying the data on disk, because JBD has no rollback. 425 * fail once we start modifying the data on disk, because JBD has no rollback.
429 */ 426 */
430 static int add_new_gdb(handle_t *handle, struct inode *inode, 427 static int add_new_gdb(handle_t *handle, struct inode *inode,
431 struct ext3_new_group_data *input, 428 struct ext3_new_group_data *input,
432 struct buffer_head **primary) 429 struct buffer_head **primary)
433 { 430 {
434 struct super_block *sb = inode->i_sb; 431 struct super_block *sb = inode->i_sb;
435 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 432 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
436 unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb); 433 unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
437 ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; 434 ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
438 struct buffer_head **o_group_desc, **n_group_desc; 435 struct buffer_head **o_group_desc, **n_group_desc;
439 struct buffer_head *dind; 436 struct buffer_head *dind;
440 int gdbackups; 437 int gdbackups;
441 struct ext3_iloc iloc; 438 struct ext3_iloc iloc;
442 __le32 *data; 439 __le32 *data;
443 int err; 440 int err;
444 441
445 if (test_opt(sb, DEBUG)) 442 if (test_opt(sb, DEBUG))
446 printk(KERN_DEBUG 443 printk(KERN_DEBUG
447 "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n", 444 "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n",
448 gdb_num); 445 gdb_num);
449 446
450 /* 447 /*
451 * If we are not using the primary superblock/GDT copy don't resize, 448 * If we are not using the primary superblock/GDT copy don't resize,
452 * because the user tools have no way of handling this. Probably a 449 * because the user tools have no way of handling this. Probably a
453 * bad time to do it anyways. 450 * bad time to do it anyways.
454 */ 451 */
455 if (EXT3_SB(sb)->s_sbh->b_blocknr != 452 if (EXT3_SB(sb)->s_sbh->b_blocknr !=
456 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) { 453 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
457 ext3_warning(sb, __func__, 454 ext3_warning(sb, __func__,
458 "won't resize using backup superblock at %llu", 455 "won't resize using backup superblock at %llu",
459 (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr); 456 (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
460 return -EPERM; 457 return -EPERM;
461 } 458 }
462 459
463 *primary = sb_bread(sb, gdblock); 460 *primary = sb_bread(sb, gdblock);
464 if (!*primary) 461 if (!*primary)
465 return -EIO; 462 return -EIO;
466 463
467 if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) { 464 if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) {
468 err = gdbackups; 465 err = gdbackups;
469 goto exit_bh; 466 goto exit_bh;
470 } 467 }
471 468
472 data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK; 469 data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
473 dind = sb_bread(sb, le32_to_cpu(*data)); 470 dind = sb_bread(sb, le32_to_cpu(*data));
474 if (!dind) { 471 if (!dind) {
475 err = -EIO; 472 err = -EIO;
476 goto exit_bh; 473 goto exit_bh;
477 } 474 }
478 475
479 data = (__le32 *)dind->b_data; 476 data = (__le32 *)dind->b_data;
480 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { 477 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
481 ext3_warning(sb, __func__, 478 ext3_warning(sb, __func__,
482 "new group %u GDT block "E3FSBLK" not reserved", 479 "new group %u GDT block "E3FSBLK" not reserved",
483 input->group, gdblock); 480 input->group, gdblock);
484 err = -EINVAL; 481 err = -EINVAL;
485 goto exit_dind; 482 goto exit_dind;
486 } 483 }
487 484
488 if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh))) 485 if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh)))
489 goto exit_dind; 486 goto exit_dind;
490 487
491 if ((err = ext3_journal_get_write_access(handle, *primary))) 488 if ((err = ext3_journal_get_write_access(handle, *primary)))
492 goto exit_sbh; 489 goto exit_sbh;
493 490
494 if ((err = ext3_journal_get_write_access(handle, dind))) 491 if ((err = ext3_journal_get_write_access(handle, dind)))
495 goto exit_primary; 492 goto exit_primary;
496 493
497 /* ext3_reserve_inode_write() gets a reference on the iloc */ 494 /* ext3_reserve_inode_write() gets a reference on the iloc */
498 if ((err = ext3_reserve_inode_write(handle, inode, &iloc))) 495 if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
499 goto exit_dindj; 496 goto exit_dindj;
500 497
501 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), 498 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
502 GFP_NOFS); 499 GFP_NOFS);
503 if (!n_group_desc) { 500 if (!n_group_desc) {
504 err = -ENOMEM; 501 err = -ENOMEM;
505 ext3_warning (sb, __func__, 502 ext3_warning (sb, __func__,
506 "not enough memory for %lu groups", gdb_num + 1); 503 "not enough memory for %lu groups", gdb_num + 1);
507 goto exit_inode; 504 goto exit_inode;
508 } 505 }
509 506
510 /* 507 /*
511 * Finally, we have all of the possible failures behind us... 508 * Finally, we have all of the possible failures behind us...
512 * 509 *
513 * Remove new GDT block from inode double-indirect block and clear out 510 * Remove new GDT block from inode double-indirect block and clear out
514 * the new GDT block for use (which also "frees" the backup GDT blocks 511 * the new GDT block for use (which also "frees" the backup GDT blocks
515 * from the reserved inode). We don't need to change the bitmaps for 512 * from the reserved inode). We don't need to change the bitmaps for
516 * these blocks, because they are marked as in-use from being in the 513 * these blocks, because they are marked as in-use from being in the
517 * reserved inode, and will become GDT blocks (primary and backup). 514 * reserved inode, and will become GDT blocks (primary and backup).
518 */ 515 */
519 data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0; 516 data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
520 err = ext3_journal_dirty_metadata(handle, dind); 517 err = ext3_journal_dirty_metadata(handle, dind);
521 if (err) 518 if (err)
522 goto exit_group_desc; 519 goto exit_group_desc;
523 brelse(dind); 520 brelse(dind);
524 dind = NULL; 521 dind = NULL;
525 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; 522 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
526 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 523 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
527 if (err) 524 if (err)
528 goto exit_group_desc; 525 goto exit_group_desc;
529 memset((*primary)->b_data, 0, sb->s_blocksize); 526 memset((*primary)->b_data, 0, sb->s_blocksize);
530 err = ext3_journal_dirty_metadata(handle, *primary); 527 err = ext3_journal_dirty_metadata(handle, *primary);
531 if (err) 528 if (err)
532 goto exit_group_desc; 529 goto exit_group_desc;
533 530
534 o_group_desc = EXT3_SB(sb)->s_group_desc; 531 o_group_desc = EXT3_SB(sb)->s_group_desc;
535 memcpy(n_group_desc, o_group_desc, 532 memcpy(n_group_desc, o_group_desc,
536 EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); 533 EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
537 n_group_desc[gdb_num] = *primary; 534 n_group_desc[gdb_num] = *primary;
538 EXT3_SB(sb)->s_group_desc = n_group_desc; 535 EXT3_SB(sb)->s_group_desc = n_group_desc;
539 EXT3_SB(sb)->s_gdb_count++; 536 EXT3_SB(sb)->s_gdb_count++;
540 kfree(o_group_desc); 537 kfree(o_group_desc);
541 538
542 le16_add_cpu(&es->s_reserved_gdt_blocks, -1); 539 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
543 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 540 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
544 if (err) 541 if (err)
545 goto exit_inode; 542 goto exit_inode;
546 543
547 return 0; 544 return 0;
548 545
549 exit_group_desc: 546 exit_group_desc:
550 kfree(n_group_desc); 547 kfree(n_group_desc);
551 exit_inode: 548 exit_inode:
552 //ext3_journal_release_buffer(handle, iloc.bh); 549 //ext3_journal_release_buffer(handle, iloc.bh);
553 brelse(iloc.bh); 550 brelse(iloc.bh);
554 exit_dindj: 551 exit_dindj:
555 //ext3_journal_release_buffer(handle, dind); 552 //ext3_journal_release_buffer(handle, dind);
556 exit_primary: 553 exit_primary:
557 //ext3_journal_release_buffer(handle, *primary); 554 //ext3_journal_release_buffer(handle, *primary);
558 exit_sbh: 555 exit_sbh:
559 //ext3_journal_release_buffer(handle, *primary); 556 //ext3_journal_release_buffer(handle, *primary);
560 exit_dind: 557 exit_dind:
561 brelse(dind); 558 brelse(dind);
562 exit_bh: 559 exit_bh:
563 brelse(*primary); 560 brelse(*primary);
564 561
565 ext3_debug("leaving with error %d\n", err); 562 ext3_debug("leaving with error %d\n", err);
566 return err; 563 return err;
567 } 564 }
568 565
569 /* 566 /*
570 * Called when we are adding a new group which has a backup copy of each of 567 * Called when we are adding a new group which has a backup copy of each of
571 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. 568 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
572 * We need to add these reserved backup GDT blocks to the resize inode, so 569 * We need to add these reserved backup GDT blocks to the resize inode, so
573 * that they are kept for future resizing and not allocated to files. 570 * that they are kept for future resizing and not allocated to files.
574 * 571 *
575 * Each reserved backup GDT block will go into a different indirect block. 572 * Each reserved backup GDT block will go into a different indirect block.
576 * The indirect blocks are actually the primary reserved GDT blocks, 573 * The indirect blocks are actually the primary reserved GDT blocks,
577 * so we know in advance what their block numbers are. We only get the 574 * so we know in advance what their block numbers are. We only get the
578 * double-indirect block to verify it is pointing to the primary reserved 575 * double-indirect block to verify it is pointing to the primary reserved
579 * GDT blocks so we don't overwrite a data block by accident. The reserved 576 * GDT blocks so we don't overwrite a data block by accident. The reserved
580 * backup GDT blocks are stored in their reserved primary GDT block. 577 * backup GDT blocks are stored in their reserved primary GDT block.
581 */ 578 */
582 static int reserve_backup_gdb(handle_t *handle, struct inode *inode, 579 static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
583 struct ext3_new_group_data *input) 580 struct ext3_new_group_data *input)
584 { 581 {
585 struct super_block *sb = inode->i_sb; 582 struct super_block *sb = inode->i_sb;
586 int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks); 583 int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks);
587 struct buffer_head **primary; 584 struct buffer_head **primary;
588 struct buffer_head *dind; 585 struct buffer_head *dind;
589 struct ext3_iloc iloc; 586 struct ext3_iloc iloc;
590 ext3_fsblk_t blk; 587 ext3_fsblk_t blk;
591 __le32 *data, *end; 588 __le32 *data, *end;
592 int gdbackups = 0; 589 int gdbackups = 0;
593 int res, i; 590 int res, i;
594 int err; 591 int err;
595 592
596 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS); 593 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
597 if (!primary) 594 if (!primary)
598 return -ENOMEM; 595 return -ENOMEM;
599 596
600 data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK; 597 data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
601 dind = sb_bread(sb, le32_to_cpu(*data)); 598 dind = sb_bread(sb, le32_to_cpu(*data));
602 if (!dind) { 599 if (!dind) {
603 err = -EIO; 600 err = -EIO;
604 goto exit_free; 601 goto exit_free;
605 } 602 }
606 603
607 blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count; 604 blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
608 data = (__le32 *)dind->b_data + (EXT3_SB(sb)->s_gdb_count % 605 data = (__le32 *)dind->b_data + (EXT3_SB(sb)->s_gdb_count %
609 EXT3_ADDR_PER_BLOCK(sb)); 606 EXT3_ADDR_PER_BLOCK(sb));
610 end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb); 607 end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
611 608
612 /* Get each reserved primary GDT block and verify it holds backups */ 609 /* Get each reserved primary GDT block and verify it holds backups */
613 for (res = 0; res < reserved_gdb; res++, blk++) { 610 for (res = 0; res < reserved_gdb; res++, blk++) {
614 if (le32_to_cpu(*data) != blk) { 611 if (le32_to_cpu(*data) != blk) {
615 ext3_warning(sb, __func__, 612 ext3_warning(sb, __func__,
616 "reserved block "E3FSBLK 613 "reserved block "E3FSBLK
617 " not at offset %ld", 614 " not at offset %ld",
618 blk, 615 blk,
619 (long)(data - (__le32 *)dind->b_data)); 616 (long)(data - (__le32 *)dind->b_data));
620 err = -EINVAL; 617 err = -EINVAL;
621 goto exit_bh; 618 goto exit_bh;
622 } 619 }
623 primary[res] = sb_bread(sb, blk); 620 primary[res] = sb_bread(sb, blk);
624 if (!primary[res]) { 621 if (!primary[res]) {
625 err = -EIO; 622 err = -EIO;
626 goto exit_bh; 623 goto exit_bh;
627 } 624 }
628 if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { 625 if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
629 brelse(primary[res]); 626 brelse(primary[res]);
630 err = gdbackups; 627 err = gdbackups;
631 goto exit_bh; 628 goto exit_bh;
632 } 629 }
633 if (++data >= end) 630 if (++data >= end)
634 data = (__le32 *)dind->b_data; 631 data = (__le32 *)dind->b_data;
635 } 632 }
636 633
637 for (i = 0; i < reserved_gdb; i++) { 634 for (i = 0; i < reserved_gdb; i++) {
638 if ((err = ext3_journal_get_write_access(handle, primary[i]))) { 635 if ((err = ext3_journal_get_write_access(handle, primary[i]))) {
639 /* 636 /*
640 int j; 637 int j;
641 for (j = 0; j < i; j++) 638 for (j = 0; j < i; j++)
642 ext3_journal_release_buffer(handle, primary[j]); 639 ext3_journal_release_buffer(handle, primary[j]);
643 */ 640 */
644 goto exit_bh; 641 goto exit_bh;
645 } 642 }
646 } 643 }
647 644
648 if ((err = ext3_reserve_inode_write(handle, inode, &iloc))) 645 if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
649 goto exit_bh; 646 goto exit_bh;
650 647
651 /* 648 /*
652 * Finally we can add each of the reserved backup GDT blocks from 649 * Finally we can add each of the reserved backup GDT blocks from
653 * the new group to its reserved primary GDT block. 650 * the new group to its reserved primary GDT block.
654 */ 651 */
655 blk = input->group * EXT3_BLOCKS_PER_GROUP(sb); 652 blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
656 for (i = 0; i < reserved_gdb; i++) { 653 for (i = 0; i < reserved_gdb; i++) {
657 int err2; 654 int err2;
658 data = (__le32 *)primary[i]->b_data; 655 data = (__le32 *)primary[i]->b_data;
659 /* printk("reserving backup %lu[%u] = %lu\n", 656 /* printk("reserving backup %lu[%u] = %lu\n",
660 primary[i]->b_blocknr, gdbackups, 657 primary[i]->b_blocknr, gdbackups,
661 blk + primary[i]->b_blocknr); */ 658 blk + primary[i]->b_blocknr); */
662 data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); 659 data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
663 err2 = ext3_journal_dirty_metadata(handle, primary[i]); 660 err2 = ext3_journal_dirty_metadata(handle, primary[i]);
664 if (!err) 661 if (!err)
665 err = err2; 662 err = err2;
666 } 663 }
667 inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9; 664 inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
668 ext3_mark_iloc_dirty(handle, inode, &iloc); 665 ext3_mark_iloc_dirty(handle, inode, &iloc);
669 666
670 exit_bh: 667 exit_bh:
671 while (--res >= 0) 668 while (--res >= 0)
672 brelse(primary[res]); 669 brelse(primary[res]);
673 brelse(dind); 670 brelse(dind);
674 671
675 exit_free: 672 exit_free:
676 kfree(primary); 673 kfree(primary);
677 674
678 return err; 675 return err;
679 } 676 }
680 677
681 /* 678 /*
682 * Update the backup copies of the ext3 metadata. These don't need to be part 679 * Update the backup copies of the ext3 metadata. These don't need to be part
683 * of the main resize transaction, because e2fsck will re-write them if there 680 * of the main resize transaction, because e2fsck will re-write them if there
684 * is a problem (basically only OOM will cause a problem). However, we 681 * is a problem (basically only OOM will cause a problem). However, we
685 * _should_ update the backups if possible, in case the primary gets trashed 682 * _should_ update the backups if possible, in case the primary gets trashed
686 * for some reason and we need to run e2fsck from a backup superblock. The 683 * for some reason and we need to run e2fsck from a backup superblock. The
687 * important part is that the new block and inode counts are in the backup 684 * important part is that the new block and inode counts are in the backup
688 * superblocks, and the location of the new group metadata in the GDT backups. 685 * superblocks, and the location of the new group metadata in the GDT backups.
689 * 686 *
690 * We do not need take the s_resize_lock for this, because these 687 * We do not need take the s_resize_lock for this, because these
691 * blocks are not otherwise touched by the filesystem code when it is 688 * blocks are not otherwise touched by the filesystem code when it is
692 * mounted. We don't need to worry about last changing from 689 * mounted. We don't need to worry about last changing from
693 * sbi->s_groups_count, because the worst that can happen is that we 690 * sbi->s_groups_count, because the worst that can happen is that we
694 * do not copy the full number of backups at this time. The resize 691 * do not copy the full number of backups at this time. The resize
695 * which changed s_groups_count will backup again. 692 * which changed s_groups_count will backup again.
696 */ 693 */
697 static void update_backups(struct super_block *sb, 694 static void update_backups(struct super_block *sb,
698 int blk_off, char *data, int size) 695 int blk_off, char *data, int size)
699 { 696 {
700 struct ext3_sb_info *sbi = EXT3_SB(sb); 697 struct ext3_sb_info *sbi = EXT3_SB(sb);
701 const unsigned long last = sbi->s_groups_count; 698 const unsigned long last = sbi->s_groups_count;
702 const int bpg = EXT3_BLOCKS_PER_GROUP(sb); 699 const int bpg = EXT3_BLOCKS_PER_GROUP(sb);
703 unsigned three = 1; 700 unsigned three = 1;
704 unsigned five = 5; 701 unsigned five = 5;
705 unsigned seven = 7; 702 unsigned seven = 7;
706 unsigned group; 703 unsigned group;
707 int rest = sb->s_blocksize - size; 704 int rest = sb->s_blocksize - size;
708 handle_t *handle; 705 handle_t *handle;
709 int err = 0, err2; 706 int err = 0, err2;
710 707
711 handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA); 708 handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
712 if (IS_ERR(handle)) { 709 if (IS_ERR(handle)) {
713 group = 1; 710 group = 1;
714 err = PTR_ERR(handle); 711 err = PTR_ERR(handle);
715 goto exit_err; 712 goto exit_err;
716 } 713 }
717 714
718 while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) { 715 while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) {
719 struct buffer_head *bh; 716 struct buffer_head *bh;
720 717
721 /* Out of journal space, and can't get more - abort - so sad */ 718 /* Out of journal space, and can't get more - abort - so sad */
722 if (handle->h_buffer_credits == 0 && 719 if (handle->h_buffer_credits == 0 &&
723 ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) && 720 ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) &&
724 (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA))) 721 (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA)))
725 break; 722 break;
726 723
727 bh = sb_getblk(sb, group * bpg + blk_off); 724 bh = sb_getblk(sb, group * bpg + blk_off);
728 if (!bh) { 725 if (!bh) {
729 err = -EIO; 726 err = -EIO;
730 break; 727 break;
731 } 728 }
732 ext3_debug("update metadata backup %#04lx\n", 729 ext3_debug("update metadata backup %#04lx\n",
733 (unsigned long)bh->b_blocknr); 730 (unsigned long)bh->b_blocknr);
734 if ((err = ext3_journal_get_write_access(handle, bh))) { 731 if ((err = ext3_journal_get_write_access(handle, bh))) {
735 brelse(bh); 732 brelse(bh);
736 break; 733 break;
737 } 734 }
738 lock_buffer(bh); 735 lock_buffer(bh);
739 memcpy(bh->b_data, data, size); 736 memcpy(bh->b_data, data, size);
740 if (rest) 737 if (rest)
741 memset(bh->b_data + size, 0, rest); 738 memset(bh->b_data + size, 0, rest);
742 set_buffer_uptodate(bh); 739 set_buffer_uptodate(bh);
743 unlock_buffer(bh); 740 unlock_buffer(bh);
744 err = ext3_journal_dirty_metadata(handle, bh); 741 err = ext3_journal_dirty_metadata(handle, bh);
745 brelse(bh); 742 brelse(bh);
746 if (err) 743 if (err)
747 break; 744 break;
748 } 745 }
749 if ((err2 = ext3_journal_stop(handle)) && !err) 746 if ((err2 = ext3_journal_stop(handle)) && !err)
750 err = err2; 747 err = err2;
751 748
752 /* 749 /*
753 * Ugh! Need to have e2fsck write the backup copies. It is too 750 * Ugh! Need to have e2fsck write the backup copies. It is too
754 * late to revert the resize, we shouldn't fail just because of 751 * late to revert the resize, we shouldn't fail just because of
755 * the backup copies (they are only needed in case of corruption). 752 * the backup copies (they are only needed in case of corruption).
756 * 753 *
757 * However, if we got here we have a journal problem too, so we 754 * However, if we got here we have a journal problem too, so we
758 * can't really start a transaction to mark the superblock. 755 * can't really start a transaction to mark the superblock.
759 * Chicken out and just set the flag on the hope it will be written 756 * Chicken out and just set the flag on the hope it will be written
760 * to disk, and if not - we will simply wait until next fsck. 757 * to disk, and if not - we will simply wait until next fsck.
761 */ 758 */
762 exit_err: 759 exit_err:
763 if (err) { 760 if (err) {
764 ext3_warning(sb, __func__, 761 ext3_warning(sb, __func__,
765 "can't update backup for group %d (err %d), " 762 "can't update backup for group %d (err %d), "
766 "forcing fsck on next reboot", group, err); 763 "forcing fsck on next reboot", group, err);
767 sbi->s_mount_state &= ~EXT3_VALID_FS; 764 sbi->s_mount_state &= ~EXT3_VALID_FS;
768 sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS); 765 sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
769 mark_buffer_dirty(sbi->s_sbh); 766 mark_buffer_dirty(sbi->s_sbh);
770 } 767 }
771 } 768 }
772 769
773 /* Add group descriptor data to an existing or new group descriptor block. 770 /* Add group descriptor data to an existing or new group descriptor block.
774 * Ensure we handle all possible error conditions _before_ we start modifying 771 * Ensure we handle all possible error conditions _before_ we start modifying
775 * the filesystem, because we cannot abort the transaction and not have it 772 * the filesystem, because we cannot abort the transaction and not have it
776 * write the data to disk. 773 * write the data to disk.
777 * 774 *
778 * If we are on a GDT block boundary, we need to get the reserved GDT block. 775 * If we are on a GDT block boundary, we need to get the reserved GDT block.
779 * Otherwise, we may need to add backup GDT blocks for a sparse group. 776 * Otherwise, we may need to add backup GDT blocks for a sparse group.
780 * 777 *
781 * We only need to hold the superblock lock while we are actually adding 778 * We only need to hold the superblock lock while we are actually adding
782 * in the new group's counts to the superblock. Prior to that we have 779 * in the new group's counts to the superblock. Prior to that we have
783 * not really "added" the group at all. We re-check that we are still 780 * not really "added" the group at all. We re-check that we are still
784 * adding in the last group in case things have changed since verifying. 781 * adding in the last group in case things have changed since verifying.
785 */ 782 */
786 int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) 783 int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
787 { 784 {
788 struct ext3_sb_info *sbi = EXT3_SB(sb); 785 struct ext3_sb_info *sbi = EXT3_SB(sb);
789 struct ext3_super_block *es = sbi->s_es; 786 struct ext3_super_block *es = sbi->s_es;
790 int reserved_gdb = ext3_bg_has_super(sb, input->group) ? 787 int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
791 le16_to_cpu(es->s_reserved_gdt_blocks) : 0; 788 le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
792 struct buffer_head *primary = NULL; 789 struct buffer_head *primary = NULL;
793 struct ext3_group_desc *gdp; 790 struct ext3_group_desc *gdp;
794 struct inode *inode = NULL; 791 struct inode *inode = NULL;
795 handle_t *handle; 792 handle_t *handle;
796 int gdb_off, gdb_num; 793 int gdb_off, gdb_num;
797 int err, err2; 794 int err, err2;
798 795
799 gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb); 796 gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
800 gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb); 797 gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb);
801 798
802 if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb, 799 if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
803 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 800 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
804 ext3_warning(sb, __func__, 801 ext3_warning(sb, __func__,
805 "Can't resize non-sparse filesystem further"); 802 "Can't resize non-sparse filesystem further");
806 return -EPERM; 803 return -EPERM;
807 } 804 }
808 805
809 if (le32_to_cpu(es->s_blocks_count) + input->blocks_count < 806 if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
810 le32_to_cpu(es->s_blocks_count)) { 807 le32_to_cpu(es->s_blocks_count)) {
811 ext3_warning(sb, __func__, "blocks_count overflow\n"); 808 ext3_warning(sb, __func__, "blocks_count overflow\n");
812 return -EINVAL; 809 return -EINVAL;
813 } 810 }
814 811
815 if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) < 812 if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
816 le32_to_cpu(es->s_inodes_count)) { 813 le32_to_cpu(es->s_inodes_count)) {
817 ext3_warning(sb, __func__, "inodes_count overflow\n"); 814 ext3_warning(sb, __func__, "inodes_count overflow\n");
818 return -EINVAL; 815 return -EINVAL;
819 } 816 }
820 817
821 if (reserved_gdb || gdb_off == 0) { 818 if (reserved_gdb || gdb_off == 0) {
822 if (!EXT3_HAS_COMPAT_FEATURE(sb, 819 if (!EXT3_HAS_COMPAT_FEATURE(sb,
823 EXT3_FEATURE_COMPAT_RESIZE_INODE) 820 EXT3_FEATURE_COMPAT_RESIZE_INODE)
824 || !le16_to_cpu(es->s_reserved_gdt_blocks)) { 821 || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
825 ext3_warning(sb, __func__, 822 ext3_warning(sb, __func__,
826 "No reserved GDT blocks, can't resize"); 823 "No reserved GDT blocks, can't resize");
827 return -EPERM; 824 return -EPERM;
828 } 825 }
829 inode = ext3_iget(sb, EXT3_RESIZE_INO); 826 inode = ext3_iget(sb, EXT3_RESIZE_INO);
830 if (IS_ERR(inode)) { 827 if (IS_ERR(inode)) {
831 ext3_warning(sb, __func__, 828 ext3_warning(sb, __func__,
832 "Error opening resize inode"); 829 "Error opening resize inode");
833 return PTR_ERR(inode); 830 return PTR_ERR(inode);
834 } 831 }
835 } 832 }
836 833
837 if ((err = verify_group_input(sb, input))) 834 if ((err = verify_group_input(sb, input)))
838 goto exit_put; 835 goto exit_put;
839 836
840 if ((err = setup_new_group_blocks(sb, input))) 837 if ((err = setup_new_group_blocks(sb, input)))
841 goto exit_put; 838 goto exit_put;
842 839
843 /* 840 /*
844 * We will always be modifying at least the superblock and a GDT 841 * We will always be modifying at least the superblock and a GDT
845 * block. If we are adding a group past the last current GDT block, 842 * block. If we are adding a group past the last current GDT block,
846 * we will also modify the inode and the dindirect block. If we 843 * we will also modify the inode and the dindirect block. If we
847 * are adding a group with superblock/GDT backups we will also 844 * are adding a group with superblock/GDT backups we will also
848 * modify each of the reserved GDT dindirect blocks. 845 * modify each of the reserved GDT dindirect blocks.
849 */ 846 */
850 handle = ext3_journal_start_sb(sb, 847 handle = ext3_journal_start_sb(sb,
851 ext3_bg_has_super(sb, input->group) ? 848 ext3_bg_has_super(sb, input->group) ?
852 3 + reserved_gdb : 4); 849 3 + reserved_gdb : 4);
853 if (IS_ERR(handle)) { 850 if (IS_ERR(handle)) {
854 err = PTR_ERR(handle); 851 err = PTR_ERR(handle);
855 goto exit_put; 852 goto exit_put;
856 } 853 }
857 854
858 mutex_lock(&sbi->s_resize_lock); 855 mutex_lock(&sbi->s_resize_lock);
859 if (input->group != sbi->s_groups_count) { 856 if (input->group != sbi->s_groups_count) {
860 ext3_warning(sb, __func__, 857 ext3_warning(sb, __func__,
861 "multiple resizers run on filesystem!"); 858 "multiple resizers run on filesystem!");
862 err = -EBUSY; 859 err = -EBUSY;
863 goto exit_journal; 860 goto exit_journal;
864 } 861 }
865 862
866 if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh))) 863 if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh)))
867 goto exit_journal; 864 goto exit_journal;
868 865
869 /* 866 /*
870 * We will only either add reserved group blocks to a backup group 867 * We will only either add reserved group blocks to a backup group
871 * or remove reserved blocks for the first group in a new group block. 868 * or remove reserved blocks for the first group in a new group block.
872 * Doing both would be mean more complex code, and sane people don't 869 * Doing both would be mean more complex code, and sane people don't
873 * use non-sparse filesystems anymore. This is already checked above. 870 * use non-sparse filesystems anymore. This is already checked above.
874 */ 871 */
875 if (gdb_off) { 872 if (gdb_off) {
876 primary = sbi->s_group_desc[gdb_num]; 873 primary = sbi->s_group_desc[gdb_num];
877 if ((err = ext3_journal_get_write_access(handle, primary))) 874 if ((err = ext3_journal_get_write_access(handle, primary)))
878 goto exit_journal; 875 goto exit_journal;
879 876
880 if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) && 877 if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) &&
881 (err = reserve_backup_gdb(handle, inode, input))) 878 (err = reserve_backup_gdb(handle, inode, input)))
882 goto exit_journal; 879 goto exit_journal;
883 } else if ((err = add_new_gdb(handle, inode, input, &primary))) 880 } else if ((err = add_new_gdb(handle, inode, input, &primary)))
884 goto exit_journal; 881 goto exit_journal;
885 882
886 /* 883 /*
887 * OK, now we've set up the new group. Time to make it active. 884 * OK, now we've set up the new group. Time to make it active.
888 * 885 *
889 * We do not lock all allocations via s_resize_lock 886 * We do not lock all allocations via s_resize_lock
890 * so we have to be safe wrt. concurrent accesses the group 887 * so we have to be safe wrt. concurrent accesses the group
891 * data. So we need to be careful to set all of the relevant 888 * data. So we need to be careful to set all of the relevant
892 * group descriptor data etc. *before* we enable the group. 889 * group descriptor data etc. *before* we enable the group.
893 * 890 *
894 * The key field here is sbi->s_groups_count: as long as 891 * The key field here is sbi->s_groups_count: as long as
895 * that retains its old value, nobody is going to access the new 892 * that retains its old value, nobody is going to access the new
896 * group. 893 * group.
897 * 894 *
898 * So first we update all the descriptor metadata for the new 895 * So first we update all the descriptor metadata for the new
899 * group; then we update the total disk blocks count; then we 896 * group; then we update the total disk blocks count; then we
900 * update the groups count to enable the group; then finally we 897 * update the groups count to enable the group; then finally we
901 * update the free space counts so that the system can start 898 * update the free space counts so that the system can start
902 * using the new disk blocks. 899 * using the new disk blocks.
903 */ 900 */
904 901
905 /* Update group descriptor block for new group */ 902 /* Update group descriptor block for new group */
906 gdp = (struct ext3_group_desc *)primary->b_data + gdb_off; 903 gdp = (struct ext3_group_desc *)primary->b_data + gdb_off;
907 904
908 gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap); 905 gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
909 gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap); 906 gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
910 gdp->bg_inode_table = cpu_to_le32(input->inode_table); 907 gdp->bg_inode_table = cpu_to_le32(input->inode_table);
911 gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); 908 gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
912 gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb)); 909 gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
913 910
914 /* 911 /*
915 * Make the new blocks and inodes valid next. We do this before 912 * Make the new blocks and inodes valid next. We do this before
916 * increasing the group count so that once the group is enabled, 913 * increasing the group count so that once the group is enabled,
917 * all of its blocks and inodes are already valid. 914 * all of its blocks and inodes are already valid.
918 * 915 *
919 * We always allocate group-by-group, then block-by-block or 916 * We always allocate group-by-group, then block-by-block or
920 * inode-by-inode within a group, so enabling these 917 * inode-by-inode within a group, so enabling these
921 * blocks/inodes before the group is live won't actually let us 918 * blocks/inodes before the group is live won't actually let us
922 * allocate the new space yet. 919 * allocate the new space yet.
923 */ 920 */
924 le32_add_cpu(&es->s_blocks_count, input->blocks_count); 921 le32_add_cpu(&es->s_blocks_count, input->blocks_count);
925 le32_add_cpu(&es->s_inodes_count, EXT3_INODES_PER_GROUP(sb)); 922 le32_add_cpu(&es->s_inodes_count, EXT3_INODES_PER_GROUP(sb));
926 923
927 /* 924 /*
928 * We need to protect s_groups_count against other CPUs seeing 925 * We need to protect s_groups_count against other CPUs seeing
929 * inconsistent state in the superblock. 926 * inconsistent state in the superblock.
930 * 927 *
931 * The precise rules we use are: 928 * The precise rules we use are:
932 * 929 *
933 * * Writers of s_groups_count *must* hold s_resize_lock 930 * * Writers of s_groups_count *must* hold s_resize_lock
934 * AND 931 * AND
935 * * Writers must perform a smp_wmb() after updating all dependent 932 * * Writers must perform a smp_wmb() after updating all dependent
936 * data and before modifying the groups count 933 * data and before modifying the groups count
937 * 934 *
938 * * Readers must hold s_resize_lock over the access 935 * * Readers must hold s_resize_lock over the access
939 * OR 936 * OR
940 * * Readers must perform an smp_rmb() after reading the groups count 937 * * Readers must perform an smp_rmb() after reading the groups count
941 * and before reading any dependent data. 938 * and before reading any dependent data.
942 * 939 *
943 * NB. These rules can be relaxed when checking the group count 940 * NB. These rules can be relaxed when checking the group count
944 * while freeing data, as we can only allocate from a block 941 * while freeing data, as we can only allocate from a block
945 * group after serialising against the group count, and we can 942 * group after serialising against the group count, and we can
946 * only then free after serialising in turn against that 943 * only then free after serialising in turn against that
947 * allocation. 944 * allocation.
948 */ 945 */
949 smp_wmb(); 946 smp_wmb();
950 947
951 /* Update the global fs size fields */ 948 /* Update the global fs size fields */
952 sbi->s_groups_count++; 949 sbi->s_groups_count++;
953 950
954 err = ext3_journal_dirty_metadata(handle, primary); 951 err = ext3_journal_dirty_metadata(handle, primary);
955 if (err) 952 if (err)
956 goto exit_journal; 953 goto exit_journal;
957 954
958 /* Update the reserved block counts only once the new group is 955 /* Update the reserved block counts only once the new group is
959 * active. */ 956 * active. */
960 le32_add_cpu(&es->s_r_blocks_count, input->reserved_blocks); 957 le32_add_cpu(&es->s_r_blocks_count, input->reserved_blocks);
961 958
962 /* Update the free space counts */ 959 /* Update the free space counts */
963 percpu_counter_add(&sbi->s_freeblocks_counter, 960 percpu_counter_add(&sbi->s_freeblocks_counter,
964 input->free_blocks_count); 961 input->free_blocks_count);
965 percpu_counter_add(&sbi->s_freeinodes_counter, 962 percpu_counter_add(&sbi->s_freeinodes_counter,
966 EXT3_INODES_PER_GROUP(sb)); 963 EXT3_INODES_PER_GROUP(sb));
967 964
968 err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); 965 err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
969 966
970 exit_journal: 967 exit_journal:
971 mutex_unlock(&sbi->s_resize_lock); 968 mutex_unlock(&sbi->s_resize_lock);
972 if ((err2 = ext3_journal_stop(handle)) && !err) 969 if ((err2 = ext3_journal_stop(handle)) && !err)
973 err = err2; 970 err = err2;
974 if (!err) { 971 if (!err) {
975 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, 972 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
976 sizeof(struct ext3_super_block)); 973 sizeof(struct ext3_super_block));
977 update_backups(sb, primary->b_blocknr, primary->b_data, 974 update_backups(sb, primary->b_blocknr, primary->b_data,
978 primary->b_size); 975 primary->b_size);
979 } 976 }
980 exit_put: 977 exit_put:
981 iput(inode); 978 iput(inode);
982 return err; 979 return err;
983 } /* ext3_group_add */ 980 } /* ext3_group_add */
984 981
985 /* Extend the filesystem to the new number of blocks specified. This entry 982 /* Extend the filesystem to the new number of blocks specified. This entry
986 * point is only used to extend the current filesystem to the end of the last 983 * point is only used to extend the current filesystem to the end of the last
987 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" 984 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
988 * for emergencies (because it has no dependencies on reserved blocks). 985 * for emergencies (because it has no dependencies on reserved blocks).
989 * 986 *
990 * If we _really_ wanted, we could use default values to call ext3_group_add() 987 * If we _really_ wanted, we could use default values to call ext3_group_add()
991 * allow the "remount" trick to work for arbitrary resizing, assuming enough 988 * allow the "remount" trick to work for arbitrary resizing, assuming enough
992 * GDT blocks are reserved to grow to the desired size. 989 * GDT blocks are reserved to grow to the desired size.
993 */ 990 */
994 int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, 991 int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
995 ext3_fsblk_t n_blocks_count) 992 ext3_fsblk_t n_blocks_count)
996 { 993 {
997 ext3_fsblk_t o_blocks_count; 994 ext3_fsblk_t o_blocks_count;
998 ext3_grpblk_t last; 995 ext3_grpblk_t last;
999 ext3_grpblk_t add; 996 ext3_grpblk_t add;
1000 struct buffer_head * bh; 997 struct buffer_head * bh;
1001 handle_t *handle; 998 handle_t *handle;
1002 int err; 999 int err;
1003 unsigned long freed_blocks; 1000 unsigned long freed_blocks;
1004 1001
1005 /* We don't need to worry about locking wrt other resizers just 1002 /* We don't need to worry about locking wrt other resizers just
1006 * yet: we're going to revalidate es->s_blocks_count after 1003 * yet: we're going to revalidate es->s_blocks_count after
1007 * taking the s_resize_lock below. */ 1004 * taking the s_resize_lock below. */
1008 o_blocks_count = le32_to_cpu(es->s_blocks_count); 1005 o_blocks_count = le32_to_cpu(es->s_blocks_count);
1009 1006
1010 if (test_opt(sb, DEBUG)) 1007 if (test_opt(sb, DEBUG))
1011 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK 1008 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK
1012 " up to "E3FSBLK" blocks\n", 1009 " up to "E3FSBLK" blocks\n",
1013 o_blocks_count, n_blocks_count); 1010 o_blocks_count, n_blocks_count);
1014 1011
1015 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 1012 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
1016 return 0; 1013 return 0;
1017 1014
1018 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1015 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
1019 printk(KERN_ERR "EXT3-fs: filesystem on %s:" 1016 printk(KERN_ERR "EXT3-fs: filesystem on %s:"
1020 " too large to resize to "E3FSBLK" blocks safely\n", 1017 " too large to resize to "E3FSBLK" blocks safely\n",
1021 sb->s_id, n_blocks_count); 1018 sb->s_id, n_blocks_count);
1022 if (sizeof(sector_t) < 8) 1019 if (sizeof(sector_t) < 8)
1023 ext3_warning(sb, __func__, 1020 ext3_warning(sb, __func__,
1024 "CONFIG_LBDAF not enabled\n"); 1021 "CONFIG_LBDAF not enabled\n");
1025 return -EINVAL; 1022 return -EINVAL;
1026 } 1023 }
1027 1024
1028 if (n_blocks_count < o_blocks_count) { 1025 if (n_blocks_count < o_blocks_count) {
1029 ext3_warning(sb, __func__, 1026 ext3_warning(sb, __func__,
1030 "can't shrink FS - resize aborted"); 1027 "can't shrink FS - resize aborted");
1031 return -EBUSY; 1028 return -EBUSY;
1032 } 1029 }
1033 1030
1034 /* Handle the remaining blocks in the last group only. */ 1031 /* Handle the remaining blocks in the last group only. */
1035 last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) % 1032 last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
1036 EXT3_BLOCKS_PER_GROUP(sb); 1033 EXT3_BLOCKS_PER_GROUP(sb);
1037 1034
1038 if (last == 0) { 1035 if (last == 0) {
1039 ext3_warning(sb, __func__, 1036 ext3_warning(sb, __func__,
1040 "need to use ext2online to resize further"); 1037 "need to use ext2online to resize further");
1041 return -EPERM; 1038 return -EPERM;
1042 } 1039 }
1043 1040
1044 add = EXT3_BLOCKS_PER_GROUP(sb) - last; 1041 add = EXT3_BLOCKS_PER_GROUP(sb) - last;
1045 1042
1046 if (o_blocks_count + add < o_blocks_count) { 1043 if (o_blocks_count + add < o_blocks_count) {
1047 ext3_warning(sb, __func__, "blocks_count overflow"); 1044 ext3_warning(sb, __func__, "blocks_count overflow");
1048 return -EINVAL; 1045 return -EINVAL;
1049 } 1046 }
1050 1047
1051 if (o_blocks_count + add > n_blocks_count) 1048 if (o_blocks_count + add > n_blocks_count)
1052 add = n_blocks_count - o_blocks_count; 1049 add = n_blocks_count - o_blocks_count;
1053 1050
1054 if (o_blocks_count + add < n_blocks_count) 1051 if (o_blocks_count + add < n_blocks_count)
1055 ext3_warning(sb, __func__, 1052 ext3_warning(sb, __func__,
1056 "will only finish group ("E3FSBLK 1053 "will only finish group ("E3FSBLK
1057 " blocks, %u new)", 1054 " blocks, %u new)",
1058 o_blocks_count + add, add); 1055 o_blocks_count + add, add);
1059 1056
1060 /* See if the device is actually as big as what was requested */ 1057 /* See if the device is actually as big as what was requested */
1061 bh = sb_bread(sb, o_blocks_count + add -1); 1058 bh = sb_bread(sb, o_blocks_count + add -1);
1062 if (!bh) { 1059 if (!bh) {
1063 ext3_warning(sb, __func__, 1060 ext3_warning(sb, __func__,
1064 "can't read last block, resize aborted"); 1061 "can't read last block, resize aborted");
1065 return -ENOSPC; 1062 return -ENOSPC;
1066 } 1063 }
1067 brelse(bh); 1064 brelse(bh);
1068 1065
1069 /* We will update the superblock, one block bitmap, and 1066 /* We will update the superblock, one block bitmap, and
1070 * one group descriptor via ext3_free_blocks(). 1067 * one group descriptor via ext3_free_blocks().
1071 */ 1068 */
1072 handle = ext3_journal_start_sb(sb, 3); 1069 handle = ext3_journal_start_sb(sb, 3);
1073 if (IS_ERR(handle)) { 1070 if (IS_ERR(handle)) {
1074 err = PTR_ERR(handle); 1071 err = PTR_ERR(handle);
1075 ext3_warning(sb, __func__, "error %d on journal start",err); 1072 ext3_warning(sb, __func__, "error %d on journal start",err);
1076 goto exit_put; 1073 goto exit_put;
1077 } 1074 }
1078 1075
1079 mutex_lock(&EXT3_SB(sb)->s_resize_lock); 1076 mutex_lock(&EXT3_SB(sb)->s_resize_lock);
1080 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 1077 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
1081 ext3_warning(sb, __func__, 1078 ext3_warning(sb, __func__,
1082 "multiple resizers run on filesystem!"); 1079 "multiple resizers run on filesystem!");
1083 mutex_unlock(&EXT3_SB(sb)->s_resize_lock); 1080 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1084 ext3_journal_stop(handle); 1081 ext3_journal_stop(handle);
1085 err = -EBUSY; 1082 err = -EBUSY;
1086 goto exit_put; 1083 goto exit_put;
1087 } 1084 }
1088 1085
1089 if ((err = ext3_journal_get_write_access(handle, 1086 if ((err = ext3_journal_get_write_access(handle,
1090 EXT3_SB(sb)->s_sbh))) { 1087 EXT3_SB(sb)->s_sbh))) {
1091 ext3_warning(sb, __func__, 1088 ext3_warning(sb, __func__,
1092 "error %d on journal write access", err); 1089 "error %d on journal write access", err);
1093 mutex_unlock(&EXT3_SB(sb)->s_resize_lock); 1090 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1094 ext3_journal_stop(handle); 1091 ext3_journal_stop(handle);
1095 goto exit_put; 1092 goto exit_put;
1096 } 1093 }
1097 es->s_blocks_count = cpu_to_le32(o_blocks_count + add); 1094 es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
1098 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 1095 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1099 mutex_unlock(&EXT3_SB(sb)->s_resize_lock); 1096 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1100 if (err) { 1097 if (err) {
1101 ext3_warning(sb, __func__, 1098 ext3_warning(sb, __func__,
1102 "error %d on journal dirty metadata", err); 1099 "error %d on journal dirty metadata", err);
1103 ext3_journal_stop(handle); 1100 ext3_journal_stop(handle);
1104 goto exit_put; 1101 goto exit_put;
1105 } 1102 }
1106 ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n", 1103 ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n",
1107 o_blocks_count, o_blocks_count + add); 1104 o_blocks_count, o_blocks_count + add);
1108 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); 1105 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
1109 ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", 1106 ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n",
1110 o_blocks_count, o_blocks_count + add); 1107 o_blocks_count, o_blocks_count + add);
1111 if ((err = ext3_journal_stop(handle))) 1108 if ((err = ext3_journal_stop(handle)))
1112 goto exit_put; 1109 goto exit_put;
1113 if (test_opt(sb, DEBUG)) 1110 if (test_opt(sb, DEBUG))
1114 printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n", 1111 printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n",
1115 le32_to_cpu(es->s_blocks_count)); 1112 le32_to_cpu(es->s_blocks_count));
1116 update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es, 1113 update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
1117 sizeof(struct ext3_super_block)); 1114 sizeof(struct ext3_super_block));
1118 exit_put: 1115 exit_put:
1119 return err; 1116 return err;
1120 } /* ext3_group_extend */ 1117 } /* ext3_group_extend */
1121 1118
1 /* 1 /*
2 * linux/fs/ext3/super.c 2 * linux/fs/ext3/super.c
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 * 8 *
9 * from 9 * from
10 * 10 *
11 * linux/fs/minix/inode.c 11 * linux/fs/minix/inode.c
12 * 12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 * 14 *
15 * Big-endian to little-endian byte-swapping/bitmaps by 15 * Big-endian to little-endian byte-swapping/bitmaps by
16 * David S. Miller (davem@caip.rutgers.edu), 1995 16 * David S. Miller (davem@caip.rutgers.edu), 1995
17 */ 17 */
18 18
19 #include <linux/module.h> 19 #include <linux/module.h>
20 #include <linux/string.h>
21 #include <linux/fs.h>
22 #include <linux/time.h>
23 #include <linux/jbd.h>
24 #include <linux/ext3_fs.h>
25 #include <linux/ext3_jbd.h>
26 #include <linux/slab.h>
27 #include <linux/init.h>
28 #include <linux/blkdev.h> 20 #include <linux/blkdev.h>
29 #include <linux/parser.h> 21 #include <linux/parser.h>
30 #include <linux/buffer_head.h>
31 #include <linux/exportfs.h> 22 #include <linux/exportfs.h>
32 #include <linux/vfs.h> 23 #include <linux/statfs.h>
33 #include <linux/random.h> 24 #include <linux/random.h>
34 #include <linux/mount.h> 25 #include <linux/mount.h>
35 #include <linux/namei.h>
36 #include <linux/quotaops.h> 26 #include <linux/quotaops.h>
37 #include <linux/seq_file.h> 27 #include <linux/seq_file.h>
38 #include <linux/log2.h> 28 #include <linux/log2.h>
39 #include <linux/cleancache.h> 29 #include <linux/cleancache.h>
40 30
41 #include <asm/uaccess.h> 31 #include <asm/uaccess.h>
42 32
33 #define CREATE_TRACE_POINTS
34
35 #include "ext3.h"
43 #include "xattr.h" 36 #include "xattr.h"
44 #include "acl.h" 37 #include "acl.h"
45 #include "namei.h" 38 #include "namei.h"
46
47 #define CREATE_TRACE_POINTS
48 #include <trace/events/ext3.h>
49 39
50 #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED 40 #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
51 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA 41 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
52 #else 42 #else
53 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA 43 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA
54 #endif 44 #endif
55 45
56 static int ext3_load_journal(struct super_block *, struct ext3_super_block *, 46 static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
57 unsigned long journal_devnum); 47 unsigned long journal_devnum);
58 static int ext3_create_journal(struct super_block *, struct ext3_super_block *, 48 static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
59 unsigned int); 49 unsigned int);
60 static int ext3_commit_super(struct super_block *sb, 50 static int ext3_commit_super(struct super_block *sb,
61 struct ext3_super_block *es, 51 struct ext3_super_block *es,
62 int sync); 52 int sync);
63 static void ext3_mark_recovery_complete(struct super_block * sb, 53 static void ext3_mark_recovery_complete(struct super_block * sb,
64 struct ext3_super_block * es); 54 struct ext3_super_block * es);
65 static void ext3_clear_journal_err(struct super_block * sb, 55 static void ext3_clear_journal_err(struct super_block * sb,
66 struct ext3_super_block * es); 56 struct ext3_super_block * es);
67 static int ext3_sync_fs(struct super_block *sb, int wait); 57 static int ext3_sync_fs(struct super_block *sb, int wait);
68 static const char *ext3_decode_error(struct super_block * sb, int errno, 58 static const char *ext3_decode_error(struct super_block * sb, int errno,
69 char nbuf[16]); 59 char nbuf[16]);
70 static int ext3_remount (struct super_block * sb, int * flags, char * data); 60 static int ext3_remount (struct super_block * sb, int * flags, char * data);
71 static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); 61 static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
72 static int ext3_unfreeze(struct super_block *sb); 62 static int ext3_unfreeze(struct super_block *sb);
73 static int ext3_freeze(struct super_block *sb); 63 static int ext3_freeze(struct super_block *sb);
74 64
75 /* 65 /*
76 * Wrappers for journal_start/end. 66 * Wrappers for journal_start/end.
77 * 67 *
78 * The only special thing we need to do here is to make sure that all 68 * The only special thing we need to do here is to make sure that all
79 * journal_end calls result in the superblock being marked dirty, so 69 * journal_end calls result in the superblock being marked dirty, so
80 * that sync() will call the filesystem's write_super callback if 70 * that sync() will call the filesystem's write_super callback if
81 * appropriate. 71 * appropriate.
82 */ 72 */
83 handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) 73 handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
84 { 74 {
85 journal_t *journal; 75 journal_t *journal;
86 76
87 if (sb->s_flags & MS_RDONLY) 77 if (sb->s_flags & MS_RDONLY)
88 return ERR_PTR(-EROFS); 78 return ERR_PTR(-EROFS);
89 79
90 /* Special case here: if the journal has aborted behind our 80 /* Special case here: if the journal has aborted behind our
91 * backs (eg. EIO in the commit thread), then we still need to 81 * backs (eg. EIO in the commit thread), then we still need to
92 * take the FS itself readonly cleanly. */ 82 * take the FS itself readonly cleanly. */
93 journal = EXT3_SB(sb)->s_journal; 83 journal = EXT3_SB(sb)->s_journal;
94 if (is_journal_aborted(journal)) { 84 if (is_journal_aborted(journal)) {
95 ext3_abort(sb, __func__, 85 ext3_abort(sb, __func__,
96 "Detected aborted journal"); 86 "Detected aborted journal");
97 return ERR_PTR(-EROFS); 87 return ERR_PTR(-EROFS);
98 } 88 }
99 89
100 return journal_start(journal, nblocks); 90 return journal_start(journal, nblocks);
101 } 91 }
102 92
103 /* 93 /*
104 * The only special thing we need to do here is to make sure that all 94 * The only special thing we need to do here is to make sure that all
105 * journal_stop calls result in the superblock being marked dirty, so 95 * journal_stop calls result in the superblock being marked dirty, so
106 * that sync() will call the filesystem's write_super callback if 96 * that sync() will call the filesystem's write_super callback if
107 * appropriate. 97 * appropriate.
108 */ 98 */
109 int __ext3_journal_stop(const char *where, handle_t *handle) 99 int __ext3_journal_stop(const char *where, handle_t *handle)
110 { 100 {
111 struct super_block *sb; 101 struct super_block *sb;
112 int err; 102 int err;
113 int rc; 103 int rc;
114 104
115 sb = handle->h_transaction->t_journal->j_private; 105 sb = handle->h_transaction->t_journal->j_private;
116 err = handle->h_err; 106 err = handle->h_err;
117 rc = journal_stop(handle); 107 rc = journal_stop(handle);
118 108
119 if (!err) 109 if (!err)
120 err = rc; 110 err = rc;
121 if (err) 111 if (err)
122 __ext3_std_error(sb, where, err); 112 __ext3_std_error(sb, where, err);
123 return err; 113 return err;
124 } 114 }
125 115
126 void ext3_journal_abort_handle(const char *caller, const char *err_fn, 116 void ext3_journal_abort_handle(const char *caller, const char *err_fn,
127 struct buffer_head *bh, handle_t *handle, int err) 117 struct buffer_head *bh, handle_t *handle, int err)
128 { 118 {
129 char nbuf[16]; 119 char nbuf[16];
130 const char *errstr = ext3_decode_error(NULL, err, nbuf); 120 const char *errstr = ext3_decode_error(NULL, err, nbuf);
131 121
132 if (bh) 122 if (bh)
133 BUFFER_TRACE(bh, "abort"); 123 BUFFER_TRACE(bh, "abort");
134 124
135 if (!handle->h_err) 125 if (!handle->h_err)
136 handle->h_err = err; 126 handle->h_err = err;
137 127
138 if (is_handle_aborted(handle)) 128 if (is_handle_aborted(handle))
139 return; 129 return;
140 130
141 printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n", 131 printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n",
142 caller, errstr, err_fn); 132 caller, errstr, err_fn);
143 133
144 journal_abort_handle(handle); 134 journal_abort_handle(handle);
145 } 135 }
146 136
147 void ext3_msg(struct super_block *sb, const char *prefix, 137 void ext3_msg(struct super_block *sb, const char *prefix,
148 const char *fmt, ...) 138 const char *fmt, ...)
149 { 139 {
150 struct va_format vaf; 140 struct va_format vaf;
151 va_list args; 141 va_list args;
152 142
153 va_start(args, fmt); 143 va_start(args, fmt);
154 144
155 vaf.fmt = fmt; 145 vaf.fmt = fmt;
156 vaf.va = &args; 146 vaf.va = &args;
157 147
158 printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf); 148 printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
159 149
160 va_end(args); 150 va_end(args);
161 } 151 }
162 152
163 /* Deal with the reporting of failure conditions on a filesystem such as 153 /* Deal with the reporting of failure conditions on a filesystem such as
164 * inconsistencies detected or read IO failures. 154 * inconsistencies detected or read IO failures.
165 * 155 *
166 * On ext2, we can store the error state of the filesystem in the 156 * On ext2, we can store the error state of the filesystem in the
167 * superblock. That is not possible on ext3, because we may have other 157 * superblock. That is not possible on ext3, because we may have other
168 * write ordering constraints on the superblock which prevent us from 158 * write ordering constraints on the superblock which prevent us from
169 * writing it out straight away; and given that the journal is about to 159 * writing it out straight away; and given that the journal is about to
170 * be aborted, we can't rely on the current, or future, transactions to 160 * be aborted, we can't rely on the current, or future, transactions to
171 * write out the superblock safely. 161 * write out the superblock safely.
172 * 162 *
173 * We'll just use the journal_abort() error code to record an error in 163 * We'll just use the journal_abort() error code to record an error in
174 * the journal instead. On recovery, the journal will complain about 164 * the journal instead. On recovery, the journal will complain about
175 * that error until we've noted it down and cleared it. 165 * that error until we've noted it down and cleared it.
176 */ 166 */
177 167
178 static void ext3_handle_error(struct super_block *sb) 168 static void ext3_handle_error(struct super_block *sb)
179 { 169 {
180 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 170 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
181 171
182 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 172 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
183 es->s_state |= cpu_to_le16(EXT3_ERROR_FS); 173 es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
184 174
185 if (sb->s_flags & MS_RDONLY) 175 if (sb->s_flags & MS_RDONLY)
186 return; 176 return;
187 177
188 if (!test_opt (sb, ERRORS_CONT)) { 178 if (!test_opt (sb, ERRORS_CONT)) {
189 journal_t *journal = EXT3_SB(sb)->s_journal; 179 journal_t *journal = EXT3_SB(sb)->s_journal;
190 180
191 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); 181 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
192 if (journal) 182 if (journal)
193 journal_abort(journal, -EIO); 183 journal_abort(journal, -EIO);
194 } 184 }
195 if (test_opt (sb, ERRORS_RO)) { 185 if (test_opt (sb, ERRORS_RO)) {
196 ext3_msg(sb, KERN_CRIT, 186 ext3_msg(sb, KERN_CRIT,
197 "error: remounting filesystem read-only"); 187 "error: remounting filesystem read-only");
198 sb->s_flags |= MS_RDONLY; 188 sb->s_flags |= MS_RDONLY;
199 } 189 }
200 ext3_commit_super(sb, es, 1); 190 ext3_commit_super(sb, es, 1);
201 if (test_opt(sb, ERRORS_PANIC)) 191 if (test_opt(sb, ERRORS_PANIC))
202 panic("EXT3-fs (%s): panic forced after error\n", 192 panic("EXT3-fs (%s): panic forced after error\n",
203 sb->s_id); 193 sb->s_id);
204 } 194 }
205 195
206 void ext3_error(struct super_block *sb, const char *function, 196 void ext3_error(struct super_block *sb, const char *function,
207 const char *fmt, ...) 197 const char *fmt, ...)
208 { 198 {
209 struct va_format vaf; 199 struct va_format vaf;
210 va_list args; 200 va_list args;
211 201
212 va_start(args, fmt); 202 va_start(args, fmt);
213 203
214 vaf.fmt = fmt; 204 vaf.fmt = fmt;
215 vaf.va = &args; 205 vaf.va = &args;
216 206
217 printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n", 207 printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n",
218 sb->s_id, function, &vaf); 208 sb->s_id, function, &vaf);
219 209
220 va_end(args); 210 va_end(args);
221 211
222 ext3_handle_error(sb); 212 ext3_handle_error(sb);
223 } 213 }
224 214
225 static const char *ext3_decode_error(struct super_block * sb, int errno, 215 static const char *ext3_decode_error(struct super_block * sb, int errno,
226 char nbuf[16]) 216 char nbuf[16])
227 { 217 {
228 char *errstr = NULL; 218 char *errstr = NULL;
229 219
230 switch (errno) { 220 switch (errno) {
231 case -EIO: 221 case -EIO:
232 errstr = "IO failure"; 222 errstr = "IO failure";
233 break; 223 break;
234 case -ENOMEM: 224 case -ENOMEM:
235 errstr = "Out of memory"; 225 errstr = "Out of memory";
236 break; 226 break;
237 case -EROFS: 227 case -EROFS:
238 if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT) 228 if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
239 errstr = "Journal has aborted"; 229 errstr = "Journal has aborted";
240 else 230 else
241 errstr = "Readonly filesystem"; 231 errstr = "Readonly filesystem";
242 break; 232 break;
243 default: 233 default:
244 /* If the caller passed in an extra buffer for unknown 234 /* If the caller passed in an extra buffer for unknown
245 * errors, textualise them now. Else we just return 235 * errors, textualise them now. Else we just return
246 * NULL. */ 236 * NULL. */
247 if (nbuf) { 237 if (nbuf) {
248 /* Check for truncated error codes... */ 238 /* Check for truncated error codes... */
249 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 239 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
250 errstr = nbuf; 240 errstr = nbuf;
251 } 241 }
252 break; 242 break;
253 } 243 }
254 244
255 return errstr; 245 return errstr;
256 } 246 }
257 247
258 /* __ext3_std_error decodes expected errors from journaling functions 248 /* __ext3_std_error decodes expected errors from journaling functions
259 * automatically and invokes the appropriate error response. */ 249 * automatically and invokes the appropriate error response. */
260 250
261 void __ext3_std_error (struct super_block * sb, const char * function, 251 void __ext3_std_error (struct super_block * sb, const char * function,
262 int errno) 252 int errno)
263 { 253 {
264 char nbuf[16]; 254 char nbuf[16];
265 const char *errstr; 255 const char *errstr;
266 256
267 /* Special case: if the error is EROFS, and we're not already 257 /* Special case: if the error is EROFS, and we're not already
268 * inside a transaction, then there's really no point in logging 258 * inside a transaction, then there's really no point in logging
269 * an error. */ 259 * an error. */
270 if (errno == -EROFS && journal_current_handle() == NULL && 260 if (errno == -EROFS && journal_current_handle() == NULL &&
271 (sb->s_flags & MS_RDONLY)) 261 (sb->s_flags & MS_RDONLY))
272 return; 262 return;
273 263
274 errstr = ext3_decode_error(sb, errno, nbuf); 264 errstr = ext3_decode_error(sb, errno, nbuf);
275 ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr); 265 ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr);
276 266
277 ext3_handle_error(sb); 267 ext3_handle_error(sb);
278 } 268 }
279 269
280 /* 270 /*
281 * ext3_abort is a much stronger failure handler than ext3_error. The 271 * ext3_abort is a much stronger failure handler than ext3_error. The
282 * abort function may be used to deal with unrecoverable failures such 272 * abort function may be used to deal with unrecoverable failures such
283 * as journal IO errors or ENOMEM at a critical moment in log management. 273 * as journal IO errors or ENOMEM at a critical moment in log management.
284 * 274 *
285 * We unconditionally force the filesystem into an ABORT|READONLY state, 275 * We unconditionally force the filesystem into an ABORT|READONLY state,
286 * unless the error response on the fs has been set to panic in which 276 * unless the error response on the fs has been set to panic in which
287 * case we take the easy way out and panic immediately. 277 * case we take the easy way out and panic immediately.
288 */ 278 */
289 279
290 void ext3_abort(struct super_block *sb, const char *function, 280 void ext3_abort(struct super_block *sb, const char *function,
291 const char *fmt, ...) 281 const char *fmt, ...)
292 { 282 {
293 struct va_format vaf; 283 struct va_format vaf;
294 va_list args; 284 va_list args;
295 285
296 va_start(args, fmt); 286 va_start(args, fmt);
297 287
298 vaf.fmt = fmt; 288 vaf.fmt = fmt;
299 vaf.va = &args; 289 vaf.va = &args;
300 290
301 printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n", 291 printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n",
302 sb->s_id, function, &vaf); 292 sb->s_id, function, &vaf);
303 293
304 va_end(args); 294 va_end(args);
305 295
306 if (test_opt(sb, ERRORS_PANIC)) 296 if (test_opt(sb, ERRORS_PANIC))
307 panic("EXT3-fs: panic from previous error\n"); 297 panic("EXT3-fs: panic from previous error\n");
308 298
309 if (sb->s_flags & MS_RDONLY) 299 if (sb->s_flags & MS_RDONLY)
310 return; 300 return;
311 301
312 ext3_msg(sb, KERN_CRIT, 302 ext3_msg(sb, KERN_CRIT,
313 "error: remounting filesystem read-only"); 303 "error: remounting filesystem read-only");
314 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 304 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
315 sb->s_flags |= MS_RDONLY; 305 sb->s_flags |= MS_RDONLY;
316 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); 306 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
317 if (EXT3_SB(sb)->s_journal) 307 if (EXT3_SB(sb)->s_journal)
318 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 308 journal_abort(EXT3_SB(sb)->s_journal, -EIO);
319 } 309 }
320 310
321 void ext3_warning(struct super_block *sb, const char *function, 311 void ext3_warning(struct super_block *sb, const char *function,
322 const char *fmt, ...) 312 const char *fmt, ...)
323 { 313 {
324 struct va_format vaf; 314 struct va_format vaf;
325 va_list args; 315 va_list args;
326 316
327 va_start(args, fmt); 317 va_start(args, fmt);
328 318
329 vaf.fmt = fmt; 319 vaf.fmt = fmt;
330 vaf.va = &args; 320 vaf.va = &args;
331 321
332 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n", 322 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n",
333 sb->s_id, function, &vaf); 323 sb->s_id, function, &vaf);
334 324
335 va_end(args); 325 va_end(args);
336 } 326 }
337 327
338 void ext3_update_dynamic_rev(struct super_block *sb) 328 void ext3_update_dynamic_rev(struct super_block *sb)
339 { 329 {
340 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 330 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
341 331
342 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV) 332 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
343 return; 333 return;
344 334
345 ext3_msg(sb, KERN_WARNING, 335 ext3_msg(sb, KERN_WARNING,
346 "warning: updating to rev %d because of " 336 "warning: updating to rev %d because of "
347 "new feature flag, running e2fsck is recommended", 337 "new feature flag, running e2fsck is recommended",
348 EXT3_DYNAMIC_REV); 338 EXT3_DYNAMIC_REV);
349 339
350 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO); 340 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
351 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE); 341 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
352 es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV); 342 es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
353 /* leave es->s_feature_*compat flags alone */ 343 /* leave es->s_feature_*compat flags alone */
354 /* es->s_uuid will be set by e2fsck if empty */ 344 /* es->s_uuid will be set by e2fsck if empty */
355 345
356 /* 346 /*
357 * The rest of the superblock fields should be zero, and if not it 347 * The rest of the superblock fields should be zero, and if not it
358 * means they are likely already in use, so leave them alone. We 348 * means they are likely already in use, so leave them alone. We
359 * can leave it up to e2fsck to clean up any inconsistencies there. 349 * can leave it up to e2fsck to clean up any inconsistencies there.
360 */ 350 */
361 } 351 }
362 352
363 /* 353 /*
364 * Open the external journal device 354 * Open the external journal device
365 */ 355 */
366 static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) 356 static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
367 { 357 {
368 struct block_device *bdev; 358 struct block_device *bdev;
369 char b[BDEVNAME_SIZE]; 359 char b[BDEVNAME_SIZE];
370 360
371 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); 361 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
372 if (IS_ERR(bdev)) 362 if (IS_ERR(bdev))
373 goto fail; 363 goto fail;
374 return bdev; 364 return bdev;
375 365
376 fail: 366 fail:
377 ext3_msg(sb, "error: failed to open journal device %s: %ld", 367 ext3_msg(sb, "error: failed to open journal device %s: %ld",
378 __bdevname(dev, b), PTR_ERR(bdev)); 368 __bdevname(dev, b), PTR_ERR(bdev));
379 369
380 return NULL; 370 return NULL;
381 } 371 }
382 372
383 /* 373 /*
384 * Release the journal device 374 * Release the journal device
385 */ 375 */
386 static int ext3_blkdev_put(struct block_device *bdev) 376 static int ext3_blkdev_put(struct block_device *bdev)
387 { 377 {
388 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 378 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
389 } 379 }
390 380
391 static int ext3_blkdev_remove(struct ext3_sb_info *sbi) 381 static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
392 { 382 {
393 struct block_device *bdev; 383 struct block_device *bdev;
394 int ret = -ENODEV; 384 int ret = -ENODEV;
395 385
396 bdev = sbi->journal_bdev; 386 bdev = sbi->journal_bdev;
397 if (bdev) { 387 if (bdev) {
398 ret = ext3_blkdev_put(bdev); 388 ret = ext3_blkdev_put(bdev);
399 sbi->journal_bdev = NULL; 389 sbi->journal_bdev = NULL;
400 } 390 }
401 return ret; 391 return ret;
402 } 392 }
403 393
404 static inline struct inode *orphan_list_entry(struct list_head *l) 394 static inline struct inode *orphan_list_entry(struct list_head *l)
405 { 395 {
406 return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode; 396 return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
407 } 397 }
408 398
409 static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) 399 static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
410 { 400 {
411 struct list_head *l; 401 struct list_head *l;
412 402
413 ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d", 403 ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d",
414 le32_to_cpu(sbi->s_es->s_last_orphan)); 404 le32_to_cpu(sbi->s_es->s_last_orphan));
415 405
416 ext3_msg(sb, KERN_ERR, "sb_info orphan list:"); 406 ext3_msg(sb, KERN_ERR, "sb_info orphan list:");
417 list_for_each(l, &sbi->s_orphan) { 407 list_for_each(l, &sbi->s_orphan) {
418 struct inode *inode = orphan_list_entry(l); 408 struct inode *inode = orphan_list_entry(l);
419 ext3_msg(sb, KERN_ERR, " " 409 ext3_msg(sb, KERN_ERR, " "
420 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 410 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
421 inode->i_sb->s_id, inode->i_ino, inode, 411 inode->i_sb->s_id, inode->i_ino, inode,
422 inode->i_mode, inode->i_nlink, 412 inode->i_mode, inode->i_nlink,
423 NEXT_ORPHAN(inode)); 413 NEXT_ORPHAN(inode));
424 } 414 }
425 } 415 }
426 416
427 static void ext3_put_super (struct super_block * sb) 417 static void ext3_put_super (struct super_block * sb)
428 { 418 {
429 struct ext3_sb_info *sbi = EXT3_SB(sb); 419 struct ext3_sb_info *sbi = EXT3_SB(sb);
430 struct ext3_super_block *es = sbi->s_es; 420 struct ext3_super_block *es = sbi->s_es;
431 int i, err; 421 int i, err;
432 422
433 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 423 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
434 ext3_xattr_put_super(sb); 424 ext3_xattr_put_super(sb);
435 err = journal_destroy(sbi->s_journal); 425 err = journal_destroy(sbi->s_journal);
436 sbi->s_journal = NULL; 426 sbi->s_journal = NULL;
437 if (err < 0) 427 if (err < 0)
438 ext3_abort(sb, __func__, "Couldn't clean up the journal"); 428 ext3_abort(sb, __func__, "Couldn't clean up the journal");
439 429
440 if (!(sb->s_flags & MS_RDONLY)) { 430 if (!(sb->s_flags & MS_RDONLY)) {
441 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 431 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
442 es->s_state = cpu_to_le16(sbi->s_mount_state); 432 es->s_state = cpu_to_le16(sbi->s_mount_state);
443 BUFFER_TRACE(sbi->s_sbh, "marking dirty"); 433 BUFFER_TRACE(sbi->s_sbh, "marking dirty");
444 mark_buffer_dirty(sbi->s_sbh); 434 mark_buffer_dirty(sbi->s_sbh);
445 ext3_commit_super(sb, es, 1); 435 ext3_commit_super(sb, es, 1);
446 } 436 }
447 437
448 for (i = 0; i < sbi->s_gdb_count; i++) 438 for (i = 0; i < sbi->s_gdb_count; i++)
449 brelse(sbi->s_group_desc[i]); 439 brelse(sbi->s_group_desc[i]);
450 kfree(sbi->s_group_desc); 440 kfree(sbi->s_group_desc);
451 percpu_counter_destroy(&sbi->s_freeblocks_counter); 441 percpu_counter_destroy(&sbi->s_freeblocks_counter);
452 percpu_counter_destroy(&sbi->s_freeinodes_counter); 442 percpu_counter_destroy(&sbi->s_freeinodes_counter);
453 percpu_counter_destroy(&sbi->s_dirs_counter); 443 percpu_counter_destroy(&sbi->s_dirs_counter);
454 brelse(sbi->s_sbh); 444 brelse(sbi->s_sbh);
455 #ifdef CONFIG_QUOTA 445 #ifdef CONFIG_QUOTA
456 for (i = 0; i < MAXQUOTAS; i++) 446 for (i = 0; i < MAXQUOTAS; i++)
457 kfree(sbi->s_qf_names[i]); 447 kfree(sbi->s_qf_names[i]);
458 #endif 448 #endif
459 449
460 /* Debugging code just in case the in-memory inode orphan list 450 /* Debugging code just in case the in-memory inode orphan list
461 * isn't empty. The on-disk one can be non-empty if we've 451 * isn't empty. The on-disk one can be non-empty if we've
462 * detected an error and taken the fs readonly, but the 452 * detected an error and taken the fs readonly, but the
463 * in-memory list had better be clean by this point. */ 453 * in-memory list had better be clean by this point. */
464 if (!list_empty(&sbi->s_orphan)) 454 if (!list_empty(&sbi->s_orphan))
465 dump_orphan_list(sb, sbi); 455 dump_orphan_list(sb, sbi);
466 J_ASSERT(list_empty(&sbi->s_orphan)); 456 J_ASSERT(list_empty(&sbi->s_orphan));
467 457
468 invalidate_bdev(sb->s_bdev); 458 invalidate_bdev(sb->s_bdev);
469 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 459 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
470 /* 460 /*
471 * Invalidate the journal device's buffers. We don't want them 461 * Invalidate the journal device's buffers. We don't want them
472 * floating about in memory - the physical journal device may 462 * floating about in memory - the physical journal device may
473 * hotswapped, and it breaks the `ro-after' testing code. 463 * hotswapped, and it breaks the `ro-after' testing code.
474 */ 464 */
475 sync_blockdev(sbi->journal_bdev); 465 sync_blockdev(sbi->journal_bdev);
476 invalidate_bdev(sbi->journal_bdev); 466 invalidate_bdev(sbi->journal_bdev);
477 ext3_blkdev_remove(sbi); 467 ext3_blkdev_remove(sbi);
478 } 468 }
479 sb->s_fs_info = NULL; 469 sb->s_fs_info = NULL;
480 kfree(sbi->s_blockgroup_lock); 470 kfree(sbi->s_blockgroup_lock);
481 kfree(sbi); 471 kfree(sbi);
482 } 472 }
483 473
484 static struct kmem_cache *ext3_inode_cachep; 474 static struct kmem_cache *ext3_inode_cachep;
485 475
486 /* 476 /*
487 * Called inside transaction, so use GFP_NOFS 477 * Called inside transaction, so use GFP_NOFS
488 */ 478 */
489 static struct inode *ext3_alloc_inode(struct super_block *sb) 479 static struct inode *ext3_alloc_inode(struct super_block *sb)
490 { 480 {
491 struct ext3_inode_info *ei; 481 struct ext3_inode_info *ei;
492 482
493 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); 483 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
494 if (!ei) 484 if (!ei)
495 return NULL; 485 return NULL;
496 ei->i_block_alloc_info = NULL; 486 ei->i_block_alloc_info = NULL;
497 ei->vfs_inode.i_version = 1; 487 ei->vfs_inode.i_version = 1;
498 atomic_set(&ei->i_datasync_tid, 0); 488 atomic_set(&ei->i_datasync_tid, 0);
499 atomic_set(&ei->i_sync_tid, 0); 489 atomic_set(&ei->i_sync_tid, 0);
500 return &ei->vfs_inode; 490 return &ei->vfs_inode;
501 } 491 }
502 492
503 static int ext3_drop_inode(struct inode *inode) 493 static int ext3_drop_inode(struct inode *inode)
504 { 494 {
505 int drop = generic_drop_inode(inode); 495 int drop = generic_drop_inode(inode);
506 496
507 trace_ext3_drop_inode(inode, drop); 497 trace_ext3_drop_inode(inode, drop);
508 return drop; 498 return drop;
509 } 499 }
510 500
511 static void ext3_i_callback(struct rcu_head *head) 501 static void ext3_i_callback(struct rcu_head *head)
512 { 502 {
513 struct inode *inode = container_of(head, struct inode, i_rcu); 503 struct inode *inode = container_of(head, struct inode, i_rcu);
514 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 504 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
515 } 505 }
516 506
517 static void ext3_destroy_inode(struct inode *inode) 507 static void ext3_destroy_inode(struct inode *inode)
518 { 508 {
519 if (!list_empty(&(EXT3_I(inode)->i_orphan))) { 509 if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
520 printk("EXT3 Inode %p: orphan list check failed!\n", 510 printk("EXT3 Inode %p: orphan list check failed!\n",
521 EXT3_I(inode)); 511 EXT3_I(inode));
522 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 512 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
523 EXT3_I(inode), sizeof(struct ext3_inode_info), 513 EXT3_I(inode), sizeof(struct ext3_inode_info),
524 false); 514 false);
525 dump_stack(); 515 dump_stack();
526 } 516 }
527 call_rcu(&inode->i_rcu, ext3_i_callback); 517 call_rcu(&inode->i_rcu, ext3_i_callback);
528 } 518 }
529 519
530 static void init_once(void *foo) 520 static void init_once(void *foo)
531 { 521 {
532 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; 522 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
533 523
534 INIT_LIST_HEAD(&ei->i_orphan); 524 INIT_LIST_HEAD(&ei->i_orphan);
535 #ifdef CONFIG_EXT3_FS_XATTR 525 #ifdef CONFIG_EXT3_FS_XATTR
536 init_rwsem(&ei->xattr_sem); 526 init_rwsem(&ei->xattr_sem);
537 #endif 527 #endif
538 mutex_init(&ei->truncate_mutex); 528 mutex_init(&ei->truncate_mutex);
539 inode_init_once(&ei->vfs_inode); 529 inode_init_once(&ei->vfs_inode);
540 } 530 }
541 531
542 static int init_inodecache(void) 532 static int init_inodecache(void)
543 { 533 {
544 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", 534 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
545 sizeof(struct ext3_inode_info), 535 sizeof(struct ext3_inode_info),
546 0, (SLAB_RECLAIM_ACCOUNT| 536 0, (SLAB_RECLAIM_ACCOUNT|
547 SLAB_MEM_SPREAD), 537 SLAB_MEM_SPREAD),
548 init_once); 538 init_once);
549 if (ext3_inode_cachep == NULL) 539 if (ext3_inode_cachep == NULL)
550 return -ENOMEM; 540 return -ENOMEM;
551 return 0; 541 return 0;
552 } 542 }
553 543
554 static void destroy_inodecache(void) 544 static void destroy_inodecache(void)
555 { 545 {
556 kmem_cache_destroy(ext3_inode_cachep); 546 kmem_cache_destroy(ext3_inode_cachep);
557 } 547 }
558 548
559 static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) 549 static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
560 { 550 {
561 #if defined(CONFIG_QUOTA) 551 #if defined(CONFIG_QUOTA)
562 struct ext3_sb_info *sbi = EXT3_SB(sb); 552 struct ext3_sb_info *sbi = EXT3_SB(sb);
563 553
564 if (sbi->s_jquota_fmt) { 554 if (sbi->s_jquota_fmt) {
565 char *fmtname = ""; 555 char *fmtname = "";
566 556
567 switch (sbi->s_jquota_fmt) { 557 switch (sbi->s_jquota_fmt) {
568 case QFMT_VFS_OLD: 558 case QFMT_VFS_OLD:
569 fmtname = "vfsold"; 559 fmtname = "vfsold";
570 break; 560 break;
571 case QFMT_VFS_V0: 561 case QFMT_VFS_V0:
572 fmtname = "vfsv0"; 562 fmtname = "vfsv0";
573 break; 563 break;
574 case QFMT_VFS_V1: 564 case QFMT_VFS_V1:
575 fmtname = "vfsv1"; 565 fmtname = "vfsv1";
576 break; 566 break;
577 } 567 }
578 seq_printf(seq, ",jqfmt=%s", fmtname); 568 seq_printf(seq, ",jqfmt=%s", fmtname);
579 } 569 }
580 570
581 if (sbi->s_qf_names[USRQUOTA]) 571 if (sbi->s_qf_names[USRQUOTA])
582 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 572 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
583 573
584 if (sbi->s_qf_names[GRPQUOTA]) 574 if (sbi->s_qf_names[GRPQUOTA])
585 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 575 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
586 576
587 if (test_opt(sb, USRQUOTA)) 577 if (test_opt(sb, USRQUOTA))
588 seq_puts(seq, ",usrquota"); 578 seq_puts(seq, ",usrquota");
589 579
590 if (test_opt(sb, GRPQUOTA)) 580 if (test_opt(sb, GRPQUOTA))
591 seq_puts(seq, ",grpquota"); 581 seq_puts(seq, ",grpquota");
592 #endif 582 #endif
593 } 583 }
594 584
595 static char *data_mode_string(unsigned long mode) 585 static char *data_mode_string(unsigned long mode)
596 { 586 {
597 switch (mode) { 587 switch (mode) {
598 case EXT3_MOUNT_JOURNAL_DATA: 588 case EXT3_MOUNT_JOURNAL_DATA:
599 return "journal"; 589 return "journal";
600 case EXT3_MOUNT_ORDERED_DATA: 590 case EXT3_MOUNT_ORDERED_DATA:
601 return "ordered"; 591 return "ordered";
602 case EXT3_MOUNT_WRITEBACK_DATA: 592 case EXT3_MOUNT_WRITEBACK_DATA:
603 return "writeback"; 593 return "writeback";
604 } 594 }
605 return "unknown"; 595 return "unknown";
606 } 596 }
607 597
608 /* 598 /*
609 * Show an option if 599 * Show an option if
610 * - it's set to a non-default value OR 600 * - it's set to a non-default value OR
611 * - if the per-sb default is different from the global default 601 * - if the per-sb default is different from the global default
612 */ 602 */
613 static int ext3_show_options(struct seq_file *seq, struct dentry *root) 603 static int ext3_show_options(struct seq_file *seq, struct dentry *root)
614 { 604 {
615 struct super_block *sb = root->d_sb; 605 struct super_block *sb = root->d_sb;
616 struct ext3_sb_info *sbi = EXT3_SB(sb); 606 struct ext3_sb_info *sbi = EXT3_SB(sb);
617 struct ext3_super_block *es = sbi->s_es; 607 struct ext3_super_block *es = sbi->s_es;
618 unsigned long def_mount_opts; 608 unsigned long def_mount_opts;
619 609
620 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 610 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
621 611
622 if (sbi->s_sb_block != 1) 612 if (sbi->s_sb_block != 1)
623 seq_printf(seq, ",sb=%lu", sbi->s_sb_block); 613 seq_printf(seq, ",sb=%lu", sbi->s_sb_block);
624 if (test_opt(sb, MINIX_DF)) 614 if (test_opt(sb, MINIX_DF))
625 seq_puts(seq, ",minixdf"); 615 seq_puts(seq, ",minixdf");
626 if (test_opt(sb, GRPID)) 616 if (test_opt(sb, GRPID))
627 seq_puts(seq, ",grpid"); 617 seq_puts(seq, ",grpid");
628 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS)) 618 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS))
629 seq_puts(seq, ",nogrpid"); 619 seq_puts(seq, ",nogrpid");
630 if (sbi->s_resuid != EXT3_DEF_RESUID || 620 if (sbi->s_resuid != EXT3_DEF_RESUID ||
631 le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) { 621 le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) {
632 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 622 seq_printf(seq, ",resuid=%u", sbi->s_resuid);
633 } 623 }
634 if (sbi->s_resgid != EXT3_DEF_RESGID || 624 if (sbi->s_resgid != EXT3_DEF_RESGID ||
635 le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) { 625 le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) {
636 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 626 seq_printf(seq, ",resgid=%u", sbi->s_resgid);
637 } 627 }
638 if (test_opt(sb, ERRORS_RO)) { 628 if (test_opt(sb, ERRORS_RO)) {
639 int def_errors = le16_to_cpu(es->s_errors); 629 int def_errors = le16_to_cpu(es->s_errors);
640 630
641 if (def_errors == EXT3_ERRORS_PANIC || 631 if (def_errors == EXT3_ERRORS_PANIC ||
642 def_errors == EXT3_ERRORS_CONTINUE) { 632 def_errors == EXT3_ERRORS_CONTINUE) {
643 seq_puts(seq, ",errors=remount-ro"); 633 seq_puts(seq, ",errors=remount-ro");
644 } 634 }
645 } 635 }
646 if (test_opt(sb, ERRORS_CONT)) 636 if (test_opt(sb, ERRORS_CONT))
647 seq_puts(seq, ",errors=continue"); 637 seq_puts(seq, ",errors=continue");
648 if (test_opt(sb, ERRORS_PANIC)) 638 if (test_opt(sb, ERRORS_PANIC))
649 seq_puts(seq, ",errors=panic"); 639 seq_puts(seq, ",errors=panic");
650 if (test_opt(sb, NO_UID32)) 640 if (test_opt(sb, NO_UID32))
651 seq_puts(seq, ",nouid32"); 641 seq_puts(seq, ",nouid32");
652 if (test_opt(sb, DEBUG)) 642 if (test_opt(sb, DEBUG))
653 seq_puts(seq, ",debug"); 643 seq_puts(seq, ",debug");
654 #ifdef CONFIG_EXT3_FS_XATTR 644 #ifdef CONFIG_EXT3_FS_XATTR
655 if (test_opt(sb, XATTR_USER)) 645 if (test_opt(sb, XATTR_USER))
656 seq_puts(seq, ",user_xattr"); 646 seq_puts(seq, ",user_xattr");
657 if (!test_opt(sb, XATTR_USER) && 647 if (!test_opt(sb, XATTR_USER) &&
658 (def_mount_opts & EXT3_DEFM_XATTR_USER)) { 648 (def_mount_opts & EXT3_DEFM_XATTR_USER)) {
659 seq_puts(seq, ",nouser_xattr"); 649 seq_puts(seq, ",nouser_xattr");
660 } 650 }
661 #endif 651 #endif
662 #ifdef CONFIG_EXT3_FS_POSIX_ACL 652 #ifdef CONFIG_EXT3_FS_POSIX_ACL
663 if (test_opt(sb, POSIX_ACL)) 653 if (test_opt(sb, POSIX_ACL))
664 seq_puts(seq, ",acl"); 654 seq_puts(seq, ",acl");
665 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT3_DEFM_ACL)) 655 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT3_DEFM_ACL))
666 seq_puts(seq, ",noacl"); 656 seq_puts(seq, ",noacl");
667 #endif 657 #endif
668 if (!test_opt(sb, RESERVATION)) 658 if (!test_opt(sb, RESERVATION))
669 seq_puts(seq, ",noreservation"); 659 seq_puts(seq, ",noreservation");
670 if (sbi->s_commit_interval) { 660 if (sbi->s_commit_interval) {
671 seq_printf(seq, ",commit=%u", 661 seq_printf(seq, ",commit=%u",
672 (unsigned) (sbi->s_commit_interval / HZ)); 662 (unsigned) (sbi->s_commit_interval / HZ));
673 } 663 }
674 664
675 /* 665 /*
676 * Always display barrier state so it's clear what the status is. 666 * Always display barrier state so it's clear what the status is.
677 */ 667 */
678 seq_puts(seq, ",barrier="); 668 seq_puts(seq, ",barrier=");
679 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 669 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
680 seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS))); 670 seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS)));
681 if (test_opt(sb, DATA_ERR_ABORT)) 671 if (test_opt(sb, DATA_ERR_ABORT))
682 seq_puts(seq, ",data_err=abort"); 672 seq_puts(seq, ",data_err=abort");
683 673
684 if (test_opt(sb, NOLOAD)) 674 if (test_opt(sb, NOLOAD))
685 seq_puts(seq, ",norecovery"); 675 seq_puts(seq, ",norecovery");
686 676
687 ext3_show_quota_options(seq, sb); 677 ext3_show_quota_options(seq, sb);
688 678
689 return 0; 679 return 0;
690 } 680 }
691 681
692 682
693 static struct inode *ext3_nfs_get_inode(struct super_block *sb, 683 static struct inode *ext3_nfs_get_inode(struct super_block *sb,
694 u64 ino, u32 generation) 684 u64 ino, u32 generation)
695 { 685 {
696 struct inode *inode; 686 struct inode *inode;
697 687
698 if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) 688 if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
699 return ERR_PTR(-ESTALE); 689 return ERR_PTR(-ESTALE);
700 if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) 690 if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
701 return ERR_PTR(-ESTALE); 691 return ERR_PTR(-ESTALE);
702 692
703 /* iget isn't really right if the inode is currently unallocated!! 693 /* iget isn't really right if the inode is currently unallocated!!
704 * 694 *
705 * ext3_read_inode will return a bad_inode if the inode had been 695 * ext3_read_inode will return a bad_inode if the inode had been
706 * deleted, so we should be safe. 696 * deleted, so we should be safe.
707 * 697 *
708 * Currently we don't know the generation for parent directory, so 698 * Currently we don't know the generation for parent directory, so
709 * a generation of 0 means "accept any" 699 * a generation of 0 means "accept any"
710 */ 700 */
711 inode = ext3_iget(sb, ino); 701 inode = ext3_iget(sb, ino);
712 if (IS_ERR(inode)) 702 if (IS_ERR(inode))
713 return ERR_CAST(inode); 703 return ERR_CAST(inode);
714 if (generation && inode->i_generation != generation) { 704 if (generation && inode->i_generation != generation) {
715 iput(inode); 705 iput(inode);
716 return ERR_PTR(-ESTALE); 706 return ERR_PTR(-ESTALE);
717 } 707 }
718 708
719 return inode; 709 return inode;
720 } 710 }
721 711
722 static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid, 712 static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid,
723 int fh_len, int fh_type) 713 int fh_len, int fh_type)
724 { 714 {
725 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 715 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
726 ext3_nfs_get_inode); 716 ext3_nfs_get_inode);
727 } 717 }
728 718
729 static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid, 719 static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid,
730 int fh_len, int fh_type) 720 int fh_len, int fh_type)
731 { 721 {
732 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 722 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
733 ext3_nfs_get_inode); 723 ext3_nfs_get_inode);
734 } 724 }
735 725
736 /* 726 /*
737 * Try to release metadata pages (indirect blocks, directories) which are 727 * Try to release metadata pages (indirect blocks, directories) which are
738 * mapped via the block device. Since these pages could have journal heads 728 * mapped via the block device. Since these pages could have journal heads
739 * which would prevent try_to_free_buffers() from freeing them, we must use 729 * which would prevent try_to_free_buffers() from freeing them, we must use
740 * jbd layer's try_to_free_buffers() function to release them. 730 * jbd layer's try_to_free_buffers() function to release them.
741 */ 731 */
742 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 732 static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
743 gfp_t wait) 733 gfp_t wait)
744 { 734 {
745 journal_t *journal = EXT3_SB(sb)->s_journal; 735 journal_t *journal = EXT3_SB(sb)->s_journal;
746 736
747 WARN_ON(PageChecked(page)); 737 WARN_ON(PageChecked(page));
748 if (!page_has_buffers(page)) 738 if (!page_has_buffers(page))
749 return 0; 739 return 0;
750 if (journal) 740 if (journal)
751 return journal_try_to_free_buffers(journal, page, 741 return journal_try_to_free_buffers(journal, page,
752 wait & ~__GFP_WAIT); 742 wait & ~__GFP_WAIT);
753 return try_to_free_buffers(page); 743 return try_to_free_buffers(page);
754 } 744 }
755 745
756 #ifdef CONFIG_QUOTA 746 #ifdef CONFIG_QUOTA
757 #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 747 #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
758 #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 748 #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
759 749
760 static int ext3_write_dquot(struct dquot *dquot); 750 static int ext3_write_dquot(struct dquot *dquot);
761 static int ext3_acquire_dquot(struct dquot *dquot); 751 static int ext3_acquire_dquot(struct dquot *dquot);
762 static int ext3_release_dquot(struct dquot *dquot); 752 static int ext3_release_dquot(struct dquot *dquot);
763 static int ext3_mark_dquot_dirty(struct dquot *dquot); 753 static int ext3_mark_dquot_dirty(struct dquot *dquot);
764 static int ext3_write_info(struct super_block *sb, int type); 754 static int ext3_write_info(struct super_block *sb, int type);
765 static int ext3_quota_on(struct super_block *sb, int type, int format_id, 755 static int ext3_quota_on(struct super_block *sb, int type, int format_id,
766 struct path *path); 756 struct path *path);
767 static int ext3_quota_on_mount(struct super_block *sb, int type); 757 static int ext3_quota_on_mount(struct super_block *sb, int type);
768 static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 758 static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
769 size_t len, loff_t off); 759 size_t len, loff_t off);
770 static ssize_t ext3_quota_write(struct super_block *sb, int type, 760 static ssize_t ext3_quota_write(struct super_block *sb, int type,
771 const char *data, size_t len, loff_t off); 761 const char *data, size_t len, loff_t off);
772 762
773 static const struct dquot_operations ext3_quota_operations = { 763 static const struct dquot_operations ext3_quota_operations = {
774 .write_dquot = ext3_write_dquot, 764 .write_dquot = ext3_write_dquot,
775 .acquire_dquot = ext3_acquire_dquot, 765 .acquire_dquot = ext3_acquire_dquot,
776 .release_dquot = ext3_release_dquot, 766 .release_dquot = ext3_release_dquot,
777 .mark_dirty = ext3_mark_dquot_dirty, 767 .mark_dirty = ext3_mark_dquot_dirty,
778 .write_info = ext3_write_info, 768 .write_info = ext3_write_info,
779 .alloc_dquot = dquot_alloc, 769 .alloc_dquot = dquot_alloc,
780 .destroy_dquot = dquot_destroy, 770 .destroy_dquot = dquot_destroy,
781 }; 771 };
782 772
783 static const struct quotactl_ops ext3_qctl_operations = { 773 static const struct quotactl_ops ext3_qctl_operations = {
784 .quota_on = ext3_quota_on, 774 .quota_on = ext3_quota_on,
785 .quota_off = dquot_quota_off, 775 .quota_off = dquot_quota_off,
786 .quota_sync = dquot_quota_sync, 776 .quota_sync = dquot_quota_sync,
787 .get_info = dquot_get_dqinfo, 777 .get_info = dquot_get_dqinfo,
788 .set_info = dquot_set_dqinfo, 778 .set_info = dquot_set_dqinfo,
789 .get_dqblk = dquot_get_dqblk, 779 .get_dqblk = dquot_get_dqblk,
790 .set_dqblk = dquot_set_dqblk 780 .set_dqblk = dquot_set_dqblk
791 }; 781 };
792 #endif 782 #endif
793 783
794 static const struct super_operations ext3_sops = { 784 static const struct super_operations ext3_sops = {
795 .alloc_inode = ext3_alloc_inode, 785 .alloc_inode = ext3_alloc_inode,
796 .destroy_inode = ext3_destroy_inode, 786 .destroy_inode = ext3_destroy_inode,
797 .write_inode = ext3_write_inode, 787 .write_inode = ext3_write_inode,
798 .dirty_inode = ext3_dirty_inode, 788 .dirty_inode = ext3_dirty_inode,
799 .drop_inode = ext3_drop_inode, 789 .drop_inode = ext3_drop_inode,
800 .evict_inode = ext3_evict_inode, 790 .evict_inode = ext3_evict_inode,
801 .put_super = ext3_put_super, 791 .put_super = ext3_put_super,
802 .sync_fs = ext3_sync_fs, 792 .sync_fs = ext3_sync_fs,
803 .freeze_fs = ext3_freeze, 793 .freeze_fs = ext3_freeze,
804 .unfreeze_fs = ext3_unfreeze, 794 .unfreeze_fs = ext3_unfreeze,
805 .statfs = ext3_statfs, 795 .statfs = ext3_statfs,
806 .remount_fs = ext3_remount, 796 .remount_fs = ext3_remount,
807 .show_options = ext3_show_options, 797 .show_options = ext3_show_options,
808 #ifdef CONFIG_QUOTA 798 #ifdef CONFIG_QUOTA
809 .quota_read = ext3_quota_read, 799 .quota_read = ext3_quota_read,
810 .quota_write = ext3_quota_write, 800 .quota_write = ext3_quota_write,
811 #endif 801 #endif
812 .bdev_try_to_free_page = bdev_try_to_free_page, 802 .bdev_try_to_free_page = bdev_try_to_free_page,
813 }; 803 };
814 804
815 static const struct export_operations ext3_export_ops = { 805 static const struct export_operations ext3_export_ops = {
816 .fh_to_dentry = ext3_fh_to_dentry, 806 .fh_to_dentry = ext3_fh_to_dentry,
817 .fh_to_parent = ext3_fh_to_parent, 807 .fh_to_parent = ext3_fh_to_parent,
818 .get_parent = ext3_get_parent, 808 .get_parent = ext3_get_parent,
819 }; 809 };
820 810
821 enum { 811 enum {
822 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 812 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
823 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 813 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
824 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 814 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
825 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 815 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
826 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 816 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
827 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 817 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
828 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 818 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
829 Opt_data_err_abort, Opt_data_err_ignore, 819 Opt_data_err_abort, Opt_data_err_ignore,
830 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 820 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
831 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 821 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
832 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 822 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
833 Opt_resize, Opt_usrquota, Opt_grpquota 823 Opt_resize, Opt_usrquota, Opt_grpquota
834 }; 824 };
835 825
836 static const match_table_t tokens = { 826 static const match_table_t tokens = {
837 {Opt_bsd_df, "bsddf"}, 827 {Opt_bsd_df, "bsddf"},
838 {Opt_minix_df, "minixdf"}, 828 {Opt_minix_df, "minixdf"},
839 {Opt_grpid, "grpid"}, 829 {Opt_grpid, "grpid"},
840 {Opt_grpid, "bsdgroups"}, 830 {Opt_grpid, "bsdgroups"},
841 {Opt_nogrpid, "nogrpid"}, 831 {Opt_nogrpid, "nogrpid"},
842 {Opt_nogrpid, "sysvgroups"}, 832 {Opt_nogrpid, "sysvgroups"},
843 {Opt_resgid, "resgid=%u"}, 833 {Opt_resgid, "resgid=%u"},
844 {Opt_resuid, "resuid=%u"}, 834 {Opt_resuid, "resuid=%u"},
845 {Opt_sb, "sb=%u"}, 835 {Opt_sb, "sb=%u"},
846 {Opt_err_cont, "errors=continue"}, 836 {Opt_err_cont, "errors=continue"},
847 {Opt_err_panic, "errors=panic"}, 837 {Opt_err_panic, "errors=panic"},
848 {Opt_err_ro, "errors=remount-ro"}, 838 {Opt_err_ro, "errors=remount-ro"},
849 {Opt_nouid32, "nouid32"}, 839 {Opt_nouid32, "nouid32"},
850 {Opt_nocheck, "nocheck"}, 840 {Opt_nocheck, "nocheck"},
851 {Opt_nocheck, "check=none"}, 841 {Opt_nocheck, "check=none"},
852 {Opt_debug, "debug"}, 842 {Opt_debug, "debug"},
853 {Opt_oldalloc, "oldalloc"}, 843 {Opt_oldalloc, "oldalloc"},
854 {Opt_orlov, "orlov"}, 844 {Opt_orlov, "orlov"},
855 {Opt_user_xattr, "user_xattr"}, 845 {Opt_user_xattr, "user_xattr"},
856 {Opt_nouser_xattr, "nouser_xattr"}, 846 {Opt_nouser_xattr, "nouser_xattr"},
857 {Opt_acl, "acl"}, 847 {Opt_acl, "acl"},
858 {Opt_noacl, "noacl"}, 848 {Opt_noacl, "noacl"},
859 {Opt_reservation, "reservation"}, 849 {Opt_reservation, "reservation"},
860 {Opt_noreservation, "noreservation"}, 850 {Opt_noreservation, "noreservation"},
861 {Opt_noload, "noload"}, 851 {Opt_noload, "noload"},
862 {Opt_noload, "norecovery"}, 852 {Opt_noload, "norecovery"},
863 {Opt_nobh, "nobh"}, 853 {Opt_nobh, "nobh"},
864 {Opt_bh, "bh"}, 854 {Opt_bh, "bh"},
865 {Opt_commit, "commit=%u"}, 855 {Opt_commit, "commit=%u"},
866 {Opt_journal_update, "journal=update"}, 856 {Opt_journal_update, "journal=update"},
867 {Opt_journal_inum, "journal=%u"}, 857 {Opt_journal_inum, "journal=%u"},
868 {Opt_journal_dev, "journal_dev=%u"}, 858 {Opt_journal_dev, "journal_dev=%u"},
869 {Opt_abort, "abort"}, 859 {Opt_abort, "abort"},
870 {Opt_data_journal, "data=journal"}, 860 {Opt_data_journal, "data=journal"},
871 {Opt_data_ordered, "data=ordered"}, 861 {Opt_data_ordered, "data=ordered"},
872 {Opt_data_writeback, "data=writeback"}, 862 {Opt_data_writeback, "data=writeback"},
873 {Opt_data_err_abort, "data_err=abort"}, 863 {Opt_data_err_abort, "data_err=abort"},
874 {Opt_data_err_ignore, "data_err=ignore"}, 864 {Opt_data_err_ignore, "data_err=ignore"},
875 {Opt_offusrjquota, "usrjquota="}, 865 {Opt_offusrjquota, "usrjquota="},
876 {Opt_usrjquota, "usrjquota=%s"}, 866 {Opt_usrjquota, "usrjquota=%s"},
877 {Opt_offgrpjquota, "grpjquota="}, 867 {Opt_offgrpjquota, "grpjquota="},
878 {Opt_grpjquota, "grpjquota=%s"}, 868 {Opt_grpjquota, "grpjquota=%s"},
879 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 869 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
880 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 870 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
881 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 871 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
882 {Opt_grpquota, "grpquota"}, 872 {Opt_grpquota, "grpquota"},
883 {Opt_noquota, "noquota"}, 873 {Opt_noquota, "noquota"},
884 {Opt_quota, "quota"}, 874 {Opt_quota, "quota"},
885 {Opt_usrquota, "usrquota"}, 875 {Opt_usrquota, "usrquota"},
886 {Opt_barrier, "barrier=%u"}, 876 {Opt_barrier, "barrier=%u"},
887 {Opt_barrier, "barrier"}, 877 {Opt_barrier, "barrier"},
888 {Opt_nobarrier, "nobarrier"}, 878 {Opt_nobarrier, "nobarrier"},
889 {Opt_resize, "resize"}, 879 {Opt_resize, "resize"},
890 {Opt_err, NULL}, 880 {Opt_err, NULL},
891 }; 881 };
892 882
893 static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb) 883 static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
894 { 884 {
895 ext3_fsblk_t sb_block; 885 ext3_fsblk_t sb_block;
896 char *options = (char *) *data; 886 char *options = (char *) *data;
897 887
898 if (!options || strncmp(options, "sb=", 3) != 0) 888 if (!options || strncmp(options, "sb=", 3) != 0)
899 return 1; /* Default location */ 889 return 1; /* Default location */
900 options += 3; 890 options += 3;
901 /*todo: use simple_strtoll with >32bit ext3 */ 891 /*todo: use simple_strtoll with >32bit ext3 */
902 sb_block = simple_strtoul(options, &options, 0); 892 sb_block = simple_strtoul(options, &options, 0);
903 if (*options && *options != ',') { 893 if (*options && *options != ',') {
904 ext3_msg(sb, "error: invalid sb specification: %s", 894 ext3_msg(sb, "error: invalid sb specification: %s",
905 (char *) *data); 895 (char *) *data);
906 return 1; 896 return 1;
907 } 897 }
908 if (*options == ',') 898 if (*options == ',')
909 options++; 899 options++;
910 *data = (void *) options; 900 *data = (void *) options;
911 return sb_block; 901 return sb_block;
912 } 902 }
913 903
914 #ifdef CONFIG_QUOTA 904 #ifdef CONFIG_QUOTA
915 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 905 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
916 { 906 {
917 struct ext3_sb_info *sbi = EXT3_SB(sb); 907 struct ext3_sb_info *sbi = EXT3_SB(sb);
918 char *qname; 908 char *qname;
919 909
920 if (sb_any_quota_loaded(sb) && 910 if (sb_any_quota_loaded(sb) &&
921 !sbi->s_qf_names[qtype]) { 911 !sbi->s_qf_names[qtype]) {
922 ext3_msg(sb, KERN_ERR, 912 ext3_msg(sb, KERN_ERR,
923 "Cannot change journaled " 913 "Cannot change journaled "
924 "quota options when quota turned on"); 914 "quota options when quota turned on");
925 return 0; 915 return 0;
926 } 916 }
927 qname = match_strdup(args); 917 qname = match_strdup(args);
928 if (!qname) { 918 if (!qname) {
929 ext3_msg(sb, KERN_ERR, 919 ext3_msg(sb, KERN_ERR,
930 "Not enough memory for storing quotafile name"); 920 "Not enough memory for storing quotafile name");
931 return 0; 921 return 0;
932 } 922 }
933 if (sbi->s_qf_names[qtype] && 923 if (sbi->s_qf_names[qtype] &&
934 strcmp(sbi->s_qf_names[qtype], qname)) { 924 strcmp(sbi->s_qf_names[qtype], qname)) {
935 ext3_msg(sb, KERN_ERR, 925 ext3_msg(sb, KERN_ERR,
936 "%s quota file already specified", QTYPE2NAME(qtype)); 926 "%s quota file already specified", QTYPE2NAME(qtype));
937 kfree(qname); 927 kfree(qname);
938 return 0; 928 return 0;
939 } 929 }
940 sbi->s_qf_names[qtype] = qname; 930 sbi->s_qf_names[qtype] = qname;
941 if (strchr(sbi->s_qf_names[qtype], '/')) { 931 if (strchr(sbi->s_qf_names[qtype], '/')) {
942 ext3_msg(sb, KERN_ERR, 932 ext3_msg(sb, KERN_ERR,
943 "quotafile must be on filesystem root"); 933 "quotafile must be on filesystem root");
944 kfree(sbi->s_qf_names[qtype]); 934 kfree(sbi->s_qf_names[qtype]);
945 sbi->s_qf_names[qtype] = NULL; 935 sbi->s_qf_names[qtype] = NULL;
946 return 0; 936 return 0;
947 } 937 }
948 set_opt(sbi->s_mount_opt, QUOTA); 938 set_opt(sbi->s_mount_opt, QUOTA);
949 return 1; 939 return 1;
950 } 940 }
951 941
952 static int clear_qf_name(struct super_block *sb, int qtype) { 942 static int clear_qf_name(struct super_block *sb, int qtype) {
953 943
954 struct ext3_sb_info *sbi = EXT3_SB(sb); 944 struct ext3_sb_info *sbi = EXT3_SB(sb);
955 945
956 if (sb_any_quota_loaded(sb) && 946 if (sb_any_quota_loaded(sb) &&
957 sbi->s_qf_names[qtype]) { 947 sbi->s_qf_names[qtype]) {
958 ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options" 948 ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options"
959 " when quota turned on"); 949 " when quota turned on");
960 return 0; 950 return 0;
961 } 951 }
962 /* 952 /*
963 * The space will be released later when all options are confirmed 953 * The space will be released later when all options are confirmed
964 * to be correct 954 * to be correct
965 */ 955 */
966 sbi->s_qf_names[qtype] = NULL; 956 sbi->s_qf_names[qtype] = NULL;
967 return 1; 957 return 1;
968 } 958 }
969 #endif 959 #endif
970 960
971 static int parse_options (char *options, struct super_block *sb, 961 static int parse_options (char *options, struct super_block *sb,
972 unsigned int *inum, unsigned long *journal_devnum, 962 unsigned int *inum, unsigned long *journal_devnum,
973 ext3_fsblk_t *n_blocks_count, int is_remount) 963 ext3_fsblk_t *n_blocks_count, int is_remount)
974 { 964 {
975 struct ext3_sb_info *sbi = EXT3_SB(sb); 965 struct ext3_sb_info *sbi = EXT3_SB(sb);
976 char * p; 966 char * p;
977 substring_t args[MAX_OPT_ARGS]; 967 substring_t args[MAX_OPT_ARGS];
978 int data_opt = 0; 968 int data_opt = 0;
979 int option; 969 int option;
980 #ifdef CONFIG_QUOTA 970 #ifdef CONFIG_QUOTA
981 int qfmt; 971 int qfmt;
982 #endif 972 #endif
983 973
984 if (!options) 974 if (!options)
985 return 1; 975 return 1;
986 976
987 while ((p = strsep (&options, ",")) != NULL) { 977 while ((p = strsep (&options, ",")) != NULL) {
988 int token; 978 int token;
989 if (!*p) 979 if (!*p)
990 continue; 980 continue;
991 /* 981 /*
992 * Initialize args struct so we know whether arg was 982 * Initialize args struct so we know whether arg was
993 * found; some options take optional arguments. 983 * found; some options take optional arguments.
994 */ 984 */
995 args[0].to = args[0].from = 0; 985 args[0].to = args[0].from = 0;
996 token = match_token(p, tokens, args); 986 token = match_token(p, tokens, args);
997 switch (token) { 987 switch (token) {
998 case Opt_bsd_df: 988 case Opt_bsd_df:
999 clear_opt (sbi->s_mount_opt, MINIX_DF); 989 clear_opt (sbi->s_mount_opt, MINIX_DF);
1000 break; 990 break;
1001 case Opt_minix_df: 991 case Opt_minix_df:
1002 set_opt (sbi->s_mount_opt, MINIX_DF); 992 set_opt (sbi->s_mount_opt, MINIX_DF);
1003 break; 993 break;
1004 case Opt_grpid: 994 case Opt_grpid:
1005 set_opt (sbi->s_mount_opt, GRPID); 995 set_opt (sbi->s_mount_opt, GRPID);
1006 break; 996 break;
1007 case Opt_nogrpid: 997 case Opt_nogrpid:
1008 clear_opt (sbi->s_mount_opt, GRPID); 998 clear_opt (sbi->s_mount_opt, GRPID);
1009 break; 999 break;
1010 case Opt_resuid: 1000 case Opt_resuid:
1011 if (match_int(&args[0], &option)) 1001 if (match_int(&args[0], &option))
1012 return 0; 1002 return 0;
1013 sbi->s_resuid = option; 1003 sbi->s_resuid = option;
1014 break; 1004 break;
1015 case Opt_resgid: 1005 case Opt_resgid:
1016 if (match_int(&args[0], &option)) 1006 if (match_int(&args[0], &option))
1017 return 0; 1007 return 0;
1018 sbi->s_resgid = option; 1008 sbi->s_resgid = option;
1019 break; 1009 break;
1020 case Opt_sb: 1010 case Opt_sb:
1021 /* handled by get_sb_block() instead of here */ 1011 /* handled by get_sb_block() instead of here */
1022 /* *sb_block = match_int(&args[0]); */ 1012 /* *sb_block = match_int(&args[0]); */
1023 break; 1013 break;
1024 case Opt_err_panic: 1014 case Opt_err_panic:
1025 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1015 clear_opt (sbi->s_mount_opt, ERRORS_CONT);
1026 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1016 clear_opt (sbi->s_mount_opt, ERRORS_RO);
1027 set_opt (sbi->s_mount_opt, ERRORS_PANIC); 1017 set_opt (sbi->s_mount_opt, ERRORS_PANIC);
1028 break; 1018 break;
1029 case Opt_err_ro: 1019 case Opt_err_ro:
1030 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1020 clear_opt (sbi->s_mount_opt, ERRORS_CONT);
1031 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1021 clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
1032 set_opt (sbi->s_mount_opt, ERRORS_RO); 1022 set_opt (sbi->s_mount_opt, ERRORS_RO);
1033 break; 1023 break;
1034 case Opt_err_cont: 1024 case Opt_err_cont:
1035 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1025 clear_opt (sbi->s_mount_opt, ERRORS_RO);
1036 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1026 clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
1037 set_opt (sbi->s_mount_opt, ERRORS_CONT); 1027 set_opt (sbi->s_mount_opt, ERRORS_CONT);
1038 break; 1028 break;
1039 case Opt_nouid32: 1029 case Opt_nouid32:
1040 set_opt (sbi->s_mount_opt, NO_UID32); 1030 set_opt (sbi->s_mount_opt, NO_UID32);
1041 break; 1031 break;
1042 case Opt_nocheck: 1032 case Opt_nocheck:
1043 clear_opt (sbi->s_mount_opt, CHECK); 1033 clear_opt (sbi->s_mount_opt, CHECK);
1044 break; 1034 break;
1045 case Opt_debug: 1035 case Opt_debug:
1046 set_opt (sbi->s_mount_opt, DEBUG); 1036 set_opt (sbi->s_mount_opt, DEBUG);
1047 break; 1037 break;
1048 case Opt_oldalloc: 1038 case Opt_oldalloc:
1049 ext3_msg(sb, KERN_WARNING, 1039 ext3_msg(sb, KERN_WARNING,
1050 "Ignoring deprecated oldalloc option"); 1040 "Ignoring deprecated oldalloc option");
1051 break; 1041 break;
1052 case Opt_orlov: 1042 case Opt_orlov:
1053 ext3_msg(sb, KERN_WARNING, 1043 ext3_msg(sb, KERN_WARNING,
1054 "Ignoring deprecated orlov option"); 1044 "Ignoring deprecated orlov option");
1055 break; 1045 break;
1056 #ifdef CONFIG_EXT3_FS_XATTR 1046 #ifdef CONFIG_EXT3_FS_XATTR
1057 case Opt_user_xattr: 1047 case Opt_user_xattr:
1058 set_opt (sbi->s_mount_opt, XATTR_USER); 1048 set_opt (sbi->s_mount_opt, XATTR_USER);
1059 break; 1049 break;
1060 case Opt_nouser_xattr: 1050 case Opt_nouser_xattr:
1061 clear_opt (sbi->s_mount_opt, XATTR_USER); 1051 clear_opt (sbi->s_mount_opt, XATTR_USER);
1062 break; 1052 break;
1063 #else 1053 #else
1064 case Opt_user_xattr: 1054 case Opt_user_xattr:
1065 case Opt_nouser_xattr: 1055 case Opt_nouser_xattr:
1066 ext3_msg(sb, KERN_INFO, 1056 ext3_msg(sb, KERN_INFO,
1067 "(no)user_xattr options not supported"); 1057 "(no)user_xattr options not supported");
1068 break; 1058 break;
1069 #endif 1059 #endif
1070 #ifdef CONFIG_EXT3_FS_POSIX_ACL 1060 #ifdef CONFIG_EXT3_FS_POSIX_ACL
1071 case Opt_acl: 1061 case Opt_acl:
1072 set_opt(sbi->s_mount_opt, POSIX_ACL); 1062 set_opt(sbi->s_mount_opt, POSIX_ACL);
1073 break; 1063 break;
1074 case Opt_noacl: 1064 case Opt_noacl:
1075 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1065 clear_opt(sbi->s_mount_opt, POSIX_ACL);
1076 break; 1066 break;
1077 #else 1067 #else
1078 case Opt_acl: 1068 case Opt_acl:
1079 case Opt_noacl: 1069 case Opt_noacl:
1080 ext3_msg(sb, KERN_INFO, 1070 ext3_msg(sb, KERN_INFO,
1081 "(no)acl options not supported"); 1071 "(no)acl options not supported");
1082 break; 1072 break;
1083 #endif 1073 #endif
1084 case Opt_reservation: 1074 case Opt_reservation:
1085 set_opt(sbi->s_mount_opt, RESERVATION); 1075 set_opt(sbi->s_mount_opt, RESERVATION);
1086 break; 1076 break;
1087 case Opt_noreservation: 1077 case Opt_noreservation:
1088 clear_opt(sbi->s_mount_opt, RESERVATION); 1078 clear_opt(sbi->s_mount_opt, RESERVATION);
1089 break; 1079 break;
1090 case Opt_journal_update: 1080 case Opt_journal_update:
1091 /* @@@ FIXME */ 1081 /* @@@ FIXME */
1092 /* Eventually we will want to be able to create 1082 /* Eventually we will want to be able to create
1093 a journal file here. For now, only allow the 1083 a journal file here. For now, only allow the
1094 user to specify an existing inode to be the 1084 user to specify an existing inode to be the
1095 journal file. */ 1085 journal file. */
1096 if (is_remount) { 1086 if (is_remount) {
1097 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1087 ext3_msg(sb, KERN_ERR, "error: cannot specify "
1098 "journal on remount"); 1088 "journal on remount");
1099 return 0; 1089 return 0;
1100 } 1090 }
1101 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 1091 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
1102 break; 1092 break;
1103 case Opt_journal_inum: 1093 case Opt_journal_inum:
1104 if (is_remount) { 1094 if (is_remount) {
1105 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1095 ext3_msg(sb, KERN_ERR, "error: cannot specify "
1106 "journal on remount"); 1096 "journal on remount");
1107 return 0; 1097 return 0;
1108 } 1098 }
1109 if (match_int(&args[0], &option)) 1099 if (match_int(&args[0], &option))
1110 return 0; 1100 return 0;
1111 *inum = option; 1101 *inum = option;
1112 break; 1102 break;
1113 case Opt_journal_dev: 1103 case Opt_journal_dev:
1114 if (is_remount) { 1104 if (is_remount) {
1115 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1105 ext3_msg(sb, KERN_ERR, "error: cannot specify "
1116 "journal on remount"); 1106 "journal on remount");
1117 return 0; 1107 return 0;
1118 } 1108 }
1119 if (match_int(&args[0], &option)) 1109 if (match_int(&args[0], &option))
1120 return 0; 1110 return 0;
1121 *journal_devnum = option; 1111 *journal_devnum = option;
1122 break; 1112 break;
1123 case Opt_noload: 1113 case Opt_noload:
1124 set_opt (sbi->s_mount_opt, NOLOAD); 1114 set_opt (sbi->s_mount_opt, NOLOAD);
1125 break; 1115 break;
1126 case Opt_commit: 1116 case Opt_commit:
1127 if (match_int(&args[0], &option)) 1117 if (match_int(&args[0], &option))
1128 return 0; 1118 return 0;
1129 if (option < 0) 1119 if (option < 0)
1130 return 0; 1120 return 0;
1131 if (option == 0) 1121 if (option == 0)
1132 option = JBD_DEFAULT_MAX_COMMIT_AGE; 1122 option = JBD_DEFAULT_MAX_COMMIT_AGE;
1133 sbi->s_commit_interval = HZ * option; 1123 sbi->s_commit_interval = HZ * option;
1134 break; 1124 break;
1135 case Opt_data_journal: 1125 case Opt_data_journal:
1136 data_opt = EXT3_MOUNT_JOURNAL_DATA; 1126 data_opt = EXT3_MOUNT_JOURNAL_DATA;
1137 goto datacheck; 1127 goto datacheck;
1138 case Opt_data_ordered: 1128 case Opt_data_ordered:
1139 data_opt = EXT3_MOUNT_ORDERED_DATA; 1129 data_opt = EXT3_MOUNT_ORDERED_DATA;
1140 goto datacheck; 1130 goto datacheck;
1141 case Opt_data_writeback: 1131 case Opt_data_writeback:
1142 data_opt = EXT3_MOUNT_WRITEBACK_DATA; 1132 data_opt = EXT3_MOUNT_WRITEBACK_DATA;
1143 datacheck: 1133 datacheck:
1144 if (is_remount) { 1134 if (is_remount) {
1145 if (test_opt(sb, DATA_FLAGS) == data_opt) 1135 if (test_opt(sb, DATA_FLAGS) == data_opt)
1146 break; 1136 break;
1147 ext3_msg(sb, KERN_ERR, 1137 ext3_msg(sb, KERN_ERR,
1148 "error: cannot change " 1138 "error: cannot change "
1149 "data mode on remount. The filesystem " 1139 "data mode on remount. The filesystem "
1150 "is mounted in data=%s mode and you " 1140 "is mounted in data=%s mode and you "
1151 "try to remount it in data=%s mode.", 1141 "try to remount it in data=%s mode.",
1152 data_mode_string(test_opt(sb, 1142 data_mode_string(test_opt(sb,
1153 DATA_FLAGS)), 1143 DATA_FLAGS)),
1154 data_mode_string(data_opt)); 1144 data_mode_string(data_opt));
1155 return 0; 1145 return 0;
1156 } else { 1146 } else {
1157 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 1147 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
1158 sbi->s_mount_opt |= data_opt; 1148 sbi->s_mount_opt |= data_opt;
1159 } 1149 }
1160 break; 1150 break;
1161 case Opt_data_err_abort: 1151 case Opt_data_err_abort:
1162 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1152 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1163 break; 1153 break;
1164 case Opt_data_err_ignore: 1154 case Opt_data_err_ignore:
1165 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1155 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1166 break; 1156 break;
1167 #ifdef CONFIG_QUOTA 1157 #ifdef CONFIG_QUOTA
1168 case Opt_usrjquota: 1158 case Opt_usrjquota:
1169 if (!set_qf_name(sb, USRQUOTA, &args[0])) 1159 if (!set_qf_name(sb, USRQUOTA, &args[0]))
1170 return 0; 1160 return 0;
1171 break; 1161 break;
1172 case Opt_grpjquota: 1162 case Opt_grpjquota:
1173 if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1163 if (!set_qf_name(sb, GRPQUOTA, &args[0]))
1174 return 0; 1164 return 0;
1175 break; 1165 break;
1176 case Opt_offusrjquota: 1166 case Opt_offusrjquota:
1177 if (!clear_qf_name(sb, USRQUOTA)) 1167 if (!clear_qf_name(sb, USRQUOTA))
1178 return 0; 1168 return 0;
1179 break; 1169 break;
1180 case Opt_offgrpjquota: 1170 case Opt_offgrpjquota:
1181 if (!clear_qf_name(sb, GRPQUOTA)) 1171 if (!clear_qf_name(sb, GRPQUOTA))
1182 return 0; 1172 return 0;
1183 break; 1173 break;
1184 case Opt_jqfmt_vfsold: 1174 case Opt_jqfmt_vfsold:
1185 qfmt = QFMT_VFS_OLD; 1175 qfmt = QFMT_VFS_OLD;
1186 goto set_qf_format; 1176 goto set_qf_format;
1187 case Opt_jqfmt_vfsv0: 1177 case Opt_jqfmt_vfsv0:
1188 qfmt = QFMT_VFS_V0; 1178 qfmt = QFMT_VFS_V0;
1189 goto set_qf_format; 1179 goto set_qf_format;
1190 case Opt_jqfmt_vfsv1: 1180 case Opt_jqfmt_vfsv1:
1191 qfmt = QFMT_VFS_V1; 1181 qfmt = QFMT_VFS_V1;
1192 set_qf_format: 1182 set_qf_format:
1193 if (sb_any_quota_loaded(sb) && 1183 if (sb_any_quota_loaded(sb) &&
1194 sbi->s_jquota_fmt != qfmt) { 1184 sbi->s_jquota_fmt != qfmt) {
1195 ext3_msg(sb, KERN_ERR, "error: cannot change " 1185 ext3_msg(sb, KERN_ERR, "error: cannot change "
1196 "journaled quota options when " 1186 "journaled quota options when "
1197 "quota turned on."); 1187 "quota turned on.");
1198 return 0; 1188 return 0;
1199 } 1189 }
1200 sbi->s_jquota_fmt = qfmt; 1190 sbi->s_jquota_fmt = qfmt;
1201 break; 1191 break;
1202 case Opt_quota: 1192 case Opt_quota:
1203 case Opt_usrquota: 1193 case Opt_usrquota:
1204 set_opt(sbi->s_mount_opt, QUOTA); 1194 set_opt(sbi->s_mount_opt, QUOTA);
1205 set_opt(sbi->s_mount_opt, USRQUOTA); 1195 set_opt(sbi->s_mount_opt, USRQUOTA);
1206 break; 1196 break;
1207 case Opt_grpquota: 1197 case Opt_grpquota:
1208 set_opt(sbi->s_mount_opt, QUOTA); 1198 set_opt(sbi->s_mount_opt, QUOTA);
1209 set_opt(sbi->s_mount_opt, GRPQUOTA); 1199 set_opt(sbi->s_mount_opt, GRPQUOTA);
1210 break; 1200 break;
1211 case Opt_noquota: 1201 case Opt_noquota:
1212 if (sb_any_quota_loaded(sb)) { 1202 if (sb_any_quota_loaded(sb)) {
1213 ext3_msg(sb, KERN_ERR, "error: cannot change " 1203 ext3_msg(sb, KERN_ERR, "error: cannot change "
1214 "quota options when quota turned on."); 1204 "quota options when quota turned on.");
1215 return 0; 1205 return 0;
1216 } 1206 }
1217 clear_opt(sbi->s_mount_opt, QUOTA); 1207 clear_opt(sbi->s_mount_opt, QUOTA);
1218 clear_opt(sbi->s_mount_opt, USRQUOTA); 1208 clear_opt(sbi->s_mount_opt, USRQUOTA);
1219 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1209 clear_opt(sbi->s_mount_opt, GRPQUOTA);
1220 break; 1210 break;
1221 #else 1211 #else
1222 case Opt_quota: 1212 case Opt_quota:
1223 case Opt_usrquota: 1213 case Opt_usrquota:
1224 case Opt_grpquota: 1214 case Opt_grpquota:
1225 ext3_msg(sb, KERN_ERR, 1215 ext3_msg(sb, KERN_ERR,
1226 "error: quota options not supported."); 1216 "error: quota options not supported.");
1227 break; 1217 break;
1228 case Opt_usrjquota: 1218 case Opt_usrjquota:
1229 case Opt_grpjquota: 1219 case Opt_grpjquota:
1230 case Opt_offusrjquota: 1220 case Opt_offusrjquota:
1231 case Opt_offgrpjquota: 1221 case Opt_offgrpjquota:
1232 case Opt_jqfmt_vfsold: 1222 case Opt_jqfmt_vfsold:
1233 case Opt_jqfmt_vfsv0: 1223 case Opt_jqfmt_vfsv0:
1234 case Opt_jqfmt_vfsv1: 1224 case Opt_jqfmt_vfsv1:
1235 ext3_msg(sb, KERN_ERR, 1225 ext3_msg(sb, KERN_ERR,
1236 "error: journaled quota options not " 1226 "error: journaled quota options not "
1237 "supported."); 1227 "supported.");
1238 break; 1228 break;
1239 case Opt_noquota: 1229 case Opt_noquota:
1240 break; 1230 break;
1241 #endif 1231 #endif
1242 case Opt_abort: 1232 case Opt_abort:
1243 set_opt(sbi->s_mount_opt, ABORT); 1233 set_opt(sbi->s_mount_opt, ABORT);
1244 break; 1234 break;
1245 case Opt_nobarrier: 1235 case Opt_nobarrier:
1246 clear_opt(sbi->s_mount_opt, BARRIER); 1236 clear_opt(sbi->s_mount_opt, BARRIER);
1247 break; 1237 break;
1248 case Opt_barrier: 1238 case Opt_barrier:
1249 if (args[0].from) { 1239 if (args[0].from) {
1250 if (match_int(&args[0], &option)) 1240 if (match_int(&args[0], &option))
1251 return 0; 1241 return 0;
1252 } else 1242 } else
1253 option = 1; /* No argument, default to 1 */ 1243 option = 1; /* No argument, default to 1 */
1254 if (option) 1244 if (option)
1255 set_opt(sbi->s_mount_opt, BARRIER); 1245 set_opt(sbi->s_mount_opt, BARRIER);
1256 else 1246 else
1257 clear_opt(sbi->s_mount_opt, BARRIER); 1247 clear_opt(sbi->s_mount_opt, BARRIER);
1258 break; 1248 break;
1259 case Opt_ignore: 1249 case Opt_ignore:
1260 break; 1250 break;
1261 case Opt_resize: 1251 case Opt_resize:
1262 if (!is_remount) { 1252 if (!is_remount) {
1263 ext3_msg(sb, KERN_ERR, 1253 ext3_msg(sb, KERN_ERR,
1264 "error: resize option only available " 1254 "error: resize option only available "
1265 "for remount"); 1255 "for remount");
1266 return 0; 1256 return 0;
1267 } 1257 }
1268 if (match_int(&args[0], &option) != 0) 1258 if (match_int(&args[0], &option) != 0)
1269 return 0; 1259 return 0;
1270 *n_blocks_count = option; 1260 *n_blocks_count = option;
1271 break; 1261 break;
1272 case Opt_nobh: 1262 case Opt_nobh:
1273 ext3_msg(sb, KERN_WARNING, 1263 ext3_msg(sb, KERN_WARNING,
1274 "warning: ignoring deprecated nobh option"); 1264 "warning: ignoring deprecated nobh option");
1275 break; 1265 break;
1276 case Opt_bh: 1266 case Opt_bh:
1277 ext3_msg(sb, KERN_WARNING, 1267 ext3_msg(sb, KERN_WARNING,
1278 "warning: ignoring deprecated bh option"); 1268 "warning: ignoring deprecated bh option");
1279 break; 1269 break;
1280 default: 1270 default:
1281 ext3_msg(sb, KERN_ERR, 1271 ext3_msg(sb, KERN_ERR,
1282 "error: unrecognized mount option \"%s\" " 1272 "error: unrecognized mount option \"%s\" "
1283 "or missing value", p); 1273 "or missing value", p);
1284 return 0; 1274 return 0;
1285 } 1275 }
1286 } 1276 }
1287 #ifdef CONFIG_QUOTA 1277 #ifdef CONFIG_QUOTA
1288 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1278 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1289 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1279 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1290 clear_opt(sbi->s_mount_opt, USRQUOTA); 1280 clear_opt(sbi->s_mount_opt, USRQUOTA);
1291 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1281 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1292 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1282 clear_opt(sbi->s_mount_opt, GRPQUOTA);
1293 1283
1294 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1284 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1295 ext3_msg(sb, KERN_ERR, "error: old and new quota " 1285 ext3_msg(sb, KERN_ERR, "error: old and new quota "
1296 "format mixing."); 1286 "format mixing.");
1297 return 0; 1287 return 0;
1298 } 1288 }
1299 1289
1300 if (!sbi->s_jquota_fmt) { 1290 if (!sbi->s_jquota_fmt) {
1301 ext3_msg(sb, KERN_ERR, "error: journaled quota format " 1291 ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1302 "not specified."); 1292 "not specified.");
1303 return 0; 1293 return 0;
1304 } 1294 }
1305 } else { 1295 } else {
1306 if (sbi->s_jquota_fmt) { 1296 if (sbi->s_jquota_fmt) {
1307 ext3_msg(sb, KERN_ERR, "error: journaled quota format " 1297 ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1308 "specified with no journaling " 1298 "specified with no journaling "
1309 "enabled."); 1299 "enabled.");
1310 return 0; 1300 return 0;
1311 } 1301 }
1312 } 1302 }
1313 #endif 1303 #endif
1314 return 1; 1304 return 1;
1315 } 1305 }
1316 1306
1317 static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, 1307 static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1318 int read_only) 1308 int read_only)
1319 { 1309 {
1320 struct ext3_sb_info *sbi = EXT3_SB(sb); 1310 struct ext3_sb_info *sbi = EXT3_SB(sb);
1321 int res = 0; 1311 int res = 0;
1322 1312
1323 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) { 1313 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
1324 ext3_msg(sb, KERN_ERR, 1314 ext3_msg(sb, KERN_ERR,
1325 "error: revision level too high, " 1315 "error: revision level too high, "
1326 "forcing read-only mode"); 1316 "forcing read-only mode");
1327 res = MS_RDONLY; 1317 res = MS_RDONLY;
1328 } 1318 }
1329 if (read_only) 1319 if (read_only)
1330 return res; 1320 return res;
1331 if (!(sbi->s_mount_state & EXT3_VALID_FS)) 1321 if (!(sbi->s_mount_state & EXT3_VALID_FS))
1332 ext3_msg(sb, KERN_WARNING, 1322 ext3_msg(sb, KERN_WARNING,
1333 "warning: mounting unchecked fs, " 1323 "warning: mounting unchecked fs, "
1334 "running e2fsck is recommended"); 1324 "running e2fsck is recommended");
1335 else if ((sbi->s_mount_state & EXT3_ERROR_FS)) 1325 else if ((sbi->s_mount_state & EXT3_ERROR_FS))
1336 ext3_msg(sb, KERN_WARNING, 1326 ext3_msg(sb, KERN_WARNING,
1337 "warning: mounting fs with errors, " 1327 "warning: mounting fs with errors, "
1338 "running e2fsck is recommended"); 1328 "running e2fsck is recommended");
1339 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && 1329 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1340 le16_to_cpu(es->s_mnt_count) >= 1330 le16_to_cpu(es->s_mnt_count) >=
1341 le16_to_cpu(es->s_max_mnt_count)) 1331 le16_to_cpu(es->s_max_mnt_count))
1342 ext3_msg(sb, KERN_WARNING, 1332 ext3_msg(sb, KERN_WARNING,
1343 "warning: maximal mount count reached, " 1333 "warning: maximal mount count reached, "
1344 "running e2fsck is recommended"); 1334 "running e2fsck is recommended");
1345 else if (le32_to_cpu(es->s_checkinterval) && 1335 else if (le32_to_cpu(es->s_checkinterval) &&
1346 (le32_to_cpu(es->s_lastcheck) + 1336 (le32_to_cpu(es->s_lastcheck) +
1347 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1337 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1348 ext3_msg(sb, KERN_WARNING, 1338 ext3_msg(sb, KERN_WARNING,
1349 "warning: checktime reached, " 1339 "warning: checktime reached, "
1350 "running e2fsck is recommended"); 1340 "running e2fsck is recommended");
1351 #if 0 1341 #if 0
1352 /* @@@ We _will_ want to clear the valid bit if we find 1342 /* @@@ We _will_ want to clear the valid bit if we find
1353 inconsistencies, to force a fsck at reboot. But for 1343 inconsistencies, to force a fsck at reboot. But for
1354 a plain journaled filesystem we can keep it set as 1344 a plain journaled filesystem we can keep it set as
1355 valid forever! :) */ 1345 valid forever! :) */
1356 es->s_state &= cpu_to_le16(~EXT3_VALID_FS); 1346 es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
1357 #endif 1347 #endif
1358 if (!le16_to_cpu(es->s_max_mnt_count)) 1348 if (!le16_to_cpu(es->s_max_mnt_count))
1359 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); 1349 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
1360 le16_add_cpu(&es->s_mnt_count, 1); 1350 le16_add_cpu(&es->s_mnt_count, 1);
1361 es->s_mtime = cpu_to_le32(get_seconds()); 1351 es->s_mtime = cpu_to_le32(get_seconds());
1362 ext3_update_dynamic_rev(sb); 1352 ext3_update_dynamic_rev(sb);
1363 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 1353 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
1364 1354
1365 ext3_commit_super(sb, es, 1); 1355 ext3_commit_super(sb, es, 1);
1366 if (test_opt(sb, DEBUG)) 1356 if (test_opt(sb, DEBUG))
1367 ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, " 1357 ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, "
1368 "bpg=%lu, ipg=%lu, mo=%04lx]", 1358 "bpg=%lu, ipg=%lu, mo=%04lx]",
1369 sb->s_blocksize, 1359 sb->s_blocksize,
1370 sbi->s_groups_count, 1360 sbi->s_groups_count,
1371 EXT3_BLOCKS_PER_GROUP(sb), 1361 EXT3_BLOCKS_PER_GROUP(sb),
1372 EXT3_INODES_PER_GROUP(sb), 1362 EXT3_INODES_PER_GROUP(sb),
1373 sbi->s_mount_opt); 1363 sbi->s_mount_opt);
1374 1364
1375 if (EXT3_SB(sb)->s_journal->j_inode == NULL) { 1365 if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
1376 char b[BDEVNAME_SIZE]; 1366 char b[BDEVNAME_SIZE];
1377 ext3_msg(sb, KERN_INFO, "using external journal on %s", 1367 ext3_msg(sb, KERN_INFO, "using external journal on %s",
1378 bdevname(EXT3_SB(sb)->s_journal->j_dev, b)); 1368 bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
1379 } else { 1369 } else {
1380 ext3_msg(sb, KERN_INFO, "using internal journal"); 1370 ext3_msg(sb, KERN_INFO, "using internal journal");
1381 } 1371 }
1382 cleancache_init_fs(sb); 1372 cleancache_init_fs(sb);
1383 return res; 1373 return res;
1384 } 1374 }
1385 1375
1386 /* Called at mount-time, super-block is locked */ 1376 /* Called at mount-time, super-block is locked */
1387 static int ext3_check_descriptors(struct super_block *sb) 1377 static int ext3_check_descriptors(struct super_block *sb)
1388 { 1378 {
1389 struct ext3_sb_info *sbi = EXT3_SB(sb); 1379 struct ext3_sb_info *sbi = EXT3_SB(sb);
1390 int i; 1380 int i;
1391 1381
1392 ext3_debug ("Checking group descriptors"); 1382 ext3_debug ("Checking group descriptors");
1393 1383
1394 for (i = 0; i < sbi->s_groups_count; i++) { 1384 for (i = 0; i < sbi->s_groups_count; i++) {
1395 struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL); 1385 struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL);
1396 ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i); 1386 ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i);
1397 ext3_fsblk_t last_block; 1387 ext3_fsblk_t last_block;
1398 1388
1399 if (i == sbi->s_groups_count - 1) 1389 if (i == sbi->s_groups_count - 1)
1400 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; 1390 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
1401 else 1391 else
1402 last_block = first_block + 1392 last_block = first_block +
1403 (EXT3_BLOCKS_PER_GROUP(sb) - 1); 1393 (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1404 1394
1405 if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || 1395 if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
1406 le32_to_cpu(gdp->bg_block_bitmap) > last_block) 1396 le32_to_cpu(gdp->bg_block_bitmap) > last_block)
1407 { 1397 {
1408 ext3_error (sb, "ext3_check_descriptors", 1398 ext3_error (sb, "ext3_check_descriptors",
1409 "Block bitmap for group %d" 1399 "Block bitmap for group %d"
1410 " not in group (block %lu)!", 1400 " not in group (block %lu)!",
1411 i, (unsigned long) 1401 i, (unsigned long)
1412 le32_to_cpu(gdp->bg_block_bitmap)); 1402 le32_to_cpu(gdp->bg_block_bitmap));
1413 return 0; 1403 return 0;
1414 } 1404 }
1415 if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block || 1405 if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
1416 le32_to_cpu(gdp->bg_inode_bitmap) > last_block) 1406 le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
1417 { 1407 {
1418 ext3_error (sb, "ext3_check_descriptors", 1408 ext3_error (sb, "ext3_check_descriptors",
1419 "Inode bitmap for group %d" 1409 "Inode bitmap for group %d"
1420 " not in group (block %lu)!", 1410 " not in group (block %lu)!",
1421 i, (unsigned long) 1411 i, (unsigned long)
1422 le32_to_cpu(gdp->bg_inode_bitmap)); 1412 le32_to_cpu(gdp->bg_inode_bitmap));
1423 return 0; 1413 return 0;
1424 } 1414 }
1425 if (le32_to_cpu(gdp->bg_inode_table) < first_block || 1415 if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
1426 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 > 1416 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 >
1427 last_block) 1417 last_block)
1428 { 1418 {
1429 ext3_error (sb, "ext3_check_descriptors", 1419 ext3_error (sb, "ext3_check_descriptors",
1430 "Inode table for group %d" 1420 "Inode table for group %d"
1431 " not in group (block %lu)!", 1421 " not in group (block %lu)!",
1432 i, (unsigned long) 1422 i, (unsigned long)
1433 le32_to_cpu(gdp->bg_inode_table)); 1423 le32_to_cpu(gdp->bg_inode_table));
1434 return 0; 1424 return 0;
1435 } 1425 }
1436 } 1426 }
1437 1427
1438 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb)); 1428 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
1439 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb)); 1429 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
1440 return 1; 1430 return 1;
1441 } 1431 }
1442 1432
1443 1433
1444 /* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at 1434 /* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
1445 * the superblock) which were deleted from all directories, but held open by 1435 * the superblock) which were deleted from all directories, but held open by
1446 * a process at the time of a crash. We walk the list and try to delete these 1436 * a process at the time of a crash. We walk the list and try to delete these
1447 * inodes at recovery time (only with a read-write filesystem). 1437 * inodes at recovery time (only with a read-write filesystem).
1448 * 1438 *
1449 * In order to keep the orphan inode chain consistent during traversal (in 1439 * In order to keep the orphan inode chain consistent during traversal (in
1450 * case of crash during recovery), we link each inode into the superblock 1440 * case of crash during recovery), we link each inode into the superblock
1451 * orphan list_head and handle it the same way as an inode deletion during 1441 * orphan list_head and handle it the same way as an inode deletion during
1452 * normal operation (which journals the operations for us). 1442 * normal operation (which journals the operations for us).
1453 * 1443 *
1454 * We only do an iget() and an iput() on each inode, which is very safe if we 1444 * We only do an iget() and an iput() on each inode, which is very safe if we
1455 * accidentally point at an in-use or already deleted inode. The worst that 1445 * accidentally point at an in-use or already deleted inode. The worst that
1456 * can happen in this case is that we get a "bit already cleared" message from 1446 * can happen in this case is that we get a "bit already cleared" message from
1457 * ext3_free_inode(). The only reason we would point at a wrong inode is if 1447 * ext3_free_inode(). The only reason we would point at a wrong inode is if
1458 * e2fsck was run on this filesystem, and it must have already done the orphan 1448 * e2fsck was run on this filesystem, and it must have already done the orphan
1459 * inode cleanup for us, so we can safely abort without any further action. 1449 * inode cleanup for us, so we can safely abort without any further action.
1460 */ 1450 */
1461 static void ext3_orphan_cleanup (struct super_block * sb, 1451 static void ext3_orphan_cleanup (struct super_block * sb,
1462 struct ext3_super_block * es) 1452 struct ext3_super_block * es)
1463 { 1453 {
1464 unsigned int s_flags = sb->s_flags; 1454 unsigned int s_flags = sb->s_flags;
1465 int nr_orphans = 0, nr_truncates = 0; 1455 int nr_orphans = 0, nr_truncates = 0;
1466 #ifdef CONFIG_QUOTA 1456 #ifdef CONFIG_QUOTA
1467 int i; 1457 int i;
1468 #endif 1458 #endif
1469 if (!es->s_last_orphan) { 1459 if (!es->s_last_orphan) {
1470 jbd_debug(4, "no orphan inodes to clean up\n"); 1460 jbd_debug(4, "no orphan inodes to clean up\n");
1471 return; 1461 return;
1472 } 1462 }
1473 1463
1474 if (bdev_read_only(sb->s_bdev)) { 1464 if (bdev_read_only(sb->s_bdev)) {
1475 ext3_msg(sb, KERN_ERR, "error: write access " 1465 ext3_msg(sb, KERN_ERR, "error: write access "
1476 "unavailable, skipping orphan cleanup."); 1466 "unavailable, skipping orphan cleanup.");
1477 return; 1467 return;
1478 } 1468 }
1479 1469
1480 /* Check if feature set allows readwrite operations */ 1470 /* Check if feature set allows readwrite operations */
1481 if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) { 1471 if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) {
1482 ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " 1472 ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
1483 "unknown ROCOMPAT features"); 1473 "unknown ROCOMPAT features");
1484 return; 1474 return;
1485 } 1475 }
1486 1476
1487 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { 1477 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
1488 if (es->s_last_orphan) 1478 if (es->s_last_orphan)
1489 jbd_debug(1, "Errors on filesystem, " 1479 jbd_debug(1, "Errors on filesystem, "
1490 "clearing orphan list.\n"); 1480 "clearing orphan list.\n");
1491 es->s_last_orphan = 0; 1481 es->s_last_orphan = 0;
1492 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1482 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1493 return; 1483 return;
1494 } 1484 }
1495 1485
1496 if (s_flags & MS_RDONLY) { 1486 if (s_flags & MS_RDONLY) {
1497 ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 1487 ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
1498 sb->s_flags &= ~MS_RDONLY; 1488 sb->s_flags &= ~MS_RDONLY;
1499 } 1489 }
1500 #ifdef CONFIG_QUOTA 1490 #ifdef CONFIG_QUOTA
1501 /* Needed for iput() to work correctly and not trash data */ 1491 /* Needed for iput() to work correctly and not trash data */
1502 sb->s_flags |= MS_ACTIVE; 1492 sb->s_flags |= MS_ACTIVE;
1503 /* Turn on quotas so that they are updated correctly */ 1493 /* Turn on quotas so that they are updated correctly */
1504 for (i = 0; i < MAXQUOTAS; i++) { 1494 for (i = 0; i < MAXQUOTAS; i++) {
1505 if (EXT3_SB(sb)->s_qf_names[i]) { 1495 if (EXT3_SB(sb)->s_qf_names[i]) {
1506 int ret = ext3_quota_on_mount(sb, i); 1496 int ret = ext3_quota_on_mount(sb, i);
1507 if (ret < 0) 1497 if (ret < 0)
1508 ext3_msg(sb, KERN_ERR, 1498 ext3_msg(sb, KERN_ERR,
1509 "error: cannot turn on journaled " 1499 "error: cannot turn on journaled "
1510 "quota: %d", ret); 1500 "quota: %d", ret);
1511 } 1501 }
1512 } 1502 }
1513 #endif 1503 #endif
1514 1504
1515 while (es->s_last_orphan) { 1505 while (es->s_last_orphan) {
1516 struct inode *inode; 1506 struct inode *inode;
1517 1507
1518 inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 1508 inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1519 if (IS_ERR(inode)) { 1509 if (IS_ERR(inode)) {
1520 es->s_last_orphan = 0; 1510 es->s_last_orphan = 0;
1521 break; 1511 break;
1522 } 1512 }
1523 1513
1524 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); 1514 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
1525 dquot_initialize(inode); 1515 dquot_initialize(inode);
1526 if (inode->i_nlink) { 1516 if (inode->i_nlink) {
1527 printk(KERN_DEBUG 1517 printk(KERN_DEBUG
1528 "%s: truncating inode %lu to %Ld bytes\n", 1518 "%s: truncating inode %lu to %Ld bytes\n",
1529 __func__, inode->i_ino, inode->i_size); 1519 __func__, inode->i_ino, inode->i_size);
1530 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1520 jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1531 inode->i_ino, inode->i_size); 1521 inode->i_ino, inode->i_size);
1532 ext3_truncate(inode); 1522 ext3_truncate(inode);
1533 nr_truncates++; 1523 nr_truncates++;
1534 } else { 1524 } else {
1535 printk(KERN_DEBUG 1525 printk(KERN_DEBUG
1536 "%s: deleting unreferenced inode %lu\n", 1526 "%s: deleting unreferenced inode %lu\n",
1537 __func__, inode->i_ino); 1527 __func__, inode->i_ino);
1538 jbd_debug(2, "deleting unreferenced inode %lu\n", 1528 jbd_debug(2, "deleting unreferenced inode %lu\n",
1539 inode->i_ino); 1529 inode->i_ino);
1540 nr_orphans++; 1530 nr_orphans++;
1541 } 1531 }
1542 iput(inode); /* The delete magic happens here! */ 1532 iput(inode); /* The delete magic happens here! */
1543 } 1533 }
1544 1534
1545 #define PLURAL(x) (x), ((x)==1) ? "" : "s" 1535 #define PLURAL(x) (x), ((x)==1) ? "" : "s"
1546 1536
1547 if (nr_orphans) 1537 if (nr_orphans)
1548 ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 1538 ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
1549 PLURAL(nr_orphans)); 1539 PLURAL(nr_orphans));
1550 if (nr_truncates) 1540 if (nr_truncates)
1551 ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 1541 ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
1552 PLURAL(nr_truncates)); 1542 PLURAL(nr_truncates));
1553 #ifdef CONFIG_QUOTA 1543 #ifdef CONFIG_QUOTA
1554 /* Turn quotas off */ 1544 /* Turn quotas off */
1555 for (i = 0; i < MAXQUOTAS; i++) { 1545 for (i = 0; i < MAXQUOTAS; i++) {
1556 if (sb_dqopt(sb)->files[i]) 1546 if (sb_dqopt(sb)->files[i])
1557 dquot_quota_off(sb, i); 1547 dquot_quota_off(sb, i);
1558 } 1548 }
1559 #endif 1549 #endif
1560 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1550 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1561 } 1551 }
1562 1552
1563 /* 1553 /*
1564 * Maximal file size. There is a direct, and {,double-,triple-}indirect 1554 * Maximal file size. There is a direct, and {,double-,triple-}indirect
1565 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. 1555 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
1566 * We need to be 1 filesystem block less than the 2^32 sector limit. 1556 * We need to be 1 filesystem block less than the 2^32 sector limit.
1567 */ 1557 */
1568 static loff_t ext3_max_size(int bits) 1558 static loff_t ext3_max_size(int bits)
1569 { 1559 {
1570 loff_t res = EXT3_NDIR_BLOCKS; 1560 loff_t res = EXT3_NDIR_BLOCKS;
1571 int meta_blocks; 1561 int meta_blocks;
1572 loff_t upper_limit; 1562 loff_t upper_limit;
1573 1563
1574 /* This is calculated to be the largest file size for a 1564 /* This is calculated to be the largest file size for a
1575 * dense, file such that the total number of 1565 * dense, file such that the total number of
1576 * sectors in the file, including data and all indirect blocks, 1566 * sectors in the file, including data and all indirect blocks,
1577 * does not exceed 2^32 -1 1567 * does not exceed 2^32 -1
1578 * __u32 i_blocks representing the total number of 1568 * __u32 i_blocks representing the total number of
1579 * 512 bytes blocks of the file 1569 * 512 bytes blocks of the file
1580 */ 1570 */
1581 upper_limit = (1LL << 32) - 1; 1571 upper_limit = (1LL << 32) - 1;
1582 1572
1583 /* total blocks in file system block size */ 1573 /* total blocks in file system block size */
1584 upper_limit >>= (bits - 9); 1574 upper_limit >>= (bits - 9);
1585 1575
1586 1576
1587 /* indirect blocks */ 1577 /* indirect blocks */
1588 meta_blocks = 1; 1578 meta_blocks = 1;
1589 /* double indirect blocks */ 1579 /* double indirect blocks */
1590 meta_blocks += 1 + (1LL << (bits-2)); 1580 meta_blocks += 1 + (1LL << (bits-2));
1591 /* tripple indirect blocks */ 1581 /* tripple indirect blocks */
1592 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 1582 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
1593 1583
1594 upper_limit -= meta_blocks; 1584 upper_limit -= meta_blocks;
1595 upper_limit <<= bits; 1585 upper_limit <<= bits;
1596 1586
1597 res += 1LL << (bits-2); 1587 res += 1LL << (bits-2);
1598 res += 1LL << (2*(bits-2)); 1588 res += 1LL << (2*(bits-2));
1599 res += 1LL << (3*(bits-2)); 1589 res += 1LL << (3*(bits-2));
1600 res <<= bits; 1590 res <<= bits;
1601 if (res > upper_limit) 1591 if (res > upper_limit)
1602 res = upper_limit; 1592 res = upper_limit;
1603 1593
1604 if (res > MAX_LFS_FILESIZE) 1594 if (res > MAX_LFS_FILESIZE)
1605 res = MAX_LFS_FILESIZE; 1595 res = MAX_LFS_FILESIZE;
1606 1596
1607 return res; 1597 return res;
1608 } 1598 }
1609 1599
1610 static ext3_fsblk_t descriptor_loc(struct super_block *sb, 1600 static ext3_fsblk_t descriptor_loc(struct super_block *sb,
1611 ext3_fsblk_t logic_sb_block, 1601 ext3_fsblk_t logic_sb_block,
1612 int nr) 1602 int nr)
1613 { 1603 {
1614 struct ext3_sb_info *sbi = EXT3_SB(sb); 1604 struct ext3_sb_info *sbi = EXT3_SB(sb);
1615 unsigned long bg, first_meta_bg; 1605 unsigned long bg, first_meta_bg;
1616 int has_super = 0; 1606 int has_super = 0;
1617 1607
1618 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1608 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
1619 1609
1620 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || 1610 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
1621 nr < first_meta_bg) 1611 nr < first_meta_bg)
1622 return (logic_sb_block + nr + 1); 1612 return (logic_sb_block + nr + 1);
1623 bg = sbi->s_desc_per_block * nr; 1613 bg = sbi->s_desc_per_block * nr;
1624 if (ext3_bg_has_super(sb, bg)) 1614 if (ext3_bg_has_super(sb, bg))
1625 has_super = 1; 1615 has_super = 1;
1626 return (has_super + ext3_group_first_block_no(sb, bg)); 1616 return (has_super + ext3_group_first_block_no(sb, bg));
1627 } 1617 }
1628 1618
1629 1619
1630 static int ext3_fill_super (struct super_block *sb, void *data, int silent) 1620 static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1631 { 1621 {
1632 struct buffer_head * bh; 1622 struct buffer_head * bh;
1633 struct ext3_super_block *es = NULL; 1623 struct ext3_super_block *es = NULL;
1634 struct ext3_sb_info *sbi; 1624 struct ext3_sb_info *sbi;
1635 ext3_fsblk_t block; 1625 ext3_fsblk_t block;
1636 ext3_fsblk_t sb_block = get_sb_block(&data, sb); 1626 ext3_fsblk_t sb_block = get_sb_block(&data, sb);
1637 ext3_fsblk_t logic_sb_block; 1627 ext3_fsblk_t logic_sb_block;
1638 unsigned long offset = 0; 1628 unsigned long offset = 0;
1639 unsigned int journal_inum = 0; 1629 unsigned int journal_inum = 0;
1640 unsigned long journal_devnum = 0; 1630 unsigned long journal_devnum = 0;
1641 unsigned long def_mount_opts; 1631 unsigned long def_mount_opts;
1642 struct inode *root; 1632 struct inode *root;
1643 int blocksize; 1633 int blocksize;
1644 int hblock; 1634 int hblock;
1645 int db_count; 1635 int db_count;
1646 int i; 1636 int i;
1647 int needs_recovery; 1637 int needs_recovery;
1648 int ret = -EINVAL; 1638 int ret = -EINVAL;
1649 __le32 features; 1639 __le32 features;
1650 int err; 1640 int err;
1651 1641
1652 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1642 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
1653 if (!sbi) 1643 if (!sbi)
1654 return -ENOMEM; 1644 return -ENOMEM;
1655 1645
1656 sbi->s_blockgroup_lock = 1646 sbi->s_blockgroup_lock =
1657 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 1647 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
1658 if (!sbi->s_blockgroup_lock) { 1648 if (!sbi->s_blockgroup_lock) {
1659 kfree(sbi); 1649 kfree(sbi);
1660 return -ENOMEM; 1650 return -ENOMEM;
1661 } 1651 }
1662 sb->s_fs_info = sbi; 1652 sb->s_fs_info = sbi;
1663 sbi->s_mount_opt = 0; 1653 sbi->s_mount_opt = 0;
1664 sbi->s_resuid = EXT3_DEF_RESUID; 1654 sbi->s_resuid = EXT3_DEF_RESUID;
1665 sbi->s_resgid = EXT3_DEF_RESGID; 1655 sbi->s_resgid = EXT3_DEF_RESGID;
1666 sbi->s_sb_block = sb_block; 1656 sbi->s_sb_block = sb_block;
1667 1657
1668 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); 1658 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
1669 if (!blocksize) { 1659 if (!blocksize) {
1670 ext3_msg(sb, KERN_ERR, "error: unable to set blocksize"); 1660 ext3_msg(sb, KERN_ERR, "error: unable to set blocksize");
1671 goto out_fail; 1661 goto out_fail;
1672 } 1662 }
1673 1663
1674 /* 1664 /*
1675 * The ext3 superblock will not be buffer aligned for other than 1kB 1665 * The ext3 superblock will not be buffer aligned for other than 1kB
1676 * block sizes. We need to calculate the offset from buffer start. 1666 * block sizes. We need to calculate the offset from buffer start.
1677 */ 1667 */
1678 if (blocksize != EXT3_MIN_BLOCK_SIZE) { 1668 if (blocksize != EXT3_MIN_BLOCK_SIZE) {
1679 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1669 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1680 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1670 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1681 } else { 1671 } else {
1682 logic_sb_block = sb_block; 1672 logic_sb_block = sb_block;
1683 } 1673 }
1684 1674
1685 if (!(bh = sb_bread(sb, logic_sb_block))) { 1675 if (!(bh = sb_bread(sb, logic_sb_block))) {
1686 ext3_msg(sb, KERN_ERR, "error: unable to read superblock"); 1676 ext3_msg(sb, KERN_ERR, "error: unable to read superblock");
1687 goto out_fail; 1677 goto out_fail;
1688 } 1678 }
1689 /* 1679 /*
1690 * Note: s_es must be initialized as soon as possible because 1680 * Note: s_es must be initialized as soon as possible because
1691 * some ext3 macro-instructions depend on its value 1681 * some ext3 macro-instructions depend on its value
1692 */ 1682 */
1693 es = (struct ext3_super_block *) (bh->b_data + offset); 1683 es = (struct ext3_super_block *) (bh->b_data + offset);
1694 sbi->s_es = es; 1684 sbi->s_es = es;
1695 sb->s_magic = le16_to_cpu(es->s_magic); 1685 sb->s_magic = le16_to_cpu(es->s_magic);
1696 if (sb->s_magic != EXT3_SUPER_MAGIC) 1686 if (sb->s_magic != EXT3_SUPER_MAGIC)
1697 goto cantfind_ext3; 1687 goto cantfind_ext3;
1698 1688
1699 /* Set defaults before we parse the mount options */ 1689 /* Set defaults before we parse the mount options */
1700 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1690 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
1701 if (def_mount_opts & EXT3_DEFM_DEBUG) 1691 if (def_mount_opts & EXT3_DEFM_DEBUG)
1702 set_opt(sbi->s_mount_opt, DEBUG); 1692 set_opt(sbi->s_mount_opt, DEBUG);
1703 if (def_mount_opts & EXT3_DEFM_BSDGROUPS) 1693 if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
1704 set_opt(sbi->s_mount_opt, GRPID); 1694 set_opt(sbi->s_mount_opt, GRPID);
1705 if (def_mount_opts & EXT3_DEFM_UID16) 1695 if (def_mount_opts & EXT3_DEFM_UID16)
1706 set_opt(sbi->s_mount_opt, NO_UID32); 1696 set_opt(sbi->s_mount_opt, NO_UID32);
1707 #ifdef CONFIG_EXT3_FS_XATTR 1697 #ifdef CONFIG_EXT3_FS_XATTR
1708 if (def_mount_opts & EXT3_DEFM_XATTR_USER) 1698 if (def_mount_opts & EXT3_DEFM_XATTR_USER)
1709 set_opt(sbi->s_mount_opt, XATTR_USER); 1699 set_opt(sbi->s_mount_opt, XATTR_USER);
1710 #endif 1700 #endif
1711 #ifdef CONFIG_EXT3_FS_POSIX_ACL 1701 #ifdef CONFIG_EXT3_FS_POSIX_ACL
1712 if (def_mount_opts & EXT3_DEFM_ACL) 1702 if (def_mount_opts & EXT3_DEFM_ACL)
1713 set_opt(sbi->s_mount_opt, POSIX_ACL); 1703 set_opt(sbi->s_mount_opt, POSIX_ACL);
1714 #endif 1704 #endif
1715 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA) 1705 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
1716 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 1706 set_opt(sbi->s_mount_opt, JOURNAL_DATA);
1717 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED) 1707 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
1718 set_opt(sbi->s_mount_opt, ORDERED_DATA); 1708 set_opt(sbi->s_mount_opt, ORDERED_DATA);
1719 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK) 1709 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
1720 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 1710 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
1721 1711
1722 if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC) 1712 if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
1723 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1713 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1724 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE) 1714 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE)
1725 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1715 set_opt(sbi->s_mount_opt, ERRORS_CONT);
1726 else 1716 else
1727 set_opt(sbi->s_mount_opt, ERRORS_RO); 1717 set_opt(sbi->s_mount_opt, ERRORS_RO);
1728 1718
1729 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1719 sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
1730 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1720 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
1731 1721
1732 /* enable barriers by default */ 1722 /* enable barriers by default */
1733 set_opt(sbi->s_mount_opt, BARRIER); 1723 set_opt(sbi->s_mount_opt, BARRIER);
1734 set_opt(sbi->s_mount_opt, RESERVATION); 1724 set_opt(sbi->s_mount_opt, RESERVATION);
1735 1725
1736 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 1726 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
1737 NULL, 0)) 1727 NULL, 0))
1738 goto failed_mount; 1728 goto failed_mount;
1739 1729
1740 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1730 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1741 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 1731 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
1742 1732
1743 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV && 1733 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
1744 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || 1734 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
1745 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1735 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
1746 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1736 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
1747 ext3_msg(sb, KERN_WARNING, 1737 ext3_msg(sb, KERN_WARNING,
1748 "warning: feature flags set on rev 0 fs, " 1738 "warning: feature flags set on rev 0 fs, "
1749 "running e2fsck is recommended"); 1739 "running e2fsck is recommended");
1750 /* 1740 /*
1751 * Check feature flags regardless of the revision level, since we 1741 * Check feature flags regardless of the revision level, since we
1752 * previously didn't change the revision level when setting the flags, 1742 * previously didn't change the revision level when setting the flags,
1753 * so there is a chance incompat flags are set on a rev 0 filesystem. 1743 * so there is a chance incompat flags are set on a rev 0 filesystem.
1754 */ 1744 */
1755 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP); 1745 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
1756 if (features) { 1746 if (features) {
1757 ext3_msg(sb, KERN_ERR, 1747 ext3_msg(sb, KERN_ERR,
1758 "error: couldn't mount because of unsupported " 1748 "error: couldn't mount because of unsupported "
1759 "optional features (%x)", le32_to_cpu(features)); 1749 "optional features (%x)", le32_to_cpu(features));
1760 goto failed_mount; 1750 goto failed_mount;
1761 } 1751 }
1762 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP); 1752 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
1763 if (!(sb->s_flags & MS_RDONLY) && features) { 1753 if (!(sb->s_flags & MS_RDONLY) && features) {
1764 ext3_msg(sb, KERN_ERR, 1754 ext3_msg(sb, KERN_ERR,
1765 "error: couldn't mount RDWR because of unsupported " 1755 "error: couldn't mount RDWR because of unsupported "
1766 "optional features (%x)", le32_to_cpu(features)); 1756 "optional features (%x)", le32_to_cpu(features));
1767 goto failed_mount; 1757 goto failed_mount;
1768 } 1758 }
1769 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 1759 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
1770 1760
1771 if (blocksize < EXT3_MIN_BLOCK_SIZE || 1761 if (blocksize < EXT3_MIN_BLOCK_SIZE ||
1772 blocksize > EXT3_MAX_BLOCK_SIZE) { 1762 blocksize > EXT3_MAX_BLOCK_SIZE) {
1773 ext3_msg(sb, KERN_ERR, 1763 ext3_msg(sb, KERN_ERR,
1774 "error: couldn't mount because of unsupported " 1764 "error: couldn't mount because of unsupported "
1775 "filesystem blocksize %d", blocksize); 1765 "filesystem blocksize %d", blocksize);
1776 goto failed_mount; 1766 goto failed_mount;
1777 } 1767 }
1778 1768
1779 hblock = bdev_logical_block_size(sb->s_bdev); 1769 hblock = bdev_logical_block_size(sb->s_bdev);
1780 if (sb->s_blocksize != blocksize) { 1770 if (sb->s_blocksize != blocksize) {
1781 /* 1771 /*
1782 * Make sure the blocksize for the filesystem is larger 1772 * Make sure the blocksize for the filesystem is larger
1783 * than the hardware sectorsize for the machine. 1773 * than the hardware sectorsize for the machine.
1784 */ 1774 */
1785 if (blocksize < hblock) { 1775 if (blocksize < hblock) {
1786 ext3_msg(sb, KERN_ERR, 1776 ext3_msg(sb, KERN_ERR,
1787 "error: fsblocksize %d too small for " 1777 "error: fsblocksize %d too small for "
1788 "hardware sectorsize %d", blocksize, hblock); 1778 "hardware sectorsize %d", blocksize, hblock);
1789 goto failed_mount; 1779 goto failed_mount;
1790 } 1780 }
1791 1781
1792 brelse (bh); 1782 brelse (bh);
1793 if (!sb_set_blocksize(sb, blocksize)) { 1783 if (!sb_set_blocksize(sb, blocksize)) {
1794 ext3_msg(sb, KERN_ERR, 1784 ext3_msg(sb, KERN_ERR,
1795 "error: bad blocksize %d", blocksize); 1785 "error: bad blocksize %d", blocksize);
1796 goto out_fail; 1786 goto out_fail;
1797 } 1787 }
1798 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1788 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1799 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1789 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1800 bh = sb_bread(sb, logic_sb_block); 1790 bh = sb_bread(sb, logic_sb_block);
1801 if (!bh) { 1791 if (!bh) {
1802 ext3_msg(sb, KERN_ERR, 1792 ext3_msg(sb, KERN_ERR,
1803 "error: can't read superblock on 2nd try"); 1793 "error: can't read superblock on 2nd try");
1804 goto failed_mount; 1794 goto failed_mount;
1805 } 1795 }
1806 es = (struct ext3_super_block *)(bh->b_data + offset); 1796 es = (struct ext3_super_block *)(bh->b_data + offset);
1807 sbi->s_es = es; 1797 sbi->s_es = es;
1808 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) { 1798 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
1809 ext3_msg(sb, KERN_ERR, 1799 ext3_msg(sb, KERN_ERR,
1810 "error: magic mismatch"); 1800 "error: magic mismatch");
1811 goto failed_mount; 1801 goto failed_mount;
1812 } 1802 }
1813 } 1803 }
1814 1804
1815 sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits); 1805 sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
1816 1806
1817 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) { 1807 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
1818 sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE; 1808 sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
1819 sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO; 1809 sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
1820 } else { 1810 } else {
1821 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 1811 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
1822 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 1812 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
1823 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || 1813 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
1824 (!is_power_of_2(sbi->s_inode_size)) || 1814 (!is_power_of_2(sbi->s_inode_size)) ||
1825 (sbi->s_inode_size > blocksize)) { 1815 (sbi->s_inode_size > blocksize)) {
1826 ext3_msg(sb, KERN_ERR, 1816 ext3_msg(sb, KERN_ERR,
1827 "error: unsupported inode size: %d", 1817 "error: unsupported inode size: %d",
1828 sbi->s_inode_size); 1818 sbi->s_inode_size);
1829 goto failed_mount; 1819 goto failed_mount;
1830 } 1820 }
1831 } 1821 }
1832 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE << 1822 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
1833 le32_to_cpu(es->s_log_frag_size); 1823 le32_to_cpu(es->s_log_frag_size);
1834 if (blocksize != sbi->s_frag_size) { 1824 if (blocksize != sbi->s_frag_size) {
1835 ext3_msg(sb, KERN_ERR, 1825 ext3_msg(sb, KERN_ERR,
1836 "error: fragsize %lu != blocksize %u (unsupported)", 1826 "error: fragsize %lu != blocksize %u (unsupported)",
1837 sbi->s_frag_size, blocksize); 1827 sbi->s_frag_size, blocksize);
1838 goto failed_mount; 1828 goto failed_mount;
1839 } 1829 }
1840 sbi->s_frags_per_block = 1; 1830 sbi->s_frags_per_block = 1;
1841 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 1831 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
1842 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); 1832 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
1843 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 1833 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
1844 if (EXT3_INODE_SIZE(sb) == 0 || EXT3_INODES_PER_GROUP(sb) == 0) 1834 if (EXT3_INODE_SIZE(sb) == 0 || EXT3_INODES_PER_GROUP(sb) == 0)
1845 goto cantfind_ext3; 1835 goto cantfind_ext3;
1846 sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb); 1836 sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
1847 if (sbi->s_inodes_per_block == 0) 1837 if (sbi->s_inodes_per_block == 0)
1848 goto cantfind_ext3; 1838 goto cantfind_ext3;
1849 sbi->s_itb_per_group = sbi->s_inodes_per_group / 1839 sbi->s_itb_per_group = sbi->s_inodes_per_group /
1850 sbi->s_inodes_per_block; 1840 sbi->s_inodes_per_block;
1851 sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc); 1841 sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
1852 sbi->s_sbh = bh; 1842 sbi->s_sbh = bh;
1853 sbi->s_mount_state = le16_to_cpu(es->s_state); 1843 sbi->s_mount_state = le16_to_cpu(es->s_state);
1854 sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb)); 1844 sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb));
1855 sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb)); 1845 sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb));
1856 for (i=0; i < 4; i++) 1846 for (i=0; i < 4; i++)
1857 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 1847 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
1858 sbi->s_def_hash_version = es->s_def_hash_version; 1848 sbi->s_def_hash_version = es->s_def_hash_version;
1859 i = le32_to_cpu(es->s_flags); 1849 i = le32_to_cpu(es->s_flags);
1860 if (i & EXT2_FLAGS_UNSIGNED_HASH) 1850 if (i & EXT2_FLAGS_UNSIGNED_HASH)
1861 sbi->s_hash_unsigned = 3; 1851 sbi->s_hash_unsigned = 3;
1862 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 1852 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
1863 #ifdef __CHAR_UNSIGNED__ 1853 #ifdef __CHAR_UNSIGNED__
1864 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 1854 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
1865 sbi->s_hash_unsigned = 3; 1855 sbi->s_hash_unsigned = 3;
1866 #else 1856 #else
1867 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 1857 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
1868 #endif 1858 #endif
1869 } 1859 }
1870 1860
1871 if (sbi->s_blocks_per_group > blocksize * 8) { 1861 if (sbi->s_blocks_per_group > blocksize * 8) {
1872 ext3_msg(sb, KERN_ERR, 1862 ext3_msg(sb, KERN_ERR,
1873 "#blocks per group too big: %lu", 1863 "#blocks per group too big: %lu",
1874 sbi->s_blocks_per_group); 1864 sbi->s_blocks_per_group);
1875 goto failed_mount; 1865 goto failed_mount;
1876 } 1866 }
1877 if (sbi->s_frags_per_group > blocksize * 8) { 1867 if (sbi->s_frags_per_group > blocksize * 8) {
1878 ext3_msg(sb, KERN_ERR, 1868 ext3_msg(sb, KERN_ERR,
1879 "error: #fragments per group too big: %lu", 1869 "error: #fragments per group too big: %lu",
1880 sbi->s_frags_per_group); 1870 sbi->s_frags_per_group);
1881 goto failed_mount; 1871 goto failed_mount;
1882 } 1872 }
1883 if (sbi->s_inodes_per_group > blocksize * 8) { 1873 if (sbi->s_inodes_per_group > blocksize * 8) {
1884 ext3_msg(sb, KERN_ERR, 1874 ext3_msg(sb, KERN_ERR,
1885 "error: #inodes per group too big: %lu", 1875 "error: #inodes per group too big: %lu",
1886 sbi->s_inodes_per_group); 1876 sbi->s_inodes_per_group);
1887 goto failed_mount; 1877 goto failed_mount;
1888 } 1878 }
1889 1879
1890 err = generic_check_addressable(sb->s_blocksize_bits, 1880 err = generic_check_addressable(sb->s_blocksize_bits,
1891 le32_to_cpu(es->s_blocks_count)); 1881 le32_to_cpu(es->s_blocks_count));
1892 if (err) { 1882 if (err) {
1893 ext3_msg(sb, KERN_ERR, 1883 ext3_msg(sb, KERN_ERR,
1894 "error: filesystem is too large to mount safely"); 1884 "error: filesystem is too large to mount safely");
1895 if (sizeof(sector_t) < 8) 1885 if (sizeof(sector_t) < 8)
1896 ext3_msg(sb, KERN_ERR, 1886 ext3_msg(sb, KERN_ERR,
1897 "error: CONFIG_LBDAF not enabled"); 1887 "error: CONFIG_LBDAF not enabled");
1898 ret = err; 1888 ret = err;
1899 goto failed_mount; 1889 goto failed_mount;
1900 } 1890 }
1901 1891
1902 if (EXT3_BLOCKS_PER_GROUP(sb) == 0) 1892 if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
1903 goto cantfind_ext3; 1893 goto cantfind_ext3;
1904 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - 1894 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
1905 le32_to_cpu(es->s_first_data_block) - 1) 1895 le32_to_cpu(es->s_first_data_block) - 1)
1906 / EXT3_BLOCKS_PER_GROUP(sb)) + 1; 1896 / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
1907 db_count = DIV_ROUND_UP(sbi->s_groups_count, EXT3_DESC_PER_BLOCK(sb)); 1897 db_count = DIV_ROUND_UP(sbi->s_groups_count, EXT3_DESC_PER_BLOCK(sb));
1908 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1898 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
1909 GFP_KERNEL); 1899 GFP_KERNEL);
1910 if (sbi->s_group_desc == NULL) { 1900 if (sbi->s_group_desc == NULL) {
1911 ext3_msg(sb, KERN_ERR, 1901 ext3_msg(sb, KERN_ERR,
1912 "error: not enough memory"); 1902 "error: not enough memory");
1913 ret = -ENOMEM; 1903 ret = -ENOMEM;
1914 goto failed_mount; 1904 goto failed_mount;
1915 } 1905 }
1916 1906
1917 bgl_lock_init(sbi->s_blockgroup_lock); 1907 bgl_lock_init(sbi->s_blockgroup_lock);
1918 1908
1919 for (i = 0; i < db_count; i++) { 1909 for (i = 0; i < db_count; i++) {
1920 block = descriptor_loc(sb, logic_sb_block, i); 1910 block = descriptor_loc(sb, logic_sb_block, i);
1921 sbi->s_group_desc[i] = sb_bread(sb, block); 1911 sbi->s_group_desc[i] = sb_bread(sb, block);
1922 if (!sbi->s_group_desc[i]) { 1912 if (!sbi->s_group_desc[i]) {
1923 ext3_msg(sb, KERN_ERR, 1913 ext3_msg(sb, KERN_ERR,
1924 "error: can't read group descriptor %d", i); 1914 "error: can't read group descriptor %d", i);
1925 db_count = i; 1915 db_count = i;
1926 goto failed_mount2; 1916 goto failed_mount2;
1927 } 1917 }
1928 } 1918 }
1929 if (!ext3_check_descriptors (sb)) { 1919 if (!ext3_check_descriptors (sb)) {
1930 ext3_msg(sb, KERN_ERR, 1920 ext3_msg(sb, KERN_ERR,
1931 "error: group descriptors corrupted"); 1921 "error: group descriptors corrupted");
1932 goto failed_mount2; 1922 goto failed_mount2;
1933 } 1923 }
1934 sbi->s_gdb_count = db_count; 1924 sbi->s_gdb_count = db_count;
1935 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 1925 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1936 spin_lock_init(&sbi->s_next_gen_lock); 1926 spin_lock_init(&sbi->s_next_gen_lock);
1937 1927
1938 /* per fileystem reservation list head & lock */ 1928 /* per fileystem reservation list head & lock */
1939 spin_lock_init(&sbi->s_rsv_window_lock); 1929 spin_lock_init(&sbi->s_rsv_window_lock);
1940 sbi->s_rsv_window_root = RB_ROOT; 1930 sbi->s_rsv_window_root = RB_ROOT;
1941 /* Add a single, static dummy reservation to the start of the 1931 /* Add a single, static dummy reservation to the start of the
1942 * reservation window list --- it gives us a placeholder for 1932 * reservation window list --- it gives us a placeholder for
1943 * append-at-start-of-list which makes the allocation logic 1933 * append-at-start-of-list which makes the allocation logic
1944 * _much_ simpler. */ 1934 * _much_ simpler. */
1945 sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 1935 sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
1946 sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 1936 sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
1947 sbi->s_rsv_window_head.rsv_alloc_hit = 0; 1937 sbi->s_rsv_window_head.rsv_alloc_hit = 0;
1948 sbi->s_rsv_window_head.rsv_goal_size = 0; 1938 sbi->s_rsv_window_head.rsv_goal_size = 0;
1949 ext3_rsv_window_add(sb, &sbi->s_rsv_window_head); 1939 ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
1950 1940
1951 /* 1941 /*
1952 * set up enough so that it can read an inode 1942 * set up enough so that it can read an inode
1953 */ 1943 */
1954 sb->s_op = &ext3_sops; 1944 sb->s_op = &ext3_sops;
1955 sb->s_export_op = &ext3_export_ops; 1945 sb->s_export_op = &ext3_export_ops;
1956 sb->s_xattr = ext3_xattr_handlers; 1946 sb->s_xattr = ext3_xattr_handlers;
1957 #ifdef CONFIG_QUOTA 1947 #ifdef CONFIG_QUOTA
1958 sb->s_qcop = &ext3_qctl_operations; 1948 sb->s_qcop = &ext3_qctl_operations;
1959 sb->dq_op = &ext3_quota_operations; 1949 sb->dq_op = &ext3_quota_operations;
1960 #endif 1950 #endif
1961 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); 1951 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
1962 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1952 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1963 mutex_init(&sbi->s_orphan_lock); 1953 mutex_init(&sbi->s_orphan_lock);
1964 mutex_init(&sbi->s_resize_lock); 1954 mutex_init(&sbi->s_resize_lock);
1965 1955
1966 sb->s_root = NULL; 1956 sb->s_root = NULL;
1967 1957
1968 needs_recovery = (es->s_last_orphan != 0 || 1958 needs_recovery = (es->s_last_orphan != 0 ||
1969 EXT3_HAS_INCOMPAT_FEATURE(sb, 1959 EXT3_HAS_INCOMPAT_FEATURE(sb,
1970 EXT3_FEATURE_INCOMPAT_RECOVER)); 1960 EXT3_FEATURE_INCOMPAT_RECOVER));
1971 1961
1972 /* 1962 /*
1973 * The first inode we look at is the journal inode. Don't try 1963 * The first inode we look at is the journal inode. Don't try
1974 * root first: it may be modified in the journal! 1964 * root first: it may be modified in the journal!
1975 */ 1965 */
1976 if (!test_opt(sb, NOLOAD) && 1966 if (!test_opt(sb, NOLOAD) &&
1977 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { 1967 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
1978 if (ext3_load_journal(sb, es, journal_devnum)) 1968 if (ext3_load_journal(sb, es, journal_devnum))
1979 goto failed_mount2; 1969 goto failed_mount2;
1980 } else if (journal_inum) { 1970 } else if (journal_inum) {
1981 if (ext3_create_journal(sb, es, journal_inum)) 1971 if (ext3_create_journal(sb, es, journal_inum))
1982 goto failed_mount2; 1972 goto failed_mount2;
1983 } else { 1973 } else {
1984 if (!silent) 1974 if (!silent)
1985 ext3_msg(sb, KERN_ERR, 1975 ext3_msg(sb, KERN_ERR,
1986 "error: no journal found. " 1976 "error: no journal found. "
1987 "mounting ext3 over ext2?"); 1977 "mounting ext3 over ext2?");
1988 goto failed_mount2; 1978 goto failed_mount2;
1989 } 1979 }
1990 err = percpu_counter_init(&sbi->s_freeblocks_counter, 1980 err = percpu_counter_init(&sbi->s_freeblocks_counter,
1991 ext3_count_free_blocks(sb)); 1981 ext3_count_free_blocks(sb));
1992 if (!err) { 1982 if (!err) {
1993 err = percpu_counter_init(&sbi->s_freeinodes_counter, 1983 err = percpu_counter_init(&sbi->s_freeinodes_counter,
1994 ext3_count_free_inodes(sb)); 1984 ext3_count_free_inodes(sb));
1995 } 1985 }
1996 if (!err) { 1986 if (!err) {
1997 err = percpu_counter_init(&sbi->s_dirs_counter, 1987 err = percpu_counter_init(&sbi->s_dirs_counter,
1998 ext3_count_dirs(sb)); 1988 ext3_count_dirs(sb));
1999 } 1989 }
2000 if (err) { 1990 if (err) {
2001 ext3_msg(sb, KERN_ERR, "error: insufficient memory"); 1991 ext3_msg(sb, KERN_ERR, "error: insufficient memory");
2002 ret = err; 1992 ret = err;
2003 goto failed_mount3; 1993 goto failed_mount3;
2004 } 1994 }
2005 1995
2006 /* We have now updated the journal if required, so we can 1996 /* We have now updated the journal if required, so we can
2007 * validate the data journaling mode. */ 1997 * validate the data journaling mode. */
2008 switch (test_opt(sb, DATA_FLAGS)) { 1998 switch (test_opt(sb, DATA_FLAGS)) {
2009 case 0: 1999 case 0:
2010 /* No mode set, assume a default based on the journal 2000 /* No mode set, assume a default based on the journal
2011 capabilities: ORDERED_DATA if the journal can 2001 capabilities: ORDERED_DATA if the journal can
2012 cope, else JOURNAL_DATA */ 2002 cope, else JOURNAL_DATA */
2013 if (journal_check_available_features 2003 if (journal_check_available_features
2014 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) 2004 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
2015 set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE); 2005 set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE);
2016 else 2006 else
2017 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2007 set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2018 break; 2008 break;
2019 2009
2020 case EXT3_MOUNT_ORDERED_DATA: 2010 case EXT3_MOUNT_ORDERED_DATA:
2021 case EXT3_MOUNT_WRITEBACK_DATA: 2011 case EXT3_MOUNT_WRITEBACK_DATA:
2022 if (!journal_check_available_features 2012 if (!journal_check_available_features
2023 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { 2013 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
2024 ext3_msg(sb, KERN_ERR, 2014 ext3_msg(sb, KERN_ERR,
2025 "error: journal does not support " 2015 "error: journal does not support "
2026 "requested data journaling mode"); 2016 "requested data journaling mode");
2027 goto failed_mount3; 2017 goto failed_mount3;
2028 } 2018 }
2029 default: 2019 default:
2030 break; 2020 break;
2031 } 2021 }
2032 2022
2033 /* 2023 /*
2034 * The journal_load will have done any necessary log recovery, 2024 * The journal_load will have done any necessary log recovery,
2035 * so we can safely mount the rest of the filesystem now. 2025 * so we can safely mount the rest of the filesystem now.
2036 */ 2026 */
2037 2027
2038 root = ext3_iget(sb, EXT3_ROOT_INO); 2028 root = ext3_iget(sb, EXT3_ROOT_INO);
2039 if (IS_ERR(root)) { 2029 if (IS_ERR(root)) {
2040 ext3_msg(sb, KERN_ERR, "error: get root inode failed"); 2030 ext3_msg(sb, KERN_ERR, "error: get root inode failed");
2041 ret = PTR_ERR(root); 2031 ret = PTR_ERR(root);
2042 goto failed_mount3; 2032 goto failed_mount3;
2043 } 2033 }
2044 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2034 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2045 iput(root); 2035 iput(root);
2046 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); 2036 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
2047 goto failed_mount3; 2037 goto failed_mount3;
2048 } 2038 }
2049 sb->s_root = d_make_root(root); 2039 sb->s_root = d_make_root(root);
2050 if (!sb->s_root) { 2040 if (!sb->s_root) {
2051 ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); 2041 ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
2052 ret = -ENOMEM; 2042 ret = -ENOMEM;
2053 goto failed_mount3; 2043 goto failed_mount3;
2054 } 2044 }
2055 2045
2056 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); 2046 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
2057 2047
2058 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; 2048 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
2059 ext3_orphan_cleanup(sb, es); 2049 ext3_orphan_cleanup(sb, es);
2060 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; 2050 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
2061 if (needs_recovery) { 2051 if (needs_recovery) {
2062 ext3_mark_recovery_complete(sb, es); 2052 ext3_mark_recovery_complete(sb, es);
2063 ext3_msg(sb, KERN_INFO, "recovery complete"); 2053 ext3_msg(sb, KERN_INFO, "recovery complete");
2064 } 2054 }
2065 ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode", 2055 ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode",
2066 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 2056 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
2067 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 2057 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
2068 "writeback"); 2058 "writeback");
2069 2059
2070 return 0; 2060 return 0;
2071 2061
2072 cantfind_ext3: 2062 cantfind_ext3:
2073 if (!silent) 2063 if (!silent)
2074 ext3_msg(sb, KERN_INFO, 2064 ext3_msg(sb, KERN_INFO,
2075 "error: can't find ext3 filesystem on dev %s.", 2065 "error: can't find ext3 filesystem on dev %s.",
2076 sb->s_id); 2066 sb->s_id);
2077 goto failed_mount; 2067 goto failed_mount;
2078 2068
2079 failed_mount3: 2069 failed_mount3:
2080 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2070 percpu_counter_destroy(&sbi->s_freeblocks_counter);
2081 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2071 percpu_counter_destroy(&sbi->s_freeinodes_counter);
2082 percpu_counter_destroy(&sbi->s_dirs_counter); 2072 percpu_counter_destroy(&sbi->s_dirs_counter);
2083 journal_destroy(sbi->s_journal); 2073 journal_destroy(sbi->s_journal);
2084 failed_mount2: 2074 failed_mount2:
2085 for (i = 0; i < db_count; i++) 2075 for (i = 0; i < db_count; i++)
2086 brelse(sbi->s_group_desc[i]); 2076 brelse(sbi->s_group_desc[i]);
2087 kfree(sbi->s_group_desc); 2077 kfree(sbi->s_group_desc);
2088 failed_mount: 2078 failed_mount:
2089 #ifdef CONFIG_QUOTA 2079 #ifdef CONFIG_QUOTA
2090 for (i = 0; i < MAXQUOTAS; i++) 2080 for (i = 0; i < MAXQUOTAS; i++)
2091 kfree(sbi->s_qf_names[i]); 2081 kfree(sbi->s_qf_names[i]);
2092 #endif 2082 #endif
2093 ext3_blkdev_remove(sbi); 2083 ext3_blkdev_remove(sbi);
2094 brelse(bh); 2084 brelse(bh);
2095 out_fail: 2085 out_fail:
2096 sb->s_fs_info = NULL; 2086 sb->s_fs_info = NULL;
2097 kfree(sbi->s_blockgroup_lock); 2087 kfree(sbi->s_blockgroup_lock);
2098 kfree(sbi); 2088 kfree(sbi);
2099 return ret; 2089 return ret;
2100 } 2090 }
2101 2091
2102 /* 2092 /*
2103 * Setup any per-fs journal parameters now. We'll do this both on 2093 * Setup any per-fs journal parameters now. We'll do this both on
2104 * initial mount, once the journal has been initialised but before we've 2094 * initial mount, once the journal has been initialised but before we've
2105 * done any recovery; and again on any subsequent remount. 2095 * done any recovery; and again on any subsequent remount.
2106 */ 2096 */
2107 static void ext3_init_journal_params(struct super_block *sb, journal_t *journal) 2097 static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
2108 { 2098 {
2109 struct ext3_sb_info *sbi = EXT3_SB(sb); 2099 struct ext3_sb_info *sbi = EXT3_SB(sb);
2110 2100
2111 if (sbi->s_commit_interval) 2101 if (sbi->s_commit_interval)
2112 journal->j_commit_interval = sbi->s_commit_interval; 2102 journal->j_commit_interval = sbi->s_commit_interval;
2113 /* We could also set up an ext3-specific default for the commit 2103 /* We could also set up an ext3-specific default for the commit
2114 * interval here, but for now we'll just fall back to the jbd 2104 * interval here, but for now we'll just fall back to the jbd
2115 * default. */ 2105 * default. */
2116 2106
2117 spin_lock(&journal->j_state_lock); 2107 spin_lock(&journal->j_state_lock);
2118 if (test_opt(sb, BARRIER)) 2108 if (test_opt(sb, BARRIER))
2119 journal->j_flags |= JFS_BARRIER; 2109 journal->j_flags |= JFS_BARRIER;
2120 else 2110 else
2121 journal->j_flags &= ~JFS_BARRIER; 2111 journal->j_flags &= ~JFS_BARRIER;
2122 if (test_opt(sb, DATA_ERR_ABORT)) 2112 if (test_opt(sb, DATA_ERR_ABORT))
2123 journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR; 2113 journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
2124 else 2114 else
2125 journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR; 2115 journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
2126 spin_unlock(&journal->j_state_lock); 2116 spin_unlock(&journal->j_state_lock);
2127 } 2117 }
2128 2118
2129 static journal_t *ext3_get_journal(struct super_block *sb, 2119 static journal_t *ext3_get_journal(struct super_block *sb,
2130 unsigned int journal_inum) 2120 unsigned int journal_inum)
2131 { 2121 {
2132 struct inode *journal_inode; 2122 struct inode *journal_inode;
2133 journal_t *journal; 2123 journal_t *journal;
2134 2124
2135 /* First, test for the existence of a valid inode on disk. Bad 2125 /* First, test for the existence of a valid inode on disk. Bad
2136 * things happen if we iget() an unused inode, as the subsequent 2126 * things happen if we iget() an unused inode, as the subsequent
2137 * iput() will try to delete it. */ 2127 * iput() will try to delete it. */
2138 2128
2139 journal_inode = ext3_iget(sb, journal_inum); 2129 journal_inode = ext3_iget(sb, journal_inum);
2140 if (IS_ERR(journal_inode)) { 2130 if (IS_ERR(journal_inode)) {
2141 ext3_msg(sb, KERN_ERR, "error: no journal found"); 2131 ext3_msg(sb, KERN_ERR, "error: no journal found");
2142 return NULL; 2132 return NULL;
2143 } 2133 }
2144 if (!journal_inode->i_nlink) { 2134 if (!journal_inode->i_nlink) {
2145 make_bad_inode(journal_inode); 2135 make_bad_inode(journal_inode);
2146 iput(journal_inode); 2136 iput(journal_inode);
2147 ext3_msg(sb, KERN_ERR, "error: journal inode is deleted"); 2137 ext3_msg(sb, KERN_ERR, "error: journal inode is deleted");
2148 return NULL; 2138 return NULL;
2149 } 2139 }
2150 2140
2151 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", 2141 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
2152 journal_inode, journal_inode->i_size); 2142 journal_inode, journal_inode->i_size);
2153 if (!S_ISREG(journal_inode->i_mode)) { 2143 if (!S_ISREG(journal_inode->i_mode)) {
2154 ext3_msg(sb, KERN_ERR, "error: invalid journal inode"); 2144 ext3_msg(sb, KERN_ERR, "error: invalid journal inode");
2155 iput(journal_inode); 2145 iput(journal_inode);
2156 return NULL; 2146 return NULL;
2157 } 2147 }
2158 2148
2159 journal = journal_init_inode(journal_inode); 2149 journal = journal_init_inode(journal_inode);
2160 if (!journal) { 2150 if (!journal) {
2161 ext3_msg(sb, KERN_ERR, "error: could not load journal inode"); 2151 ext3_msg(sb, KERN_ERR, "error: could not load journal inode");
2162 iput(journal_inode); 2152 iput(journal_inode);
2163 return NULL; 2153 return NULL;
2164 } 2154 }
2165 journal->j_private = sb; 2155 journal->j_private = sb;
2166 ext3_init_journal_params(sb, journal); 2156 ext3_init_journal_params(sb, journal);
2167 return journal; 2157 return journal;
2168 } 2158 }
2169 2159
2170 static journal_t *ext3_get_dev_journal(struct super_block *sb, 2160 static journal_t *ext3_get_dev_journal(struct super_block *sb,
2171 dev_t j_dev) 2161 dev_t j_dev)
2172 { 2162 {
2173 struct buffer_head * bh; 2163 struct buffer_head * bh;
2174 journal_t *journal; 2164 journal_t *journal;
2175 ext3_fsblk_t start; 2165 ext3_fsblk_t start;
2176 ext3_fsblk_t len; 2166 ext3_fsblk_t len;
2177 int hblock, blocksize; 2167 int hblock, blocksize;
2178 ext3_fsblk_t sb_block; 2168 ext3_fsblk_t sb_block;
2179 unsigned long offset; 2169 unsigned long offset;
2180 struct ext3_super_block * es; 2170 struct ext3_super_block * es;
2181 struct block_device *bdev; 2171 struct block_device *bdev;
2182 2172
2183 bdev = ext3_blkdev_get(j_dev, sb); 2173 bdev = ext3_blkdev_get(j_dev, sb);
2184 if (bdev == NULL) 2174 if (bdev == NULL)
2185 return NULL; 2175 return NULL;
2186 2176
2187 blocksize = sb->s_blocksize; 2177 blocksize = sb->s_blocksize;
2188 hblock = bdev_logical_block_size(bdev); 2178 hblock = bdev_logical_block_size(bdev);
2189 if (blocksize < hblock) { 2179 if (blocksize < hblock) {
2190 ext3_msg(sb, KERN_ERR, 2180 ext3_msg(sb, KERN_ERR,
2191 "error: blocksize too small for journal device"); 2181 "error: blocksize too small for journal device");
2192 goto out_bdev; 2182 goto out_bdev;
2193 } 2183 }
2194 2184
2195 sb_block = EXT3_MIN_BLOCK_SIZE / blocksize; 2185 sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
2196 offset = EXT3_MIN_BLOCK_SIZE % blocksize; 2186 offset = EXT3_MIN_BLOCK_SIZE % blocksize;
2197 set_blocksize(bdev, blocksize); 2187 set_blocksize(bdev, blocksize);
2198 if (!(bh = __bread(bdev, sb_block, blocksize))) { 2188 if (!(bh = __bread(bdev, sb_block, blocksize))) {
2199 ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of " 2189 ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of "
2200 "external journal"); 2190 "external journal");
2201 goto out_bdev; 2191 goto out_bdev;
2202 } 2192 }
2203 2193
2204 es = (struct ext3_super_block *) (bh->b_data + offset); 2194 es = (struct ext3_super_block *) (bh->b_data + offset);
2205 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) || 2195 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
2206 !(le32_to_cpu(es->s_feature_incompat) & 2196 !(le32_to_cpu(es->s_feature_incompat) &
2207 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { 2197 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
2208 ext3_msg(sb, KERN_ERR, "error: external journal has " 2198 ext3_msg(sb, KERN_ERR, "error: external journal has "
2209 "bad superblock"); 2199 "bad superblock");
2210 brelse(bh); 2200 brelse(bh);
2211 goto out_bdev; 2201 goto out_bdev;
2212 } 2202 }
2213 2203
2214 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 2204 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
2215 ext3_msg(sb, KERN_ERR, "error: journal UUID does not match"); 2205 ext3_msg(sb, KERN_ERR, "error: journal UUID does not match");
2216 brelse(bh); 2206 brelse(bh);
2217 goto out_bdev; 2207 goto out_bdev;
2218 } 2208 }
2219 2209
2220 len = le32_to_cpu(es->s_blocks_count); 2210 len = le32_to_cpu(es->s_blocks_count);
2221 start = sb_block + 1; 2211 start = sb_block + 1;
2222 brelse(bh); /* we're done with the superblock */ 2212 brelse(bh); /* we're done with the superblock */
2223 2213
2224 journal = journal_init_dev(bdev, sb->s_bdev, 2214 journal = journal_init_dev(bdev, sb->s_bdev,
2225 start, len, blocksize); 2215 start, len, blocksize);
2226 if (!journal) { 2216 if (!journal) {
2227 ext3_msg(sb, KERN_ERR, 2217 ext3_msg(sb, KERN_ERR,
2228 "error: failed to create device journal"); 2218 "error: failed to create device journal");
2229 goto out_bdev; 2219 goto out_bdev;
2230 } 2220 }
2231 journal->j_private = sb; 2221 journal->j_private = sb;
2232 if (!bh_uptodate_or_lock(journal->j_sb_buffer)) { 2222 if (!bh_uptodate_or_lock(journal->j_sb_buffer)) {
2233 if (bh_submit_read(journal->j_sb_buffer)) { 2223 if (bh_submit_read(journal->j_sb_buffer)) {
2234 ext3_msg(sb, KERN_ERR, "I/O error on journal device"); 2224 ext3_msg(sb, KERN_ERR, "I/O error on journal device");
2235 goto out_journal; 2225 goto out_journal;
2236 } 2226 }
2237 } 2227 }
2238 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 2228 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
2239 ext3_msg(sb, KERN_ERR, 2229 ext3_msg(sb, KERN_ERR,
2240 "error: external journal has more than one " 2230 "error: external journal has more than one "
2241 "user (unsupported) - %d", 2231 "user (unsupported) - %d",
2242 be32_to_cpu(journal->j_superblock->s_nr_users)); 2232 be32_to_cpu(journal->j_superblock->s_nr_users));
2243 goto out_journal; 2233 goto out_journal;
2244 } 2234 }
2245 EXT3_SB(sb)->journal_bdev = bdev; 2235 EXT3_SB(sb)->journal_bdev = bdev;
2246 ext3_init_journal_params(sb, journal); 2236 ext3_init_journal_params(sb, journal);
2247 return journal; 2237 return journal;
2248 out_journal: 2238 out_journal:
2249 journal_destroy(journal); 2239 journal_destroy(journal);
2250 out_bdev: 2240 out_bdev:
2251 ext3_blkdev_put(bdev); 2241 ext3_blkdev_put(bdev);
2252 return NULL; 2242 return NULL;
2253 } 2243 }
2254 2244
2255 static int ext3_load_journal(struct super_block *sb, 2245 static int ext3_load_journal(struct super_block *sb,
2256 struct ext3_super_block *es, 2246 struct ext3_super_block *es,
2257 unsigned long journal_devnum) 2247 unsigned long journal_devnum)
2258 { 2248 {
2259 journal_t *journal; 2249 journal_t *journal;
2260 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 2250 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
2261 dev_t journal_dev; 2251 dev_t journal_dev;
2262 int err = 0; 2252 int err = 0;
2263 int really_read_only; 2253 int really_read_only;
2264 2254
2265 if (journal_devnum && 2255 if (journal_devnum &&
2266 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2256 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2267 ext3_msg(sb, KERN_INFO, "external journal device major/minor " 2257 ext3_msg(sb, KERN_INFO, "external journal device major/minor "
2268 "numbers have changed"); 2258 "numbers have changed");
2269 journal_dev = new_decode_dev(journal_devnum); 2259 journal_dev = new_decode_dev(journal_devnum);
2270 } else 2260 } else
2271 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 2261 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
2272 2262
2273 really_read_only = bdev_read_only(sb->s_bdev); 2263 really_read_only = bdev_read_only(sb->s_bdev);
2274 2264
2275 /* 2265 /*
2276 * Are we loading a blank journal or performing recovery after a 2266 * Are we loading a blank journal or performing recovery after a
2277 * crash? For recovery, we need to check in advance whether we 2267 * crash? For recovery, we need to check in advance whether we
2278 * can get read-write access to the device. 2268 * can get read-write access to the device.
2279 */ 2269 */
2280 2270
2281 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) { 2271 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
2282 if (sb->s_flags & MS_RDONLY) { 2272 if (sb->s_flags & MS_RDONLY) {
2283 ext3_msg(sb, KERN_INFO, 2273 ext3_msg(sb, KERN_INFO,
2284 "recovery required on readonly filesystem"); 2274 "recovery required on readonly filesystem");
2285 if (really_read_only) { 2275 if (really_read_only) {
2286 ext3_msg(sb, KERN_ERR, "error: write access " 2276 ext3_msg(sb, KERN_ERR, "error: write access "
2287 "unavailable, cannot proceed"); 2277 "unavailable, cannot proceed");
2288 return -EROFS; 2278 return -EROFS;
2289 } 2279 }
2290 ext3_msg(sb, KERN_INFO, 2280 ext3_msg(sb, KERN_INFO,
2291 "write access will be enabled during recovery"); 2281 "write access will be enabled during recovery");
2292 } 2282 }
2293 } 2283 }
2294 2284
2295 if (journal_inum && journal_dev) { 2285 if (journal_inum && journal_dev) {
2296 ext3_msg(sb, KERN_ERR, "error: filesystem has both journal " 2286 ext3_msg(sb, KERN_ERR, "error: filesystem has both journal "
2297 "and inode journals"); 2287 "and inode journals");
2298 return -EINVAL; 2288 return -EINVAL;
2299 } 2289 }
2300 2290
2301 if (journal_inum) { 2291 if (journal_inum) {
2302 if (!(journal = ext3_get_journal(sb, journal_inum))) 2292 if (!(journal = ext3_get_journal(sb, journal_inum)))
2303 return -EINVAL; 2293 return -EINVAL;
2304 } else { 2294 } else {
2305 if (!(journal = ext3_get_dev_journal(sb, journal_dev))) 2295 if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
2306 return -EINVAL; 2296 return -EINVAL;
2307 } 2297 }
2308 2298
2309 if (!(journal->j_flags & JFS_BARRIER)) 2299 if (!(journal->j_flags & JFS_BARRIER))
2310 printk(KERN_INFO "EXT3-fs: barriers not enabled\n"); 2300 printk(KERN_INFO "EXT3-fs: barriers not enabled\n");
2311 2301
2312 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2302 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2313 err = journal_update_format(journal); 2303 err = journal_update_format(journal);
2314 if (err) { 2304 if (err) {
2315 ext3_msg(sb, KERN_ERR, "error updating journal"); 2305 ext3_msg(sb, KERN_ERR, "error updating journal");
2316 journal_destroy(journal); 2306 journal_destroy(journal);
2317 return err; 2307 return err;
2318 } 2308 }
2319 } 2309 }
2320 2310
2321 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) 2311 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
2322 err = journal_wipe(journal, !really_read_only); 2312 err = journal_wipe(journal, !really_read_only);
2323 if (!err) 2313 if (!err)
2324 err = journal_load(journal); 2314 err = journal_load(journal);
2325 2315
2326 if (err) { 2316 if (err) {
2327 ext3_msg(sb, KERN_ERR, "error loading journal"); 2317 ext3_msg(sb, KERN_ERR, "error loading journal");
2328 journal_destroy(journal); 2318 journal_destroy(journal);
2329 return err; 2319 return err;
2330 } 2320 }
2331 2321
2332 EXT3_SB(sb)->s_journal = journal; 2322 EXT3_SB(sb)->s_journal = journal;
2333 ext3_clear_journal_err(sb, es); 2323 ext3_clear_journal_err(sb, es);
2334 2324
2335 if (!really_read_only && journal_devnum && 2325 if (!really_read_only && journal_devnum &&
2336 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2326 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2337 es->s_journal_dev = cpu_to_le32(journal_devnum); 2327 es->s_journal_dev = cpu_to_le32(journal_devnum);
2338 2328
2339 /* Make sure we flush the recovery flag to disk. */ 2329 /* Make sure we flush the recovery flag to disk. */
2340 ext3_commit_super(sb, es, 1); 2330 ext3_commit_super(sb, es, 1);
2341 } 2331 }
2342 2332
2343 return 0; 2333 return 0;
2344 } 2334 }
2345 2335
2346 static int ext3_create_journal(struct super_block *sb, 2336 static int ext3_create_journal(struct super_block *sb,
2347 struct ext3_super_block *es, 2337 struct ext3_super_block *es,
2348 unsigned int journal_inum) 2338 unsigned int journal_inum)
2349 { 2339 {
2350 journal_t *journal; 2340 journal_t *journal;
2351 int err; 2341 int err;
2352 2342
2353 if (sb->s_flags & MS_RDONLY) { 2343 if (sb->s_flags & MS_RDONLY) {
2354 ext3_msg(sb, KERN_ERR, 2344 ext3_msg(sb, KERN_ERR,
2355 "error: readonly filesystem when trying to " 2345 "error: readonly filesystem when trying to "
2356 "create journal"); 2346 "create journal");
2357 return -EROFS; 2347 return -EROFS;
2358 } 2348 }
2359 2349
2360 journal = ext3_get_journal(sb, journal_inum); 2350 journal = ext3_get_journal(sb, journal_inum);
2361 if (!journal) 2351 if (!journal)
2362 return -EINVAL; 2352 return -EINVAL;
2363 2353
2364 ext3_msg(sb, KERN_INFO, "creating new journal on inode %u", 2354 ext3_msg(sb, KERN_INFO, "creating new journal on inode %u",
2365 journal_inum); 2355 journal_inum);
2366 2356
2367 err = journal_create(journal); 2357 err = journal_create(journal);
2368 if (err) { 2358 if (err) {
2369 ext3_msg(sb, KERN_ERR, "error creating journal"); 2359 ext3_msg(sb, KERN_ERR, "error creating journal");
2370 journal_destroy(journal); 2360 journal_destroy(journal);
2371 return -EIO; 2361 return -EIO;
2372 } 2362 }
2373 2363
2374 EXT3_SB(sb)->s_journal = journal; 2364 EXT3_SB(sb)->s_journal = journal;
2375 2365
2376 ext3_update_dynamic_rev(sb); 2366 ext3_update_dynamic_rev(sb);
2377 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2367 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2378 EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL); 2368 EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
2379 2369
2380 es->s_journal_inum = cpu_to_le32(journal_inum); 2370 es->s_journal_inum = cpu_to_le32(journal_inum);
2381 2371
2382 /* Make sure we flush the recovery flag to disk. */ 2372 /* Make sure we flush the recovery flag to disk. */
2383 ext3_commit_super(sb, es, 1); 2373 ext3_commit_super(sb, es, 1);
2384 2374
2385 return 0; 2375 return 0;
2386 } 2376 }
2387 2377
2388 static int ext3_commit_super(struct super_block *sb, 2378 static int ext3_commit_super(struct super_block *sb,
2389 struct ext3_super_block *es, 2379 struct ext3_super_block *es,
2390 int sync) 2380 int sync)
2391 { 2381 {
2392 struct buffer_head *sbh = EXT3_SB(sb)->s_sbh; 2382 struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
2393 int error = 0; 2383 int error = 0;
2394 2384
2395 if (!sbh) 2385 if (!sbh)
2396 return error; 2386 return error;
2397 2387
2398 if (buffer_write_io_error(sbh)) { 2388 if (buffer_write_io_error(sbh)) {
2399 /* 2389 /*
2400 * Oh, dear. A previous attempt to write the 2390 * Oh, dear. A previous attempt to write the
2401 * superblock failed. This could happen because the 2391 * superblock failed. This could happen because the
2402 * USB device was yanked out. Or it could happen to 2392 * USB device was yanked out. Or it could happen to
2403 * be a transient write error and maybe the block will 2393 * be a transient write error and maybe the block will
2404 * be remapped. Nothing we can do but to retry the 2394 * be remapped. Nothing we can do but to retry the
2405 * write and hope for the best. 2395 * write and hope for the best.
2406 */ 2396 */
2407 ext3_msg(sb, KERN_ERR, "previous I/O error to " 2397 ext3_msg(sb, KERN_ERR, "previous I/O error to "
2408 "superblock detected"); 2398 "superblock detected");
2409 clear_buffer_write_io_error(sbh); 2399 clear_buffer_write_io_error(sbh);
2410 set_buffer_uptodate(sbh); 2400 set_buffer_uptodate(sbh);
2411 } 2401 }
2412 /* 2402 /*
2413 * If the file system is mounted read-only, don't update the 2403 * If the file system is mounted read-only, don't update the
2414 * superblock write time. This avoids updating the superblock 2404 * superblock write time. This avoids updating the superblock
2415 * write time when we are mounting the root file system 2405 * write time when we are mounting the root file system
2416 * read/only but we need to replay the journal; at that point, 2406 * read/only but we need to replay the journal; at that point,
2417 * for people who are east of GMT and who make their clock 2407 * for people who are east of GMT and who make their clock
2418 * tick in localtime for Windows bug-for-bug compatibility, 2408 * tick in localtime for Windows bug-for-bug compatibility,
2419 * the clock is set in the future, and this will cause e2fsck 2409 * the clock is set in the future, and this will cause e2fsck
2420 * to complain and force a full file system check. 2410 * to complain and force a full file system check.
2421 */ 2411 */
2422 if (!(sb->s_flags & MS_RDONLY)) 2412 if (!(sb->s_flags & MS_RDONLY))
2423 es->s_wtime = cpu_to_le32(get_seconds()); 2413 es->s_wtime = cpu_to_le32(get_seconds());
2424 es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb)); 2414 es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
2425 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb)); 2415 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
2426 BUFFER_TRACE(sbh, "marking dirty"); 2416 BUFFER_TRACE(sbh, "marking dirty");
2427 mark_buffer_dirty(sbh); 2417 mark_buffer_dirty(sbh);
2428 if (sync) { 2418 if (sync) {
2429 error = sync_dirty_buffer(sbh); 2419 error = sync_dirty_buffer(sbh);
2430 if (buffer_write_io_error(sbh)) { 2420 if (buffer_write_io_error(sbh)) {
2431 ext3_msg(sb, KERN_ERR, "I/O error while writing " 2421 ext3_msg(sb, KERN_ERR, "I/O error while writing "
2432 "superblock"); 2422 "superblock");
2433 clear_buffer_write_io_error(sbh); 2423 clear_buffer_write_io_error(sbh);
2434 set_buffer_uptodate(sbh); 2424 set_buffer_uptodate(sbh);
2435 } 2425 }
2436 } 2426 }
2437 return error; 2427 return error;
2438 } 2428 }
2439 2429
2440 2430
2441 /* 2431 /*
2442 * Have we just finished recovery? If so, and if we are mounting (or 2432 * Have we just finished recovery? If so, and if we are mounting (or
2443 * remounting) the filesystem readonly, then we will end up with a 2433 * remounting) the filesystem readonly, then we will end up with a
2444 * consistent fs on disk. Record that fact. 2434 * consistent fs on disk. Record that fact.
2445 */ 2435 */
2446 static void ext3_mark_recovery_complete(struct super_block * sb, 2436 static void ext3_mark_recovery_complete(struct super_block * sb,
2447 struct ext3_super_block * es) 2437 struct ext3_super_block * es)
2448 { 2438 {
2449 journal_t *journal = EXT3_SB(sb)->s_journal; 2439 journal_t *journal = EXT3_SB(sb)->s_journal;
2450 2440
2451 journal_lock_updates(journal); 2441 journal_lock_updates(journal);
2452 if (journal_flush(journal) < 0) 2442 if (journal_flush(journal) < 0)
2453 goto out; 2443 goto out;
2454 2444
2455 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && 2445 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
2456 sb->s_flags & MS_RDONLY) { 2446 sb->s_flags & MS_RDONLY) {
2457 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2447 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2458 ext3_commit_super(sb, es, 1); 2448 ext3_commit_super(sb, es, 1);
2459 } 2449 }
2460 2450
2461 out: 2451 out:
2462 journal_unlock_updates(journal); 2452 journal_unlock_updates(journal);
2463 } 2453 }
2464 2454
2465 /* 2455 /*
2466 * If we are mounting (or read-write remounting) a filesystem whose journal 2456 * If we are mounting (or read-write remounting) a filesystem whose journal
2467 * has recorded an error from a previous lifetime, move that error to the 2457 * has recorded an error from a previous lifetime, move that error to the
2468 * main filesystem now. 2458 * main filesystem now.
2469 */ 2459 */
2470 static void ext3_clear_journal_err(struct super_block *sb, 2460 static void ext3_clear_journal_err(struct super_block *sb,
2471 struct ext3_super_block *es) 2461 struct ext3_super_block *es)
2472 { 2462 {
2473 journal_t *journal; 2463 journal_t *journal;
2474 int j_errno; 2464 int j_errno;
2475 const char *errstr; 2465 const char *errstr;
2476 2466
2477 journal = EXT3_SB(sb)->s_journal; 2467 journal = EXT3_SB(sb)->s_journal;
2478 2468
2479 /* 2469 /*
2480 * Now check for any error status which may have been recorded in the 2470 * Now check for any error status which may have been recorded in the
2481 * journal by a prior ext3_error() or ext3_abort() 2471 * journal by a prior ext3_error() or ext3_abort()
2482 */ 2472 */
2483 2473
2484 j_errno = journal_errno(journal); 2474 j_errno = journal_errno(journal);
2485 if (j_errno) { 2475 if (j_errno) {
2486 char nbuf[16]; 2476 char nbuf[16];
2487 2477
2488 errstr = ext3_decode_error(sb, j_errno, nbuf); 2478 errstr = ext3_decode_error(sb, j_errno, nbuf);
2489 ext3_warning(sb, __func__, "Filesystem error recorded " 2479 ext3_warning(sb, __func__, "Filesystem error recorded "
2490 "from previous mount: %s", errstr); 2480 "from previous mount: %s", errstr);
2491 ext3_warning(sb, __func__, "Marking fs in need of " 2481 ext3_warning(sb, __func__, "Marking fs in need of "
2492 "filesystem check."); 2482 "filesystem check.");
2493 2483
2494 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 2484 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
2495 es->s_state |= cpu_to_le16(EXT3_ERROR_FS); 2485 es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
2496 ext3_commit_super (sb, es, 1); 2486 ext3_commit_super (sb, es, 1);
2497 2487
2498 journal_clear_err(journal); 2488 journal_clear_err(journal);
2499 } 2489 }
2500 } 2490 }
2501 2491
2502 /* 2492 /*
2503 * Force the running and committing transactions to commit, 2493 * Force the running and committing transactions to commit,
2504 * and wait on the commit. 2494 * and wait on the commit.
2505 */ 2495 */
2506 int ext3_force_commit(struct super_block *sb) 2496 int ext3_force_commit(struct super_block *sb)
2507 { 2497 {
2508 journal_t *journal; 2498 journal_t *journal;
2509 int ret; 2499 int ret;
2510 2500
2511 if (sb->s_flags & MS_RDONLY) 2501 if (sb->s_flags & MS_RDONLY)
2512 return 0; 2502 return 0;
2513 2503
2514 journal = EXT3_SB(sb)->s_journal; 2504 journal = EXT3_SB(sb)->s_journal;
2515 ret = ext3_journal_force_commit(journal); 2505 ret = ext3_journal_force_commit(journal);
2516 return ret; 2506 return ret;
2517 } 2507 }
2518 2508
2519 static int ext3_sync_fs(struct super_block *sb, int wait) 2509 static int ext3_sync_fs(struct super_block *sb, int wait)
2520 { 2510 {
2521 tid_t target; 2511 tid_t target;
2522 2512
2523 trace_ext3_sync_fs(sb, wait); 2513 trace_ext3_sync_fs(sb, wait);
2524 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { 2514 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
2525 if (wait) 2515 if (wait)
2526 log_wait_commit(EXT3_SB(sb)->s_journal, target); 2516 log_wait_commit(EXT3_SB(sb)->s_journal, target);
2527 } 2517 }
2528 return 0; 2518 return 0;
2529 } 2519 }
2530 2520
2531 /* 2521 /*
2532 * LVM calls this function before a (read-only) snapshot is created. This 2522 * LVM calls this function before a (read-only) snapshot is created. This
2533 * gives us a chance to flush the journal completely and mark the fs clean. 2523 * gives us a chance to flush the journal completely and mark the fs clean.
2534 */ 2524 */
2535 static int ext3_freeze(struct super_block *sb) 2525 static int ext3_freeze(struct super_block *sb)
2536 { 2526 {
2537 int error = 0; 2527 int error = 0;
2538 journal_t *journal; 2528 journal_t *journal;
2539 2529
2540 if (!(sb->s_flags & MS_RDONLY)) { 2530 if (!(sb->s_flags & MS_RDONLY)) {
2541 journal = EXT3_SB(sb)->s_journal; 2531 journal = EXT3_SB(sb)->s_journal;
2542 2532
2543 /* Now we set up the journal barrier. */ 2533 /* Now we set up the journal barrier. */
2544 journal_lock_updates(journal); 2534 journal_lock_updates(journal);
2545 2535
2546 /* 2536 /*
2547 * We don't want to clear needs_recovery flag when we failed 2537 * We don't want to clear needs_recovery flag when we failed
2548 * to flush the journal. 2538 * to flush the journal.
2549 */ 2539 */
2550 error = journal_flush(journal); 2540 error = journal_flush(journal);
2551 if (error < 0) 2541 if (error < 0)
2552 goto out; 2542 goto out;
2553 2543
2554 /* Journal blocked and flushed, clear needs_recovery flag. */ 2544 /* Journal blocked and flushed, clear needs_recovery flag. */
2555 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2545 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2556 error = ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2546 error = ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2557 if (error) 2547 if (error)
2558 goto out; 2548 goto out;
2559 } 2549 }
2560 return 0; 2550 return 0;
2561 2551
2562 out: 2552 out:
2563 journal_unlock_updates(journal); 2553 journal_unlock_updates(journal);
2564 return error; 2554 return error;
2565 } 2555 }
2566 2556
2567 /* 2557 /*
2568 * Called by LVM after the snapshot is done. We need to reset the RECOVER 2558 * Called by LVM after the snapshot is done. We need to reset the RECOVER
2569 * flag here, even though the filesystem is not technically dirty yet. 2559 * flag here, even though the filesystem is not technically dirty yet.
2570 */ 2560 */
2571 static int ext3_unfreeze(struct super_block *sb) 2561 static int ext3_unfreeze(struct super_block *sb)
2572 { 2562 {
2573 if (!(sb->s_flags & MS_RDONLY)) { 2563 if (!(sb->s_flags & MS_RDONLY)) {
2574 lock_super(sb); 2564 lock_super(sb);
2575 /* Reser the needs_recovery flag before the fs is unlocked. */ 2565 /* Reser the needs_recovery flag before the fs is unlocked. */
2576 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2566 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2577 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2567 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2578 unlock_super(sb); 2568 unlock_super(sb);
2579 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2569 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2580 } 2570 }
2581 return 0; 2571 return 0;
2582 } 2572 }
2583 2573
2584 static int ext3_remount (struct super_block * sb, int * flags, char * data) 2574 static int ext3_remount (struct super_block * sb, int * flags, char * data)
2585 { 2575 {
2586 struct ext3_super_block * es; 2576 struct ext3_super_block * es;
2587 struct ext3_sb_info *sbi = EXT3_SB(sb); 2577 struct ext3_sb_info *sbi = EXT3_SB(sb);
2588 ext3_fsblk_t n_blocks_count = 0; 2578 ext3_fsblk_t n_blocks_count = 0;
2589 unsigned long old_sb_flags; 2579 unsigned long old_sb_flags;
2590 struct ext3_mount_options old_opts; 2580 struct ext3_mount_options old_opts;
2591 int enable_quota = 0; 2581 int enable_quota = 0;
2592 int err; 2582 int err;
2593 #ifdef CONFIG_QUOTA 2583 #ifdef CONFIG_QUOTA
2594 int i; 2584 int i;
2595 #endif 2585 #endif
2596 2586
2597 /* Store the original options */ 2587 /* Store the original options */
2598 lock_super(sb); 2588 lock_super(sb);
2599 old_sb_flags = sb->s_flags; 2589 old_sb_flags = sb->s_flags;
2600 old_opts.s_mount_opt = sbi->s_mount_opt; 2590 old_opts.s_mount_opt = sbi->s_mount_opt;
2601 old_opts.s_resuid = sbi->s_resuid; 2591 old_opts.s_resuid = sbi->s_resuid;
2602 old_opts.s_resgid = sbi->s_resgid; 2592 old_opts.s_resgid = sbi->s_resgid;
2603 old_opts.s_commit_interval = sbi->s_commit_interval; 2593 old_opts.s_commit_interval = sbi->s_commit_interval;
2604 #ifdef CONFIG_QUOTA 2594 #ifdef CONFIG_QUOTA
2605 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 2595 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
2606 for (i = 0; i < MAXQUOTAS; i++) 2596 for (i = 0; i < MAXQUOTAS; i++)
2607 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 2597 old_opts.s_qf_names[i] = sbi->s_qf_names[i];
2608 #endif 2598 #endif
2609 2599
2610 /* 2600 /*
2611 * Allow the "check" option to be passed as a remount option. 2601 * Allow the "check" option to be passed as a remount option.
2612 */ 2602 */
2613 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { 2603 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
2614 err = -EINVAL; 2604 err = -EINVAL;
2615 goto restore_opts; 2605 goto restore_opts;
2616 } 2606 }
2617 2607
2618 if (test_opt(sb, ABORT)) 2608 if (test_opt(sb, ABORT))
2619 ext3_abort(sb, __func__, "Abort forced by user"); 2609 ext3_abort(sb, __func__, "Abort forced by user");
2620 2610
2621 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2611 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2622 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 2612 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
2623 2613
2624 es = sbi->s_es; 2614 es = sbi->s_es;
2625 2615
2626 ext3_init_journal_params(sb, sbi->s_journal); 2616 ext3_init_journal_params(sb, sbi->s_journal);
2627 2617
2628 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 2618 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
2629 n_blocks_count > le32_to_cpu(es->s_blocks_count)) { 2619 n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
2630 if (test_opt(sb, ABORT)) { 2620 if (test_opt(sb, ABORT)) {
2631 err = -EROFS; 2621 err = -EROFS;
2632 goto restore_opts; 2622 goto restore_opts;
2633 } 2623 }
2634 2624
2635 if (*flags & MS_RDONLY) { 2625 if (*flags & MS_RDONLY) {
2636 err = dquot_suspend(sb, -1); 2626 err = dquot_suspend(sb, -1);
2637 if (err < 0) 2627 if (err < 0)
2638 goto restore_opts; 2628 goto restore_opts;
2639 2629
2640 /* 2630 /*
2641 * First of all, the unconditional stuff we have to do 2631 * First of all, the unconditional stuff we have to do
2642 * to disable replay of the journal when we next remount 2632 * to disable replay of the journal when we next remount
2643 */ 2633 */
2644 sb->s_flags |= MS_RDONLY; 2634 sb->s_flags |= MS_RDONLY;
2645 2635
2646 /* 2636 /*
2647 * OK, test if we are remounting a valid rw partition 2637 * OK, test if we are remounting a valid rw partition
2648 * readonly, and if so set the rdonly flag and then 2638 * readonly, and if so set the rdonly flag and then
2649 * mark the partition as valid again. 2639 * mark the partition as valid again.
2650 */ 2640 */
2651 if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) && 2641 if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
2652 (sbi->s_mount_state & EXT3_VALID_FS)) 2642 (sbi->s_mount_state & EXT3_VALID_FS))
2653 es->s_state = cpu_to_le16(sbi->s_mount_state); 2643 es->s_state = cpu_to_le16(sbi->s_mount_state);
2654 2644
2655 ext3_mark_recovery_complete(sb, es); 2645 ext3_mark_recovery_complete(sb, es);
2656 } else { 2646 } else {
2657 __le32 ret; 2647 __le32 ret;
2658 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, 2648 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
2659 ~EXT3_FEATURE_RO_COMPAT_SUPP))) { 2649 ~EXT3_FEATURE_RO_COMPAT_SUPP))) {
2660 ext3_msg(sb, KERN_WARNING, 2650 ext3_msg(sb, KERN_WARNING,
2661 "warning: couldn't remount RDWR " 2651 "warning: couldn't remount RDWR "
2662 "because of unsupported optional " 2652 "because of unsupported optional "
2663 "features (%x)", le32_to_cpu(ret)); 2653 "features (%x)", le32_to_cpu(ret));
2664 err = -EROFS; 2654 err = -EROFS;
2665 goto restore_opts; 2655 goto restore_opts;
2666 } 2656 }
2667 2657
2668 /* 2658 /*
2669 * If we have an unprocessed orphan list hanging 2659 * If we have an unprocessed orphan list hanging
2670 * around from a previously readonly bdev mount, 2660 * around from a previously readonly bdev mount,
2671 * require a full umount & mount for now. 2661 * require a full umount & mount for now.
2672 */ 2662 */
2673 if (es->s_last_orphan) { 2663 if (es->s_last_orphan) {
2674 ext3_msg(sb, KERN_WARNING, "warning: couldn't " 2664 ext3_msg(sb, KERN_WARNING, "warning: couldn't "
2675 "remount RDWR because of unprocessed " 2665 "remount RDWR because of unprocessed "
2676 "orphan inode list. Please " 2666 "orphan inode list. Please "
2677 "umount & mount instead."); 2667 "umount & mount instead.");
2678 err = -EINVAL; 2668 err = -EINVAL;
2679 goto restore_opts; 2669 goto restore_opts;
2680 } 2670 }
2681 2671
2682 /* 2672 /*
2683 * Mounting a RDONLY partition read-write, so reread 2673 * Mounting a RDONLY partition read-write, so reread
2684 * and store the current valid flag. (It may have 2674 * and store the current valid flag. (It may have
2685 * been changed by e2fsck since we originally mounted 2675 * been changed by e2fsck since we originally mounted
2686 * the partition.) 2676 * the partition.)
2687 */ 2677 */
2688 ext3_clear_journal_err(sb, es); 2678 ext3_clear_journal_err(sb, es);
2689 sbi->s_mount_state = le16_to_cpu(es->s_state); 2679 sbi->s_mount_state = le16_to_cpu(es->s_state);
2690 if ((err = ext3_group_extend(sb, es, n_blocks_count))) 2680 if ((err = ext3_group_extend(sb, es, n_blocks_count)))
2691 goto restore_opts; 2681 goto restore_opts;
2692 if (!ext3_setup_super (sb, es, 0)) 2682 if (!ext3_setup_super (sb, es, 0))
2693 sb->s_flags &= ~MS_RDONLY; 2683 sb->s_flags &= ~MS_RDONLY;
2694 enable_quota = 1; 2684 enable_quota = 1;
2695 } 2685 }
2696 } 2686 }
2697 #ifdef CONFIG_QUOTA 2687 #ifdef CONFIG_QUOTA
2698 /* Release old quota file names */ 2688 /* Release old quota file names */
2699 for (i = 0; i < MAXQUOTAS; i++) 2689 for (i = 0; i < MAXQUOTAS; i++)
2700 if (old_opts.s_qf_names[i] && 2690 if (old_opts.s_qf_names[i] &&
2701 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2691 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2702 kfree(old_opts.s_qf_names[i]); 2692 kfree(old_opts.s_qf_names[i]);
2703 #endif 2693 #endif
2704 unlock_super(sb); 2694 unlock_super(sb);
2705 2695
2706 if (enable_quota) 2696 if (enable_quota)
2707 dquot_resume(sb, -1); 2697 dquot_resume(sb, -1);
2708 return 0; 2698 return 0;
2709 restore_opts: 2699 restore_opts:
2710 sb->s_flags = old_sb_flags; 2700 sb->s_flags = old_sb_flags;
2711 sbi->s_mount_opt = old_opts.s_mount_opt; 2701 sbi->s_mount_opt = old_opts.s_mount_opt;
2712 sbi->s_resuid = old_opts.s_resuid; 2702 sbi->s_resuid = old_opts.s_resuid;
2713 sbi->s_resgid = old_opts.s_resgid; 2703 sbi->s_resgid = old_opts.s_resgid;
2714 sbi->s_commit_interval = old_opts.s_commit_interval; 2704 sbi->s_commit_interval = old_opts.s_commit_interval;
2715 #ifdef CONFIG_QUOTA 2705 #ifdef CONFIG_QUOTA
2716 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 2706 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
2717 for (i = 0; i < MAXQUOTAS; i++) { 2707 for (i = 0; i < MAXQUOTAS; i++) {
2718 if (sbi->s_qf_names[i] && 2708 if (sbi->s_qf_names[i] &&
2719 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2709 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2720 kfree(sbi->s_qf_names[i]); 2710 kfree(sbi->s_qf_names[i]);
2721 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 2711 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
2722 } 2712 }
2723 #endif 2713 #endif
2724 unlock_super(sb); 2714 unlock_super(sb);
2725 return err; 2715 return err;
2726 } 2716 }
2727 2717
2728 static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) 2718 static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2729 { 2719 {
2730 struct super_block *sb = dentry->d_sb; 2720 struct super_block *sb = dentry->d_sb;
2731 struct ext3_sb_info *sbi = EXT3_SB(sb); 2721 struct ext3_sb_info *sbi = EXT3_SB(sb);
2732 struct ext3_super_block *es = sbi->s_es; 2722 struct ext3_super_block *es = sbi->s_es;
2733 u64 fsid; 2723 u64 fsid;
2734 2724
2735 if (test_opt(sb, MINIX_DF)) { 2725 if (test_opt(sb, MINIX_DF)) {
2736 sbi->s_overhead_last = 0; 2726 sbi->s_overhead_last = 0;
2737 } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { 2727 } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
2738 unsigned long ngroups = sbi->s_groups_count, i; 2728 unsigned long ngroups = sbi->s_groups_count, i;
2739 ext3_fsblk_t overhead = 0; 2729 ext3_fsblk_t overhead = 0;
2740 smp_rmb(); 2730 smp_rmb();
2741 2731
2742 /* 2732 /*
2743 * Compute the overhead (FS structures). This is constant 2733 * Compute the overhead (FS structures). This is constant
2744 * for a given filesystem unless the number of block groups 2734 * for a given filesystem unless the number of block groups
2745 * changes so we cache the previous value until it does. 2735 * changes so we cache the previous value until it does.
2746 */ 2736 */
2747 2737
2748 /* 2738 /*
2749 * All of the blocks before first_data_block are 2739 * All of the blocks before first_data_block are
2750 * overhead 2740 * overhead
2751 */ 2741 */
2752 overhead = le32_to_cpu(es->s_first_data_block); 2742 overhead = le32_to_cpu(es->s_first_data_block);
2753 2743
2754 /* 2744 /*
2755 * Add the overhead attributed to the superblock and 2745 * Add the overhead attributed to the superblock and
2756 * block group descriptors. If the sparse superblocks 2746 * block group descriptors. If the sparse superblocks
2757 * feature is turned on, then not all groups have this. 2747 * feature is turned on, then not all groups have this.
2758 */ 2748 */
2759 for (i = 0; i < ngroups; i++) { 2749 for (i = 0; i < ngroups; i++) {
2760 overhead += ext3_bg_has_super(sb, i) + 2750 overhead += ext3_bg_has_super(sb, i) +
2761 ext3_bg_num_gdb(sb, i); 2751 ext3_bg_num_gdb(sb, i);
2762 cond_resched(); 2752 cond_resched();
2763 } 2753 }
2764 2754
2765 /* 2755 /*
2766 * Every block group has an inode bitmap, a block 2756 * Every block group has an inode bitmap, a block
2767 * bitmap, and an inode table. 2757 * bitmap, and an inode table.
2768 */ 2758 */
2769 overhead += ngroups * (2 + sbi->s_itb_per_group); 2759 overhead += ngroups * (2 + sbi->s_itb_per_group);
2770 sbi->s_overhead_last = overhead; 2760 sbi->s_overhead_last = overhead;
2771 smp_wmb(); 2761 smp_wmb();
2772 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); 2762 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
2773 } 2763 }
2774 2764
2775 buf->f_type = EXT3_SUPER_MAGIC; 2765 buf->f_type = EXT3_SUPER_MAGIC;
2776 buf->f_bsize = sb->s_blocksize; 2766 buf->f_bsize = sb->s_blocksize;
2777 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last; 2767 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
2778 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); 2768 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
2779 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); 2769 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
2780 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) 2770 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
2781 buf->f_bavail = 0; 2771 buf->f_bavail = 0;
2782 buf->f_files = le32_to_cpu(es->s_inodes_count); 2772 buf->f_files = le32_to_cpu(es->s_inodes_count);
2783 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 2773 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
2784 buf->f_namelen = EXT3_NAME_LEN; 2774 buf->f_namelen = EXT3_NAME_LEN;
2785 fsid = le64_to_cpup((void *)es->s_uuid) ^ 2775 fsid = le64_to_cpup((void *)es->s_uuid) ^
2786 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 2776 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
2787 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 2777 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
2788 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 2778 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
2789 return 0; 2779 return 0;
2790 } 2780 }
2791 2781
2792 /* Helper function for writing quotas on sync - we need to start transaction before quota file 2782 /* Helper function for writing quotas on sync - we need to start transaction before quota file
2793 * is locked for write. Otherwise the are possible deadlocks: 2783 * is locked for write. Otherwise the are possible deadlocks:
2794 * Process 1 Process 2 2784 * Process 1 Process 2
2795 * ext3_create() quota_sync() 2785 * ext3_create() quota_sync()
2796 * journal_start() write_dquot() 2786 * journal_start() write_dquot()
2797 * dquot_initialize() down(dqio_mutex) 2787 * dquot_initialize() down(dqio_mutex)
2798 * down(dqio_mutex) journal_start() 2788 * down(dqio_mutex) journal_start()
2799 * 2789 *
2800 */ 2790 */
2801 2791
2802 #ifdef CONFIG_QUOTA 2792 #ifdef CONFIG_QUOTA
2803 2793
2804 static inline struct inode *dquot_to_inode(struct dquot *dquot) 2794 static inline struct inode *dquot_to_inode(struct dquot *dquot)
2805 { 2795 {
2806 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 2796 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
2807 } 2797 }
2808 2798
2809 static int ext3_write_dquot(struct dquot *dquot) 2799 static int ext3_write_dquot(struct dquot *dquot)
2810 { 2800 {
2811 int ret, err; 2801 int ret, err;
2812 handle_t *handle; 2802 handle_t *handle;
2813 struct inode *inode; 2803 struct inode *inode;
2814 2804
2815 inode = dquot_to_inode(dquot); 2805 inode = dquot_to_inode(dquot);
2816 handle = ext3_journal_start(inode, 2806 handle = ext3_journal_start(inode,
2817 EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2807 EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2818 if (IS_ERR(handle)) 2808 if (IS_ERR(handle))
2819 return PTR_ERR(handle); 2809 return PTR_ERR(handle);
2820 ret = dquot_commit(dquot); 2810 ret = dquot_commit(dquot);
2821 err = ext3_journal_stop(handle); 2811 err = ext3_journal_stop(handle);
2822 if (!ret) 2812 if (!ret)
2823 ret = err; 2813 ret = err;
2824 return ret; 2814 return ret;
2825 } 2815 }
2826 2816
2827 static int ext3_acquire_dquot(struct dquot *dquot) 2817 static int ext3_acquire_dquot(struct dquot *dquot)
2828 { 2818 {
2829 int ret, err; 2819 int ret, err;
2830 handle_t *handle; 2820 handle_t *handle;
2831 2821
2832 handle = ext3_journal_start(dquot_to_inode(dquot), 2822 handle = ext3_journal_start(dquot_to_inode(dquot),
2833 EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2823 EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2834 if (IS_ERR(handle)) 2824 if (IS_ERR(handle))
2835 return PTR_ERR(handle); 2825 return PTR_ERR(handle);
2836 ret = dquot_acquire(dquot); 2826 ret = dquot_acquire(dquot);
2837 err = ext3_journal_stop(handle); 2827 err = ext3_journal_stop(handle);
2838 if (!ret) 2828 if (!ret)
2839 ret = err; 2829 ret = err;
2840 return ret; 2830 return ret;
2841 } 2831 }
2842 2832
2843 static int ext3_release_dquot(struct dquot *dquot) 2833 static int ext3_release_dquot(struct dquot *dquot)
2844 { 2834 {
2845 int ret, err; 2835 int ret, err;
2846 handle_t *handle; 2836 handle_t *handle;
2847 2837
2848 handle = ext3_journal_start(dquot_to_inode(dquot), 2838 handle = ext3_journal_start(dquot_to_inode(dquot),
2849 EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2839 EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2850 if (IS_ERR(handle)) { 2840 if (IS_ERR(handle)) {
2851 /* Release dquot anyway to avoid endless cycle in dqput() */ 2841 /* Release dquot anyway to avoid endless cycle in dqput() */
2852 dquot_release(dquot); 2842 dquot_release(dquot);
2853 return PTR_ERR(handle); 2843 return PTR_ERR(handle);
2854 } 2844 }
2855 ret = dquot_release(dquot); 2845 ret = dquot_release(dquot);
2856 err = ext3_journal_stop(handle); 2846 err = ext3_journal_stop(handle);
2857 if (!ret) 2847 if (!ret)
2858 ret = err; 2848 ret = err;
2859 return ret; 2849 return ret;
2860 } 2850 }
2861 2851
2862 static int ext3_mark_dquot_dirty(struct dquot *dquot) 2852 static int ext3_mark_dquot_dirty(struct dquot *dquot)
2863 { 2853 {
2864 /* Are we journaling quotas? */ 2854 /* Are we journaling quotas? */
2865 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2855 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
2866 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2856 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
2867 dquot_mark_dquot_dirty(dquot); 2857 dquot_mark_dquot_dirty(dquot);
2868 return ext3_write_dquot(dquot); 2858 return ext3_write_dquot(dquot);
2869 } else { 2859 } else {
2870 return dquot_mark_dquot_dirty(dquot); 2860 return dquot_mark_dquot_dirty(dquot);
2871 } 2861 }
2872 } 2862 }
2873 2863
2874 static int ext3_write_info(struct super_block *sb, int type) 2864 static int ext3_write_info(struct super_block *sb, int type)
2875 { 2865 {
2876 int ret, err; 2866 int ret, err;
2877 handle_t *handle; 2867 handle_t *handle;
2878 2868
2879 /* Data block + inode block */ 2869 /* Data block + inode block */
2880 handle = ext3_journal_start(sb->s_root->d_inode, 2); 2870 handle = ext3_journal_start(sb->s_root->d_inode, 2);
2881 if (IS_ERR(handle)) 2871 if (IS_ERR(handle))
2882 return PTR_ERR(handle); 2872 return PTR_ERR(handle);
2883 ret = dquot_commit_info(sb, type); 2873 ret = dquot_commit_info(sb, type);
2884 err = ext3_journal_stop(handle); 2874 err = ext3_journal_stop(handle);
2885 if (!ret) 2875 if (!ret)
2886 ret = err; 2876 ret = err;
2887 return ret; 2877 return ret;
2888 } 2878 }
2889 2879
2890 /* 2880 /*
2891 * Turn on quotas during mount time - we need to find 2881 * Turn on quotas during mount time - we need to find
2892 * the quota file and such... 2882 * the quota file and such...
2893 */ 2883 */
2894 static int ext3_quota_on_mount(struct super_block *sb, int type) 2884 static int ext3_quota_on_mount(struct super_block *sb, int type)
2895 { 2885 {
2896 return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], 2886 return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
2897 EXT3_SB(sb)->s_jquota_fmt, type); 2887 EXT3_SB(sb)->s_jquota_fmt, type);
2898 } 2888 }
2899 2889
2900 /* 2890 /*
2901 * Standard function to be called on quota_on 2891 * Standard function to be called on quota_on
2902 */ 2892 */
2903 static int ext3_quota_on(struct super_block *sb, int type, int format_id, 2893 static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2904 struct path *path) 2894 struct path *path)
2905 { 2895 {
2906 int err; 2896 int err;
2907 2897
2908 if (!test_opt(sb, QUOTA)) 2898 if (!test_opt(sb, QUOTA))
2909 return -EINVAL; 2899 return -EINVAL;
2910 2900
2911 /* Quotafile not on the same filesystem? */ 2901 /* Quotafile not on the same filesystem? */
2912 if (path->dentry->d_sb != sb) 2902 if (path->dentry->d_sb != sb)
2913 return -EXDEV; 2903 return -EXDEV;
2914 /* Journaling quota? */ 2904 /* Journaling quota? */
2915 if (EXT3_SB(sb)->s_qf_names[type]) { 2905 if (EXT3_SB(sb)->s_qf_names[type]) {
2916 /* Quotafile not of fs root? */ 2906 /* Quotafile not of fs root? */
2917 if (path->dentry->d_parent != sb->s_root) 2907 if (path->dentry->d_parent != sb->s_root)
2918 ext3_msg(sb, KERN_WARNING, 2908 ext3_msg(sb, KERN_WARNING,
2919 "warning: Quota file not on filesystem root. " 2909 "warning: Quota file not on filesystem root. "
2920 "Journaled quota will not work."); 2910 "Journaled quota will not work.");
2921 } 2911 }
2922 2912
2923 /* 2913 /*
2924 * When we journal data on quota file, we have to flush journal to see 2914 * When we journal data on quota file, we have to flush journal to see
2925 * all updates to the file when we bypass pagecache... 2915 * all updates to the file when we bypass pagecache...
2926 */ 2916 */
2927 if (ext3_should_journal_data(path->dentry->d_inode)) { 2917 if (ext3_should_journal_data(path->dentry->d_inode)) {
2928 /* 2918 /*
2929 * We don't need to lock updates but journal_flush() could 2919 * We don't need to lock updates but journal_flush() could
2930 * otherwise be livelocked... 2920 * otherwise be livelocked...
2931 */ 2921 */
2932 journal_lock_updates(EXT3_SB(sb)->s_journal); 2922 journal_lock_updates(EXT3_SB(sb)->s_journal);
2933 err = journal_flush(EXT3_SB(sb)->s_journal); 2923 err = journal_flush(EXT3_SB(sb)->s_journal);
2934 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2924 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2935 if (err) 2925 if (err)
2936 return err; 2926 return err;
2937 } 2927 }
2938 2928
2939 return dquot_quota_on(sb, type, format_id, path); 2929 return dquot_quota_on(sb, type, format_id, path);
2940 } 2930 }
2941 2931
2942 /* Read data from quotafile - avoid pagecache and such because we cannot afford 2932 /* Read data from quotafile - avoid pagecache and such because we cannot afford
2943 * acquiring the locks... As quota files are never truncated and quota code 2933 * acquiring the locks... As quota files are never truncated and quota code
2944 * itself serializes the operations (and no one else should touch the files) 2934 * itself serializes the operations (and no one else should touch the files)
2945 * we don't have to be afraid of races */ 2935 * we don't have to be afraid of races */
2946 static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 2936 static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
2947 size_t len, loff_t off) 2937 size_t len, loff_t off)
2948 { 2938 {
2949 struct inode *inode = sb_dqopt(sb)->files[type]; 2939 struct inode *inode = sb_dqopt(sb)->files[type];
2950 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); 2940 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
2951 int err = 0; 2941 int err = 0;
2952 int offset = off & (sb->s_blocksize - 1); 2942 int offset = off & (sb->s_blocksize - 1);
2953 int tocopy; 2943 int tocopy;
2954 size_t toread; 2944 size_t toread;
2955 struct buffer_head *bh; 2945 struct buffer_head *bh;
2956 loff_t i_size = i_size_read(inode); 2946 loff_t i_size = i_size_read(inode);
2957 2947
2958 if (off > i_size) 2948 if (off > i_size)
2959 return 0; 2949 return 0;
2960 if (off+len > i_size) 2950 if (off+len > i_size)
2961 len = i_size-off; 2951 len = i_size-off;
2962 toread = len; 2952 toread = len;
2963 while (toread > 0) { 2953 while (toread > 0) {
2964 tocopy = sb->s_blocksize - offset < toread ? 2954 tocopy = sb->s_blocksize - offset < toread ?
2965 sb->s_blocksize - offset : toread; 2955 sb->s_blocksize - offset : toread;
2966 bh = ext3_bread(NULL, inode, blk, 0, &err); 2956 bh = ext3_bread(NULL, inode, blk, 0, &err);
2967 if (err) 2957 if (err)
2968 return err; 2958 return err;
2969 if (!bh) /* A hole? */ 2959 if (!bh) /* A hole? */
2970 memset(data, 0, tocopy); 2960 memset(data, 0, tocopy);
2971 else 2961 else
2972 memcpy(data, bh->b_data+offset, tocopy); 2962 memcpy(data, bh->b_data+offset, tocopy);
2973 brelse(bh); 2963 brelse(bh);
2974 offset = 0; 2964 offset = 0;
2975 toread -= tocopy; 2965 toread -= tocopy;
2976 data += tocopy; 2966 data += tocopy;
2977 blk++; 2967 blk++;
2978 } 2968 }
2979 return len; 2969 return len;
2980 } 2970 }
2981 2971
2982 /* Write to quotafile (we know the transaction is already started and has 2972 /* Write to quotafile (we know the transaction is already started and has
2983 * enough credits) */ 2973 * enough credits) */
2984 static ssize_t ext3_quota_write(struct super_block *sb, int type, 2974 static ssize_t ext3_quota_write(struct super_block *sb, int type,
2985 const char *data, size_t len, loff_t off) 2975 const char *data, size_t len, loff_t off)
2986 { 2976 {
2987 struct inode *inode = sb_dqopt(sb)->files[type]; 2977 struct inode *inode = sb_dqopt(sb)->files[type];
2988 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); 2978 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
2989 int err = 0; 2979 int err = 0;
2990 int offset = off & (sb->s_blocksize - 1); 2980 int offset = off & (sb->s_blocksize - 1);
2991 int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL; 2981 int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
2992 struct buffer_head *bh; 2982 struct buffer_head *bh;
2993 handle_t *handle = journal_current_handle(); 2983 handle_t *handle = journal_current_handle();
2994 2984
2995 if (!handle) { 2985 if (!handle) {
2996 ext3_msg(sb, KERN_WARNING, 2986 ext3_msg(sb, KERN_WARNING,
2997 "warning: quota write (off=%llu, len=%llu)" 2987 "warning: quota write (off=%llu, len=%llu)"
2998 " cancelled because transaction is not started.", 2988 " cancelled because transaction is not started.",
2999 (unsigned long long)off, (unsigned long long)len); 2989 (unsigned long long)off, (unsigned long long)len);
3000 return -EIO; 2990 return -EIO;
3001 } 2991 }
3002 2992
3003 /* 2993 /*
3004 * Since we account only one data block in transaction credits, 2994 * Since we account only one data block in transaction credits,
3005 * then it is impossible to cross a block boundary. 2995 * then it is impossible to cross a block boundary.
3006 */ 2996 */
3007 if (sb->s_blocksize - offset < len) { 2997 if (sb->s_blocksize - offset < len) {
3008 ext3_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 2998 ext3_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
3009 " cancelled because not block aligned", 2999 " cancelled because not block aligned",
3010 (unsigned long long)off, (unsigned long long)len); 3000 (unsigned long long)off, (unsigned long long)len);
3011 return -EIO; 3001 return -EIO;
3012 } 3002 }
3013 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 3003 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3014 bh = ext3_bread(handle, inode, blk, 1, &err); 3004 bh = ext3_bread(handle, inode, blk, 1, &err);
3015 if (!bh) 3005 if (!bh)
3016 goto out; 3006 goto out;
3017 if (journal_quota) { 3007 if (journal_quota) {
3018 err = ext3_journal_get_write_access(handle, bh); 3008 err = ext3_journal_get_write_access(handle, bh);
3019 if (err) { 3009 if (err) {
3020 brelse(bh); 3010 brelse(bh);
3021 goto out; 3011 goto out;
3022 } 3012 }
3023 } 3013 }
3024 lock_buffer(bh); 3014 lock_buffer(bh);
3025 memcpy(bh->b_data+offset, data, len); 3015 memcpy(bh->b_data+offset, data, len);
3026 flush_dcache_page(bh->b_page); 3016 flush_dcache_page(bh->b_page);
3027 unlock_buffer(bh); 3017 unlock_buffer(bh);
3028 if (journal_quota) 3018 if (journal_quota)
3029 err = ext3_journal_dirty_metadata(handle, bh); 3019 err = ext3_journal_dirty_metadata(handle, bh);
3030 else { 3020 else {
3031 /* Always do at least ordered writes for quotas */ 3021 /* Always do at least ordered writes for quotas */
3032 err = ext3_journal_dirty_data(handle, bh); 3022 err = ext3_journal_dirty_data(handle, bh);
3033 mark_buffer_dirty(bh); 3023 mark_buffer_dirty(bh);
3034 } 3024 }
3035 brelse(bh); 3025 brelse(bh);
3036 out: 3026 out:
3037 if (err) { 3027 if (err) {
3038 mutex_unlock(&inode->i_mutex); 3028 mutex_unlock(&inode->i_mutex);
3039 return err; 3029 return err;
3040 } 3030 }
3041 if (inode->i_size < off + len) { 3031 if (inode->i_size < off + len) {
3042 i_size_write(inode, off + len); 3032 i_size_write(inode, off + len);
3043 EXT3_I(inode)->i_disksize = inode->i_size; 3033 EXT3_I(inode)->i_disksize = inode->i_size;
3044 } 3034 }
3045 inode->i_version++; 3035 inode->i_version++;
3046 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3036 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3047 ext3_mark_inode_dirty(handle, inode); 3037 ext3_mark_inode_dirty(handle, inode);
3048 mutex_unlock(&inode->i_mutex); 3038 mutex_unlock(&inode->i_mutex);
3049 return len; 3039 return len;
3050 } 3040 }
3051 3041
3052 #endif 3042 #endif
3053 3043
3054 static struct dentry *ext3_mount(struct file_system_type *fs_type, 3044 static struct dentry *ext3_mount(struct file_system_type *fs_type,
3055 int flags, const char *dev_name, void *data) 3045 int flags, const char *dev_name, void *data)
3056 { 3046 {
3057 return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super); 3047 return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
3058 } 3048 }
3059 3049
3060 static struct file_system_type ext3_fs_type = { 3050 static struct file_system_type ext3_fs_type = {
3061 .owner = THIS_MODULE, 3051 .owner = THIS_MODULE,
3062 .name = "ext3", 3052 .name = "ext3",
3063 .mount = ext3_mount, 3053 .mount = ext3_mount,
3064 .kill_sb = kill_block_super, 3054 .kill_sb = kill_block_super,
3065 .fs_flags = FS_REQUIRES_DEV, 3055 .fs_flags = FS_REQUIRES_DEV,
3066 }; 3056 };
3067 3057
3068 static int __init init_ext3_fs(void) 3058 static int __init init_ext3_fs(void)
3069 { 3059 {
3070 int err = init_ext3_xattr(); 3060 int err = init_ext3_xattr();
3071 if (err) 3061 if (err)
3072 return err; 3062 return err;
3073 err = init_inodecache(); 3063 err = init_inodecache();
3074 if (err) 3064 if (err)
3075 goto out1; 3065 goto out1;
3076 err = register_filesystem(&ext3_fs_type); 3066 err = register_filesystem(&ext3_fs_type);
3077 if (err) 3067 if (err)
3078 goto out; 3068 goto out;
3079 return 0; 3069 return 0;
3080 out: 3070 out:
3081 destroy_inodecache(); 3071 destroy_inodecache();
3082 out1: 3072 out1:
3083 exit_ext3_xattr(); 3073 exit_ext3_xattr();
3084 return err; 3074 return err;
3085 } 3075 }
3086 3076
3087 static void __exit exit_ext3_fs(void) 3077 static void __exit exit_ext3_fs(void)
3088 { 3078 {
3089 unregister_filesystem(&ext3_fs_type); 3079 unregister_filesystem(&ext3_fs_type);
3090 destroy_inodecache(); 3080 destroy_inodecache();
3091 exit_ext3_xattr(); 3081 exit_ext3_xattr();
3092 } 3082 }
3093 3083
3094 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 3084 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
3095 MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); 3085 MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
3096 MODULE_LICENSE("GPL"); 3086 MODULE_LICENSE("GPL");
1 /* 1 /*
2 * linux/fs/ext3/symlink.c 2 * linux/fs/ext3/symlink.c
3 * 3 *
4 * Only fast symlinks left here - the rest is done by generic code. AV, 1999 4 * Only fast symlinks left here - the rest is done by generic code. AV, 1999
5 * 5 *
6 * Copyright (C) 1992, 1993, 1994, 1995 6 * Copyright (C) 1992, 1993, 1994, 1995
7 * Remy Card (card@masi.ibp.fr) 7 * Remy Card (card@masi.ibp.fr)
8 * Laboratoire MASI - Institut Blaise Pascal 8 * Laboratoire MASI - Institut Blaise Pascal
9 * Universite Pierre et Marie Curie (Paris VI) 9 * Universite Pierre et Marie Curie (Paris VI)
10 * 10 *
11 * from 11 * from
12 * 12 *
13 * linux/fs/minix/symlink.c 13 * linux/fs/minix/symlink.c
14 * 14 *
15 * Copyright (C) 1991, 1992 Linus Torvalds 15 * Copyright (C) 1991, 1992 Linus Torvalds
16 * 16 *
17 * ext3 symlink handling code 17 * ext3 symlink handling code
18 */ 18 */
19 19
20 #include <linux/fs.h>
21 #include <linux/jbd.h>
22 #include <linux/ext3_fs.h>
23 #include <linux/namei.h> 20 #include <linux/namei.h>
21 #include "ext3.h"
24 #include "xattr.h" 22 #include "xattr.h"
25 23
26 static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd) 24 static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
27 { 25 {
28 struct ext3_inode_info *ei = EXT3_I(dentry->d_inode); 26 struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
29 nd_set_link(nd, (char*)ei->i_data); 27 nd_set_link(nd, (char*)ei->i_data);
30 return NULL; 28 return NULL;
31 } 29 }
32 30
33 const struct inode_operations ext3_symlink_inode_operations = { 31 const struct inode_operations ext3_symlink_inode_operations = {
34 .readlink = generic_readlink, 32 .readlink = generic_readlink,
35 .follow_link = page_follow_link_light, 33 .follow_link = page_follow_link_light,
36 .put_link = page_put_link, 34 .put_link = page_put_link,
37 .setattr = ext3_setattr, 35 .setattr = ext3_setattr,
38 #ifdef CONFIG_EXT3_FS_XATTR 36 #ifdef CONFIG_EXT3_FS_XATTR
39 .setxattr = generic_setxattr, 37 .setxattr = generic_setxattr,
40 .getxattr = generic_getxattr, 38 .getxattr = generic_getxattr,
41 .listxattr = ext3_listxattr, 39 .listxattr = ext3_listxattr,
42 .removexattr = generic_removexattr, 40 .removexattr = generic_removexattr,
43 #endif 41 #endif
44 }; 42 };
45 43
46 const struct inode_operations ext3_fast_symlink_inode_operations = { 44 const struct inode_operations ext3_fast_symlink_inode_operations = {
47 .readlink = generic_readlink, 45 .readlink = generic_readlink,
48 .follow_link = ext3_follow_link, 46 .follow_link = ext3_follow_link,
49 .setattr = ext3_setattr, 47 .setattr = ext3_setattr,
50 #ifdef CONFIG_EXT3_FS_XATTR 48 #ifdef CONFIG_EXT3_FS_XATTR
51 .setxattr = generic_setxattr, 49 .setxattr = generic_setxattr,
52 .getxattr = generic_getxattr, 50 .getxattr = generic_getxattr,
53 .listxattr = ext3_listxattr, 51 .listxattr = ext3_listxattr,
54 .removexattr = generic_removexattr, 52 .removexattr = generic_removexattr,
55 #endif 53 #endif
56 }; 54 };
1 /* 1 /*
2 * linux/fs/ext3/xattr.c 2 * linux/fs/ext3/xattr.c
3 * 3 *
4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5 * 5 *
6 * Fix by Harrison Xing <harrison@mountainviewdata.com>. 6 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7 * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>. 7 * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
8 * Extended attributes for symlinks and special files added per 8 * Extended attributes for symlinks and special files added per
9 * suggestion of Luka Renko <luka.renko@hermes.si>. 9 * suggestion of Luka Renko <luka.renko@hermes.si>.
10 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>, 10 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
11 * Red Hat Inc. 11 * Red Hat Inc.
12 * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz 12 * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
13 * and Andreas Gruenbacher <agruen@suse.de>. 13 * and Andreas Gruenbacher <agruen@suse.de>.
14 */ 14 */
15 15
16 /* 16 /*
17 * Extended attributes are stored directly in inodes (on file systems with 17 * Extended attributes are stored directly in inodes (on file systems with
18 * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl 18 * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
19 * field contains the block number if an inode uses an additional block. All 19 * field contains the block number if an inode uses an additional block. All
20 * attributes must fit in the inode and one additional block. Blocks that 20 * attributes must fit in the inode and one additional block. Blocks that
21 * contain the identical set of attributes may be shared among several inodes. 21 * contain the identical set of attributes may be shared among several inodes.
22 * Identical blocks are detected by keeping a cache of blocks that have 22 * Identical blocks are detected by keeping a cache of blocks that have
23 * recently been accessed. 23 * recently been accessed.
24 * 24 *
25 * The attributes in inodes and on blocks have a different header; the entries 25 * The attributes in inodes and on blocks have a different header; the entries
26 * are stored in the same format: 26 * are stored in the same format:
27 * 27 *
28 * +------------------+ 28 * +------------------+
29 * | header | 29 * | header |
30 * | entry 1 | | 30 * | entry 1 | |
31 * | entry 2 | | growing downwards 31 * | entry 2 | | growing downwards
32 * | entry 3 | v 32 * | entry 3 | v
33 * | four null bytes | 33 * | four null bytes |
34 * | . . . | 34 * | . . . |
35 * | value 1 | ^ 35 * | value 1 | ^
36 * | value 3 | | growing upwards 36 * | value 3 | | growing upwards
37 * | value 2 | | 37 * | value 2 | |
38 * +------------------+ 38 * +------------------+
39 * 39 *
40 * The header is followed by multiple entry descriptors. In disk blocks, the 40 * The header is followed by multiple entry descriptors. In disk blocks, the
41 * entry descriptors are kept sorted. In inodes, they are unsorted. The 41 * entry descriptors are kept sorted. In inodes, they are unsorted. The
42 * attribute values are aligned to the end of the block in no specific order. 42 * attribute values are aligned to the end of the block in no specific order.
43 * 43 *
44 * Locking strategy 44 * Locking strategy
45 * ---------------- 45 * ----------------
46 * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem. 46 * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
47 * EA blocks are only changed if they are exclusive to an inode, so 47 * EA blocks are only changed if they are exclusive to an inode, so
48 * holding xattr_sem also means that nothing but the EA block's reference 48 * holding xattr_sem also means that nothing but the EA block's reference
49 * count can change. Multiple writers to the same block are synchronized 49 * count can change. Multiple writers to the same block are synchronized
50 * by the buffer lock. 50 * by the buffer lock.
51 */ 51 */
52 52
53 #include <linux/init.h> 53 #include "ext3.h"
54 #include <linux/fs.h>
55 #include <linux/slab.h>
56 #include <linux/ext3_jbd.h>
57 #include <linux/ext3_fs.h>
58 #include <linux/mbcache.h> 54 #include <linux/mbcache.h>
59 #include <linux/quotaops.h> 55 #include <linux/quotaops.h>
60 #include <linux/rwsem.h>
61 #include "xattr.h" 56 #include "xattr.h"
62 #include "acl.h" 57 #include "acl.h"
63 58
64 #define BHDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) 59 #define BHDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
65 #define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) 60 #define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
66 #define BFIRST(bh) ENTRY(BHDR(bh)+1) 61 #define BFIRST(bh) ENTRY(BHDR(bh)+1)
67 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) 62 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
68 63
69 #define IHDR(inode, raw_inode) \ 64 #define IHDR(inode, raw_inode) \
70 ((struct ext3_xattr_ibody_header *) \ 65 ((struct ext3_xattr_ibody_header *) \
71 ((void *)raw_inode + \ 66 ((void *)raw_inode + \
72 EXT3_GOOD_OLD_INODE_SIZE + \ 67 EXT3_GOOD_OLD_INODE_SIZE + \
73 EXT3_I(inode)->i_extra_isize)) 68 EXT3_I(inode)->i_extra_isize))
74 #define IFIRST(hdr) ((struct ext3_xattr_entry *)((hdr)+1)) 69 #define IFIRST(hdr) ((struct ext3_xattr_entry *)((hdr)+1))
75 70
76 #ifdef EXT3_XATTR_DEBUG 71 #ifdef EXT3_XATTR_DEBUG
77 # define ea_idebug(inode, f...) do { \ 72 # define ea_idebug(inode, f...) do { \
78 printk(KERN_DEBUG "inode %s:%lu: ", \ 73 printk(KERN_DEBUG "inode %s:%lu: ", \
79 inode->i_sb->s_id, inode->i_ino); \ 74 inode->i_sb->s_id, inode->i_ino); \
80 printk(f); \ 75 printk(f); \
81 printk("\n"); \ 76 printk("\n"); \
82 } while (0) 77 } while (0)
83 # define ea_bdebug(bh, f...) do { \ 78 # define ea_bdebug(bh, f...) do { \
84 char b[BDEVNAME_SIZE]; \ 79 char b[BDEVNAME_SIZE]; \
85 printk(KERN_DEBUG "block %s:%lu: ", \ 80 printk(KERN_DEBUG "block %s:%lu: ", \
86 bdevname(bh->b_bdev, b), \ 81 bdevname(bh->b_bdev, b), \
87 (unsigned long) bh->b_blocknr); \ 82 (unsigned long) bh->b_blocknr); \
88 printk(f); \ 83 printk(f); \
89 printk("\n"); \ 84 printk("\n"); \
90 } while (0) 85 } while (0)
91 #else 86 #else
92 # define ea_idebug(f...) 87 # define ea_idebug(f...)
93 # define ea_bdebug(f...) 88 # define ea_bdebug(f...)
94 #endif 89 #endif
95 90
96 static void ext3_xattr_cache_insert(struct buffer_head *); 91 static void ext3_xattr_cache_insert(struct buffer_head *);
97 static struct buffer_head *ext3_xattr_cache_find(struct inode *, 92 static struct buffer_head *ext3_xattr_cache_find(struct inode *,
98 struct ext3_xattr_header *, 93 struct ext3_xattr_header *,
99 struct mb_cache_entry **); 94 struct mb_cache_entry **);
100 static void ext3_xattr_rehash(struct ext3_xattr_header *, 95 static void ext3_xattr_rehash(struct ext3_xattr_header *,
101 struct ext3_xattr_entry *); 96 struct ext3_xattr_entry *);
102 static int ext3_xattr_list(struct dentry *dentry, char *buffer, 97 static int ext3_xattr_list(struct dentry *dentry, char *buffer,
103 size_t buffer_size); 98 size_t buffer_size);
104 99
105 static struct mb_cache *ext3_xattr_cache; 100 static struct mb_cache *ext3_xattr_cache;
106 101
107 static const struct xattr_handler *ext3_xattr_handler_map[] = { 102 static const struct xattr_handler *ext3_xattr_handler_map[] = {
108 [EXT3_XATTR_INDEX_USER] = &ext3_xattr_user_handler, 103 [EXT3_XATTR_INDEX_USER] = &ext3_xattr_user_handler,
109 #ifdef CONFIG_EXT3_FS_POSIX_ACL 104 #ifdef CONFIG_EXT3_FS_POSIX_ACL
110 [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext3_xattr_acl_access_handler, 105 [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext3_xattr_acl_access_handler,
111 [EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext3_xattr_acl_default_handler, 106 [EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext3_xattr_acl_default_handler,
112 #endif 107 #endif
113 [EXT3_XATTR_INDEX_TRUSTED] = &ext3_xattr_trusted_handler, 108 [EXT3_XATTR_INDEX_TRUSTED] = &ext3_xattr_trusted_handler,
114 #ifdef CONFIG_EXT3_FS_SECURITY 109 #ifdef CONFIG_EXT3_FS_SECURITY
115 [EXT3_XATTR_INDEX_SECURITY] = &ext3_xattr_security_handler, 110 [EXT3_XATTR_INDEX_SECURITY] = &ext3_xattr_security_handler,
116 #endif 111 #endif
117 }; 112 };
118 113
119 const struct xattr_handler *ext3_xattr_handlers[] = { 114 const struct xattr_handler *ext3_xattr_handlers[] = {
120 &ext3_xattr_user_handler, 115 &ext3_xattr_user_handler,
121 &ext3_xattr_trusted_handler, 116 &ext3_xattr_trusted_handler,
122 #ifdef CONFIG_EXT3_FS_POSIX_ACL 117 #ifdef CONFIG_EXT3_FS_POSIX_ACL
123 &ext3_xattr_acl_access_handler, 118 &ext3_xattr_acl_access_handler,
124 &ext3_xattr_acl_default_handler, 119 &ext3_xattr_acl_default_handler,
125 #endif 120 #endif
126 #ifdef CONFIG_EXT3_FS_SECURITY 121 #ifdef CONFIG_EXT3_FS_SECURITY
127 &ext3_xattr_security_handler, 122 &ext3_xattr_security_handler,
128 #endif 123 #endif
129 NULL 124 NULL
130 }; 125 };
131 126
132 static inline const struct xattr_handler * 127 static inline const struct xattr_handler *
133 ext3_xattr_handler(int name_index) 128 ext3_xattr_handler(int name_index)
134 { 129 {
135 const struct xattr_handler *handler = NULL; 130 const struct xattr_handler *handler = NULL;
136 131
137 if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map)) 132 if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map))
138 handler = ext3_xattr_handler_map[name_index]; 133 handler = ext3_xattr_handler_map[name_index];
139 return handler; 134 return handler;
140 } 135 }
141 136
142 /* 137 /*
143 * Inode operation listxattr() 138 * Inode operation listxattr()
144 * 139 *
145 * dentry->d_inode->i_mutex: don't care 140 * dentry->d_inode->i_mutex: don't care
146 */ 141 */
147 ssize_t 142 ssize_t
148 ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) 143 ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
149 { 144 {
150 return ext3_xattr_list(dentry, buffer, size); 145 return ext3_xattr_list(dentry, buffer, size);
151 } 146 }
152 147
153 static int 148 static int
154 ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end) 149 ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end)
155 { 150 {
156 while (!IS_LAST_ENTRY(entry)) { 151 while (!IS_LAST_ENTRY(entry)) {
157 struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry); 152 struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry);
158 if ((void *)next >= end) 153 if ((void *)next >= end)
159 return -EIO; 154 return -EIO;
160 entry = next; 155 entry = next;
161 } 156 }
162 return 0; 157 return 0;
163 } 158 }
164 159
165 static inline int 160 static inline int
166 ext3_xattr_check_block(struct buffer_head *bh) 161 ext3_xattr_check_block(struct buffer_head *bh)
167 { 162 {
168 int error; 163 int error;
169 164
170 if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || 165 if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
171 BHDR(bh)->h_blocks != cpu_to_le32(1)) 166 BHDR(bh)->h_blocks != cpu_to_le32(1))
172 return -EIO; 167 return -EIO;
173 error = ext3_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); 168 error = ext3_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
174 return error; 169 return error;
175 } 170 }
176 171
177 static inline int 172 static inline int
178 ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size) 173 ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size)
179 { 174 {
180 size_t value_size = le32_to_cpu(entry->e_value_size); 175 size_t value_size = le32_to_cpu(entry->e_value_size);
181 176
182 if (entry->e_value_block != 0 || value_size > size || 177 if (entry->e_value_block != 0 || value_size > size ||
183 le16_to_cpu(entry->e_value_offs) + value_size > size) 178 le16_to_cpu(entry->e_value_offs) + value_size > size)
184 return -EIO; 179 return -EIO;
185 return 0; 180 return 0;
186 } 181 }
187 182
188 static int 183 static int
189 ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index, 184 ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
190 const char *name, size_t size, int sorted) 185 const char *name, size_t size, int sorted)
191 { 186 {
192 struct ext3_xattr_entry *entry; 187 struct ext3_xattr_entry *entry;
193 size_t name_len; 188 size_t name_len;
194 int cmp = 1; 189 int cmp = 1;
195 190
196 if (name == NULL) 191 if (name == NULL)
197 return -EINVAL; 192 return -EINVAL;
198 name_len = strlen(name); 193 name_len = strlen(name);
199 entry = *pentry; 194 entry = *pentry;
200 for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) { 195 for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
201 cmp = name_index - entry->e_name_index; 196 cmp = name_index - entry->e_name_index;
202 if (!cmp) 197 if (!cmp)
203 cmp = name_len - entry->e_name_len; 198 cmp = name_len - entry->e_name_len;
204 if (!cmp) 199 if (!cmp)
205 cmp = memcmp(name, entry->e_name, name_len); 200 cmp = memcmp(name, entry->e_name, name_len);
206 if (cmp <= 0 && (sorted || cmp == 0)) 201 if (cmp <= 0 && (sorted || cmp == 0))
207 break; 202 break;
208 } 203 }
209 *pentry = entry; 204 *pentry = entry;
210 if (!cmp && ext3_xattr_check_entry(entry, size)) 205 if (!cmp && ext3_xattr_check_entry(entry, size))
211 return -EIO; 206 return -EIO;
212 return cmp ? -ENODATA : 0; 207 return cmp ? -ENODATA : 0;
213 } 208 }
214 209
215 static int 210 static int
216 ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, 211 ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
217 void *buffer, size_t buffer_size) 212 void *buffer, size_t buffer_size)
218 { 213 {
219 struct buffer_head *bh = NULL; 214 struct buffer_head *bh = NULL;
220 struct ext3_xattr_entry *entry; 215 struct ext3_xattr_entry *entry;
221 size_t size; 216 size_t size;
222 int error; 217 int error;
223 218
224 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", 219 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
225 name_index, name, buffer, (long)buffer_size); 220 name_index, name, buffer, (long)buffer_size);
226 221
227 error = -ENODATA; 222 error = -ENODATA;
228 if (!EXT3_I(inode)->i_file_acl) 223 if (!EXT3_I(inode)->i_file_acl)
229 goto cleanup; 224 goto cleanup;
230 ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl); 225 ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
231 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); 226 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
232 if (!bh) 227 if (!bh)
233 goto cleanup; 228 goto cleanup;
234 ea_bdebug(bh, "b_count=%d, refcount=%d", 229 ea_bdebug(bh, "b_count=%d, refcount=%d",
235 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 230 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
236 if (ext3_xattr_check_block(bh)) { 231 if (ext3_xattr_check_block(bh)) {
237 bad_block: ext3_error(inode->i_sb, __func__, 232 bad_block: ext3_error(inode->i_sb, __func__,
238 "inode %lu: bad block "E3FSBLK, inode->i_ino, 233 "inode %lu: bad block "E3FSBLK, inode->i_ino,
239 EXT3_I(inode)->i_file_acl); 234 EXT3_I(inode)->i_file_acl);
240 error = -EIO; 235 error = -EIO;
241 goto cleanup; 236 goto cleanup;
242 } 237 }
243 ext3_xattr_cache_insert(bh); 238 ext3_xattr_cache_insert(bh);
244 entry = BFIRST(bh); 239 entry = BFIRST(bh);
245 error = ext3_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); 240 error = ext3_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
246 if (error == -EIO) 241 if (error == -EIO)
247 goto bad_block; 242 goto bad_block;
248 if (error) 243 if (error)
249 goto cleanup; 244 goto cleanup;
250 size = le32_to_cpu(entry->e_value_size); 245 size = le32_to_cpu(entry->e_value_size);
251 if (buffer) { 246 if (buffer) {
252 error = -ERANGE; 247 error = -ERANGE;
253 if (size > buffer_size) 248 if (size > buffer_size)
254 goto cleanup; 249 goto cleanup;
255 memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), 250 memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
256 size); 251 size);
257 } 252 }
258 error = size; 253 error = size;
259 254
260 cleanup: 255 cleanup:
261 brelse(bh); 256 brelse(bh);
262 return error; 257 return error;
263 } 258 }
264 259
265 static int 260 static int
266 ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, 261 ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
267 void *buffer, size_t buffer_size) 262 void *buffer, size_t buffer_size)
268 { 263 {
269 struct ext3_xattr_ibody_header *header; 264 struct ext3_xattr_ibody_header *header;
270 struct ext3_xattr_entry *entry; 265 struct ext3_xattr_entry *entry;
271 struct ext3_inode *raw_inode; 266 struct ext3_inode *raw_inode;
272 struct ext3_iloc iloc; 267 struct ext3_iloc iloc;
273 size_t size; 268 size_t size;
274 void *end; 269 void *end;
275 int error; 270 int error;
276 271
277 if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR)) 272 if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR))
278 return -ENODATA; 273 return -ENODATA;
279 error = ext3_get_inode_loc(inode, &iloc); 274 error = ext3_get_inode_loc(inode, &iloc);
280 if (error) 275 if (error)
281 return error; 276 return error;
282 raw_inode = ext3_raw_inode(&iloc); 277 raw_inode = ext3_raw_inode(&iloc);
283 header = IHDR(inode, raw_inode); 278 header = IHDR(inode, raw_inode);
284 entry = IFIRST(header); 279 entry = IFIRST(header);
285 end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; 280 end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
286 error = ext3_xattr_check_names(entry, end); 281 error = ext3_xattr_check_names(entry, end);
287 if (error) 282 if (error)
288 goto cleanup; 283 goto cleanup;
289 error = ext3_xattr_find_entry(&entry, name_index, name, 284 error = ext3_xattr_find_entry(&entry, name_index, name,
290 end - (void *)entry, 0); 285 end - (void *)entry, 0);
291 if (error) 286 if (error)
292 goto cleanup; 287 goto cleanup;
293 size = le32_to_cpu(entry->e_value_size); 288 size = le32_to_cpu(entry->e_value_size);
294 if (buffer) { 289 if (buffer) {
295 error = -ERANGE; 290 error = -ERANGE;
296 if (size > buffer_size) 291 if (size > buffer_size)
297 goto cleanup; 292 goto cleanup;
298 memcpy(buffer, (void *)IFIRST(header) + 293 memcpy(buffer, (void *)IFIRST(header) +
299 le16_to_cpu(entry->e_value_offs), size); 294 le16_to_cpu(entry->e_value_offs), size);
300 } 295 }
301 error = size; 296 error = size;
302 297
303 cleanup: 298 cleanup:
304 brelse(iloc.bh); 299 brelse(iloc.bh);
305 return error; 300 return error;
306 } 301 }
307 302
308 /* 303 /*
309 * ext3_xattr_get() 304 * ext3_xattr_get()
310 * 305 *
311 * Copy an extended attribute into the buffer 306 * Copy an extended attribute into the buffer
312 * provided, or compute the buffer size required. 307 * provided, or compute the buffer size required.
313 * Buffer is NULL to compute the size of the buffer required. 308 * Buffer is NULL to compute the size of the buffer required.
314 * 309 *
315 * Returns a negative error number on failure, or the number of bytes 310 * Returns a negative error number on failure, or the number of bytes
316 * used / required on success. 311 * used / required on success.
317 */ 312 */
318 int 313 int
319 ext3_xattr_get(struct inode *inode, int name_index, const char *name, 314 ext3_xattr_get(struct inode *inode, int name_index, const char *name,
320 void *buffer, size_t buffer_size) 315 void *buffer, size_t buffer_size)
321 { 316 {
322 int error; 317 int error;
323 318
324 down_read(&EXT3_I(inode)->xattr_sem); 319 down_read(&EXT3_I(inode)->xattr_sem);
325 error = ext3_xattr_ibody_get(inode, name_index, name, buffer, 320 error = ext3_xattr_ibody_get(inode, name_index, name, buffer,
326 buffer_size); 321 buffer_size);
327 if (error == -ENODATA) 322 if (error == -ENODATA)
328 error = ext3_xattr_block_get(inode, name_index, name, buffer, 323 error = ext3_xattr_block_get(inode, name_index, name, buffer,
329 buffer_size); 324 buffer_size);
330 up_read(&EXT3_I(inode)->xattr_sem); 325 up_read(&EXT3_I(inode)->xattr_sem);
331 return error; 326 return error;
332 } 327 }
333 328
334 static int 329 static int
335 ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry, 330 ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
336 char *buffer, size_t buffer_size) 331 char *buffer, size_t buffer_size)
337 { 332 {
338 size_t rest = buffer_size; 333 size_t rest = buffer_size;
339 334
340 for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) { 335 for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
341 const struct xattr_handler *handler = 336 const struct xattr_handler *handler =
342 ext3_xattr_handler(entry->e_name_index); 337 ext3_xattr_handler(entry->e_name_index);
343 338
344 if (handler) { 339 if (handler) {
345 size_t size = handler->list(dentry, buffer, rest, 340 size_t size = handler->list(dentry, buffer, rest,
346 entry->e_name, 341 entry->e_name,
347 entry->e_name_len, 342 entry->e_name_len,
348 handler->flags); 343 handler->flags);
349 if (buffer) { 344 if (buffer) {
350 if (size > rest) 345 if (size > rest)
351 return -ERANGE; 346 return -ERANGE;
352 buffer += size; 347 buffer += size;
353 } 348 }
354 rest -= size; 349 rest -= size;
355 } 350 }
356 } 351 }
357 return buffer_size - rest; 352 return buffer_size - rest;
358 } 353 }
359 354
360 static int 355 static int
361 ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) 356 ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
362 { 357 {
363 struct inode *inode = dentry->d_inode; 358 struct inode *inode = dentry->d_inode;
364 struct buffer_head *bh = NULL; 359 struct buffer_head *bh = NULL;
365 int error; 360 int error;
366 361
367 ea_idebug(inode, "buffer=%p, buffer_size=%ld", 362 ea_idebug(inode, "buffer=%p, buffer_size=%ld",
368 buffer, (long)buffer_size); 363 buffer, (long)buffer_size);
369 364
370 error = 0; 365 error = 0;
371 if (!EXT3_I(inode)->i_file_acl) 366 if (!EXT3_I(inode)->i_file_acl)
372 goto cleanup; 367 goto cleanup;
373 ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl); 368 ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
374 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); 369 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
375 error = -EIO; 370 error = -EIO;
376 if (!bh) 371 if (!bh)
377 goto cleanup; 372 goto cleanup;
378 ea_bdebug(bh, "b_count=%d, refcount=%d", 373 ea_bdebug(bh, "b_count=%d, refcount=%d",
379 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 374 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
380 if (ext3_xattr_check_block(bh)) { 375 if (ext3_xattr_check_block(bh)) {
381 ext3_error(inode->i_sb, __func__, 376 ext3_error(inode->i_sb, __func__,
382 "inode %lu: bad block "E3FSBLK, inode->i_ino, 377 "inode %lu: bad block "E3FSBLK, inode->i_ino,
383 EXT3_I(inode)->i_file_acl); 378 EXT3_I(inode)->i_file_acl);
384 error = -EIO; 379 error = -EIO;
385 goto cleanup; 380 goto cleanup;
386 } 381 }
387 ext3_xattr_cache_insert(bh); 382 ext3_xattr_cache_insert(bh);
388 error = ext3_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); 383 error = ext3_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
389 384
390 cleanup: 385 cleanup:
391 brelse(bh); 386 brelse(bh);
392 387
393 return error; 388 return error;
394 } 389 }
395 390
396 static int 391 static int
397 ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) 392 ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
398 { 393 {
399 struct inode *inode = dentry->d_inode; 394 struct inode *inode = dentry->d_inode;
400 struct ext3_xattr_ibody_header *header; 395 struct ext3_xattr_ibody_header *header;
401 struct ext3_inode *raw_inode; 396 struct ext3_inode *raw_inode;
402 struct ext3_iloc iloc; 397 struct ext3_iloc iloc;
403 void *end; 398 void *end;
404 int error; 399 int error;
405 400
406 if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR)) 401 if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR))
407 return 0; 402 return 0;
408 error = ext3_get_inode_loc(inode, &iloc); 403 error = ext3_get_inode_loc(inode, &iloc);
409 if (error) 404 if (error)
410 return error; 405 return error;
411 raw_inode = ext3_raw_inode(&iloc); 406 raw_inode = ext3_raw_inode(&iloc);
412 header = IHDR(inode, raw_inode); 407 header = IHDR(inode, raw_inode);
413 end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; 408 end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
414 error = ext3_xattr_check_names(IFIRST(header), end); 409 error = ext3_xattr_check_names(IFIRST(header), end);
415 if (error) 410 if (error)
416 goto cleanup; 411 goto cleanup;
417 error = ext3_xattr_list_entries(dentry, IFIRST(header), 412 error = ext3_xattr_list_entries(dentry, IFIRST(header),
418 buffer, buffer_size); 413 buffer, buffer_size);
419 414
420 cleanup: 415 cleanup:
421 brelse(iloc.bh); 416 brelse(iloc.bh);
422 return error; 417 return error;
423 } 418 }
424 419
425 /* 420 /*
426 * ext3_xattr_list() 421 * ext3_xattr_list()
427 * 422 *
428 * Copy a list of attribute names into the buffer 423 * Copy a list of attribute names into the buffer
429 * provided, or compute the buffer size required. 424 * provided, or compute the buffer size required.
430 * Buffer is NULL to compute the size of the buffer required. 425 * Buffer is NULL to compute the size of the buffer required.
431 * 426 *
432 * Returns a negative error number on failure, or the number of bytes 427 * Returns a negative error number on failure, or the number of bytes
433 * used / required on success. 428 * used / required on success.
434 */ 429 */
435 static int 430 static int
436 ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) 431 ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
437 { 432 {
438 int i_error, b_error; 433 int i_error, b_error;
439 434
440 down_read(&EXT3_I(dentry->d_inode)->xattr_sem); 435 down_read(&EXT3_I(dentry->d_inode)->xattr_sem);
441 i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size); 436 i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size);
442 if (i_error < 0) { 437 if (i_error < 0) {
443 b_error = 0; 438 b_error = 0;
444 } else { 439 } else {
445 if (buffer) { 440 if (buffer) {
446 buffer += i_error; 441 buffer += i_error;
447 buffer_size -= i_error; 442 buffer_size -= i_error;
448 } 443 }
449 b_error = ext3_xattr_block_list(dentry, buffer, buffer_size); 444 b_error = ext3_xattr_block_list(dentry, buffer, buffer_size);
450 if (b_error < 0) 445 if (b_error < 0)
451 i_error = 0; 446 i_error = 0;
452 } 447 }
453 up_read(&EXT3_I(dentry->d_inode)->xattr_sem); 448 up_read(&EXT3_I(dentry->d_inode)->xattr_sem);
454 return i_error + b_error; 449 return i_error + b_error;
455 } 450 }
456 451
457 /* 452 /*
458 * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is 453 * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
459 * not set, set it. 454 * not set, set it.
460 */ 455 */
461 static void ext3_xattr_update_super_block(handle_t *handle, 456 static void ext3_xattr_update_super_block(handle_t *handle,
462 struct super_block *sb) 457 struct super_block *sb)
463 { 458 {
464 if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) 459 if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
465 return; 460 return;
466 461
467 if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) { 462 if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
468 EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR); 463 EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR);
469 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 464 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
470 } 465 }
471 } 466 }
472 467
473 /* 468 /*
474 * Release the xattr block BH: If the reference count is > 1, decrement 469 * Release the xattr block BH: If the reference count is > 1, decrement
475 * it; otherwise free the block. 470 * it; otherwise free the block.
476 */ 471 */
477 static void 472 static void
478 ext3_xattr_release_block(handle_t *handle, struct inode *inode, 473 ext3_xattr_release_block(handle_t *handle, struct inode *inode,
479 struct buffer_head *bh) 474 struct buffer_head *bh)
480 { 475 {
481 struct mb_cache_entry *ce = NULL; 476 struct mb_cache_entry *ce = NULL;
482 int error = 0; 477 int error = 0;
483 478
484 ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr); 479 ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
485 error = ext3_journal_get_write_access(handle, bh); 480 error = ext3_journal_get_write_access(handle, bh);
486 if (error) 481 if (error)
487 goto out; 482 goto out;
488 483
489 lock_buffer(bh); 484 lock_buffer(bh);
490 485
491 if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { 486 if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
492 ea_bdebug(bh, "refcount now=0; freeing"); 487 ea_bdebug(bh, "refcount now=0; freeing");
493 if (ce) 488 if (ce)
494 mb_cache_entry_free(ce); 489 mb_cache_entry_free(ce);
495 ext3_free_blocks(handle, inode, bh->b_blocknr, 1); 490 ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
496 get_bh(bh); 491 get_bh(bh);
497 ext3_forget(handle, 1, inode, bh, bh->b_blocknr); 492 ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
498 } else { 493 } else {
499 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 494 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
500 error = ext3_journal_dirty_metadata(handle, bh); 495 error = ext3_journal_dirty_metadata(handle, bh);
501 if (IS_SYNC(inode)) 496 if (IS_SYNC(inode))
502 handle->h_sync = 1; 497 handle->h_sync = 1;
503 dquot_free_block(inode, 1); 498 dquot_free_block(inode, 1);
504 ea_bdebug(bh, "refcount now=%d; releasing", 499 ea_bdebug(bh, "refcount now=%d; releasing",
505 le32_to_cpu(BHDR(bh)->h_refcount)); 500 le32_to_cpu(BHDR(bh)->h_refcount));
506 if (ce) 501 if (ce)
507 mb_cache_entry_release(ce); 502 mb_cache_entry_release(ce);
508 } 503 }
509 unlock_buffer(bh); 504 unlock_buffer(bh);
510 out: 505 out:
511 ext3_std_error(inode->i_sb, error); 506 ext3_std_error(inode->i_sb, error);
512 return; 507 return;
513 } 508 }
514 509
515 struct ext3_xattr_info { 510 struct ext3_xattr_info {
516 int name_index; 511 int name_index;
517 const char *name; 512 const char *name;
518 const void *value; 513 const void *value;
519 size_t value_len; 514 size_t value_len;
520 }; 515 };
521 516
522 struct ext3_xattr_search { 517 struct ext3_xattr_search {
523 struct ext3_xattr_entry *first; 518 struct ext3_xattr_entry *first;
524 void *base; 519 void *base;
525 void *end; 520 void *end;
526 struct ext3_xattr_entry *here; 521 struct ext3_xattr_entry *here;
527 int not_found; 522 int not_found;
528 }; 523 };
529 524
530 static int 525 static int
531 ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s) 526 ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
532 { 527 {
533 struct ext3_xattr_entry *last; 528 struct ext3_xattr_entry *last;
534 size_t free, min_offs = s->end - s->base, name_len = strlen(i->name); 529 size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
535 530
536 /* Compute min_offs and last. */ 531 /* Compute min_offs and last. */
537 last = s->first; 532 last = s->first;
538 for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { 533 for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
539 if (!last->e_value_block && last->e_value_size) { 534 if (!last->e_value_block && last->e_value_size) {
540 size_t offs = le16_to_cpu(last->e_value_offs); 535 size_t offs = le16_to_cpu(last->e_value_offs);
541 if (offs < min_offs) 536 if (offs < min_offs)
542 min_offs = offs; 537 min_offs = offs;
543 } 538 }
544 } 539 }
545 free = min_offs - ((void *)last - s->base) - sizeof(__u32); 540 free = min_offs - ((void *)last - s->base) - sizeof(__u32);
546 if (!s->not_found) { 541 if (!s->not_found) {
547 if (!s->here->e_value_block && s->here->e_value_size) { 542 if (!s->here->e_value_block && s->here->e_value_size) {
548 size_t size = le32_to_cpu(s->here->e_value_size); 543 size_t size = le32_to_cpu(s->here->e_value_size);
549 free += EXT3_XATTR_SIZE(size); 544 free += EXT3_XATTR_SIZE(size);
550 } 545 }
551 free += EXT3_XATTR_LEN(name_len); 546 free += EXT3_XATTR_LEN(name_len);
552 } 547 }
553 if (i->value) { 548 if (i->value) {
554 if (free < EXT3_XATTR_SIZE(i->value_len) || 549 if (free < EXT3_XATTR_SIZE(i->value_len) ||
555 free < EXT3_XATTR_LEN(name_len) + 550 free < EXT3_XATTR_LEN(name_len) +
556 EXT3_XATTR_SIZE(i->value_len)) 551 EXT3_XATTR_SIZE(i->value_len))
557 return -ENOSPC; 552 return -ENOSPC;
558 } 553 }
559 554
560 if (i->value && s->not_found) { 555 if (i->value && s->not_found) {
561 /* Insert the new name. */ 556 /* Insert the new name. */
562 size_t size = EXT3_XATTR_LEN(name_len); 557 size_t size = EXT3_XATTR_LEN(name_len);
563 size_t rest = (void *)last - (void *)s->here + sizeof(__u32); 558 size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
564 memmove((void *)s->here + size, s->here, rest); 559 memmove((void *)s->here + size, s->here, rest);
565 memset(s->here, 0, size); 560 memset(s->here, 0, size);
566 s->here->e_name_index = i->name_index; 561 s->here->e_name_index = i->name_index;
567 s->here->e_name_len = name_len; 562 s->here->e_name_len = name_len;
568 memcpy(s->here->e_name, i->name, name_len); 563 memcpy(s->here->e_name, i->name, name_len);
569 } else { 564 } else {
570 if (!s->here->e_value_block && s->here->e_value_size) { 565 if (!s->here->e_value_block && s->here->e_value_size) {
571 void *first_val = s->base + min_offs; 566 void *first_val = s->base + min_offs;
572 size_t offs = le16_to_cpu(s->here->e_value_offs); 567 size_t offs = le16_to_cpu(s->here->e_value_offs);
573 void *val = s->base + offs; 568 void *val = s->base + offs;
574 size_t size = EXT3_XATTR_SIZE( 569 size_t size = EXT3_XATTR_SIZE(
575 le32_to_cpu(s->here->e_value_size)); 570 le32_to_cpu(s->here->e_value_size));
576 571
577 if (i->value && size == EXT3_XATTR_SIZE(i->value_len)) { 572 if (i->value && size == EXT3_XATTR_SIZE(i->value_len)) {
578 /* The old and the new value have the same 573 /* The old and the new value have the same
579 size. Just replace. */ 574 size. Just replace. */
580 s->here->e_value_size = 575 s->here->e_value_size =
581 cpu_to_le32(i->value_len); 576 cpu_to_le32(i->value_len);
582 memset(val + size - EXT3_XATTR_PAD, 0, 577 memset(val + size - EXT3_XATTR_PAD, 0,
583 EXT3_XATTR_PAD); /* Clear pad bytes. */ 578 EXT3_XATTR_PAD); /* Clear pad bytes. */
584 memcpy(val, i->value, i->value_len); 579 memcpy(val, i->value, i->value_len);
585 return 0; 580 return 0;
586 } 581 }
587 582
588 /* Remove the old value. */ 583 /* Remove the old value. */
589 memmove(first_val + size, first_val, val - first_val); 584 memmove(first_val + size, first_val, val - first_val);
590 memset(first_val, 0, size); 585 memset(first_val, 0, size);
591 s->here->e_value_size = 0; 586 s->here->e_value_size = 0;
592 s->here->e_value_offs = 0; 587 s->here->e_value_offs = 0;
593 min_offs += size; 588 min_offs += size;
594 589
595 /* Adjust all value offsets. */ 590 /* Adjust all value offsets. */
596 last = s->first; 591 last = s->first;
597 while (!IS_LAST_ENTRY(last)) { 592 while (!IS_LAST_ENTRY(last)) {
598 size_t o = le16_to_cpu(last->e_value_offs); 593 size_t o = le16_to_cpu(last->e_value_offs);
599 if (!last->e_value_block && 594 if (!last->e_value_block &&
600 last->e_value_size && o < offs) 595 last->e_value_size && o < offs)
601 last->e_value_offs = 596 last->e_value_offs =
602 cpu_to_le16(o + size); 597 cpu_to_le16(o + size);
603 last = EXT3_XATTR_NEXT(last); 598 last = EXT3_XATTR_NEXT(last);
604 } 599 }
605 } 600 }
606 if (!i->value) { 601 if (!i->value) {
607 /* Remove the old name. */ 602 /* Remove the old name. */
608 size_t size = EXT3_XATTR_LEN(name_len); 603 size_t size = EXT3_XATTR_LEN(name_len);
609 last = ENTRY((void *)last - size); 604 last = ENTRY((void *)last - size);
610 memmove(s->here, (void *)s->here + size, 605 memmove(s->here, (void *)s->here + size,
611 (void *)last - (void *)s->here + sizeof(__u32)); 606 (void *)last - (void *)s->here + sizeof(__u32));
612 memset(last, 0, size); 607 memset(last, 0, size);
613 } 608 }
614 } 609 }
615 610
616 if (i->value) { 611 if (i->value) {
617 /* Insert the new value. */ 612 /* Insert the new value. */
618 s->here->e_value_size = cpu_to_le32(i->value_len); 613 s->here->e_value_size = cpu_to_le32(i->value_len);
619 if (i->value_len) { 614 if (i->value_len) {
620 size_t size = EXT3_XATTR_SIZE(i->value_len); 615 size_t size = EXT3_XATTR_SIZE(i->value_len);
621 void *val = s->base + min_offs - size; 616 void *val = s->base + min_offs - size;
622 s->here->e_value_offs = cpu_to_le16(min_offs - size); 617 s->here->e_value_offs = cpu_to_le16(min_offs - size);
623 memset(val + size - EXT3_XATTR_PAD, 0, 618 memset(val + size - EXT3_XATTR_PAD, 0,
624 EXT3_XATTR_PAD); /* Clear the pad bytes. */ 619 EXT3_XATTR_PAD); /* Clear the pad bytes. */
625 memcpy(val, i->value, i->value_len); 620 memcpy(val, i->value, i->value_len);
626 } 621 }
627 } 622 }
628 return 0; 623 return 0;
629 } 624 }
630 625
631 struct ext3_xattr_block_find { 626 struct ext3_xattr_block_find {
632 struct ext3_xattr_search s; 627 struct ext3_xattr_search s;
633 struct buffer_head *bh; 628 struct buffer_head *bh;
634 }; 629 };
635 630
636 static int 631 static int
637 ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i, 632 ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
638 struct ext3_xattr_block_find *bs) 633 struct ext3_xattr_block_find *bs)
639 { 634 {
640 struct super_block *sb = inode->i_sb; 635 struct super_block *sb = inode->i_sb;
641 int error; 636 int error;
642 637
643 ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", 638 ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
644 i->name_index, i->name, i->value, (long)i->value_len); 639 i->name_index, i->name, i->value, (long)i->value_len);
645 640
646 if (EXT3_I(inode)->i_file_acl) { 641 if (EXT3_I(inode)->i_file_acl) {
647 /* The inode already has an extended attribute block. */ 642 /* The inode already has an extended attribute block. */
648 bs->bh = sb_bread(sb, EXT3_I(inode)->i_file_acl); 643 bs->bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
649 error = -EIO; 644 error = -EIO;
650 if (!bs->bh) 645 if (!bs->bh)
651 goto cleanup; 646 goto cleanup;
652 ea_bdebug(bs->bh, "b_count=%d, refcount=%d", 647 ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
653 atomic_read(&(bs->bh->b_count)), 648 atomic_read(&(bs->bh->b_count)),
654 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 649 le32_to_cpu(BHDR(bs->bh)->h_refcount));
655 if (ext3_xattr_check_block(bs->bh)) { 650 if (ext3_xattr_check_block(bs->bh)) {
656 ext3_error(sb, __func__, 651 ext3_error(sb, __func__,
657 "inode %lu: bad block "E3FSBLK, inode->i_ino, 652 "inode %lu: bad block "E3FSBLK, inode->i_ino,
658 EXT3_I(inode)->i_file_acl); 653 EXT3_I(inode)->i_file_acl);
659 error = -EIO; 654 error = -EIO;
660 goto cleanup; 655 goto cleanup;
661 } 656 }
662 /* Find the named attribute. */ 657 /* Find the named attribute. */
663 bs->s.base = BHDR(bs->bh); 658 bs->s.base = BHDR(bs->bh);
664 bs->s.first = BFIRST(bs->bh); 659 bs->s.first = BFIRST(bs->bh);
665 bs->s.end = bs->bh->b_data + bs->bh->b_size; 660 bs->s.end = bs->bh->b_data + bs->bh->b_size;
666 bs->s.here = bs->s.first; 661 bs->s.here = bs->s.first;
667 error = ext3_xattr_find_entry(&bs->s.here, i->name_index, 662 error = ext3_xattr_find_entry(&bs->s.here, i->name_index,
668 i->name, bs->bh->b_size, 1); 663 i->name, bs->bh->b_size, 1);
669 if (error && error != -ENODATA) 664 if (error && error != -ENODATA)
670 goto cleanup; 665 goto cleanup;
671 bs->s.not_found = error; 666 bs->s.not_found = error;
672 } 667 }
673 error = 0; 668 error = 0;
674 669
675 cleanup: 670 cleanup:
676 return error; 671 return error;
677 } 672 }
678 673
679 static int 674 static int
680 ext3_xattr_block_set(handle_t *handle, struct inode *inode, 675 ext3_xattr_block_set(handle_t *handle, struct inode *inode,
681 struct ext3_xattr_info *i, 676 struct ext3_xattr_info *i,
682 struct ext3_xattr_block_find *bs) 677 struct ext3_xattr_block_find *bs)
683 { 678 {
684 struct super_block *sb = inode->i_sb; 679 struct super_block *sb = inode->i_sb;
685 struct buffer_head *new_bh = NULL; 680 struct buffer_head *new_bh = NULL;
686 struct ext3_xattr_search *s = &bs->s; 681 struct ext3_xattr_search *s = &bs->s;
687 struct mb_cache_entry *ce = NULL; 682 struct mb_cache_entry *ce = NULL;
688 int error = 0; 683 int error = 0;
689 684
690 #define header(x) ((struct ext3_xattr_header *)(x)) 685 #define header(x) ((struct ext3_xattr_header *)(x))
691 686
692 if (i->value && i->value_len > sb->s_blocksize) 687 if (i->value && i->value_len > sb->s_blocksize)
693 return -ENOSPC; 688 return -ENOSPC;
694 if (s->base) { 689 if (s->base) {
695 ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev, 690 ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev,
696 bs->bh->b_blocknr); 691 bs->bh->b_blocknr);
697 error = ext3_journal_get_write_access(handle, bs->bh); 692 error = ext3_journal_get_write_access(handle, bs->bh);
698 if (error) 693 if (error)
699 goto cleanup; 694 goto cleanup;
700 lock_buffer(bs->bh); 695 lock_buffer(bs->bh);
701 696
702 if (header(s->base)->h_refcount == cpu_to_le32(1)) { 697 if (header(s->base)->h_refcount == cpu_to_le32(1)) {
703 if (ce) { 698 if (ce) {
704 mb_cache_entry_free(ce); 699 mb_cache_entry_free(ce);
705 ce = NULL; 700 ce = NULL;
706 } 701 }
707 ea_bdebug(bs->bh, "modifying in-place"); 702 ea_bdebug(bs->bh, "modifying in-place");
708 error = ext3_xattr_set_entry(i, s); 703 error = ext3_xattr_set_entry(i, s);
709 if (!error) { 704 if (!error) {
710 if (!IS_LAST_ENTRY(s->first)) 705 if (!IS_LAST_ENTRY(s->first))
711 ext3_xattr_rehash(header(s->base), 706 ext3_xattr_rehash(header(s->base),
712 s->here); 707 s->here);
713 ext3_xattr_cache_insert(bs->bh); 708 ext3_xattr_cache_insert(bs->bh);
714 } 709 }
715 unlock_buffer(bs->bh); 710 unlock_buffer(bs->bh);
716 if (error == -EIO) 711 if (error == -EIO)
717 goto bad_block; 712 goto bad_block;
718 if (!error) 713 if (!error)
719 error = ext3_journal_dirty_metadata(handle, 714 error = ext3_journal_dirty_metadata(handle,
720 bs->bh); 715 bs->bh);
721 if (error) 716 if (error)
722 goto cleanup; 717 goto cleanup;
723 goto inserted; 718 goto inserted;
724 } else { 719 } else {
725 int offset = (char *)s->here - bs->bh->b_data; 720 int offset = (char *)s->here - bs->bh->b_data;
726 721
727 unlock_buffer(bs->bh); 722 unlock_buffer(bs->bh);
728 journal_release_buffer(handle, bs->bh); 723 journal_release_buffer(handle, bs->bh);
729 724
730 if (ce) { 725 if (ce) {
731 mb_cache_entry_release(ce); 726 mb_cache_entry_release(ce);
732 ce = NULL; 727 ce = NULL;
733 } 728 }
734 ea_bdebug(bs->bh, "cloning"); 729 ea_bdebug(bs->bh, "cloning");
735 s->base = kmalloc(bs->bh->b_size, GFP_NOFS); 730 s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
736 error = -ENOMEM; 731 error = -ENOMEM;
737 if (s->base == NULL) 732 if (s->base == NULL)
738 goto cleanup; 733 goto cleanup;
739 memcpy(s->base, BHDR(bs->bh), bs->bh->b_size); 734 memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
740 s->first = ENTRY(header(s->base)+1); 735 s->first = ENTRY(header(s->base)+1);
741 header(s->base)->h_refcount = cpu_to_le32(1); 736 header(s->base)->h_refcount = cpu_to_le32(1);
742 s->here = ENTRY(s->base + offset); 737 s->here = ENTRY(s->base + offset);
743 s->end = s->base + bs->bh->b_size; 738 s->end = s->base + bs->bh->b_size;
744 } 739 }
745 } else { 740 } else {
746 /* Allocate a buffer where we construct the new block. */ 741 /* Allocate a buffer where we construct the new block. */
747 s->base = kzalloc(sb->s_blocksize, GFP_NOFS); 742 s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
748 /* assert(header == s->base) */ 743 /* assert(header == s->base) */
749 error = -ENOMEM; 744 error = -ENOMEM;
750 if (s->base == NULL) 745 if (s->base == NULL)
751 goto cleanup; 746 goto cleanup;
752 header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); 747 header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
753 header(s->base)->h_blocks = cpu_to_le32(1); 748 header(s->base)->h_blocks = cpu_to_le32(1);
754 header(s->base)->h_refcount = cpu_to_le32(1); 749 header(s->base)->h_refcount = cpu_to_le32(1);
755 s->first = ENTRY(header(s->base)+1); 750 s->first = ENTRY(header(s->base)+1);
756 s->here = ENTRY(header(s->base)+1); 751 s->here = ENTRY(header(s->base)+1);
757 s->end = s->base + sb->s_blocksize; 752 s->end = s->base + sb->s_blocksize;
758 } 753 }
759 754
760 error = ext3_xattr_set_entry(i, s); 755 error = ext3_xattr_set_entry(i, s);
761 if (error == -EIO) 756 if (error == -EIO)
762 goto bad_block; 757 goto bad_block;
763 if (error) 758 if (error)
764 goto cleanup; 759 goto cleanup;
765 if (!IS_LAST_ENTRY(s->first)) 760 if (!IS_LAST_ENTRY(s->first))
766 ext3_xattr_rehash(header(s->base), s->here); 761 ext3_xattr_rehash(header(s->base), s->here);
767 762
768 inserted: 763 inserted:
769 if (!IS_LAST_ENTRY(s->first)) { 764 if (!IS_LAST_ENTRY(s->first)) {
770 new_bh = ext3_xattr_cache_find(inode, header(s->base), &ce); 765 new_bh = ext3_xattr_cache_find(inode, header(s->base), &ce);
771 if (new_bh) { 766 if (new_bh) {
772 /* We found an identical block in the cache. */ 767 /* We found an identical block in the cache. */
773 if (new_bh == bs->bh) 768 if (new_bh == bs->bh)
774 ea_bdebug(new_bh, "keeping"); 769 ea_bdebug(new_bh, "keeping");
775 else { 770 else {
776 /* The old block is released after updating 771 /* The old block is released after updating
777 the inode. */ 772 the inode. */
778 error = dquot_alloc_block(inode, 1); 773 error = dquot_alloc_block(inode, 1);
779 if (error) 774 if (error)
780 goto cleanup; 775 goto cleanup;
781 error = ext3_journal_get_write_access(handle, 776 error = ext3_journal_get_write_access(handle,
782 new_bh); 777 new_bh);
783 if (error) 778 if (error)
784 goto cleanup_dquot; 779 goto cleanup_dquot;
785 lock_buffer(new_bh); 780 lock_buffer(new_bh);
786 le32_add_cpu(&BHDR(new_bh)->h_refcount, 1); 781 le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
787 ea_bdebug(new_bh, "reusing; refcount now=%d", 782 ea_bdebug(new_bh, "reusing; refcount now=%d",
788 le32_to_cpu(BHDR(new_bh)->h_refcount)); 783 le32_to_cpu(BHDR(new_bh)->h_refcount));
789 unlock_buffer(new_bh); 784 unlock_buffer(new_bh);
790 error = ext3_journal_dirty_metadata(handle, 785 error = ext3_journal_dirty_metadata(handle,
791 new_bh); 786 new_bh);
792 if (error) 787 if (error)
793 goto cleanup_dquot; 788 goto cleanup_dquot;
794 } 789 }
795 mb_cache_entry_release(ce); 790 mb_cache_entry_release(ce);
796 ce = NULL; 791 ce = NULL;
797 } else if (bs->bh && s->base == bs->bh->b_data) { 792 } else if (bs->bh && s->base == bs->bh->b_data) {
798 /* We were modifying this block in-place. */ 793 /* We were modifying this block in-place. */
799 ea_bdebug(bs->bh, "keeping this block"); 794 ea_bdebug(bs->bh, "keeping this block");
800 new_bh = bs->bh; 795 new_bh = bs->bh;
801 get_bh(new_bh); 796 get_bh(new_bh);
802 } else { 797 } else {
803 /* We need to allocate a new block */ 798 /* We need to allocate a new block */
804 ext3_fsblk_t goal = ext3_group_first_block_no(sb, 799 ext3_fsblk_t goal = ext3_group_first_block_no(sb,
805 EXT3_I(inode)->i_block_group); 800 EXT3_I(inode)->i_block_group);
806 ext3_fsblk_t block; 801 ext3_fsblk_t block;
807 802
808 /* 803 /*
809 * Protect us agaist concurrent allocations to the 804 * Protect us agaist concurrent allocations to the
810 * same inode from ext3_..._writepage(). Reservation 805 * same inode from ext3_..._writepage(). Reservation
811 * code does not expect racing allocations. 806 * code does not expect racing allocations.
812 */ 807 */
813 mutex_lock(&EXT3_I(inode)->truncate_mutex); 808 mutex_lock(&EXT3_I(inode)->truncate_mutex);
814 block = ext3_new_block(handle, inode, goal, &error); 809 block = ext3_new_block(handle, inode, goal, &error);
815 mutex_unlock(&EXT3_I(inode)->truncate_mutex); 810 mutex_unlock(&EXT3_I(inode)->truncate_mutex);
816 if (error) 811 if (error)
817 goto cleanup; 812 goto cleanup;
818 ea_idebug(inode, "creating block %d", block); 813 ea_idebug(inode, "creating block %d", block);
819 814
820 new_bh = sb_getblk(sb, block); 815 new_bh = sb_getblk(sb, block);
821 if (!new_bh) { 816 if (!new_bh) {
822 getblk_failed: 817 getblk_failed:
823 ext3_free_blocks(handle, inode, block, 1); 818 ext3_free_blocks(handle, inode, block, 1);
824 error = -EIO; 819 error = -EIO;
825 goto cleanup; 820 goto cleanup;
826 } 821 }
827 lock_buffer(new_bh); 822 lock_buffer(new_bh);
828 error = ext3_journal_get_create_access(handle, new_bh); 823 error = ext3_journal_get_create_access(handle, new_bh);
829 if (error) { 824 if (error) {
830 unlock_buffer(new_bh); 825 unlock_buffer(new_bh);
831 goto getblk_failed; 826 goto getblk_failed;
832 } 827 }
833 memcpy(new_bh->b_data, s->base, new_bh->b_size); 828 memcpy(new_bh->b_data, s->base, new_bh->b_size);
834 set_buffer_uptodate(new_bh); 829 set_buffer_uptodate(new_bh);
835 unlock_buffer(new_bh); 830 unlock_buffer(new_bh);
836 ext3_xattr_cache_insert(new_bh); 831 ext3_xattr_cache_insert(new_bh);
837 error = ext3_journal_dirty_metadata(handle, new_bh); 832 error = ext3_journal_dirty_metadata(handle, new_bh);
838 if (error) 833 if (error)
839 goto cleanup; 834 goto cleanup;
840 } 835 }
841 } 836 }
842 837
843 /* Update the inode. */ 838 /* Update the inode. */
844 EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; 839 EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
845 840
846 /* Drop the previous xattr block. */ 841 /* Drop the previous xattr block. */
847 if (bs->bh && bs->bh != new_bh) 842 if (bs->bh && bs->bh != new_bh)
848 ext3_xattr_release_block(handle, inode, bs->bh); 843 ext3_xattr_release_block(handle, inode, bs->bh);
849 error = 0; 844 error = 0;
850 845
851 cleanup: 846 cleanup:
852 if (ce) 847 if (ce)
853 mb_cache_entry_release(ce); 848 mb_cache_entry_release(ce);
854 brelse(new_bh); 849 brelse(new_bh);
855 if (!(bs->bh && s->base == bs->bh->b_data)) 850 if (!(bs->bh && s->base == bs->bh->b_data))
856 kfree(s->base); 851 kfree(s->base);
857 852
858 return error; 853 return error;
859 854
860 cleanup_dquot: 855 cleanup_dquot:
861 dquot_free_block(inode, 1); 856 dquot_free_block(inode, 1);
862 goto cleanup; 857 goto cleanup;
863 858
864 bad_block: 859 bad_block:
865 ext3_error(inode->i_sb, __func__, 860 ext3_error(inode->i_sb, __func__,
866 "inode %lu: bad block "E3FSBLK, inode->i_ino, 861 "inode %lu: bad block "E3FSBLK, inode->i_ino,
867 EXT3_I(inode)->i_file_acl); 862 EXT3_I(inode)->i_file_acl);
868 goto cleanup; 863 goto cleanup;
869 864
870 #undef header 865 #undef header
871 } 866 }
872 867
873 struct ext3_xattr_ibody_find { 868 struct ext3_xattr_ibody_find {
874 struct ext3_xattr_search s; 869 struct ext3_xattr_search s;
875 struct ext3_iloc iloc; 870 struct ext3_iloc iloc;
876 }; 871 };
877 872
878 static int 873 static int
879 ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i, 874 ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
880 struct ext3_xattr_ibody_find *is) 875 struct ext3_xattr_ibody_find *is)
881 { 876 {
882 struct ext3_xattr_ibody_header *header; 877 struct ext3_xattr_ibody_header *header;
883 struct ext3_inode *raw_inode; 878 struct ext3_inode *raw_inode;
884 int error; 879 int error;
885 880
886 if (EXT3_I(inode)->i_extra_isize == 0) 881 if (EXT3_I(inode)->i_extra_isize == 0)
887 return 0; 882 return 0;
888 raw_inode = ext3_raw_inode(&is->iloc); 883 raw_inode = ext3_raw_inode(&is->iloc);
889 header = IHDR(inode, raw_inode); 884 header = IHDR(inode, raw_inode);
890 is->s.base = is->s.first = IFIRST(header); 885 is->s.base = is->s.first = IFIRST(header);
891 is->s.here = is->s.first; 886 is->s.here = is->s.first;
892 is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; 887 is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
893 if (ext3_test_inode_state(inode, EXT3_STATE_XATTR)) { 888 if (ext3_test_inode_state(inode, EXT3_STATE_XATTR)) {
894 error = ext3_xattr_check_names(IFIRST(header), is->s.end); 889 error = ext3_xattr_check_names(IFIRST(header), is->s.end);
895 if (error) 890 if (error)
896 return error; 891 return error;
897 /* Find the named attribute. */ 892 /* Find the named attribute. */
898 error = ext3_xattr_find_entry(&is->s.here, i->name_index, 893 error = ext3_xattr_find_entry(&is->s.here, i->name_index,
899 i->name, is->s.end - 894 i->name, is->s.end -
900 (void *)is->s.base, 0); 895 (void *)is->s.base, 0);
901 if (error && error != -ENODATA) 896 if (error && error != -ENODATA)
902 return error; 897 return error;
903 is->s.not_found = error; 898 is->s.not_found = error;
904 } 899 }
905 return 0; 900 return 0;
906 } 901 }
907 902
908 static int 903 static int
909 ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, 904 ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
910 struct ext3_xattr_info *i, 905 struct ext3_xattr_info *i,
911 struct ext3_xattr_ibody_find *is) 906 struct ext3_xattr_ibody_find *is)
912 { 907 {
913 struct ext3_xattr_ibody_header *header; 908 struct ext3_xattr_ibody_header *header;
914 struct ext3_xattr_search *s = &is->s; 909 struct ext3_xattr_search *s = &is->s;
915 int error; 910 int error;
916 911
917 if (EXT3_I(inode)->i_extra_isize == 0) 912 if (EXT3_I(inode)->i_extra_isize == 0)
918 return -ENOSPC; 913 return -ENOSPC;
919 error = ext3_xattr_set_entry(i, s); 914 error = ext3_xattr_set_entry(i, s);
920 if (error) 915 if (error)
921 return error; 916 return error;
922 header = IHDR(inode, ext3_raw_inode(&is->iloc)); 917 header = IHDR(inode, ext3_raw_inode(&is->iloc));
923 if (!IS_LAST_ENTRY(s->first)) { 918 if (!IS_LAST_ENTRY(s->first)) {
924 header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); 919 header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
925 ext3_set_inode_state(inode, EXT3_STATE_XATTR); 920 ext3_set_inode_state(inode, EXT3_STATE_XATTR);
926 } else { 921 } else {
927 header->h_magic = cpu_to_le32(0); 922 header->h_magic = cpu_to_le32(0);
928 ext3_clear_inode_state(inode, EXT3_STATE_XATTR); 923 ext3_clear_inode_state(inode, EXT3_STATE_XATTR);
929 } 924 }
930 return 0; 925 return 0;
931 } 926 }
932 927
933 /* 928 /*
934 * ext3_xattr_set_handle() 929 * ext3_xattr_set_handle()
935 * 930 *
936 * Create, replace or remove an extended attribute for this inode. Value 931 * Create, replace or remove an extended attribute for this inode. Value
937 * is NULL to remove an existing extended attribute, and non-NULL to 932 * is NULL to remove an existing extended attribute, and non-NULL to
938 * either replace an existing extended attribute, or create a new extended 933 * either replace an existing extended attribute, or create a new extended
939 * attribute. The flags XATTR_REPLACE and XATTR_CREATE 934 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
940 * specify that an extended attribute must exist and must not exist 935 * specify that an extended attribute must exist and must not exist
941 * previous to the call, respectively. 936 * previous to the call, respectively.
942 * 937 *
943 * Returns 0, or a negative error number on failure. 938 * Returns 0, or a negative error number on failure.
944 */ 939 */
945 int 940 int
946 ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, 941 ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
947 const char *name, const void *value, size_t value_len, 942 const char *name, const void *value, size_t value_len,
948 int flags) 943 int flags)
949 { 944 {
950 struct ext3_xattr_info i = { 945 struct ext3_xattr_info i = {
951 .name_index = name_index, 946 .name_index = name_index,
952 .name = name, 947 .name = name,
953 .value = value, 948 .value = value,
954 .value_len = value_len, 949 .value_len = value_len,
955 950
956 }; 951 };
957 struct ext3_xattr_ibody_find is = { 952 struct ext3_xattr_ibody_find is = {
958 .s = { .not_found = -ENODATA, }, 953 .s = { .not_found = -ENODATA, },
959 }; 954 };
960 struct ext3_xattr_block_find bs = { 955 struct ext3_xattr_block_find bs = {
961 .s = { .not_found = -ENODATA, }, 956 .s = { .not_found = -ENODATA, },
962 }; 957 };
963 int error; 958 int error;
964 959
965 if (!name) 960 if (!name)
966 return -EINVAL; 961 return -EINVAL;
967 if (strlen(name) > 255) 962 if (strlen(name) > 255)
968 return -ERANGE; 963 return -ERANGE;
969 down_write(&EXT3_I(inode)->xattr_sem); 964 down_write(&EXT3_I(inode)->xattr_sem);
970 error = ext3_get_inode_loc(inode, &is.iloc); 965 error = ext3_get_inode_loc(inode, &is.iloc);
971 if (error) 966 if (error)
972 goto cleanup; 967 goto cleanup;
973 968
974 error = ext3_journal_get_write_access(handle, is.iloc.bh); 969 error = ext3_journal_get_write_access(handle, is.iloc.bh);
975 if (error) 970 if (error)
976 goto cleanup; 971 goto cleanup;
977 972
978 if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) { 973 if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) {
979 struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc); 974 struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
980 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); 975 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
981 ext3_clear_inode_state(inode, EXT3_STATE_NEW); 976 ext3_clear_inode_state(inode, EXT3_STATE_NEW);
982 } 977 }
983 978
984 error = ext3_xattr_ibody_find(inode, &i, &is); 979 error = ext3_xattr_ibody_find(inode, &i, &is);
985 if (error) 980 if (error)
986 goto cleanup; 981 goto cleanup;
987 if (is.s.not_found) 982 if (is.s.not_found)
988 error = ext3_xattr_block_find(inode, &i, &bs); 983 error = ext3_xattr_block_find(inode, &i, &bs);
989 if (error) 984 if (error)
990 goto cleanup; 985 goto cleanup;
991 if (is.s.not_found && bs.s.not_found) { 986 if (is.s.not_found && bs.s.not_found) {
992 error = -ENODATA; 987 error = -ENODATA;
993 if (flags & XATTR_REPLACE) 988 if (flags & XATTR_REPLACE)
994 goto cleanup; 989 goto cleanup;
995 error = 0; 990 error = 0;
996 if (!value) 991 if (!value)
997 goto cleanup; 992 goto cleanup;
998 } else { 993 } else {
999 error = -EEXIST; 994 error = -EEXIST;
1000 if (flags & XATTR_CREATE) 995 if (flags & XATTR_CREATE)
1001 goto cleanup; 996 goto cleanup;
1002 } 997 }
1003 if (!value) { 998 if (!value) {
1004 if (!is.s.not_found) 999 if (!is.s.not_found)
1005 error = ext3_xattr_ibody_set(handle, inode, &i, &is); 1000 error = ext3_xattr_ibody_set(handle, inode, &i, &is);
1006 else if (!bs.s.not_found) 1001 else if (!bs.s.not_found)
1007 error = ext3_xattr_block_set(handle, inode, &i, &bs); 1002 error = ext3_xattr_block_set(handle, inode, &i, &bs);
1008 } else { 1003 } else {
1009 error = ext3_xattr_ibody_set(handle, inode, &i, &is); 1004 error = ext3_xattr_ibody_set(handle, inode, &i, &is);
1010 if (!error && !bs.s.not_found) { 1005 if (!error && !bs.s.not_found) {
1011 i.value = NULL; 1006 i.value = NULL;
1012 error = ext3_xattr_block_set(handle, inode, &i, &bs); 1007 error = ext3_xattr_block_set(handle, inode, &i, &bs);
1013 } else if (error == -ENOSPC) { 1008 } else if (error == -ENOSPC) {
1014 if (EXT3_I(inode)->i_file_acl && !bs.s.base) { 1009 if (EXT3_I(inode)->i_file_acl && !bs.s.base) {
1015 error = ext3_xattr_block_find(inode, &i, &bs); 1010 error = ext3_xattr_block_find(inode, &i, &bs);
1016 if (error) 1011 if (error)
1017 goto cleanup; 1012 goto cleanup;
1018 } 1013 }
1019 error = ext3_xattr_block_set(handle, inode, &i, &bs); 1014 error = ext3_xattr_block_set(handle, inode, &i, &bs);
1020 if (error) 1015 if (error)
1021 goto cleanup; 1016 goto cleanup;
1022 if (!is.s.not_found) { 1017 if (!is.s.not_found) {
1023 i.value = NULL; 1018 i.value = NULL;
1024 error = ext3_xattr_ibody_set(handle, inode, &i, 1019 error = ext3_xattr_ibody_set(handle, inode, &i,
1025 &is); 1020 &is);
1026 } 1021 }
1027 } 1022 }
1028 } 1023 }
1029 if (!error) { 1024 if (!error) {
1030 ext3_xattr_update_super_block(handle, inode->i_sb); 1025 ext3_xattr_update_super_block(handle, inode->i_sb);
1031 inode->i_ctime = CURRENT_TIME_SEC; 1026 inode->i_ctime = CURRENT_TIME_SEC;
1032 error = ext3_mark_iloc_dirty(handle, inode, &is.iloc); 1027 error = ext3_mark_iloc_dirty(handle, inode, &is.iloc);
1033 /* 1028 /*
1034 * The bh is consumed by ext3_mark_iloc_dirty, even with 1029 * The bh is consumed by ext3_mark_iloc_dirty, even with
1035 * error != 0. 1030 * error != 0.
1036 */ 1031 */
1037 is.iloc.bh = NULL; 1032 is.iloc.bh = NULL;
1038 if (IS_SYNC(inode)) 1033 if (IS_SYNC(inode))
1039 handle->h_sync = 1; 1034 handle->h_sync = 1;
1040 } 1035 }
1041 1036
1042 cleanup: 1037 cleanup:
1043 brelse(is.iloc.bh); 1038 brelse(is.iloc.bh);
1044 brelse(bs.bh); 1039 brelse(bs.bh);
1045 up_write(&EXT3_I(inode)->xattr_sem); 1040 up_write(&EXT3_I(inode)->xattr_sem);
1046 return error; 1041 return error;
1047 } 1042 }
1048 1043
1049 /* 1044 /*
1050 * ext3_xattr_set() 1045 * ext3_xattr_set()
1051 * 1046 *
1052 * Like ext3_xattr_set_handle, but start from an inode. This extended 1047 * Like ext3_xattr_set_handle, but start from an inode. This extended
1053 * attribute modification is a filesystem transaction by itself. 1048 * attribute modification is a filesystem transaction by itself.
1054 * 1049 *
1055 * Returns 0, or a negative error number on failure. 1050 * Returns 0, or a negative error number on failure.
1056 */ 1051 */
1057 int 1052 int
1058 ext3_xattr_set(struct inode *inode, int name_index, const char *name, 1053 ext3_xattr_set(struct inode *inode, int name_index, const char *name,
1059 const void *value, size_t value_len, int flags) 1054 const void *value, size_t value_len, int flags)
1060 { 1055 {
1061 handle_t *handle; 1056 handle_t *handle;
1062 int error, retries = 0; 1057 int error, retries = 0;
1063 1058
1064 retry: 1059 retry:
1065 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); 1060 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
1066 if (IS_ERR(handle)) { 1061 if (IS_ERR(handle)) {
1067 error = PTR_ERR(handle); 1062 error = PTR_ERR(handle);
1068 } else { 1063 } else {
1069 int error2; 1064 int error2;
1070 1065
1071 error = ext3_xattr_set_handle(handle, inode, name_index, name, 1066 error = ext3_xattr_set_handle(handle, inode, name_index, name,
1072 value, value_len, flags); 1067 value, value_len, flags);
1073 error2 = ext3_journal_stop(handle); 1068 error2 = ext3_journal_stop(handle);
1074 if (error == -ENOSPC && 1069 if (error == -ENOSPC &&
1075 ext3_should_retry_alloc(inode->i_sb, &retries)) 1070 ext3_should_retry_alloc(inode->i_sb, &retries))
1076 goto retry; 1071 goto retry;
1077 if (error == 0) 1072 if (error == 0)
1078 error = error2; 1073 error = error2;
1079 } 1074 }
1080 1075
1081 return error; 1076 return error;
1082 } 1077 }
1083 1078
1084 /* 1079 /*
1085 * ext3_xattr_delete_inode() 1080 * ext3_xattr_delete_inode()
1086 * 1081 *
1087 * Free extended attribute resources associated with this inode. This 1082 * Free extended attribute resources associated with this inode. This
1088 * is called immediately before an inode is freed. We have exclusive 1083 * is called immediately before an inode is freed. We have exclusive
1089 * access to the inode. 1084 * access to the inode.
1090 */ 1085 */
1091 void 1086 void
1092 ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) 1087 ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
1093 { 1088 {
1094 struct buffer_head *bh = NULL; 1089 struct buffer_head *bh = NULL;
1095 1090
1096 if (!EXT3_I(inode)->i_file_acl) 1091 if (!EXT3_I(inode)->i_file_acl)
1097 goto cleanup; 1092 goto cleanup;
1098 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); 1093 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
1099 if (!bh) { 1094 if (!bh) {
1100 ext3_error(inode->i_sb, __func__, 1095 ext3_error(inode->i_sb, __func__,
1101 "inode %lu: block "E3FSBLK" read error", inode->i_ino, 1096 "inode %lu: block "E3FSBLK" read error", inode->i_ino,
1102 EXT3_I(inode)->i_file_acl); 1097 EXT3_I(inode)->i_file_acl);
1103 goto cleanup; 1098 goto cleanup;
1104 } 1099 }
1105 if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || 1100 if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
1106 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1101 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1107 ext3_error(inode->i_sb, __func__, 1102 ext3_error(inode->i_sb, __func__,
1108 "inode %lu: bad block "E3FSBLK, inode->i_ino, 1103 "inode %lu: bad block "E3FSBLK, inode->i_ino,
1109 EXT3_I(inode)->i_file_acl); 1104 EXT3_I(inode)->i_file_acl);
1110 goto cleanup; 1105 goto cleanup;
1111 } 1106 }
1112 ext3_xattr_release_block(handle, inode, bh); 1107 ext3_xattr_release_block(handle, inode, bh);
1113 EXT3_I(inode)->i_file_acl = 0; 1108 EXT3_I(inode)->i_file_acl = 0;
1114 1109
1115 cleanup: 1110 cleanup:
1116 brelse(bh); 1111 brelse(bh);
1117 } 1112 }
1118 1113
1119 /* 1114 /*
1120 * ext3_xattr_put_super() 1115 * ext3_xattr_put_super()
1121 * 1116 *
1122 * This is called when a file system is unmounted. 1117 * This is called when a file system is unmounted.
1123 */ 1118 */
1124 void 1119 void
1125 ext3_xattr_put_super(struct super_block *sb) 1120 ext3_xattr_put_super(struct super_block *sb)
1126 { 1121 {
1127 mb_cache_shrink(sb->s_bdev); 1122 mb_cache_shrink(sb->s_bdev);
1128 } 1123 }
1129 1124
1130 /* 1125 /*
1131 * ext3_xattr_cache_insert() 1126 * ext3_xattr_cache_insert()
1132 * 1127 *
1133 * Create a new entry in the extended attribute cache, and insert 1128 * Create a new entry in the extended attribute cache, and insert
1134 * it unless such an entry is already in the cache. 1129 * it unless such an entry is already in the cache.
1135 * 1130 *
1136 * Returns 0, or a negative error number on failure. 1131 * Returns 0, or a negative error number on failure.
1137 */ 1132 */
1138 static void 1133 static void
1139 ext3_xattr_cache_insert(struct buffer_head *bh) 1134 ext3_xattr_cache_insert(struct buffer_head *bh)
1140 { 1135 {
1141 __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); 1136 __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
1142 struct mb_cache_entry *ce; 1137 struct mb_cache_entry *ce;
1143 int error; 1138 int error;
1144 1139
1145 ce = mb_cache_entry_alloc(ext3_xattr_cache, GFP_NOFS); 1140 ce = mb_cache_entry_alloc(ext3_xattr_cache, GFP_NOFS);
1146 if (!ce) { 1141 if (!ce) {
1147 ea_bdebug(bh, "out of memory"); 1142 ea_bdebug(bh, "out of memory");
1148 return; 1143 return;
1149 } 1144 }
1150 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); 1145 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
1151 if (error) { 1146 if (error) {
1152 mb_cache_entry_free(ce); 1147 mb_cache_entry_free(ce);
1153 if (error == -EBUSY) { 1148 if (error == -EBUSY) {
1154 ea_bdebug(bh, "already in cache"); 1149 ea_bdebug(bh, "already in cache");
1155 error = 0; 1150 error = 0;
1156 } 1151 }
1157 } else { 1152 } else {
1158 ea_bdebug(bh, "inserting [%x]", (int)hash); 1153 ea_bdebug(bh, "inserting [%x]", (int)hash);
1159 mb_cache_entry_release(ce); 1154 mb_cache_entry_release(ce);
1160 } 1155 }
1161 } 1156 }
1162 1157
1163 /* 1158 /*
1164 * ext3_xattr_cmp() 1159 * ext3_xattr_cmp()
1165 * 1160 *
1166 * Compare two extended attribute blocks for equality. 1161 * Compare two extended attribute blocks for equality.
1167 * 1162 *
1168 * Returns 0 if the blocks are equal, 1 if they differ, and 1163 * Returns 0 if the blocks are equal, 1 if they differ, and
1169 * a negative error number on errors. 1164 * a negative error number on errors.
1170 */ 1165 */
1171 static int 1166 static int
1172 ext3_xattr_cmp(struct ext3_xattr_header *header1, 1167 ext3_xattr_cmp(struct ext3_xattr_header *header1,
1173 struct ext3_xattr_header *header2) 1168 struct ext3_xattr_header *header2)
1174 { 1169 {
1175 struct ext3_xattr_entry *entry1, *entry2; 1170 struct ext3_xattr_entry *entry1, *entry2;
1176 1171
1177 entry1 = ENTRY(header1+1); 1172 entry1 = ENTRY(header1+1);
1178 entry2 = ENTRY(header2+1); 1173 entry2 = ENTRY(header2+1);
1179 while (!IS_LAST_ENTRY(entry1)) { 1174 while (!IS_LAST_ENTRY(entry1)) {
1180 if (IS_LAST_ENTRY(entry2)) 1175 if (IS_LAST_ENTRY(entry2))
1181 return 1; 1176 return 1;
1182 if (entry1->e_hash != entry2->e_hash || 1177 if (entry1->e_hash != entry2->e_hash ||
1183 entry1->e_name_index != entry2->e_name_index || 1178 entry1->e_name_index != entry2->e_name_index ||
1184 entry1->e_name_len != entry2->e_name_len || 1179 entry1->e_name_len != entry2->e_name_len ||
1185 entry1->e_value_size != entry2->e_value_size || 1180 entry1->e_value_size != entry2->e_value_size ||
1186 memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) 1181 memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
1187 return 1; 1182 return 1;
1188 if (entry1->e_value_block != 0 || entry2->e_value_block != 0) 1183 if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
1189 return -EIO; 1184 return -EIO;
1190 if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), 1185 if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
1191 (char *)header2 + le16_to_cpu(entry2->e_value_offs), 1186 (char *)header2 + le16_to_cpu(entry2->e_value_offs),
1192 le32_to_cpu(entry1->e_value_size))) 1187 le32_to_cpu(entry1->e_value_size)))
1193 return 1; 1188 return 1;
1194 1189
1195 entry1 = EXT3_XATTR_NEXT(entry1); 1190 entry1 = EXT3_XATTR_NEXT(entry1);
1196 entry2 = EXT3_XATTR_NEXT(entry2); 1191 entry2 = EXT3_XATTR_NEXT(entry2);
1197 } 1192 }
1198 if (!IS_LAST_ENTRY(entry2)) 1193 if (!IS_LAST_ENTRY(entry2))
1199 return 1; 1194 return 1;
1200 return 0; 1195 return 0;
1201 } 1196 }
1202 1197
1203 /* 1198 /*
1204 * ext3_xattr_cache_find() 1199 * ext3_xattr_cache_find()
1205 * 1200 *
1206 * Find an identical extended attribute block. 1201 * Find an identical extended attribute block.
1207 * 1202 *
1208 * Returns a pointer to the block found, or NULL if such a block was 1203 * Returns a pointer to the block found, or NULL if such a block was
1209 * not found or an error occurred. 1204 * not found or an error occurred.
1210 */ 1205 */
1211 static struct buffer_head * 1206 static struct buffer_head *
1212 ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header, 1207 ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
1213 struct mb_cache_entry **pce) 1208 struct mb_cache_entry **pce)
1214 { 1209 {
1215 __u32 hash = le32_to_cpu(header->h_hash); 1210 __u32 hash = le32_to_cpu(header->h_hash);
1216 struct mb_cache_entry *ce; 1211 struct mb_cache_entry *ce;
1217 1212
1218 if (!header->h_hash) 1213 if (!header->h_hash)
1219 return NULL; /* never share */ 1214 return NULL; /* never share */
1220 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 1215 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1221 again: 1216 again:
1222 ce = mb_cache_entry_find_first(ext3_xattr_cache, inode->i_sb->s_bdev, 1217 ce = mb_cache_entry_find_first(ext3_xattr_cache, inode->i_sb->s_bdev,
1223 hash); 1218 hash);
1224 while (ce) { 1219 while (ce) {
1225 struct buffer_head *bh; 1220 struct buffer_head *bh;
1226 1221
1227 if (IS_ERR(ce)) { 1222 if (IS_ERR(ce)) {
1228 if (PTR_ERR(ce) == -EAGAIN) 1223 if (PTR_ERR(ce) == -EAGAIN)
1229 goto again; 1224 goto again;
1230 break; 1225 break;
1231 } 1226 }
1232 bh = sb_bread(inode->i_sb, ce->e_block); 1227 bh = sb_bread(inode->i_sb, ce->e_block);
1233 if (!bh) { 1228 if (!bh) {
1234 ext3_error(inode->i_sb, __func__, 1229 ext3_error(inode->i_sb, __func__,
1235 "inode %lu: block %lu read error", 1230 "inode %lu: block %lu read error",
1236 inode->i_ino, (unsigned long) ce->e_block); 1231 inode->i_ino, (unsigned long) ce->e_block);
1237 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1232 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
1238 EXT3_XATTR_REFCOUNT_MAX) { 1233 EXT3_XATTR_REFCOUNT_MAX) {
1239 ea_idebug(inode, "block %lu refcount %d>=%d", 1234 ea_idebug(inode, "block %lu refcount %d>=%d",
1240 (unsigned long) ce->e_block, 1235 (unsigned long) ce->e_block,
1241 le32_to_cpu(BHDR(bh)->h_refcount), 1236 le32_to_cpu(BHDR(bh)->h_refcount),
1242 EXT3_XATTR_REFCOUNT_MAX); 1237 EXT3_XATTR_REFCOUNT_MAX);
1243 } else if (ext3_xattr_cmp(header, BHDR(bh)) == 0) { 1238 } else if (ext3_xattr_cmp(header, BHDR(bh)) == 0) {
1244 *pce = ce; 1239 *pce = ce;
1245 return bh; 1240 return bh;
1246 } 1241 }
1247 brelse(bh); 1242 brelse(bh);
1248 ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); 1243 ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
1249 } 1244 }
1250 return NULL; 1245 return NULL;
1251 } 1246 }
1252 1247
1253 #define NAME_HASH_SHIFT 5 1248 #define NAME_HASH_SHIFT 5
1254 #define VALUE_HASH_SHIFT 16 1249 #define VALUE_HASH_SHIFT 16
1255 1250
1256 /* 1251 /*
1257 * ext3_xattr_hash_entry() 1252 * ext3_xattr_hash_entry()
1258 * 1253 *
1259 * Compute the hash of an extended attribute. 1254 * Compute the hash of an extended attribute.
1260 */ 1255 */
1261 static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, 1256 static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
1262 struct ext3_xattr_entry *entry) 1257 struct ext3_xattr_entry *entry)
1263 { 1258 {
1264 __u32 hash = 0; 1259 __u32 hash = 0;
1265 char *name = entry->e_name; 1260 char *name = entry->e_name;
1266 int n; 1261 int n;
1267 1262
1268 for (n=0; n < entry->e_name_len; n++) { 1263 for (n=0; n < entry->e_name_len; n++) {
1269 hash = (hash << NAME_HASH_SHIFT) ^ 1264 hash = (hash << NAME_HASH_SHIFT) ^
1270 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ 1265 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1271 *name++; 1266 *name++;
1272 } 1267 }
1273 1268
1274 if (entry->e_value_block == 0 && entry->e_value_size != 0) { 1269 if (entry->e_value_block == 0 && entry->e_value_size != 0) {
1275 __le32 *value = (__le32 *)((char *)header + 1270 __le32 *value = (__le32 *)((char *)header +
1276 le16_to_cpu(entry->e_value_offs)); 1271 le16_to_cpu(entry->e_value_offs));
1277 for (n = (le32_to_cpu(entry->e_value_size) + 1272 for (n = (le32_to_cpu(entry->e_value_size) +
1278 EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { 1273 EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
1279 hash = (hash << VALUE_HASH_SHIFT) ^ 1274 hash = (hash << VALUE_HASH_SHIFT) ^
1280 (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ 1275 (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1281 le32_to_cpu(*value++); 1276 le32_to_cpu(*value++);
1282 } 1277 }
1283 } 1278 }
1284 entry->e_hash = cpu_to_le32(hash); 1279 entry->e_hash = cpu_to_le32(hash);
1285 } 1280 }
1286 1281
1287 #undef NAME_HASH_SHIFT 1282 #undef NAME_HASH_SHIFT
1288 #undef VALUE_HASH_SHIFT 1283 #undef VALUE_HASH_SHIFT
1289 1284
1290 #define BLOCK_HASH_SHIFT 16 1285 #define BLOCK_HASH_SHIFT 16
1291 1286
1292 /* 1287 /*
1293 * ext3_xattr_rehash() 1288 * ext3_xattr_rehash()
1294 * 1289 *
1295 * Re-compute the extended attribute hash value after an entry has changed. 1290 * Re-compute the extended attribute hash value after an entry has changed.
1296 */ 1291 */
1297 static void ext3_xattr_rehash(struct ext3_xattr_header *header, 1292 static void ext3_xattr_rehash(struct ext3_xattr_header *header,
1298 struct ext3_xattr_entry *entry) 1293 struct ext3_xattr_entry *entry)
1299 { 1294 {
1300 struct ext3_xattr_entry *here; 1295 struct ext3_xattr_entry *here;
1301 __u32 hash = 0; 1296 __u32 hash = 0;
1302 1297
1303 ext3_xattr_hash_entry(header, entry); 1298 ext3_xattr_hash_entry(header, entry);
1304 here = ENTRY(header+1); 1299 here = ENTRY(header+1);
1305 while (!IS_LAST_ENTRY(here)) { 1300 while (!IS_LAST_ENTRY(here)) {
1306 if (!here->e_hash) { 1301 if (!here->e_hash) {
1307 /* Block is not shared if an entry's hash value == 0 */ 1302 /* Block is not shared if an entry's hash value == 0 */
1308 hash = 0; 1303 hash = 0;
1309 break; 1304 break;
1310 } 1305 }
1311 hash = (hash << BLOCK_HASH_SHIFT) ^ 1306 hash = (hash << BLOCK_HASH_SHIFT) ^
1312 (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ 1307 (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1313 le32_to_cpu(here->e_hash); 1308 le32_to_cpu(here->e_hash);
1314 here = EXT3_XATTR_NEXT(here); 1309 here = EXT3_XATTR_NEXT(here);
1315 } 1310 }
1316 header->h_hash = cpu_to_le32(hash); 1311 header->h_hash = cpu_to_le32(hash);
1317 } 1312 }
1318 1313
1319 #undef BLOCK_HASH_SHIFT 1314 #undef BLOCK_HASH_SHIFT
1320 1315
1321 int __init 1316 int __init
1322 init_ext3_xattr(void) 1317 init_ext3_xattr(void)
1323 { 1318 {
1324 ext3_xattr_cache = mb_cache_create("ext3_xattr", 6); 1319 ext3_xattr_cache = mb_cache_create("ext3_xattr", 6);
1325 if (!ext3_xattr_cache) 1320 if (!ext3_xattr_cache)
1326 return -ENOMEM; 1321 return -ENOMEM;
1327 return 0; 1322 return 0;
1328 } 1323 }
1329 1324
1330 void 1325 void
1331 exit_ext3_xattr(void) 1326 exit_ext3_xattr(void)
1332 { 1327 {
1333 if (ext3_xattr_cache) 1328 if (ext3_xattr_cache)
1334 mb_cache_destroy(ext3_xattr_cache); 1329 mb_cache_destroy(ext3_xattr_cache);
1335 ext3_xattr_cache = NULL; 1330 ext3_xattr_cache = NULL;
1336 } 1331 }
1337 1332
fs/ext3/xattr_security.c
1 /* 1 /*
2 * linux/fs/ext3/xattr_security.c 2 * linux/fs/ext3/xattr_security.c
3 * Handler for storing security labels as extended attributes. 3 * Handler for storing security labels as extended attributes.
4 */ 4 */
5 5
6 #include <linux/slab.h>
7 #include <linux/string.h>
8 #include <linux/fs.h>
9 #include <linux/ext3_jbd.h>
10 #include <linux/ext3_fs.h>
11 #include <linux/security.h> 6 #include <linux/security.h>
7 #include "ext3.h"
12 #include "xattr.h" 8 #include "xattr.h"
13 9
14 static size_t 10 static size_t
15 ext3_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, 11 ext3_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len, int type) 12 const char *name, size_t name_len, int type)
17 { 13 {
18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 14 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 15 const size_t total_len = prefix_len + name_len + 1;
20 16
21 17
22 if (list && total_len <= list_size) { 18 if (list && total_len <= list_size) {
23 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); 19 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
24 memcpy(list+prefix_len, name, name_len); 20 memcpy(list+prefix_len, name, name_len);
25 list[prefix_len + name_len] = '\0'; 21 list[prefix_len + name_len] = '\0';
26 } 22 }
27 return total_len; 23 return total_len;
28 } 24 }
29 25
30 static int 26 static int
31 ext3_xattr_security_get(struct dentry *dentry, const char *name, 27 ext3_xattr_security_get(struct dentry *dentry, const char *name,
32 void *buffer, size_t size, int type) 28 void *buffer, size_t size, int type)
33 { 29 {
34 if (strcmp(name, "") == 0) 30 if (strcmp(name, "") == 0)
35 return -EINVAL; 31 return -EINVAL;
36 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY, 32 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
37 name, buffer, size); 33 name, buffer, size);
38 } 34 }
39 35
40 static int 36 static int
41 ext3_xattr_security_set(struct dentry *dentry, const char *name, 37 ext3_xattr_security_set(struct dentry *dentry, const char *name,
42 const void *value, size_t size, int flags, int type) 38 const void *value, size_t size, int flags, int type)
43 { 39 {
44 if (strcmp(name, "") == 0) 40 if (strcmp(name, "") == 0)
45 return -EINVAL; 41 return -EINVAL;
46 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY, 42 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
47 name, value, size, flags); 43 name, value, size, flags);
48 } 44 }
49 45
50 int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array, 46 int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array,
51 void *fs_info) 47 void *fs_info)
52 { 48 {
53 const struct xattr *xattr; 49 const struct xattr *xattr;
54 handle_t *handle = fs_info; 50 handle_t *handle = fs_info;
55 int err = 0; 51 int err = 0;
56 52
57 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 53 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
58 err = ext3_xattr_set_handle(handle, inode, 54 err = ext3_xattr_set_handle(handle, inode,
59 EXT3_XATTR_INDEX_SECURITY, 55 EXT3_XATTR_INDEX_SECURITY,
60 xattr->name, xattr->value, 56 xattr->name, xattr->value,
61 xattr->value_len, 0); 57 xattr->value_len, 0);
62 if (err < 0) 58 if (err < 0)
63 break; 59 break;
64 } 60 }
65 return err; 61 return err;
66 } 62 }
67 63
68 int 64 int
69 ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir, 65 ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
70 const struct qstr *qstr) 66 const struct qstr *qstr)
71 { 67 {
72 return security_inode_init_security(inode, dir, qstr, 68 return security_inode_init_security(inode, dir, qstr,
73 &ext3_initxattrs, handle); 69 &ext3_initxattrs, handle);
74 } 70 }
75 71
76 const struct xattr_handler ext3_xattr_security_handler = { 72 const struct xattr_handler ext3_xattr_security_handler = {
77 .prefix = XATTR_SECURITY_PREFIX, 73 .prefix = XATTR_SECURITY_PREFIX,
78 .list = ext3_xattr_security_list, 74 .list = ext3_xattr_security_list,
79 .get = ext3_xattr_security_get, 75 .get = ext3_xattr_security_get,
80 .set = ext3_xattr_security_set, 76 .set = ext3_xattr_security_set,
81 }; 77 };
fs/ext3/xattr_trusted.c
1 /* 1 /*
2 * linux/fs/ext3/xattr_trusted.c 2 * linux/fs/ext3/xattr_trusted.c
3 * Handler for trusted extended attributes. 3 * Handler for trusted extended attributes.
4 * 4 *
5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8 #include <linux/string.h> 8 #include "ext3.h"
9 #include <linux/capability.h>
10 #include <linux/fs.h>
11 #include <linux/ext3_jbd.h>
12 #include <linux/ext3_fs.h>
13 #include "xattr.h" 9 #include "xattr.h"
14 10
15 static size_t 11 static size_t
16 ext3_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, 12 ext3_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len, int type) 13 const char *name, size_t name_len, int type)
18 { 14 {
19 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 15 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 16 const size_t total_len = prefix_len + name_len + 1;
21 17
22 if (!capable(CAP_SYS_ADMIN)) 18 if (!capable(CAP_SYS_ADMIN))
23 return 0; 19 return 0;
24 20
25 if (list && total_len <= list_size) { 21 if (list && total_len <= list_size) {
26 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); 22 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
27 memcpy(list+prefix_len, name, name_len); 23 memcpy(list+prefix_len, name, name_len);
28 list[prefix_len + name_len] = '\0'; 24 list[prefix_len + name_len] = '\0';
29 } 25 }
30 return total_len; 26 return total_len;
31 } 27 }
32 28
33 static int 29 static int
34 ext3_xattr_trusted_get(struct dentry *dentry, const char *name, 30 ext3_xattr_trusted_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t size, int type) 31 void *buffer, size_t size, int type)
36 { 32 {
37 if (strcmp(name, "") == 0) 33 if (strcmp(name, "") == 0)
38 return -EINVAL; 34 return -EINVAL;
39 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, 35 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED,
40 name, buffer, size); 36 name, buffer, size);
41 } 37 }
42 38
43 static int 39 static int
44 ext3_xattr_trusted_set(struct dentry *dentry, const char *name, 40 ext3_xattr_trusted_set(struct dentry *dentry, const char *name,
45 const void *value, size_t size, int flags, int type) 41 const void *value, size_t size, int flags, int type)
46 { 42 {
47 if (strcmp(name, "") == 0) 43 if (strcmp(name, "") == 0)
48 return -EINVAL; 44 return -EINVAL;
49 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, name, 45 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, name,
50 value, size, flags); 46 value, size, flags);
51 } 47 }
52 48
53 const struct xattr_handler ext3_xattr_trusted_handler = { 49 const struct xattr_handler ext3_xattr_trusted_handler = {
54 .prefix = XATTR_TRUSTED_PREFIX, 50 .prefix = XATTR_TRUSTED_PREFIX,
55 .list = ext3_xattr_trusted_list, 51 .list = ext3_xattr_trusted_list,
56 .get = ext3_xattr_trusted_get, 52 .get = ext3_xattr_trusted_get,
57 .set = ext3_xattr_trusted_set, 53 .set = ext3_xattr_trusted_set,
58 }; 54 };
59 55
fs/ext3/xattr_user.c
1 /* 1 /*
2 * linux/fs/ext3/xattr_user.c 2 * linux/fs/ext3/xattr_user.c
3 * Handler for extended user attributes. 3 * Handler for extended user attributes.
4 * 4 *
5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8 #include <linux/string.h> 8 #include "ext3.h"
9 #include <linux/fs.h>
10 #include <linux/ext3_jbd.h>
11 #include <linux/ext3_fs.h>
12 #include "xattr.h" 9 #include "xattr.h"
13 10
14 static size_t 11 static size_t
15 ext3_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, 12 ext3_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len, int type) 13 const char *name, size_t name_len, int type)
17 { 14 {
18 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 15 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 16 const size_t total_len = prefix_len + name_len + 1;
20 17
21 if (!test_opt(dentry->d_sb, XATTR_USER)) 18 if (!test_opt(dentry->d_sb, XATTR_USER))
22 return 0; 19 return 0;
23 20
24 if (list && total_len <= list_size) { 21 if (list && total_len <= list_size) {
25 memcpy(list, XATTR_USER_PREFIX, prefix_len); 22 memcpy(list, XATTR_USER_PREFIX, prefix_len);
26 memcpy(list+prefix_len, name, name_len); 23 memcpy(list+prefix_len, name, name_len);
27 list[prefix_len + name_len] = '\0'; 24 list[prefix_len + name_len] = '\0';
28 } 25 }
29 return total_len; 26 return total_len;
30 } 27 }
31 28
32 static int 29 static int
33 ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer, 30 ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer,
34 size_t size, int type) 31 size_t size, int type)
35 { 32 {
36 if (strcmp(name, "") == 0) 33 if (strcmp(name, "") == 0)
37 return -EINVAL; 34 return -EINVAL;
38 if (!test_opt(dentry->d_sb, XATTR_USER)) 35 if (!test_opt(dentry->d_sb, XATTR_USER))
39 return -EOPNOTSUPP; 36 return -EOPNOTSUPP;
40 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_USER, 37 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_USER,
41 name, buffer, size); 38 name, buffer, size);
42 } 39 }
43 40
44 static int 41 static int
45 ext3_xattr_user_set(struct dentry *dentry, const char *name, 42 ext3_xattr_user_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags, int type) 43 const void *value, size_t size, int flags, int type)
47 { 44 {
48 if (strcmp(name, "") == 0) 45 if (strcmp(name, "") == 0)
49 return -EINVAL; 46 return -EINVAL;
50 if (!test_opt(dentry->d_sb, XATTR_USER)) 47 if (!test_opt(dentry->d_sb, XATTR_USER))
51 return -EOPNOTSUPP; 48 return -EOPNOTSUPP;
52 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_USER, 49 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_USER,
53 name, value, size, flags); 50 name, value, size, flags);
54 } 51 }
55 52
56 const struct xattr_handler ext3_xattr_user_handler = { 53 const struct xattr_handler ext3_xattr_user_handler = {
57 .prefix = XATTR_USER_PREFIX, 54 .prefix = XATTR_USER_PREFIX,
58 .list = ext3_xattr_user_list, 55 .list = ext3_xattr_user_list,
59 .get = ext3_xattr_user_get, 56 .get = ext3_xattr_user_get,
60 .set = ext3_xattr_user_set, 57 .set = ext3_xattr_user_set,
61 }; 58 };
62 59
include/linux/ext3_fs.h
1 /* File was deleted
2 * linux/include/linux/ext3_fs.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16 #ifndef _LINUX_EXT3_FS_H
17 #define _LINUX_EXT3_FS_H
18
19 #include <linux/types.h>
20 #include <linux/magic.h>
21 #include <linux/bug.h>
22
23 /*
24 * The second extended filesystem constants/structures
25 */
26
27 /*
28 * Define EXT3FS_DEBUG to produce debug messages
29 */
30 #undef EXT3FS_DEBUG
31
32 /*
33 * Define EXT3_RESERVATION to reserve data blocks for expanding files
34 */
35 #define EXT3_DEFAULT_RESERVE_BLOCKS 8
36 /*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
37 #define EXT3_MAX_RESERVE_BLOCKS 1027
38 #define EXT3_RESERVE_WINDOW_NOT_ALLOCATED 0
39
40 /*
41 * Debug code
42 */
43 #ifdef EXT3FS_DEBUG
44 #define ext3_debug(f, a...) \
45 do { \
46 printk (KERN_DEBUG "EXT3-fs DEBUG (%s, %d): %s:", \
47 __FILE__, __LINE__, __func__); \
48 printk (KERN_DEBUG f, ## a); \
49 } while (0)
50 #else
51 #define ext3_debug(f, a...) do {} while (0)
52 #endif
53
54 /*
55 * Special inodes numbers
56 */
57 #define EXT3_BAD_INO 1 /* Bad blocks inode */
58 #define EXT3_ROOT_INO 2 /* Root inode */
59 #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */
60 #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */
61 #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */
62 #define EXT3_JOURNAL_INO 8 /* Journal inode */
63
64 /* First non-reserved inode for old ext3 filesystems */
65 #define EXT3_GOOD_OLD_FIRST_INO 11
66
67 /*
68 * Maximal count of links to a file
69 */
70 #define EXT3_LINK_MAX 32000
71
72 /*
73 * Macro-instructions used to manage several block sizes
74 */
75 #define EXT3_MIN_BLOCK_SIZE 1024
76 #define EXT3_MAX_BLOCK_SIZE 65536
77 #define EXT3_MIN_BLOCK_LOG_SIZE 10
78 #ifdef __KERNEL__
79 # define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize)
80 #else
81 # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
82 #endif
83 #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32))
84 #ifdef __KERNEL__
85 # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
86 #else
87 # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
88 #endif
89 #ifdef __KERNEL__
90 #define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits)
91 #define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size)
92 #define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino)
93 #else
94 #define EXT3_INODE_SIZE(s) (((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \
95 EXT3_GOOD_OLD_INODE_SIZE : \
96 (s)->s_inode_size)
97 #define EXT3_FIRST_INO(s) (((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \
98 EXT3_GOOD_OLD_FIRST_INO : \
99 (s)->s_first_ino)
100 #endif
101
102 /*
103 * Macro-instructions used to manage fragments
104 */
105 #define EXT3_MIN_FRAG_SIZE 1024
106 #define EXT3_MAX_FRAG_SIZE 4096
107 #define EXT3_MIN_FRAG_LOG_SIZE 10
108 #ifdef __KERNEL__
109 # define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size)
110 # define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block)
111 #else
112 # define EXT3_FRAG_SIZE(s) (EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size)
113 # define EXT3_FRAGS_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s))
114 #endif
115
116 /*
117 * Structure of a blocks group descriptor
118 */
119 struct ext3_group_desc
120 {
121 __le32 bg_block_bitmap; /* Blocks bitmap block */
122 __le32 bg_inode_bitmap; /* Inodes bitmap block */
123 __le32 bg_inode_table; /* Inodes table block */
124 __le16 bg_free_blocks_count; /* Free blocks count */
125 __le16 bg_free_inodes_count; /* Free inodes count */
126 __le16 bg_used_dirs_count; /* Directories count */
127 __u16 bg_pad;
128 __le32 bg_reserved[3];
129 };
130
131 /*
132 * Macro-instructions used to manage group descriptors
133 */
134 #ifdef __KERNEL__
135 # define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group)
136 # define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block)
137 # define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group)
138 # define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits)
139 #else
140 # define EXT3_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group)
141 # define EXT3_DESC_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc))
142 # define EXT3_INODES_PER_GROUP(s) ((s)->s_inodes_per_group)
143 #endif
144
145 /*
146 * Constants relative to the data blocks
147 */
148 #define EXT3_NDIR_BLOCKS 12
149 #define EXT3_IND_BLOCK EXT3_NDIR_BLOCKS
150 #define EXT3_DIND_BLOCK (EXT3_IND_BLOCK + 1)
151 #define EXT3_TIND_BLOCK (EXT3_DIND_BLOCK + 1)
152 #define EXT3_N_BLOCKS (EXT3_TIND_BLOCK + 1)
153
154 /*
155 * Inode flags
156 */
157 #define EXT3_SECRM_FL 0x00000001 /* Secure deletion */
158 #define EXT3_UNRM_FL 0x00000002 /* Undelete */
159 #define EXT3_COMPR_FL 0x00000004 /* Compress file */
160 #define EXT3_SYNC_FL 0x00000008 /* Synchronous updates */
161 #define EXT3_IMMUTABLE_FL 0x00000010 /* Immutable file */
162 #define EXT3_APPEND_FL 0x00000020 /* writes to file may only append */
163 #define EXT3_NODUMP_FL 0x00000040 /* do not dump file */
164 #define EXT3_NOATIME_FL 0x00000080 /* do not update atime */
165 /* Reserved for compression usage... */
166 #define EXT3_DIRTY_FL 0x00000100
167 #define EXT3_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
168 #define EXT3_NOCOMPR_FL 0x00000400 /* Don't compress */
169 #define EXT3_ECOMPR_FL 0x00000800 /* Compression error */
170 /* End compression flags --- maybe not all used */
171 #define EXT3_INDEX_FL 0x00001000 /* hash-indexed directory */
172 #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */
173 #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
174 #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */
175 #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
176 #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
177 #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
178
179 #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
180 #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
181
182 /* Flags that should be inherited by new inodes from their parent. */
183 #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
184 EXT3_SYNC_FL | EXT3_NODUMP_FL |\
185 EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\
186 EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
187 EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
188
189 /* Flags that are appropriate for regular files (all but dir-specific ones). */
190 #define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL))
191
192 /* Flags that are appropriate for non-directories/regular files. */
193 #define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL)
194
195 /* Mask out flags that are inappropriate for the given type of inode. */
196 static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags)
197 {
198 if (S_ISDIR(mode))
199 return flags;
200 else if (S_ISREG(mode))
201 return flags & EXT3_REG_FLMASK;
202 else
203 return flags & EXT3_OTHER_FLMASK;
204 }
205
206 /* Used to pass group descriptor data when online resize is done */
207 struct ext3_new_group_input {
208 __u32 group; /* Group number for this data */
209 __u32 block_bitmap; /* Absolute block number of block bitmap */
210 __u32 inode_bitmap; /* Absolute block number of inode bitmap */
211 __u32 inode_table; /* Absolute block number of inode table start */
212 __u32 blocks_count; /* Total number of blocks in this group */
213 __u16 reserved_blocks; /* Number of reserved blocks in this group */
214 __u16 unused;
215 };
216
217 /* The struct ext3_new_group_input in kernel space, with free_blocks_count */
218 struct ext3_new_group_data {
219 __u32 group;
220 __u32 block_bitmap;
221 __u32 inode_bitmap;
222 __u32 inode_table;
223 __u32 blocks_count;
224 __u16 reserved_blocks;
225 __u16 unused;
226 __u32 free_blocks_count;
227 };
228
229
230 /*
231 * ioctl commands
232 */
233 #define EXT3_IOC_GETFLAGS FS_IOC_GETFLAGS
234 #define EXT3_IOC_SETFLAGS FS_IOC_SETFLAGS
235 #define EXT3_IOC_GETVERSION _IOR('f', 3, long)
236 #define EXT3_IOC_SETVERSION _IOW('f', 4, long)
237 #define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
238 #define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input)
239 #define EXT3_IOC_GETVERSION_OLD FS_IOC_GETVERSION
240 #define EXT3_IOC_SETVERSION_OLD FS_IOC_SETVERSION
241 #ifdef CONFIG_JBD_DEBUG
242 #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
243 #endif
244 #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long)
245 #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long)
246
247 /*
248 * ioctl commands in 32 bit emulation
249 */
250 #define EXT3_IOC32_GETFLAGS FS_IOC32_GETFLAGS
251 #define EXT3_IOC32_SETFLAGS FS_IOC32_SETFLAGS
252 #define EXT3_IOC32_GETVERSION _IOR('f', 3, int)
253 #define EXT3_IOC32_SETVERSION _IOW('f', 4, int)
254 #define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int)
255 #define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int)
256 #define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
257 #ifdef CONFIG_JBD_DEBUG
258 #define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
259 #endif
260 #define EXT3_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
261 #define EXT3_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
262
263
264 /*
265 * Mount options
266 */
267 struct ext3_mount_options {
268 unsigned long s_mount_opt;
269 uid_t s_resuid;
270 gid_t s_resgid;
271 unsigned long s_commit_interval;
272 #ifdef CONFIG_QUOTA
273 int s_jquota_fmt;
274 char *s_qf_names[MAXQUOTAS];
275 #endif
276 };
277
278 /*
279 * Structure of an inode on the disk
280 */
281 struct ext3_inode {
282 __le16 i_mode; /* File mode */
283 __le16 i_uid; /* Low 16 bits of Owner Uid */
284 __le32 i_size; /* Size in bytes */
285 __le32 i_atime; /* Access time */
286 __le32 i_ctime; /* Creation time */
287 __le32 i_mtime; /* Modification time */
288 __le32 i_dtime; /* Deletion Time */
289 __le16 i_gid; /* Low 16 bits of Group Id */
290 __le16 i_links_count; /* Links count */
291 __le32 i_blocks; /* Blocks count */
292 __le32 i_flags; /* File flags */
293 union {
294 struct {
295 __u32 l_i_reserved1;
296 } linux1;
297 struct {
298 __u32 h_i_translator;
299 } hurd1;
300 struct {
301 __u32 m_i_reserved1;
302 } masix1;
303 } osd1; /* OS dependent 1 */
304 __le32 i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
305 __le32 i_generation; /* File version (for NFS) */
306 __le32 i_file_acl; /* File ACL */
307 __le32 i_dir_acl; /* Directory ACL */
308 __le32 i_faddr; /* Fragment address */
309 union {
310 struct {
311 __u8 l_i_frag; /* Fragment number */
312 __u8 l_i_fsize; /* Fragment size */
313 __u16 i_pad1;
314 __le16 l_i_uid_high; /* these 2 fields */
315 __le16 l_i_gid_high; /* were reserved2[0] */
316 __u32 l_i_reserved2;
317 } linux2;
318 struct {
319 __u8 h_i_frag; /* Fragment number */
320 __u8 h_i_fsize; /* Fragment size */
321 __u16 h_i_mode_high;
322 __u16 h_i_uid_high;
323 __u16 h_i_gid_high;
324 __u32 h_i_author;
325 } hurd2;
326 struct {
327 __u8 m_i_frag; /* Fragment number */
328 __u8 m_i_fsize; /* Fragment size */
329 __u16 m_pad1;
330 __u32 m_i_reserved2[2];
331 } masix2;
332 } osd2; /* OS dependent 2 */
333 __le16 i_extra_isize;
334 __le16 i_pad1;
335 };
336
337 #define i_size_high i_dir_acl
338
339 #if defined(__KERNEL__) || defined(__linux__)
340 #define i_reserved1 osd1.linux1.l_i_reserved1
341 #define i_frag osd2.linux2.l_i_frag
342 #define i_fsize osd2.linux2.l_i_fsize
343 #define i_uid_low i_uid
344 #define i_gid_low i_gid
345 #define i_uid_high osd2.linux2.l_i_uid_high
346 #define i_gid_high osd2.linux2.l_i_gid_high
347 #define i_reserved2 osd2.linux2.l_i_reserved2
348
349 #elif defined(__GNU__)
350
351 #define i_translator osd1.hurd1.h_i_translator
352 #define i_frag osd2.hurd2.h_i_frag;
353 #define i_fsize osd2.hurd2.h_i_fsize;
354 #define i_uid_high osd2.hurd2.h_i_uid_high
355 #define i_gid_high osd2.hurd2.h_i_gid_high
356 #define i_author osd2.hurd2.h_i_author
357
358 #elif defined(__masix__)
359
360 #define i_reserved1 osd1.masix1.m_i_reserved1
361 #define i_frag osd2.masix2.m_i_frag
362 #define i_fsize osd2.masix2.m_i_fsize
363 #define i_reserved2 osd2.masix2.m_i_reserved2
364
365 #endif /* defined(__KERNEL__) || defined(__linux__) */
366
367 /*
368 * File system states
369 */
370 #define EXT3_VALID_FS 0x0001 /* Unmounted cleanly */
371 #define EXT3_ERROR_FS 0x0002 /* Errors detected */
372 #define EXT3_ORPHAN_FS 0x0004 /* Orphans being recovered */
373
374 /*
375 * Misc. filesystem flags
376 */
377 #define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
378 #define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
379 #define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
380
381 /*
382 * Mount flags
383 */
384 #define EXT3_MOUNT_CHECK 0x00001 /* Do mount-time checks */
385 /* EXT3_MOUNT_OLDALLOC was there */
386 #define EXT3_MOUNT_GRPID 0x00004 /* Create files with directory's group */
387 #define EXT3_MOUNT_DEBUG 0x00008 /* Some debugging messages */
388 #define EXT3_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
389 #define EXT3_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
390 #define EXT3_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
391 #define EXT3_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
392 #define EXT3_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
393 #define EXT3_MOUNT_ABORT 0x00200 /* Fatal error detected */
394 #define EXT3_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
395 #define EXT3_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
396 #define EXT3_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
397 #define EXT3_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
398 #define EXT3_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
399 #define EXT3_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
400 #define EXT3_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
401 #define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
402 #define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */
403 #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */
404 #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */
405 #define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
406 #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
407 #define EXT3_MOUNT_DATA_ERR_ABORT 0x400000 /* Abort on file data write
408 * error in ordered mode */
409
410 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
411 #ifndef _LINUX_EXT2_FS_H
412 #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt
413 #define set_opt(o, opt) o |= EXT3_MOUNT_##opt
414 #define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \
415 EXT3_MOUNT_##opt)
416 #else
417 #define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD
418 #define EXT2_MOUNT_ABORT EXT3_MOUNT_ABORT
419 #define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS
420 #endif
421
422 #define ext3_set_bit __set_bit_le
423 #define ext3_set_bit_atomic ext2_set_bit_atomic
424 #define ext3_clear_bit __clear_bit_le
425 #define ext3_clear_bit_atomic ext2_clear_bit_atomic
426 #define ext3_test_bit test_bit_le
427 #define ext3_find_next_zero_bit find_next_zero_bit_le
428
429 /*
430 * Maximal mount counts between two filesystem checks
431 */
432 #define EXT3_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
433 #define EXT3_DFL_CHECKINTERVAL 0 /* Don't use interval check */
434
435 /*
436 * Behaviour when detecting errors
437 */
438 #define EXT3_ERRORS_CONTINUE 1 /* Continue execution */
439 #define EXT3_ERRORS_RO 2 /* Remount fs read-only */
440 #define EXT3_ERRORS_PANIC 3 /* Panic */
441 #define EXT3_ERRORS_DEFAULT EXT3_ERRORS_CONTINUE
442
443 /*
444 * Structure of the super block
445 */
446 struct ext3_super_block {
447 /*00*/ __le32 s_inodes_count; /* Inodes count */
448 __le32 s_blocks_count; /* Blocks count */
449 __le32 s_r_blocks_count; /* Reserved blocks count */
450 __le32 s_free_blocks_count; /* Free blocks count */
451 /*10*/ __le32 s_free_inodes_count; /* Free inodes count */
452 __le32 s_first_data_block; /* First Data Block */
453 __le32 s_log_block_size; /* Block size */
454 __le32 s_log_frag_size; /* Fragment size */
455 /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
456 __le32 s_frags_per_group; /* # Fragments per group */
457 __le32 s_inodes_per_group; /* # Inodes per group */
458 __le32 s_mtime; /* Mount time */
459 /*30*/ __le32 s_wtime; /* Write time */
460 __le16 s_mnt_count; /* Mount count */
461 __le16 s_max_mnt_count; /* Maximal mount count */
462 __le16 s_magic; /* Magic signature */
463 __le16 s_state; /* File system state */
464 __le16 s_errors; /* Behaviour when detecting errors */
465 __le16 s_minor_rev_level; /* minor revision level */
466 /*40*/ __le32 s_lastcheck; /* time of last check */
467 __le32 s_checkinterval; /* max. time between checks */
468 __le32 s_creator_os; /* OS */
469 __le32 s_rev_level; /* Revision level */
470 /*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
471 __le16 s_def_resgid; /* Default gid for reserved blocks */
472 /*
473 * These fields are for EXT3_DYNAMIC_REV superblocks only.
474 *
475 * Note: the difference between the compatible feature set and
476 * the incompatible feature set is that if there is a bit set
477 * in the incompatible feature set that the kernel doesn't
478 * know about, it should refuse to mount the filesystem.
479 *
480 * e2fsck's requirements are more strict; if it doesn't know
481 * about a feature in either the compatible or incompatible
482 * feature set, it must abort and not try to meddle with
483 * things it doesn't understand...
484 */
485 __le32 s_first_ino; /* First non-reserved inode */
486 __le16 s_inode_size; /* size of inode structure */
487 __le16 s_block_group_nr; /* block group # of this superblock */
488 __le32 s_feature_compat; /* compatible feature set */
489 /*60*/ __le32 s_feature_incompat; /* incompatible feature set */
490 __le32 s_feature_ro_compat; /* readonly-compatible feature set */
491 /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
492 /*78*/ char s_volume_name[16]; /* volume name */
493 /*88*/ char s_last_mounted[64]; /* directory where last mounted */
494 /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
495 /*
496 * Performance hints. Directory preallocation should only
497 * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on.
498 */
499 __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
500 __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
501 __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
502 /*
503 * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
504 */
505 /*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
506 /*E0*/ __le32 s_journal_inum; /* inode number of journal file */
507 __le32 s_journal_dev; /* device number of journal file */
508 __le32 s_last_orphan; /* start of list of inodes to delete */
509 __le32 s_hash_seed[4]; /* HTREE hash seed */
510 __u8 s_def_hash_version; /* Default hash version to use */
511 __u8 s_reserved_char_pad;
512 __u16 s_reserved_word_pad;
513 __le32 s_default_mount_opts;
514 __le32 s_first_meta_bg; /* First metablock block group */
515 __le32 s_mkfs_time; /* When the filesystem was created */
516 __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
517 /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
518 /*150*/ __le32 s_blocks_count_hi; /* Blocks count */
519 __le32 s_r_blocks_count_hi; /* Reserved blocks count */
520 __le32 s_free_blocks_count_hi; /* Free blocks count */
521 __le16 s_min_extra_isize; /* All inodes have at least # bytes */
522 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
523 __le32 s_flags; /* Miscellaneous flags */
524 __le16 s_raid_stride; /* RAID stride */
525 __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
526 __le64 s_mmp_block; /* Block for multi-mount protection */
527 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
528 __u8 s_log_groups_per_flex; /* FLEX_BG group size */
529 __u8 s_reserved_char_pad2;
530 __le16 s_reserved_pad;
531 __u32 s_reserved[162]; /* Padding to the end of the block */
532 };
533
534 #ifdef __KERNEL__
535 #include <linux/ext3_fs_i.h>
536 #include <linux/ext3_fs_sb.h>
537 static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb)
538 {
539 return sb->s_fs_info;
540 }
541 static inline struct ext3_inode_info *EXT3_I(struct inode *inode)
542 {
543 return container_of(inode, struct ext3_inode_info, vfs_inode);
544 }
545
546 static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino)
547 {
548 return ino == EXT3_ROOT_INO ||
549 ino == EXT3_JOURNAL_INO ||
550 ino == EXT3_RESIZE_INO ||
551 (ino >= EXT3_FIRST_INO(sb) &&
552 ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count));
553 }
554
555 /*
556 * Inode dynamic state flags
557 */
558 enum {
559 EXT3_STATE_JDATA, /* journaled data exists */
560 EXT3_STATE_NEW, /* inode is newly created */
561 EXT3_STATE_XATTR, /* has in-inode xattrs */
562 EXT3_STATE_FLUSH_ON_CLOSE, /* flush dirty pages on close */
563 };
564
565 static inline int ext3_test_inode_state(struct inode *inode, int bit)
566 {
567 return test_bit(bit, &EXT3_I(inode)->i_state_flags);
568 }
569
570 static inline void ext3_set_inode_state(struct inode *inode, int bit)
571 {
572 set_bit(bit, &EXT3_I(inode)->i_state_flags);
573 }
574
575 static inline void ext3_clear_inode_state(struct inode *inode, int bit)
576 {
577 clear_bit(bit, &EXT3_I(inode)->i_state_flags);
578 }
579 #else
580 /* Assume that user mode programs are passing in an ext3fs superblock, not
581 * a kernel struct super_block. This will allow us to call the feature-test
582 * macros from user land. */
583 #define EXT3_SB(sb) (sb)
584 #endif
585
586 #define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime
587
588 /*
589 * Codes for operating systems
590 */
591 #define EXT3_OS_LINUX 0
592 #define EXT3_OS_HURD 1
593 #define EXT3_OS_MASIX 2
594 #define EXT3_OS_FREEBSD 3
595 #define EXT3_OS_LITES 4
596
597 /*
598 * Revision levels
599 */
600 #define EXT3_GOOD_OLD_REV 0 /* The good old (original) format */
601 #define EXT3_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
602
603 #define EXT3_CURRENT_REV EXT3_GOOD_OLD_REV
604 #define EXT3_MAX_SUPP_REV EXT3_DYNAMIC_REV
605
606 #define EXT3_GOOD_OLD_INODE_SIZE 128
607
608 /*
609 * Feature set definitions
610 */
611
612 #define EXT3_HAS_COMPAT_FEATURE(sb,mask) \
613 ( EXT3_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
614 #define EXT3_HAS_RO_COMPAT_FEATURE(sb,mask) \
615 ( EXT3_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
616 #define EXT3_HAS_INCOMPAT_FEATURE(sb,mask) \
617 ( EXT3_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
618 #define EXT3_SET_COMPAT_FEATURE(sb,mask) \
619 EXT3_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
620 #define EXT3_SET_RO_COMPAT_FEATURE(sb,mask) \
621 EXT3_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
622 #define EXT3_SET_INCOMPAT_FEATURE(sb,mask) \
623 EXT3_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
624 #define EXT3_CLEAR_COMPAT_FEATURE(sb,mask) \
625 EXT3_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
626 #define EXT3_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
627 EXT3_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
628 #define EXT3_CLEAR_INCOMPAT_FEATURE(sb,mask) \
629 EXT3_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
630
631 #define EXT3_FEATURE_COMPAT_DIR_PREALLOC 0x0001
632 #define EXT3_FEATURE_COMPAT_IMAGIC_INODES 0x0002
633 #define EXT3_FEATURE_COMPAT_HAS_JOURNAL 0x0004
634 #define EXT3_FEATURE_COMPAT_EXT_ATTR 0x0008
635 #define EXT3_FEATURE_COMPAT_RESIZE_INODE 0x0010
636 #define EXT3_FEATURE_COMPAT_DIR_INDEX 0x0020
637
638 #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
639 #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
640 #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
641
642 #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001
643 #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002
644 #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
645 #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
646 #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010
647
648 #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
649 #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
650 EXT3_FEATURE_INCOMPAT_RECOVER| \
651 EXT3_FEATURE_INCOMPAT_META_BG)
652 #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
653 EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
654 EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
655
656 /*
657 * Default values for user and/or group using reserved blocks
658 */
659 #define EXT3_DEF_RESUID 0
660 #define EXT3_DEF_RESGID 0
661
662 /*
663 * Default mount options
664 */
665 #define EXT3_DEFM_DEBUG 0x0001
666 #define EXT3_DEFM_BSDGROUPS 0x0002
667 #define EXT3_DEFM_XATTR_USER 0x0004
668 #define EXT3_DEFM_ACL 0x0008
669 #define EXT3_DEFM_UID16 0x0010
670 #define EXT3_DEFM_JMODE 0x0060
671 #define EXT3_DEFM_JMODE_DATA 0x0020
672 #define EXT3_DEFM_JMODE_ORDERED 0x0040
673 #define EXT3_DEFM_JMODE_WBACK 0x0060
674
675 /*
676 * Structure of a directory entry
677 */
678 #define EXT3_NAME_LEN 255
679
680 struct ext3_dir_entry {
681 __le32 inode; /* Inode number */
682 __le16 rec_len; /* Directory entry length */
683 __le16 name_len; /* Name length */
684 char name[EXT3_NAME_LEN]; /* File name */
685 };
686
687 /*
688 * The new version of the directory entry. Since EXT3 structures are
689 * stored in intel byte order, and the name_len field could never be
690 * bigger than 255 chars, it's safe to reclaim the extra byte for the
691 * file_type field.
692 */
693 struct ext3_dir_entry_2 {
694 __le32 inode; /* Inode number */
695 __le16 rec_len; /* Directory entry length */
696 __u8 name_len; /* Name length */
697 __u8 file_type;
698 char name[EXT3_NAME_LEN]; /* File name */
699 };
700
701 /*
702 * Ext3 directory file types. Only the low 3 bits are used. The
703 * other bits are reserved for now.
704 */
705 #define EXT3_FT_UNKNOWN 0
706 #define EXT3_FT_REG_FILE 1
707 #define EXT3_FT_DIR 2
708 #define EXT3_FT_CHRDEV 3
709 #define EXT3_FT_BLKDEV 4
710 #define EXT3_FT_FIFO 5
711 #define EXT3_FT_SOCK 6
712 #define EXT3_FT_SYMLINK 7
713
714 #define EXT3_FT_MAX 8
715
716 /*
717 * EXT3_DIR_PAD defines the directory entries boundaries
718 *
719 * NOTE: It must be a multiple of 4
720 */
721 #define EXT3_DIR_PAD 4
722 #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1)
723 #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \
724 ~EXT3_DIR_ROUND)
725 #define EXT3_MAX_REC_LEN ((1<<16)-1)
726
727 /*
728 * Tests against MAX_REC_LEN etc were put in place for 64k block
729 * sizes; if that is not possible on this arch, we can skip
730 * those tests and speed things up.
731 */
732 static inline unsigned ext3_rec_len_from_disk(__le16 dlen)
733 {
734 unsigned len = le16_to_cpu(dlen);
735
736 #if (PAGE_CACHE_SIZE >= 65536)
737 if (len == EXT3_MAX_REC_LEN)
738 return 1 << 16;
739 #endif
740 return len;
741 }
742
743 static inline __le16 ext3_rec_len_to_disk(unsigned len)
744 {
745 #if (PAGE_CACHE_SIZE >= 65536)
746 if (len == (1 << 16))
747 return cpu_to_le16(EXT3_MAX_REC_LEN);
748 else if (len > (1 << 16))
749 BUG();
750 #endif
751 return cpu_to_le16(len);
752 }
753
754 /*
755 * Hash Tree Directory indexing
756 * (c) Daniel Phillips, 2001
757 */
758
759 #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
760 EXT3_FEATURE_COMPAT_DIR_INDEX) && \
761 (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
762 #define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
763 #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
764
765 /* Legal values for the dx_root hash_version field: */
766
767 #define DX_HASH_LEGACY 0
768 #define DX_HASH_HALF_MD4 1
769 #define DX_HASH_TEA 2
770 #define DX_HASH_LEGACY_UNSIGNED 3
771 #define DX_HASH_HALF_MD4_UNSIGNED 4
772 #define DX_HASH_TEA_UNSIGNED 5
773
774 #ifdef __KERNEL__
775
776 /* hash info structure used by the directory hash */
777 struct dx_hash_info
778 {
779 u32 hash;
780 u32 minor_hash;
781 int hash_version;
782 u32 *seed;
783 };
784
785 #define EXT3_HTREE_EOF 0x7fffffff
786
787 /*
788 * Control parameters used by ext3_htree_next_block
789 */
790 #define HASH_NB_ALWAYS 1
791
792
793 /*
794 * Describe an inode's exact location on disk and in memory
795 */
796 struct ext3_iloc
797 {
798 struct buffer_head *bh;
799 unsigned long offset;
800 unsigned long block_group;
801 };
802
803 static inline struct ext3_inode *ext3_raw_inode(struct ext3_iloc *iloc)
804 {
805 return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset);
806 }
807
808 /*
809 * This structure is stuffed into the struct file's private_data field
810 * for directories. It is where we put information so that we can do
811 * readdir operations in hash tree order.
812 */
813 struct dir_private_info {
814 struct rb_root root;
815 struct rb_node *curr_node;
816 struct fname *extra_fname;
817 loff_t last_pos;
818 __u32 curr_hash;
819 __u32 curr_minor_hash;
820 __u32 next_hash;
821 };
822
823 /* calculate the first block number of the group */
824 static inline ext3_fsblk_t
825 ext3_group_first_block_no(struct super_block *sb, unsigned long group_no)
826 {
827 return group_no * (ext3_fsblk_t)EXT3_BLOCKS_PER_GROUP(sb) +
828 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
829 }
830
831 /*
832 * Special error return code only used by dx_probe() and its callers.
833 */
834 #define ERR_BAD_DX_DIR -75000
835
836 /*
837 * Function prototypes
838 */
839
840 /*
841 * Ok, these declarations are also in <linux/kernel.h> but none of the
842 * ext3 source programs needs to include it so they are duplicated here.
843 */
844 # define NORET_TYPE /**/
845 # define ATTRIB_NORET __attribute__((noreturn))
846 # define NORET_AND noreturn,
847
848 /* balloc.c */
849 extern int ext3_bg_has_super(struct super_block *sb, int group);
850 extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
851 extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
852 ext3_fsblk_t goal, int *errp);
853 extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
854 ext3_fsblk_t goal, unsigned long *count, int *errp);
855 extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
856 ext3_fsblk_t block, unsigned long count);
857 extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
858 ext3_fsblk_t block, unsigned long count,
859 unsigned long *pdquot_freed_blocks);
860 extern ext3_fsblk_t ext3_count_free_blocks (struct super_block *);
861 extern void ext3_check_blocks_bitmap (struct super_block *);
862 extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
863 unsigned int block_group,
864 struct buffer_head ** bh);
865 extern int ext3_should_retry_alloc(struct super_block *sb, int *retries);
866 extern void ext3_init_block_alloc_info(struct inode *);
867 extern void ext3_rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv);
868 extern int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range);
869
870 /* dir.c */
871 extern int ext3_check_dir_entry(const char *, struct inode *,
872 struct ext3_dir_entry_2 *,
873 struct buffer_head *, unsigned long);
874 extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
875 __u32 minor_hash,
876 struct ext3_dir_entry_2 *dirent);
877 extern void ext3_htree_free_dir_info(struct dir_private_info *p);
878
879 /* fsync.c */
880 extern int ext3_sync_file(struct file *, loff_t, loff_t, int);
881
882 /* hash.c */
883 extern int ext3fs_dirhash(const char *name, int len, struct
884 dx_hash_info *hinfo);
885
886 /* ialloc.c */
887 extern struct inode * ext3_new_inode (handle_t *, struct inode *,
888 const struct qstr *, umode_t);
889 extern void ext3_free_inode (handle_t *, struct inode *);
890 extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
891 extern unsigned long ext3_count_free_inodes (struct super_block *);
892 extern unsigned long ext3_count_dirs (struct super_block *);
893 extern void ext3_check_inodes_bitmap (struct super_block *);
894 extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
895
896
897 /* inode.c */
898 int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
899 struct buffer_head *bh, ext3_fsblk_t blocknr);
900 struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
901 struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
902 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
903 sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
904 int create);
905
906 extern struct inode *ext3_iget(struct super_block *, unsigned long);
907 extern int ext3_write_inode (struct inode *, struct writeback_control *);
908 extern int ext3_setattr (struct dentry *, struct iattr *);
909 extern void ext3_evict_inode (struct inode *);
910 extern int ext3_sync_inode (handle_t *, struct inode *);
911 extern void ext3_discard_reservation (struct inode *);
912 extern void ext3_dirty_inode(struct inode *, int);
913 extern int ext3_change_inode_journal_flag(struct inode *, int);
914 extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
915 extern int ext3_can_truncate(struct inode *inode);
916 extern void ext3_truncate(struct inode *inode);
917 extern void ext3_set_inode_flags(struct inode *);
918 extern void ext3_get_inode_flags(struct ext3_inode_info *);
919 extern void ext3_set_aops(struct inode *inode);
920 extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
921 u64 start, u64 len);
922
923 /* ioctl.c */
924 extern long ext3_ioctl(struct file *, unsigned int, unsigned long);
925 extern long ext3_compat_ioctl(struct file *, unsigned int, unsigned long);
926
927 /* namei.c */
928 extern int ext3_orphan_add(handle_t *, struct inode *);
929 extern int ext3_orphan_del(handle_t *, struct inode *);
930 extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
931 __u32 start_minor_hash, __u32 *next_hash);
932
933 /* resize.c */
934 extern int ext3_group_add(struct super_block *sb,
935 struct ext3_new_group_data *input);
936 extern int ext3_group_extend(struct super_block *sb,
937 struct ext3_super_block *es,
938 ext3_fsblk_t n_blocks_count);
939
940 /* super.c */
941 extern __printf(3, 4)
942 void ext3_error(struct super_block *, const char *, const char *, ...);
943 extern void __ext3_std_error (struct super_block *, const char *, int);
944 extern __printf(3, 4)
945 void ext3_abort(struct super_block *, const char *, const char *, ...);
946 extern __printf(3, 4)
947 void ext3_warning(struct super_block *, const char *, const char *, ...);
948 extern __printf(3, 4)
949 void ext3_msg(struct super_block *, const char *, const char *, ...);
950 extern void ext3_update_dynamic_rev (struct super_block *sb);
951
952 #define ext3_std_error(sb, errno) \
953 do { \
954 if ((errno)) \
955 __ext3_std_error((sb), __func__, (errno)); \
956 } while (0)
957
958 /*
959 * Inodes and files operations
960 */
961
962 /* dir.c */
963 extern const struct file_operations ext3_dir_operations;
964
965 /* file.c */
966 extern const struct inode_operations ext3_file_inode_operations;
967 extern const struct file_operations ext3_file_operations;
968
969 /* namei.c */
970 extern const struct inode_operations ext3_dir_inode_operations;
971 extern const struct inode_operations ext3_special_inode_operations;
972
973 /* symlink.c */
974 extern const struct inode_operations ext3_symlink_inode_operations;
975 extern const struct inode_operations ext3_fast_symlink_inode_operations;
976
977
978 #endif /* __KERNEL__ */
979
980 #endif /* _LINUX_EXT3_FS_H */
981 1 /*
include/linux/ext3_fs_i.h
1 /* File was deleted
2 * linux/include/linux/ext3_fs_i.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_i.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16 #ifndef _LINUX_EXT3_FS_I
17 #define _LINUX_EXT3_FS_I
18
19 #include <linux/rwsem.h>
20 #include <linux/rbtree.h>
21 #include <linux/seqlock.h>
22 #include <linux/mutex.h>
23
24 /* data type for block offset of block group */
25 typedef int ext3_grpblk_t;
26
27 /* data type for filesystem-wide blocks number */
28 typedef unsigned long ext3_fsblk_t;
29
30 #define E3FSBLK "%lu"
31
32 struct ext3_reserve_window {
33 ext3_fsblk_t _rsv_start; /* First byte reserved */
34 ext3_fsblk_t _rsv_end; /* Last byte reserved or 0 */
35 };
36
37 struct ext3_reserve_window_node {
38 struct rb_node rsv_node;
39 __u32 rsv_goal_size;
40 __u32 rsv_alloc_hit;
41 struct ext3_reserve_window rsv_window;
42 };
43
44 struct ext3_block_alloc_info {
45 /* information about reservation window */
46 struct ext3_reserve_window_node rsv_window_node;
47 /*
48 * was i_next_alloc_block in ext3_inode_info
49 * is the logical (file-relative) number of the
50 * most-recently-allocated block in this file.
51 * We use this for detecting linearly ascending allocation requests.
52 */
53 __u32 last_alloc_logical_block;
54 /*
55 * Was i_next_alloc_goal in ext3_inode_info
56 * is the *physical* companion to i_next_alloc_block.
57 * it the physical block number of the block which was most-recentl
58 * allocated to this file. This give us the goal (target) for the next
59 * allocation when we detect linearly ascending requests.
60 */
61 ext3_fsblk_t last_alloc_physical_block;
62 };
63
64 #define rsv_start rsv_window._rsv_start
65 #define rsv_end rsv_window._rsv_end
66
67 /*
68 * third extended file system inode data in memory
69 */
70 struct ext3_inode_info {
71 __le32 i_data[15]; /* unconverted */
72 __u32 i_flags;
73 #ifdef EXT3_FRAGMENTS
74 __u32 i_faddr;
75 __u8 i_frag_no;
76 __u8 i_frag_size;
77 #endif
78 ext3_fsblk_t i_file_acl;
79 __u32 i_dir_acl;
80 __u32 i_dtime;
81
82 /*
83 * i_block_group is the number of the block group which contains
84 * this file's inode. Constant across the lifetime of the inode,
85 * it is ued for making block allocation decisions - we try to
86 * place a file's data blocks near its inode block, and new inodes
87 * near to their parent directory's inode.
88 */
89 __u32 i_block_group;
90 unsigned long i_state_flags; /* Dynamic state flags for ext3 */
91
92 /* block reservation info */
93 struct ext3_block_alloc_info *i_block_alloc_info;
94
95 __u32 i_dir_start_lookup;
96 #ifdef CONFIG_EXT3_FS_XATTR
97 /*
98 * Extended attributes can be read independently of the main file
99 * data. Taking i_mutex even when reading would cause contention
100 * between readers of EAs and writers of regular file data, so
101 * instead we synchronize on xattr_sem when reading or changing
102 * EAs.
103 */
104 struct rw_semaphore xattr_sem;
105 #endif
106
107 struct list_head i_orphan; /* unlinked but open inodes */
108
109 /*
110 * i_disksize keeps track of what the inode size is ON DISK, not
111 * in memory. During truncate, i_size is set to the new size by
112 * the VFS prior to calling ext3_truncate(), but the filesystem won't
113 * set i_disksize to 0 until the truncate is actually under way.
114 *
115 * The intent is that i_disksize always represents the blocks which
116 * are used by this file. This allows recovery to restart truncate
117 * on orphans if we crash during truncate. We actually write i_disksize
118 * into the on-disk inode when writing inodes out, instead of i_size.
119 *
120 * The only time when i_disksize and i_size may be different is when
121 * a truncate is in progress. The only things which change i_disksize
122 * are ext3_get_block (growth) and ext3_truncate (shrinkth).
123 */
124 loff_t i_disksize;
125
126 /* on-disk additional length */
127 __u16 i_extra_isize;
128
129 /*
130 * truncate_mutex is for serialising ext3_truncate() against
131 * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's
132 * data tree are chopped off during truncate. We can't do that in
133 * ext3 because whenever we perform intermediate commits during
134 * truncate, the inode and all the metadata blocks *must* be in a
135 * consistent state which allows truncation of the orphans to restart
136 * during recovery. Hence we must fix the get_block-vs-truncate race
137 * by other means, so we have truncate_mutex.
138 */
139 struct mutex truncate_mutex;
140
141 /*
142 * Transactions that contain inode's metadata needed to complete
143 * fsync and fdatasync, respectively.
144 */
145 atomic_t i_sync_tid;
146 atomic_t i_datasync_tid;
147
148 struct inode vfs_inode;
149 };
150
151 #endif /* _LINUX_EXT3_FS_I */
152 1 /*
include/linux/ext3_fs_sb.h
1 /* File was deleted
2 * linux/include/linux/ext3_fs_sb.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_sb.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16 #ifndef _LINUX_EXT3_FS_SB
17 #define _LINUX_EXT3_FS_SB
18
19 #ifdef __KERNEL__
20 #include <linux/timer.h>
21 #include <linux/wait.h>
22 #include <linux/blockgroup_lock.h>
23 #include <linux/percpu_counter.h>
24 #endif
25 #include <linux/rbtree.h>
26
27 /*
28 * third extended-fs super-block data in memory
29 */
30 struct ext3_sb_info {
31 unsigned long s_frag_size; /* Size of a fragment in bytes */
32 unsigned long s_frags_per_block;/* Number of fragments per block */
33 unsigned long s_inodes_per_block;/* Number of inodes per block */
34 unsigned long s_frags_per_group;/* Number of fragments in a group */
35 unsigned long s_blocks_per_group;/* Number of blocks in a group */
36 unsigned long s_inodes_per_group;/* Number of inodes in a group */
37 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
38 unsigned long s_gdb_count; /* Number of group descriptor blocks */
39 unsigned long s_desc_per_block; /* Number of group descriptors per block */
40 unsigned long s_groups_count; /* Number of groups in the fs */
41 unsigned long s_overhead_last; /* Last calculated overhead */
42 unsigned long s_blocks_last; /* Last seen block count */
43 struct buffer_head * s_sbh; /* Buffer containing the super block */
44 struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */
45 struct buffer_head ** s_group_desc;
46 unsigned long s_mount_opt;
47 ext3_fsblk_t s_sb_block;
48 uid_t s_resuid;
49 gid_t s_resgid;
50 unsigned short s_mount_state;
51 unsigned short s_pad;
52 int s_addr_per_block_bits;
53 int s_desc_per_block_bits;
54 int s_inode_size;
55 int s_first_ino;
56 spinlock_t s_next_gen_lock;
57 u32 s_next_generation;
58 u32 s_hash_seed[4];
59 int s_def_hash_version;
60 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
61 struct percpu_counter s_freeblocks_counter;
62 struct percpu_counter s_freeinodes_counter;
63 struct percpu_counter s_dirs_counter;
64 struct blockgroup_lock *s_blockgroup_lock;
65
66 /* root of the per fs reservation window tree */
67 spinlock_t s_rsv_window_lock;
68 struct rb_root s_rsv_window_root;
69 struct ext3_reserve_window_node s_rsv_window_head;
70
71 /* Journaling */
72 struct inode * s_journal_inode;
73 struct journal_s * s_journal;
74 struct list_head s_orphan;
75 struct mutex s_orphan_lock;
76 struct mutex s_resize_lock;
77 unsigned long s_commit_interval;
78 struct block_device *journal_bdev;
79 #ifdef CONFIG_QUOTA
80 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
81 int s_jquota_fmt; /* Format of quota to use */
82 #endif
83 };
84
85 static inline spinlock_t *
86 sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group)
87 {
88 return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
89 }
90
91 #endif /* _LINUX_EXT3_FS_SB */
92 1 /*
include/linux/ext3_jbd.h
1 /* File was deleted
2 * linux/include/linux/ext3_jbd.h
3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 *
6 * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
7 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Ext3-specific journaling extensions.
13 */
14
15 #ifndef _LINUX_EXT3_JBD_H
16 #define _LINUX_EXT3_JBD_H
17
18 #include <linux/fs.h>
19 #include <linux/jbd.h>
20 #include <linux/ext3_fs.h>
21
22 #define EXT3_JOURNAL(inode) (EXT3_SB((inode)->i_sb)->s_journal)
23
24 /* Define the number of blocks we need to account to a transaction to
25 * modify one block of data.
26 *
27 * We may have to touch one inode, one bitmap buffer, up to three
28 * indirection blocks, the group and superblock summaries, and the data
29 * block to complete the transaction. */
30
31 #define EXT3_SINGLEDATA_TRANS_BLOCKS 8U
32
33 /* Extended attribute operations touch at most two data buffers,
34 * two bitmap buffers, and two group summaries, in addition to the inode
35 * and the superblock, which are already accounted for. */
36
37 #define EXT3_XATTR_TRANS_BLOCKS 6U
38
39 /* Define the minimum size for a transaction which modifies data. This
40 * needs to take into account the fact that we may end up modifying two
41 * quota files too (one for the group, one for the user quota). The
42 * superblock only gets updated once, of course, so don't bother
43 * counting that again for the quota updates. */
44
45 #define EXT3_DATA_TRANS_BLOCKS(sb) (EXT3_SINGLEDATA_TRANS_BLOCKS + \
46 EXT3_XATTR_TRANS_BLOCKS - 2 + \
47 EXT3_MAXQUOTAS_TRANS_BLOCKS(sb))
48
49 /* Delete operations potentially hit one directory's namespace plus an
50 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
51 * generous. We can grow the delete transaction later if necessary. */
52
53 #define EXT3_DELETE_TRANS_BLOCKS(sb) (EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) + 64)
54
55 /* Define an arbitrary limit for the amount of data we will anticipate
56 * writing to any given transaction. For unbounded transactions such as
57 * write(2) and truncate(2) we can write more than this, but we always
58 * start off at the maximum transaction size and grow the transaction
59 * optimistically as we go. */
60
61 #define EXT3_MAX_TRANS_DATA 64U
62
63 /* We break up a large truncate or write transaction once the handle's
64 * buffer credits gets this low, we need either to extend the
65 * transaction or to start a new one. Reserve enough space here for
66 * inode, bitmap, superblock, group and indirection updates for at least
67 * one block, plus two quota updates. Quota allocations are not
68 * needed. */
69
70 #define EXT3_RESERVE_TRANS_BLOCKS 12U
71
72 #define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
73
74 #ifdef CONFIG_QUOTA
75 /* Amount of blocks needed for quota update - we know that the structure was
76 * allocated so we need to update only inode+data */
77 #define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
78 /* Amount of blocks needed for quota insert/delete - we do some block writes
79 * but inode, sb and group updates are done only once */
80 #define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
81 (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
82 #define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
83 (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
84 #else
85 #define EXT3_QUOTA_TRANS_BLOCKS(sb) 0
86 #define EXT3_QUOTA_INIT_BLOCKS(sb) 0
87 #define EXT3_QUOTA_DEL_BLOCKS(sb) 0
88 #endif
89 #define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb))
90 #define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb))
91 #define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb))
92
93 int
94 ext3_mark_iloc_dirty(handle_t *handle,
95 struct inode *inode,
96 struct ext3_iloc *iloc);
97
98 /*
99 * On success, We end up with an outstanding reference count against
100 * iloc->bh. This _must_ be cleaned up later.
101 */
102
103 int ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
104 struct ext3_iloc *iloc);
105
106 int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode);
107
108 /*
109 * Wrapper functions with which ext3 calls into JBD. The intent here is
110 * to allow these to be turned into appropriate stubs so ext3 can control
111 * ext2 filesystems, so ext2+ext3 systems only nee one fs. This work hasn't
112 * been done yet.
113 */
114
115 static inline void ext3_journal_release_buffer(handle_t *handle,
116 struct buffer_head *bh)
117 {
118 journal_release_buffer(handle, bh);
119 }
120
121 void ext3_journal_abort_handle(const char *caller, const char *err_fn,
122 struct buffer_head *bh, handle_t *handle, int err);
123
124 int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
125 struct buffer_head *bh);
126
127 int __ext3_journal_get_write_access(const char *where, handle_t *handle,
128 struct buffer_head *bh);
129
130 int __ext3_journal_forget(const char *where, handle_t *handle,
131 struct buffer_head *bh);
132
133 int __ext3_journal_revoke(const char *where, handle_t *handle,
134 unsigned long blocknr, struct buffer_head *bh);
135
136 int __ext3_journal_get_create_access(const char *where,
137 handle_t *handle, struct buffer_head *bh);
138
139 int __ext3_journal_dirty_metadata(const char *where,
140 handle_t *handle, struct buffer_head *bh);
141
142 #define ext3_journal_get_undo_access(handle, bh) \
143 __ext3_journal_get_undo_access(__func__, (handle), (bh))
144 #define ext3_journal_get_write_access(handle, bh) \
145 __ext3_journal_get_write_access(__func__, (handle), (bh))
146 #define ext3_journal_revoke(handle, blocknr, bh) \
147 __ext3_journal_revoke(__func__, (handle), (blocknr), (bh))
148 #define ext3_journal_get_create_access(handle, bh) \
149 __ext3_journal_get_create_access(__func__, (handle), (bh))
150 #define ext3_journal_dirty_metadata(handle, bh) \
151 __ext3_journal_dirty_metadata(__func__, (handle), (bh))
152 #define ext3_journal_forget(handle, bh) \
153 __ext3_journal_forget(__func__, (handle), (bh))
154
155 int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
156
157 handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks);
158 int __ext3_journal_stop(const char *where, handle_t *handle);
159
160 static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks)
161 {
162 return ext3_journal_start_sb(inode->i_sb, nblocks);
163 }
164
165 #define ext3_journal_stop(handle) \
166 __ext3_journal_stop(__func__, (handle))
167
168 static inline handle_t *ext3_journal_current_handle(void)
169 {
170 return journal_current_handle();
171 }
172
173 static inline int ext3_journal_extend(handle_t *handle, int nblocks)
174 {
175 return journal_extend(handle, nblocks);
176 }
177
178 static inline int ext3_journal_restart(handle_t *handle, int nblocks)
179 {
180 return journal_restart(handle, nblocks);
181 }
182
183 static inline int ext3_journal_blocks_per_page(struct inode *inode)
184 {
185 return journal_blocks_per_page(inode);
186 }
187
188 static inline int ext3_journal_force_commit(journal_t *journal)
189 {
190 return journal_force_commit(journal);
191 }
192
193 /* super.c */
194 int ext3_force_commit(struct super_block *sb);
195
196 static inline int ext3_should_journal_data(struct inode *inode)
197 {
198 if (!S_ISREG(inode->i_mode))
199 return 1;
200 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
201 return 1;
202 if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
203 return 1;
204 return 0;
205 }
206
207 static inline int ext3_should_order_data(struct inode *inode)
208 {
209 if (!S_ISREG(inode->i_mode))
210 return 0;
211 if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
212 return 0;
213 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
214 return 1;
215 return 0;
216 }
217
218 static inline int ext3_should_writeback_data(struct inode *inode)
219 {
220 if (!S_ISREG(inode->i_mode))
221 return 0;
222 if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
223 return 0;
224 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
225 return 1;
226 return 0;
227 }
228
229 #endif /* _LINUX_EXT3_JBD_H */
230 1 /*