Commit dd1f723bf56bd96efc9d90e9e60dc511c79de48f

Authored by Theodore Ts'o
1 parent f275411440

ext4: use prandom_u32() instead of get_random_bytes()

Many of the uses of get_random_bytes() do not actually need
cryptographically secure random numbers.  Replace those uses with a
call to prandom_u32(), which is faster and which doesn't consume
entropy from the /dev/random driver.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

Showing 3 changed files with 4 additions and 7 deletions Inline Diff

1 /* 1 /*
2 * linux/fs/ext4/ialloc.c 2 * linux/fs/ext4/ialloc.c
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 * 8 *
9 * BSD ufs-inspired inode and directory allocation by 9 * BSD ufs-inspired inode and directory allocation by
10 * Stephen Tweedie (sct@redhat.com), 1993 10 * Stephen Tweedie (sct@redhat.com), 1993
11 * Big-endian to little-endian byte-swapping/bitmaps by 11 * Big-endian to little-endian byte-swapping/bitmaps by
12 * David S. Miller (davem@caip.rutgers.edu), 1995 12 * David S. Miller (davem@caip.rutgers.edu), 1995
13 */ 13 */
14 14
15 #include <linux/time.h> 15 #include <linux/time.h>
16 #include <linux/fs.h> 16 #include <linux/fs.h>
17 #include <linux/jbd2.h> 17 #include <linux/jbd2.h>
18 #include <linux/stat.h> 18 #include <linux/stat.h>
19 #include <linux/string.h> 19 #include <linux/string.h>
20 #include <linux/quotaops.h> 20 #include <linux/quotaops.h>
21 #include <linux/buffer_head.h> 21 #include <linux/buffer_head.h>
22 #include <linux/random.h> 22 #include <linux/random.h>
23 #include <linux/bitops.h> 23 #include <linux/bitops.h>
24 #include <linux/blkdev.h> 24 #include <linux/blkdev.h>
25 #include <asm/byteorder.h> 25 #include <asm/byteorder.h>
26 26
27 #include "ext4.h" 27 #include "ext4.h"
28 #include "ext4_jbd2.h" 28 #include "ext4_jbd2.h"
29 #include "xattr.h" 29 #include "xattr.h"
30 #include "acl.h" 30 #include "acl.h"
31 31
32 #include <trace/events/ext4.h> 32 #include <trace/events/ext4.h>
33 33
34 /* 34 /*
35 * ialloc.c contains the inodes allocation and deallocation routines 35 * ialloc.c contains the inodes allocation and deallocation routines
36 */ 36 */
37 37
38 /* 38 /*
39 * The free inodes are managed by bitmaps. A file system contains several 39 * The free inodes are managed by bitmaps. A file system contains several
40 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap 40 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
41 * block for inodes, N blocks for the inode table and data blocks. 41 * block for inodes, N blocks for the inode table and data blocks.
42 * 42 *
43 * The file system contains group descriptors which are located after the 43 * The file system contains group descriptors which are located after the
44 * super block. Each descriptor contains the number of the bitmap block and 44 * super block. Each descriptor contains the number of the bitmap block and
45 * the free blocks count in the block. 45 * the free blocks count in the block.
46 */ 46 */
47 47
48 /* 48 /*
49 * To avoid calling the atomic setbit hundreds or thousands of times, we only 49 * To avoid calling the atomic setbit hundreds or thousands of times, we only
50 * need to use it within a single byte (to ensure we get endianness right). 50 * need to use it within a single byte (to ensure we get endianness right).
51 * We can use memset for the rest of the bitmap as there are no other users. 51 * We can use memset for the rest of the bitmap as there are no other users.
52 */ 52 */
53 void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 53 void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
54 { 54 {
55 int i; 55 int i;
56 56
57 if (start_bit >= end_bit) 57 if (start_bit >= end_bit)
58 return; 58 return;
59 59
60 ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); 60 ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
61 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) 61 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
62 ext4_set_bit(i, bitmap); 62 ext4_set_bit(i, bitmap);
63 if (i < end_bit) 63 if (i < end_bit)
64 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); 64 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
65 } 65 }
66 66
67 /* Initializes an uninitialized inode bitmap */ 67 /* Initializes an uninitialized inode bitmap */
68 static unsigned ext4_init_inode_bitmap(struct super_block *sb, 68 static unsigned ext4_init_inode_bitmap(struct super_block *sb,
69 struct buffer_head *bh, 69 struct buffer_head *bh,
70 ext4_group_t block_group, 70 ext4_group_t block_group,
71 struct ext4_group_desc *gdp) 71 struct ext4_group_desc *gdp)
72 { 72 {
73 struct ext4_group_info *grp; 73 struct ext4_group_info *grp;
74 J_ASSERT_BH(bh, buffer_locked(bh)); 74 J_ASSERT_BH(bh, buffer_locked(bh));
75 75
76 /* If checksum is bad mark all blocks and inodes use to prevent 76 /* If checksum is bad mark all blocks and inodes use to prevent
77 * allocation, essentially implementing a per-group read-only flag. */ 77 * allocation, essentially implementing a per-group read-only flag. */
78 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 78 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
79 ext4_error(sb, "Checksum bad for group %u", block_group); 79 ext4_error(sb, "Checksum bad for group %u", block_group);
80 grp = ext4_get_group_info(sb, block_group); 80 grp = ext4_get_group_info(sb, block_group);
81 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); 81 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
82 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); 82 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
83 return 0; 83 return 0;
84 } 84 }
85 85
86 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); 86 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
87 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 87 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
88 bh->b_data); 88 bh->b_data);
89 ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, 89 ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh,
90 EXT4_INODES_PER_GROUP(sb) / 8); 90 EXT4_INODES_PER_GROUP(sb) / 8);
91 ext4_group_desc_csum_set(sb, block_group, gdp); 91 ext4_group_desc_csum_set(sb, block_group, gdp);
92 92
93 return EXT4_INODES_PER_GROUP(sb); 93 return EXT4_INODES_PER_GROUP(sb);
94 } 94 }
95 95
96 void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) 96 void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
97 { 97 {
98 if (uptodate) { 98 if (uptodate) {
99 set_buffer_uptodate(bh); 99 set_buffer_uptodate(bh);
100 set_bitmap_uptodate(bh); 100 set_bitmap_uptodate(bh);
101 } 101 }
102 unlock_buffer(bh); 102 unlock_buffer(bh);
103 put_bh(bh); 103 put_bh(bh);
104 } 104 }
105 105
106 /* 106 /*
107 * Read the inode allocation bitmap for a given block_group, reading 107 * Read the inode allocation bitmap for a given block_group, reading
108 * into the specified slot in the superblock's bitmap cache. 108 * into the specified slot in the superblock's bitmap cache.
109 * 109 *
110 * Return buffer_head of bitmap on success or NULL. 110 * Return buffer_head of bitmap on success or NULL.
111 */ 111 */
112 static struct buffer_head * 112 static struct buffer_head *
113 ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) 113 ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
114 { 114 {
115 struct ext4_group_desc *desc; 115 struct ext4_group_desc *desc;
116 struct buffer_head *bh = NULL; 116 struct buffer_head *bh = NULL;
117 ext4_fsblk_t bitmap_blk; 117 ext4_fsblk_t bitmap_blk;
118 struct ext4_group_info *grp; 118 struct ext4_group_info *grp;
119 119
120 desc = ext4_get_group_desc(sb, block_group, NULL); 120 desc = ext4_get_group_desc(sb, block_group, NULL);
121 if (!desc) 121 if (!desc)
122 return NULL; 122 return NULL;
123 123
124 bitmap_blk = ext4_inode_bitmap(sb, desc); 124 bitmap_blk = ext4_inode_bitmap(sb, desc);
125 bh = sb_getblk(sb, bitmap_blk); 125 bh = sb_getblk(sb, bitmap_blk);
126 if (unlikely(!bh)) { 126 if (unlikely(!bh)) {
127 ext4_error(sb, "Cannot read inode bitmap - " 127 ext4_error(sb, "Cannot read inode bitmap - "
128 "block_group = %u, inode_bitmap = %llu", 128 "block_group = %u, inode_bitmap = %llu",
129 block_group, bitmap_blk); 129 block_group, bitmap_blk);
130 return NULL; 130 return NULL;
131 } 131 }
132 if (bitmap_uptodate(bh)) 132 if (bitmap_uptodate(bh))
133 goto verify; 133 goto verify;
134 134
135 lock_buffer(bh); 135 lock_buffer(bh);
136 if (bitmap_uptodate(bh)) { 136 if (bitmap_uptodate(bh)) {
137 unlock_buffer(bh); 137 unlock_buffer(bh);
138 goto verify; 138 goto verify;
139 } 139 }
140 140
141 ext4_lock_group(sb, block_group); 141 ext4_lock_group(sb, block_group);
142 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 142 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
143 ext4_init_inode_bitmap(sb, bh, block_group, desc); 143 ext4_init_inode_bitmap(sb, bh, block_group, desc);
144 set_bitmap_uptodate(bh); 144 set_bitmap_uptodate(bh);
145 set_buffer_uptodate(bh); 145 set_buffer_uptodate(bh);
146 set_buffer_verified(bh); 146 set_buffer_verified(bh);
147 ext4_unlock_group(sb, block_group); 147 ext4_unlock_group(sb, block_group);
148 unlock_buffer(bh); 148 unlock_buffer(bh);
149 return bh; 149 return bh;
150 } 150 }
151 ext4_unlock_group(sb, block_group); 151 ext4_unlock_group(sb, block_group);
152 152
153 if (buffer_uptodate(bh)) { 153 if (buffer_uptodate(bh)) {
154 /* 154 /*
155 * if not uninit if bh is uptodate, 155 * if not uninit if bh is uptodate,
156 * bitmap is also uptodate 156 * bitmap is also uptodate
157 */ 157 */
158 set_bitmap_uptodate(bh); 158 set_bitmap_uptodate(bh);
159 unlock_buffer(bh); 159 unlock_buffer(bh);
160 goto verify; 160 goto verify;
161 } 161 }
162 /* 162 /*
163 * submit the buffer_head for reading 163 * submit the buffer_head for reading
164 */ 164 */
165 trace_ext4_load_inode_bitmap(sb, block_group); 165 trace_ext4_load_inode_bitmap(sb, block_group);
166 bh->b_end_io = ext4_end_bitmap_read; 166 bh->b_end_io = ext4_end_bitmap_read;
167 get_bh(bh); 167 get_bh(bh);
168 submit_bh(READ | REQ_META | REQ_PRIO, bh); 168 submit_bh(READ | REQ_META | REQ_PRIO, bh);
169 wait_on_buffer(bh); 169 wait_on_buffer(bh);
170 if (!buffer_uptodate(bh)) { 170 if (!buffer_uptodate(bh)) {
171 put_bh(bh); 171 put_bh(bh);
172 ext4_error(sb, "Cannot read inode bitmap - " 172 ext4_error(sb, "Cannot read inode bitmap - "
173 "block_group = %u, inode_bitmap = %llu", 173 "block_group = %u, inode_bitmap = %llu",
174 block_group, bitmap_blk); 174 block_group, bitmap_blk);
175 return NULL; 175 return NULL;
176 } 176 }
177 177
178 verify: 178 verify:
179 ext4_lock_group(sb, block_group); 179 ext4_lock_group(sb, block_group);
180 if (!buffer_verified(bh) && 180 if (!buffer_verified(bh) &&
181 !ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, 181 !ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh,
182 EXT4_INODES_PER_GROUP(sb) / 8)) { 182 EXT4_INODES_PER_GROUP(sb) / 8)) {
183 ext4_unlock_group(sb, block_group); 183 ext4_unlock_group(sb, block_group);
184 put_bh(bh); 184 put_bh(bh);
185 ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " 185 ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
186 "inode_bitmap = %llu", block_group, bitmap_blk); 186 "inode_bitmap = %llu", block_group, bitmap_blk);
187 grp = ext4_get_group_info(sb, block_group); 187 grp = ext4_get_group_info(sb, block_group);
188 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); 188 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
189 return NULL; 189 return NULL;
190 } 190 }
191 ext4_unlock_group(sb, block_group); 191 ext4_unlock_group(sb, block_group);
192 set_buffer_verified(bh); 192 set_buffer_verified(bh);
193 return bh; 193 return bh;
194 } 194 }
195 195
196 /* 196 /*
197 * NOTE! When we get the inode, we're the only people 197 * NOTE! When we get the inode, we're the only people
198 * that have access to it, and as such there are no 198 * that have access to it, and as such there are no
199 * race conditions we have to worry about. The inode 199 * race conditions we have to worry about. The inode
200 * is not on the hash-lists, and it cannot be reached 200 * is not on the hash-lists, and it cannot be reached
201 * through the filesystem because the directory entry 201 * through the filesystem because the directory entry
202 * has been deleted earlier. 202 * has been deleted earlier.
203 * 203 *
204 * HOWEVER: we must make sure that we get no aliases, 204 * HOWEVER: we must make sure that we get no aliases,
205 * which means that we have to call "clear_inode()" 205 * which means that we have to call "clear_inode()"
206 * _before_ we mark the inode not in use in the inode 206 * _before_ we mark the inode not in use in the inode
207 * bitmaps. Otherwise a newly created file might use 207 * bitmaps. Otherwise a newly created file might use
208 * the same inode number (not actually the same pointer 208 * the same inode number (not actually the same pointer
209 * though), and then we'd have two inodes sharing the 209 * though), and then we'd have two inodes sharing the
210 * same inode number and space on the harddisk. 210 * same inode number and space on the harddisk.
211 */ 211 */
212 void ext4_free_inode(handle_t *handle, struct inode *inode) 212 void ext4_free_inode(handle_t *handle, struct inode *inode)
213 { 213 {
214 struct super_block *sb = inode->i_sb; 214 struct super_block *sb = inode->i_sb;
215 int is_directory; 215 int is_directory;
216 unsigned long ino; 216 unsigned long ino;
217 struct buffer_head *bitmap_bh = NULL; 217 struct buffer_head *bitmap_bh = NULL;
218 struct buffer_head *bh2; 218 struct buffer_head *bh2;
219 ext4_group_t block_group; 219 ext4_group_t block_group;
220 unsigned long bit; 220 unsigned long bit;
221 struct ext4_group_desc *gdp; 221 struct ext4_group_desc *gdp;
222 struct ext4_super_block *es; 222 struct ext4_super_block *es;
223 struct ext4_sb_info *sbi; 223 struct ext4_sb_info *sbi;
224 int fatal = 0, err, count, cleared; 224 int fatal = 0, err, count, cleared;
225 struct ext4_group_info *grp; 225 struct ext4_group_info *grp;
226 226
227 if (!sb) { 227 if (!sb) {
228 printk(KERN_ERR "EXT4-fs: %s:%d: inode on " 228 printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
229 "nonexistent device\n", __func__, __LINE__); 229 "nonexistent device\n", __func__, __LINE__);
230 return; 230 return;
231 } 231 }
232 if (atomic_read(&inode->i_count) > 1) { 232 if (atomic_read(&inode->i_count) > 1) {
233 ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d", 233 ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d",
234 __func__, __LINE__, inode->i_ino, 234 __func__, __LINE__, inode->i_ino,
235 atomic_read(&inode->i_count)); 235 atomic_read(&inode->i_count));
236 return; 236 return;
237 } 237 }
238 if (inode->i_nlink) { 238 if (inode->i_nlink) {
239 ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n", 239 ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n",
240 __func__, __LINE__, inode->i_ino, inode->i_nlink); 240 __func__, __LINE__, inode->i_ino, inode->i_nlink);
241 return; 241 return;
242 } 242 }
243 sbi = EXT4_SB(sb); 243 sbi = EXT4_SB(sb);
244 244
245 ino = inode->i_ino; 245 ino = inode->i_ino;
246 ext4_debug("freeing inode %lu\n", ino); 246 ext4_debug("freeing inode %lu\n", ino);
247 trace_ext4_free_inode(inode); 247 trace_ext4_free_inode(inode);
248 248
249 /* 249 /*
250 * Note: we must free any quota before locking the superblock, 250 * Note: we must free any quota before locking the superblock,
251 * as writing the quota to disk may need the lock as well. 251 * as writing the quota to disk may need the lock as well.
252 */ 252 */
253 dquot_initialize(inode); 253 dquot_initialize(inode);
254 ext4_xattr_delete_inode(handle, inode); 254 ext4_xattr_delete_inode(handle, inode);
255 dquot_free_inode(inode); 255 dquot_free_inode(inode);
256 dquot_drop(inode); 256 dquot_drop(inode);
257 257
258 is_directory = S_ISDIR(inode->i_mode); 258 is_directory = S_ISDIR(inode->i_mode);
259 259
260 /* Do this BEFORE marking the inode not in use or returning an error */ 260 /* Do this BEFORE marking the inode not in use or returning an error */
261 ext4_clear_inode(inode); 261 ext4_clear_inode(inode);
262 262
263 es = EXT4_SB(sb)->s_es; 263 es = EXT4_SB(sb)->s_es;
264 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 264 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
265 ext4_error(sb, "reserved or nonexistent inode %lu", ino); 265 ext4_error(sb, "reserved or nonexistent inode %lu", ino);
266 goto error_return; 266 goto error_return;
267 } 267 }
268 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 268 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
269 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 269 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
270 bitmap_bh = ext4_read_inode_bitmap(sb, block_group); 270 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
271 /* Don't bother if the inode bitmap is corrupt. */ 271 /* Don't bother if the inode bitmap is corrupt. */
272 grp = ext4_get_group_info(sb, block_group); 272 grp = ext4_get_group_info(sb, block_group);
273 if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || !bitmap_bh) 273 if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || !bitmap_bh)
274 goto error_return; 274 goto error_return;
275 275
276 BUFFER_TRACE(bitmap_bh, "get_write_access"); 276 BUFFER_TRACE(bitmap_bh, "get_write_access");
277 fatal = ext4_journal_get_write_access(handle, bitmap_bh); 277 fatal = ext4_journal_get_write_access(handle, bitmap_bh);
278 if (fatal) 278 if (fatal)
279 goto error_return; 279 goto error_return;
280 280
281 fatal = -ESRCH; 281 fatal = -ESRCH;
282 gdp = ext4_get_group_desc(sb, block_group, &bh2); 282 gdp = ext4_get_group_desc(sb, block_group, &bh2);
283 if (gdp) { 283 if (gdp) {
284 BUFFER_TRACE(bh2, "get_write_access"); 284 BUFFER_TRACE(bh2, "get_write_access");
285 fatal = ext4_journal_get_write_access(handle, bh2); 285 fatal = ext4_journal_get_write_access(handle, bh2);
286 } 286 }
287 ext4_lock_group(sb, block_group); 287 ext4_lock_group(sb, block_group);
288 cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data); 288 cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data);
289 if (fatal || !cleared) { 289 if (fatal || !cleared) {
290 ext4_unlock_group(sb, block_group); 290 ext4_unlock_group(sb, block_group);
291 goto out; 291 goto out;
292 } 292 }
293 293
294 count = ext4_free_inodes_count(sb, gdp) + 1; 294 count = ext4_free_inodes_count(sb, gdp) + 1;
295 ext4_free_inodes_set(sb, gdp, count); 295 ext4_free_inodes_set(sb, gdp, count);
296 if (is_directory) { 296 if (is_directory) {
297 count = ext4_used_dirs_count(sb, gdp) - 1; 297 count = ext4_used_dirs_count(sb, gdp) - 1;
298 ext4_used_dirs_set(sb, gdp, count); 298 ext4_used_dirs_set(sb, gdp, count);
299 percpu_counter_dec(&sbi->s_dirs_counter); 299 percpu_counter_dec(&sbi->s_dirs_counter);
300 } 300 }
301 ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, 301 ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh,
302 EXT4_INODES_PER_GROUP(sb) / 8); 302 EXT4_INODES_PER_GROUP(sb) / 8);
303 ext4_group_desc_csum_set(sb, block_group, gdp); 303 ext4_group_desc_csum_set(sb, block_group, gdp);
304 ext4_unlock_group(sb, block_group); 304 ext4_unlock_group(sb, block_group);
305 305
306 percpu_counter_inc(&sbi->s_freeinodes_counter); 306 percpu_counter_inc(&sbi->s_freeinodes_counter);
307 if (sbi->s_log_groups_per_flex) { 307 if (sbi->s_log_groups_per_flex) {
308 ext4_group_t f = ext4_flex_group(sbi, block_group); 308 ext4_group_t f = ext4_flex_group(sbi, block_group);
309 309
310 atomic_inc(&sbi->s_flex_groups[f].free_inodes); 310 atomic_inc(&sbi->s_flex_groups[f].free_inodes);
311 if (is_directory) 311 if (is_directory)
312 atomic_dec(&sbi->s_flex_groups[f].used_dirs); 312 atomic_dec(&sbi->s_flex_groups[f].used_dirs);
313 } 313 }
314 BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); 314 BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
315 fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); 315 fatal = ext4_handle_dirty_metadata(handle, NULL, bh2);
316 out: 316 out:
317 if (cleared) { 317 if (cleared) {
318 BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata"); 318 BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
319 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 319 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
320 if (!fatal) 320 if (!fatal)
321 fatal = err; 321 fatal = err;
322 } else { 322 } else {
323 ext4_error(sb, "bit already cleared for inode %lu", ino); 323 ext4_error(sb, "bit already cleared for inode %lu", ino);
324 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); 324 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
325 } 325 }
326 326
327 error_return: 327 error_return:
328 brelse(bitmap_bh); 328 brelse(bitmap_bh);
329 ext4_std_error(sb, fatal); 329 ext4_std_error(sb, fatal);
330 } 330 }
331 331
332 struct orlov_stats { 332 struct orlov_stats {
333 __u64 free_clusters; 333 __u64 free_clusters;
334 __u32 free_inodes; 334 __u32 free_inodes;
335 __u32 used_dirs; 335 __u32 used_dirs;
336 }; 336 };
337 337
338 /* 338 /*
339 * Helper function for Orlov's allocator; returns critical information 339 * Helper function for Orlov's allocator; returns critical information
340 * for a particular block group or flex_bg. If flex_size is 1, then g 340 * for a particular block group or flex_bg. If flex_size is 1, then g
341 * is a block group number; otherwise it is flex_bg number. 341 * is a block group number; otherwise it is flex_bg number.
342 */ 342 */
343 static void get_orlov_stats(struct super_block *sb, ext4_group_t g, 343 static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
344 int flex_size, struct orlov_stats *stats) 344 int flex_size, struct orlov_stats *stats)
345 { 345 {
346 struct ext4_group_desc *desc; 346 struct ext4_group_desc *desc;
347 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; 347 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
348 348
349 if (flex_size > 1) { 349 if (flex_size > 1) {
350 stats->free_inodes = atomic_read(&flex_group[g].free_inodes); 350 stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
351 stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); 351 stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
352 stats->used_dirs = atomic_read(&flex_group[g].used_dirs); 352 stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
353 return; 353 return;
354 } 354 }
355 355
356 desc = ext4_get_group_desc(sb, g, NULL); 356 desc = ext4_get_group_desc(sb, g, NULL);
357 if (desc) { 357 if (desc) {
358 stats->free_inodes = ext4_free_inodes_count(sb, desc); 358 stats->free_inodes = ext4_free_inodes_count(sb, desc);
359 stats->free_clusters = ext4_free_group_clusters(sb, desc); 359 stats->free_clusters = ext4_free_group_clusters(sb, desc);
360 stats->used_dirs = ext4_used_dirs_count(sb, desc); 360 stats->used_dirs = ext4_used_dirs_count(sb, desc);
361 } else { 361 } else {
362 stats->free_inodes = 0; 362 stats->free_inodes = 0;
363 stats->free_clusters = 0; 363 stats->free_clusters = 0;
364 stats->used_dirs = 0; 364 stats->used_dirs = 0;
365 } 365 }
366 } 366 }
367 367
368 /* 368 /*
369 * Orlov's allocator for directories. 369 * Orlov's allocator for directories.
370 * 370 *
371 * We always try to spread first-level directories. 371 * We always try to spread first-level directories.
372 * 372 *
373 * If there are blockgroups with both free inodes and free blocks counts 373 * If there are blockgroups with both free inodes and free blocks counts
374 * not worse than average we return one with smallest directory count. 374 * not worse than average we return one with smallest directory count.
375 * Otherwise we simply return a random group. 375 * Otherwise we simply return a random group.
376 * 376 *
377 * For the rest rules look so: 377 * For the rest rules look so:
378 * 378 *
379 * It's OK to put directory into a group unless 379 * It's OK to put directory into a group unless
380 * it has too many directories already (max_dirs) or 380 * it has too many directories already (max_dirs) or
381 * it has too few free inodes left (min_inodes) or 381 * it has too few free inodes left (min_inodes) or
382 * it has too few free blocks left (min_blocks) or 382 * it has too few free blocks left (min_blocks) or
383 * Parent's group is preferred, if it doesn't satisfy these 383 * Parent's group is preferred, if it doesn't satisfy these
384 * conditions we search cyclically through the rest. If none 384 * conditions we search cyclically through the rest. If none
385 * of the groups look good we just look for a group with more 385 * of the groups look good we just look for a group with more
386 * free inodes than average (starting at parent's group). 386 * free inodes than average (starting at parent's group).
387 */ 387 */
388 388
389 static int find_group_orlov(struct super_block *sb, struct inode *parent, 389 static int find_group_orlov(struct super_block *sb, struct inode *parent,
390 ext4_group_t *group, umode_t mode, 390 ext4_group_t *group, umode_t mode,
391 const struct qstr *qstr) 391 const struct qstr *qstr)
392 { 392 {
393 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 393 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
394 struct ext4_sb_info *sbi = EXT4_SB(sb); 394 struct ext4_sb_info *sbi = EXT4_SB(sb);
395 ext4_group_t real_ngroups = ext4_get_groups_count(sb); 395 ext4_group_t real_ngroups = ext4_get_groups_count(sb);
396 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 396 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
397 unsigned int freei, avefreei, grp_free; 397 unsigned int freei, avefreei, grp_free;
398 ext4_fsblk_t freeb, avefreec; 398 ext4_fsblk_t freeb, avefreec;
399 unsigned int ndirs; 399 unsigned int ndirs;
400 int max_dirs, min_inodes; 400 int max_dirs, min_inodes;
401 ext4_grpblk_t min_clusters; 401 ext4_grpblk_t min_clusters;
402 ext4_group_t i, grp, g, ngroups; 402 ext4_group_t i, grp, g, ngroups;
403 struct ext4_group_desc *desc; 403 struct ext4_group_desc *desc;
404 struct orlov_stats stats; 404 struct orlov_stats stats;
405 int flex_size = ext4_flex_bg_size(sbi); 405 int flex_size = ext4_flex_bg_size(sbi);
406 struct dx_hash_info hinfo; 406 struct dx_hash_info hinfo;
407 407
408 ngroups = real_ngroups; 408 ngroups = real_ngroups;
409 if (flex_size > 1) { 409 if (flex_size > 1) {
410 ngroups = (real_ngroups + flex_size - 1) >> 410 ngroups = (real_ngroups + flex_size - 1) >>
411 sbi->s_log_groups_per_flex; 411 sbi->s_log_groups_per_flex;
412 parent_group >>= sbi->s_log_groups_per_flex; 412 parent_group >>= sbi->s_log_groups_per_flex;
413 } 413 }
414 414
415 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); 415 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
416 avefreei = freei / ngroups; 416 avefreei = freei / ngroups;
417 freeb = EXT4_C2B(sbi, 417 freeb = EXT4_C2B(sbi,
418 percpu_counter_read_positive(&sbi->s_freeclusters_counter)); 418 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
419 avefreec = freeb; 419 avefreec = freeb;
420 do_div(avefreec, ngroups); 420 do_div(avefreec, ngroups);
421 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); 421 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
422 422
423 if (S_ISDIR(mode) && 423 if (S_ISDIR(mode) &&
424 ((parent == sb->s_root->d_inode) || 424 ((parent == sb->s_root->d_inode) ||
425 (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) { 425 (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
426 int best_ndir = inodes_per_group; 426 int best_ndir = inodes_per_group;
427 int ret = -1; 427 int ret = -1;
428 428
429 if (qstr) { 429 if (qstr) {
430 hinfo.hash_version = DX_HASH_HALF_MD4; 430 hinfo.hash_version = DX_HASH_HALF_MD4;
431 hinfo.seed = sbi->s_hash_seed; 431 hinfo.seed = sbi->s_hash_seed;
432 ext4fs_dirhash(qstr->name, qstr->len, &hinfo); 432 ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
433 grp = hinfo.hash; 433 grp = hinfo.hash;
434 } else 434 } else
435 get_random_bytes(&grp, sizeof(grp)); 435 grp = prandom_u32();
436 parent_group = (unsigned)grp % ngroups; 436 parent_group = (unsigned)grp % ngroups;
437 for (i = 0; i < ngroups; i++) { 437 for (i = 0; i < ngroups; i++) {
438 g = (parent_group + i) % ngroups; 438 g = (parent_group + i) % ngroups;
439 get_orlov_stats(sb, g, flex_size, &stats); 439 get_orlov_stats(sb, g, flex_size, &stats);
440 if (!stats.free_inodes) 440 if (!stats.free_inodes)
441 continue; 441 continue;
442 if (stats.used_dirs >= best_ndir) 442 if (stats.used_dirs >= best_ndir)
443 continue; 443 continue;
444 if (stats.free_inodes < avefreei) 444 if (stats.free_inodes < avefreei)
445 continue; 445 continue;
446 if (stats.free_clusters < avefreec) 446 if (stats.free_clusters < avefreec)
447 continue; 447 continue;
448 grp = g; 448 grp = g;
449 ret = 0; 449 ret = 0;
450 best_ndir = stats.used_dirs; 450 best_ndir = stats.used_dirs;
451 } 451 }
452 if (ret) 452 if (ret)
453 goto fallback; 453 goto fallback;
454 found_flex_bg: 454 found_flex_bg:
455 if (flex_size == 1) { 455 if (flex_size == 1) {
456 *group = grp; 456 *group = grp;
457 return 0; 457 return 0;
458 } 458 }
459 459
460 /* 460 /*
461 * We pack inodes at the beginning of the flexgroup's 461 * We pack inodes at the beginning of the flexgroup's
462 * inode tables. Block allocation decisions will do 462 * inode tables. Block allocation decisions will do
463 * something similar, although regular files will 463 * something similar, although regular files will
464 * start at 2nd block group of the flexgroup. See 464 * start at 2nd block group of the flexgroup. See
465 * ext4_ext_find_goal() and ext4_find_near(). 465 * ext4_ext_find_goal() and ext4_find_near().
466 */ 466 */
467 grp *= flex_size; 467 grp *= flex_size;
468 for (i = 0; i < flex_size; i++) { 468 for (i = 0; i < flex_size; i++) {
469 if (grp+i >= real_ngroups) 469 if (grp+i >= real_ngroups)
470 break; 470 break;
471 desc = ext4_get_group_desc(sb, grp+i, NULL); 471 desc = ext4_get_group_desc(sb, grp+i, NULL);
472 if (desc && ext4_free_inodes_count(sb, desc)) { 472 if (desc && ext4_free_inodes_count(sb, desc)) {
473 *group = grp+i; 473 *group = grp+i;
474 return 0; 474 return 0;
475 } 475 }
476 } 476 }
477 goto fallback; 477 goto fallback;
478 } 478 }
479 479
480 max_dirs = ndirs / ngroups + inodes_per_group / 16; 480 max_dirs = ndirs / ngroups + inodes_per_group / 16;
481 min_inodes = avefreei - inodes_per_group*flex_size / 4; 481 min_inodes = avefreei - inodes_per_group*flex_size / 4;
482 if (min_inodes < 1) 482 if (min_inodes < 1)
483 min_inodes = 1; 483 min_inodes = 1;
484 min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4; 484 min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
485 485
486 /* 486 /*
487 * Start looking in the flex group where we last allocated an 487 * Start looking in the flex group where we last allocated an
488 * inode for this parent directory 488 * inode for this parent directory
489 */ 489 */
490 if (EXT4_I(parent)->i_last_alloc_group != ~0) { 490 if (EXT4_I(parent)->i_last_alloc_group != ~0) {
491 parent_group = EXT4_I(parent)->i_last_alloc_group; 491 parent_group = EXT4_I(parent)->i_last_alloc_group;
492 if (flex_size > 1) 492 if (flex_size > 1)
493 parent_group >>= sbi->s_log_groups_per_flex; 493 parent_group >>= sbi->s_log_groups_per_flex;
494 } 494 }
495 495
496 for (i = 0; i < ngroups; i++) { 496 for (i = 0; i < ngroups; i++) {
497 grp = (parent_group + i) % ngroups; 497 grp = (parent_group + i) % ngroups;
498 get_orlov_stats(sb, grp, flex_size, &stats); 498 get_orlov_stats(sb, grp, flex_size, &stats);
499 if (stats.used_dirs >= max_dirs) 499 if (stats.used_dirs >= max_dirs)
500 continue; 500 continue;
501 if (stats.free_inodes < min_inodes) 501 if (stats.free_inodes < min_inodes)
502 continue; 502 continue;
503 if (stats.free_clusters < min_clusters) 503 if (stats.free_clusters < min_clusters)
504 continue; 504 continue;
505 goto found_flex_bg; 505 goto found_flex_bg;
506 } 506 }
507 507
508 fallback: 508 fallback:
509 ngroups = real_ngroups; 509 ngroups = real_ngroups;
510 avefreei = freei / ngroups; 510 avefreei = freei / ngroups;
511 fallback_retry: 511 fallback_retry:
512 parent_group = EXT4_I(parent)->i_block_group; 512 parent_group = EXT4_I(parent)->i_block_group;
513 for (i = 0; i < ngroups; i++) { 513 for (i = 0; i < ngroups; i++) {
514 grp = (parent_group + i) % ngroups; 514 grp = (parent_group + i) % ngroups;
515 desc = ext4_get_group_desc(sb, grp, NULL); 515 desc = ext4_get_group_desc(sb, grp, NULL);
516 if (desc) { 516 if (desc) {
517 grp_free = ext4_free_inodes_count(sb, desc); 517 grp_free = ext4_free_inodes_count(sb, desc);
518 if (grp_free && grp_free >= avefreei) { 518 if (grp_free && grp_free >= avefreei) {
519 *group = grp; 519 *group = grp;
520 return 0; 520 return 0;
521 } 521 }
522 } 522 }
523 } 523 }
524 524
525 if (avefreei) { 525 if (avefreei) {
526 /* 526 /*
527 * The free-inodes counter is approximate, and for really small 527 * The free-inodes counter is approximate, and for really small
528 * filesystems the above test can fail to find any blockgroups 528 * filesystems the above test can fail to find any blockgroups
529 */ 529 */
530 avefreei = 0; 530 avefreei = 0;
531 goto fallback_retry; 531 goto fallback_retry;
532 } 532 }
533 533
534 return -1; 534 return -1;
535 } 535 }
536 536
537 static int find_group_other(struct super_block *sb, struct inode *parent, 537 static int find_group_other(struct super_block *sb, struct inode *parent,
538 ext4_group_t *group, umode_t mode) 538 ext4_group_t *group, umode_t mode)
539 { 539 {
540 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 540 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
541 ext4_group_t i, last, ngroups = ext4_get_groups_count(sb); 541 ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
542 struct ext4_group_desc *desc; 542 struct ext4_group_desc *desc;
543 int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); 543 int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
544 544
545 /* 545 /*
546 * Try to place the inode is the same flex group as its 546 * Try to place the inode is the same flex group as its
547 * parent. If we can't find space, use the Orlov algorithm to 547 * parent. If we can't find space, use the Orlov algorithm to
548 * find another flex group, and store that information in the 548 * find another flex group, and store that information in the
549 * parent directory's inode information so that use that flex 549 * parent directory's inode information so that use that flex
550 * group for future allocations. 550 * group for future allocations.
551 */ 551 */
552 if (flex_size > 1) { 552 if (flex_size > 1) {
553 int retry = 0; 553 int retry = 0;
554 554
555 try_again: 555 try_again:
556 parent_group &= ~(flex_size-1); 556 parent_group &= ~(flex_size-1);
557 last = parent_group + flex_size; 557 last = parent_group + flex_size;
558 if (last > ngroups) 558 if (last > ngroups)
559 last = ngroups; 559 last = ngroups;
560 for (i = parent_group; i < last; i++) { 560 for (i = parent_group; i < last; i++) {
561 desc = ext4_get_group_desc(sb, i, NULL); 561 desc = ext4_get_group_desc(sb, i, NULL);
562 if (desc && ext4_free_inodes_count(sb, desc)) { 562 if (desc && ext4_free_inodes_count(sb, desc)) {
563 *group = i; 563 *group = i;
564 return 0; 564 return 0;
565 } 565 }
566 } 566 }
567 if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) { 567 if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) {
568 retry = 1; 568 retry = 1;
569 parent_group = EXT4_I(parent)->i_last_alloc_group; 569 parent_group = EXT4_I(parent)->i_last_alloc_group;
570 goto try_again; 570 goto try_again;
571 } 571 }
572 /* 572 /*
573 * If this didn't work, use the Orlov search algorithm 573 * If this didn't work, use the Orlov search algorithm
574 * to find a new flex group; we pass in the mode to 574 * to find a new flex group; we pass in the mode to
575 * avoid the topdir algorithms. 575 * avoid the topdir algorithms.
576 */ 576 */
577 *group = parent_group + flex_size; 577 *group = parent_group + flex_size;
578 if (*group > ngroups) 578 if (*group > ngroups)
579 *group = 0; 579 *group = 0;
580 return find_group_orlov(sb, parent, group, mode, NULL); 580 return find_group_orlov(sb, parent, group, mode, NULL);
581 } 581 }
582 582
583 /* 583 /*
584 * Try to place the inode in its parent directory 584 * Try to place the inode in its parent directory
585 */ 585 */
586 *group = parent_group; 586 *group = parent_group;
587 desc = ext4_get_group_desc(sb, *group, NULL); 587 desc = ext4_get_group_desc(sb, *group, NULL);
588 if (desc && ext4_free_inodes_count(sb, desc) && 588 if (desc && ext4_free_inodes_count(sb, desc) &&
589 ext4_free_group_clusters(sb, desc)) 589 ext4_free_group_clusters(sb, desc))
590 return 0; 590 return 0;
591 591
592 /* 592 /*
593 * We're going to place this inode in a different blockgroup from its 593 * We're going to place this inode in a different blockgroup from its
594 * parent. We want to cause files in a common directory to all land in 594 * parent. We want to cause files in a common directory to all land in
595 * the same blockgroup. But we want files which are in a different 595 * the same blockgroup. But we want files which are in a different
596 * directory which shares a blockgroup with our parent to land in a 596 * directory which shares a blockgroup with our parent to land in a
597 * different blockgroup. 597 * different blockgroup.
598 * 598 *
599 * So add our directory's i_ino into the starting point for the hash. 599 * So add our directory's i_ino into the starting point for the hash.
600 */ 600 */
601 *group = (*group + parent->i_ino) % ngroups; 601 *group = (*group + parent->i_ino) % ngroups;
602 602
603 /* 603 /*
604 * Use a quadratic hash to find a group with a free inode and some free 604 * Use a quadratic hash to find a group with a free inode and some free
605 * blocks. 605 * blocks.
606 */ 606 */
607 for (i = 1; i < ngroups; i <<= 1) { 607 for (i = 1; i < ngroups; i <<= 1) {
608 *group += i; 608 *group += i;
609 if (*group >= ngroups) 609 if (*group >= ngroups)
610 *group -= ngroups; 610 *group -= ngroups;
611 desc = ext4_get_group_desc(sb, *group, NULL); 611 desc = ext4_get_group_desc(sb, *group, NULL);
612 if (desc && ext4_free_inodes_count(sb, desc) && 612 if (desc && ext4_free_inodes_count(sb, desc) &&
613 ext4_free_group_clusters(sb, desc)) 613 ext4_free_group_clusters(sb, desc))
614 return 0; 614 return 0;
615 } 615 }
616 616
617 /* 617 /*
618 * That failed: try linear search for a free inode, even if that group 618 * That failed: try linear search for a free inode, even if that group
619 * has no free blocks. 619 * has no free blocks.
620 */ 620 */
621 *group = parent_group; 621 *group = parent_group;
622 for (i = 0; i < ngroups; i++) { 622 for (i = 0; i < ngroups; i++) {
623 if (++*group >= ngroups) 623 if (++*group >= ngroups)
624 *group = 0; 624 *group = 0;
625 desc = ext4_get_group_desc(sb, *group, NULL); 625 desc = ext4_get_group_desc(sb, *group, NULL);
626 if (desc && ext4_free_inodes_count(sb, desc)) 626 if (desc && ext4_free_inodes_count(sb, desc))
627 return 0; 627 return 0;
628 } 628 }
629 629
630 return -1; 630 return -1;
631 } 631 }
632 632
633 /* 633 /*
634 * In no journal mode, if an inode has recently been deleted, we want 634 * In no journal mode, if an inode has recently been deleted, we want
635 * to avoid reusing it until we're reasonably sure the inode table 635 * to avoid reusing it until we're reasonably sure the inode table
636 * block has been written back to disk. (Yes, these values are 636 * block has been written back to disk. (Yes, these values are
637 * somewhat arbitrary...) 637 * somewhat arbitrary...)
638 */ 638 */
639 #define RECENTCY_MIN 5 639 #define RECENTCY_MIN 5
640 #define RECENTCY_DIRTY 30 640 #define RECENTCY_DIRTY 30
641 641
642 static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) 642 static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
643 { 643 {
644 struct ext4_group_desc *gdp; 644 struct ext4_group_desc *gdp;
645 struct ext4_inode *raw_inode; 645 struct ext4_inode *raw_inode;
646 struct buffer_head *bh; 646 struct buffer_head *bh;
647 unsigned long dtime, now; 647 unsigned long dtime, now;
648 int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; 648 int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
649 int offset, ret = 0, recentcy = RECENTCY_MIN; 649 int offset, ret = 0, recentcy = RECENTCY_MIN;
650 650
651 gdp = ext4_get_group_desc(sb, group, NULL); 651 gdp = ext4_get_group_desc(sb, group, NULL);
652 if (unlikely(!gdp)) 652 if (unlikely(!gdp))
653 return 0; 653 return 0;
654 654
655 bh = sb_getblk(sb, ext4_inode_table(sb, gdp) + 655 bh = sb_getblk(sb, ext4_inode_table(sb, gdp) +
656 (ino / inodes_per_block)); 656 (ino / inodes_per_block));
657 if (unlikely(!bh) || !buffer_uptodate(bh)) 657 if (unlikely(!bh) || !buffer_uptodate(bh))
658 /* 658 /*
659 * If the block is not in the buffer cache, then it 659 * If the block is not in the buffer cache, then it
660 * must have been written out. 660 * must have been written out.
661 */ 661 */
662 goto out; 662 goto out;
663 663
664 offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb); 664 offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb);
665 raw_inode = (struct ext4_inode *) (bh->b_data + offset); 665 raw_inode = (struct ext4_inode *) (bh->b_data + offset);
666 dtime = le32_to_cpu(raw_inode->i_dtime); 666 dtime = le32_to_cpu(raw_inode->i_dtime);
667 now = get_seconds(); 667 now = get_seconds();
668 if (buffer_dirty(bh)) 668 if (buffer_dirty(bh))
669 recentcy += RECENTCY_DIRTY; 669 recentcy += RECENTCY_DIRTY;
670 670
671 if (dtime && (dtime < now) && (now < dtime + recentcy)) 671 if (dtime && (dtime < now) && (now < dtime + recentcy))
672 ret = 1; 672 ret = 1;
673 out: 673 out:
674 brelse(bh); 674 brelse(bh);
675 return ret; 675 return ret;
676 } 676 }
677 677
678 /* 678 /*
679 * There are two policies for allocating an inode. If the new inode is 679 * There are two policies for allocating an inode. If the new inode is
680 * a directory, then a forward search is made for a block group with both 680 * a directory, then a forward search is made for a block group with both
681 * free space and a low directory-to-inode ratio; if that fails, then of 681 * free space and a low directory-to-inode ratio; if that fails, then of
682 * the groups with above-average free space, that group with the fewest 682 * the groups with above-average free space, that group with the fewest
683 * directories already is chosen. 683 * directories already is chosen.
684 * 684 *
685 * For other inodes, search forward from the parent directory's block 685 * For other inodes, search forward from the parent directory's block
686 * group to find a free inode. 686 * group to find a free inode.
687 */ 687 */
688 struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, 688 struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
689 umode_t mode, const struct qstr *qstr, 689 umode_t mode, const struct qstr *qstr,
690 __u32 goal, uid_t *owner, int handle_type, 690 __u32 goal, uid_t *owner, int handle_type,
691 unsigned int line_no, int nblocks) 691 unsigned int line_no, int nblocks)
692 { 692 {
693 struct super_block *sb; 693 struct super_block *sb;
694 struct buffer_head *inode_bitmap_bh = NULL; 694 struct buffer_head *inode_bitmap_bh = NULL;
695 struct buffer_head *group_desc_bh; 695 struct buffer_head *group_desc_bh;
696 ext4_group_t ngroups, group = 0; 696 ext4_group_t ngroups, group = 0;
697 unsigned long ino = 0; 697 unsigned long ino = 0;
698 struct inode *inode; 698 struct inode *inode;
699 struct ext4_group_desc *gdp = NULL; 699 struct ext4_group_desc *gdp = NULL;
700 struct ext4_inode_info *ei; 700 struct ext4_inode_info *ei;
701 struct ext4_sb_info *sbi; 701 struct ext4_sb_info *sbi;
702 int ret2, err = 0; 702 int ret2, err = 0;
703 struct inode *ret; 703 struct inode *ret;
704 ext4_group_t i; 704 ext4_group_t i;
705 ext4_group_t flex_group; 705 ext4_group_t flex_group;
706 struct ext4_group_info *grp; 706 struct ext4_group_info *grp;
707 707
708 /* Cannot create files in a deleted directory */ 708 /* Cannot create files in a deleted directory */
709 if (!dir || !dir->i_nlink) 709 if (!dir || !dir->i_nlink)
710 return ERR_PTR(-EPERM); 710 return ERR_PTR(-EPERM);
711 711
712 sb = dir->i_sb; 712 sb = dir->i_sb;
713 ngroups = ext4_get_groups_count(sb); 713 ngroups = ext4_get_groups_count(sb);
714 trace_ext4_request_inode(dir, mode); 714 trace_ext4_request_inode(dir, mode);
715 inode = new_inode(sb); 715 inode = new_inode(sb);
716 if (!inode) 716 if (!inode)
717 return ERR_PTR(-ENOMEM); 717 return ERR_PTR(-ENOMEM);
718 ei = EXT4_I(inode); 718 ei = EXT4_I(inode);
719 sbi = EXT4_SB(sb); 719 sbi = EXT4_SB(sb);
720 720
721 /* 721 /*
722 * Initalize owners and quota early so that we don't have to account 722 * Initalize owners and quota early so that we don't have to account
723 * for quota initialization worst case in standard inode creating 723 * for quota initialization worst case in standard inode creating
724 * transaction 724 * transaction
725 */ 725 */
726 if (owner) { 726 if (owner) {
727 inode->i_mode = mode; 727 inode->i_mode = mode;
728 i_uid_write(inode, owner[0]); 728 i_uid_write(inode, owner[0]);
729 i_gid_write(inode, owner[1]); 729 i_gid_write(inode, owner[1]);
730 } else if (test_opt(sb, GRPID)) { 730 } else if (test_opt(sb, GRPID)) {
731 inode->i_mode = mode; 731 inode->i_mode = mode;
732 inode->i_uid = current_fsuid(); 732 inode->i_uid = current_fsuid();
733 inode->i_gid = dir->i_gid; 733 inode->i_gid = dir->i_gid;
734 } else 734 } else
735 inode_init_owner(inode, dir, mode); 735 inode_init_owner(inode, dir, mode);
736 dquot_initialize(inode); 736 dquot_initialize(inode);
737 737
738 if (!goal) 738 if (!goal)
739 goal = sbi->s_inode_goal; 739 goal = sbi->s_inode_goal;
740 740
741 if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) { 741 if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) {
742 group = (goal - 1) / EXT4_INODES_PER_GROUP(sb); 742 group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
743 ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb); 743 ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
744 ret2 = 0; 744 ret2 = 0;
745 goto got_group; 745 goto got_group;
746 } 746 }
747 747
748 if (S_ISDIR(mode)) 748 if (S_ISDIR(mode))
749 ret2 = find_group_orlov(sb, dir, &group, mode, qstr); 749 ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
750 else 750 else
751 ret2 = find_group_other(sb, dir, &group, mode); 751 ret2 = find_group_other(sb, dir, &group, mode);
752 752
753 got_group: 753 got_group:
754 EXT4_I(dir)->i_last_alloc_group = group; 754 EXT4_I(dir)->i_last_alloc_group = group;
755 err = -ENOSPC; 755 err = -ENOSPC;
756 if (ret2 == -1) 756 if (ret2 == -1)
757 goto out; 757 goto out;
758 758
759 /* 759 /*
760 * Normally we will only go through one pass of this loop, 760 * Normally we will only go through one pass of this loop,
761 * unless we get unlucky and it turns out the group we selected 761 * unless we get unlucky and it turns out the group we selected
762 * had its last inode grabbed by someone else. 762 * had its last inode grabbed by someone else.
763 */ 763 */
764 for (i = 0; i < ngroups; i++, ino = 0) { 764 for (i = 0; i < ngroups; i++, ino = 0) {
765 err = -EIO; 765 err = -EIO;
766 766
767 gdp = ext4_get_group_desc(sb, group, &group_desc_bh); 767 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
768 if (!gdp) 768 if (!gdp)
769 goto out; 769 goto out;
770 770
771 /* 771 /*
772 * Check free inodes count before loading bitmap. 772 * Check free inodes count before loading bitmap.
773 */ 773 */
774 if (ext4_free_inodes_count(sb, gdp) == 0) { 774 if (ext4_free_inodes_count(sb, gdp) == 0) {
775 if (++group == ngroups) 775 if (++group == ngroups)
776 group = 0; 776 group = 0;
777 continue; 777 continue;
778 } 778 }
779 779
780 grp = ext4_get_group_info(sb, group); 780 grp = ext4_get_group_info(sb, group);
781 /* Skip groups with already-known suspicious inode tables */ 781 /* Skip groups with already-known suspicious inode tables */
782 if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { 782 if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
783 if (++group == ngroups) 783 if (++group == ngroups)
784 group = 0; 784 group = 0;
785 continue; 785 continue;
786 } 786 }
787 787
788 brelse(inode_bitmap_bh); 788 brelse(inode_bitmap_bh);
789 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); 789 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
790 /* Skip groups with suspicious inode tables */ 790 /* Skip groups with suspicious inode tables */
791 if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || !inode_bitmap_bh) { 791 if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || !inode_bitmap_bh) {
792 if (++group == ngroups) 792 if (++group == ngroups)
793 group = 0; 793 group = 0;
794 continue; 794 continue;
795 } 795 }
796 796
797 repeat_in_this_group: 797 repeat_in_this_group:
798 ino = ext4_find_next_zero_bit((unsigned long *) 798 ino = ext4_find_next_zero_bit((unsigned long *)
799 inode_bitmap_bh->b_data, 799 inode_bitmap_bh->b_data,
800 EXT4_INODES_PER_GROUP(sb), ino); 800 EXT4_INODES_PER_GROUP(sb), ino);
801 if (ino >= EXT4_INODES_PER_GROUP(sb)) 801 if (ino >= EXT4_INODES_PER_GROUP(sb))
802 goto next_group; 802 goto next_group;
803 if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { 803 if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
804 ext4_error(sb, "reserved inode found cleared - " 804 ext4_error(sb, "reserved inode found cleared - "
805 "inode=%lu", ino + 1); 805 "inode=%lu", ino + 1);
806 continue; 806 continue;
807 } 807 }
808 if ((EXT4_SB(sb)->s_journal == NULL) && 808 if ((EXT4_SB(sb)->s_journal == NULL) &&
809 recently_deleted(sb, group, ino)) { 809 recently_deleted(sb, group, ino)) {
810 ino++; 810 ino++;
811 goto next_inode; 811 goto next_inode;
812 } 812 }
813 if (!handle) { 813 if (!handle) {
814 BUG_ON(nblocks <= 0); 814 BUG_ON(nblocks <= 0);
815 handle = __ext4_journal_start_sb(dir->i_sb, line_no, 815 handle = __ext4_journal_start_sb(dir->i_sb, line_no,
816 handle_type, nblocks, 816 handle_type, nblocks,
817 0); 817 0);
818 if (IS_ERR(handle)) { 818 if (IS_ERR(handle)) {
819 err = PTR_ERR(handle); 819 err = PTR_ERR(handle);
820 ext4_std_error(sb, err); 820 ext4_std_error(sb, err);
821 goto out; 821 goto out;
822 } 822 }
823 } 823 }
824 BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); 824 BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
825 err = ext4_journal_get_write_access(handle, inode_bitmap_bh); 825 err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
826 if (err) { 826 if (err) {
827 ext4_std_error(sb, err); 827 ext4_std_error(sb, err);
828 goto out; 828 goto out;
829 } 829 }
830 ext4_lock_group(sb, group); 830 ext4_lock_group(sb, group);
831 ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); 831 ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
832 ext4_unlock_group(sb, group); 832 ext4_unlock_group(sb, group);
833 ino++; /* the inode bitmap is zero-based */ 833 ino++; /* the inode bitmap is zero-based */
834 if (!ret2) 834 if (!ret2)
835 goto got; /* we grabbed the inode! */ 835 goto got; /* we grabbed the inode! */
836 next_inode: 836 next_inode:
837 if (ino < EXT4_INODES_PER_GROUP(sb)) 837 if (ino < EXT4_INODES_PER_GROUP(sb))
838 goto repeat_in_this_group; 838 goto repeat_in_this_group;
839 next_group: 839 next_group:
840 if (++group == ngroups) 840 if (++group == ngroups)
841 group = 0; 841 group = 0;
842 } 842 }
843 err = -ENOSPC; 843 err = -ENOSPC;
844 goto out; 844 goto out;
845 845
846 got: 846 got:
847 BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); 847 BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
848 err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); 848 err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
849 if (err) { 849 if (err) {
850 ext4_std_error(sb, err); 850 ext4_std_error(sb, err);
851 goto out; 851 goto out;
852 } 852 }
853 853
854 /* We may have to initialize the block bitmap if it isn't already */ 854 /* We may have to initialize the block bitmap if it isn't already */
855 if (ext4_has_group_desc_csum(sb) && 855 if (ext4_has_group_desc_csum(sb) &&
856 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 856 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
857 struct buffer_head *block_bitmap_bh; 857 struct buffer_head *block_bitmap_bh;
858 858
859 block_bitmap_bh = ext4_read_block_bitmap(sb, group); 859 block_bitmap_bh = ext4_read_block_bitmap(sb, group);
860 BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); 860 BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
861 err = ext4_journal_get_write_access(handle, block_bitmap_bh); 861 err = ext4_journal_get_write_access(handle, block_bitmap_bh);
862 if (err) { 862 if (err) {
863 brelse(block_bitmap_bh); 863 brelse(block_bitmap_bh);
864 ext4_std_error(sb, err); 864 ext4_std_error(sb, err);
865 goto out; 865 goto out;
866 } 866 }
867 867
868 BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); 868 BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
869 err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh); 869 err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
870 870
871 /* recheck and clear flag under lock if we still need to */ 871 /* recheck and clear flag under lock if we still need to */
872 ext4_lock_group(sb, group); 872 ext4_lock_group(sb, group);
873 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 873 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
874 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 874 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
875 ext4_free_group_clusters_set(sb, gdp, 875 ext4_free_group_clusters_set(sb, gdp,
876 ext4_free_clusters_after_init(sb, group, gdp)); 876 ext4_free_clusters_after_init(sb, group, gdp));
877 ext4_block_bitmap_csum_set(sb, group, gdp, 877 ext4_block_bitmap_csum_set(sb, group, gdp,
878 block_bitmap_bh); 878 block_bitmap_bh);
879 ext4_group_desc_csum_set(sb, group, gdp); 879 ext4_group_desc_csum_set(sb, group, gdp);
880 } 880 }
881 ext4_unlock_group(sb, group); 881 ext4_unlock_group(sb, group);
882 brelse(block_bitmap_bh); 882 brelse(block_bitmap_bh);
883 883
884 if (err) { 884 if (err) {
885 ext4_std_error(sb, err); 885 ext4_std_error(sb, err);
886 goto out; 886 goto out;
887 } 887 }
888 } 888 }
889 889
890 BUFFER_TRACE(group_desc_bh, "get_write_access"); 890 BUFFER_TRACE(group_desc_bh, "get_write_access");
891 err = ext4_journal_get_write_access(handle, group_desc_bh); 891 err = ext4_journal_get_write_access(handle, group_desc_bh);
892 if (err) { 892 if (err) {
893 ext4_std_error(sb, err); 893 ext4_std_error(sb, err);
894 goto out; 894 goto out;
895 } 895 }
896 896
897 /* Update the relevant bg descriptor fields */ 897 /* Update the relevant bg descriptor fields */
898 if (ext4_has_group_desc_csum(sb)) { 898 if (ext4_has_group_desc_csum(sb)) {
899 int free; 899 int free;
900 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 900 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
901 901
902 down_read(&grp->alloc_sem); /* protect vs itable lazyinit */ 902 down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
903 ext4_lock_group(sb, group); /* while we modify the bg desc */ 903 ext4_lock_group(sb, group); /* while we modify the bg desc */
904 free = EXT4_INODES_PER_GROUP(sb) - 904 free = EXT4_INODES_PER_GROUP(sb) -
905 ext4_itable_unused_count(sb, gdp); 905 ext4_itable_unused_count(sb, gdp);
906 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 906 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
907 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); 907 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
908 free = 0; 908 free = 0;
909 } 909 }
910 /* 910 /*
911 * Check the relative inode number against the last used 911 * Check the relative inode number against the last used
912 * relative inode number in this group. if it is greater 912 * relative inode number in this group. if it is greater
913 * we need to update the bg_itable_unused count 913 * we need to update the bg_itable_unused count
914 */ 914 */
915 if (ino > free) 915 if (ino > free)
916 ext4_itable_unused_set(sb, gdp, 916 ext4_itable_unused_set(sb, gdp,
917 (EXT4_INODES_PER_GROUP(sb) - ino)); 917 (EXT4_INODES_PER_GROUP(sb) - ino));
918 up_read(&grp->alloc_sem); 918 up_read(&grp->alloc_sem);
919 } else { 919 } else {
920 ext4_lock_group(sb, group); 920 ext4_lock_group(sb, group);
921 } 921 }
922 922
923 ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); 923 ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
924 if (S_ISDIR(mode)) { 924 if (S_ISDIR(mode)) {
925 ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); 925 ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
926 if (sbi->s_log_groups_per_flex) { 926 if (sbi->s_log_groups_per_flex) {
927 ext4_group_t f = ext4_flex_group(sbi, group); 927 ext4_group_t f = ext4_flex_group(sbi, group);
928 928
929 atomic_inc(&sbi->s_flex_groups[f].used_dirs); 929 atomic_inc(&sbi->s_flex_groups[f].used_dirs);
930 } 930 }
931 } 931 }
932 if (ext4_has_group_desc_csum(sb)) { 932 if (ext4_has_group_desc_csum(sb)) {
933 ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh, 933 ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh,
934 EXT4_INODES_PER_GROUP(sb) / 8); 934 EXT4_INODES_PER_GROUP(sb) / 8);
935 ext4_group_desc_csum_set(sb, group, gdp); 935 ext4_group_desc_csum_set(sb, group, gdp);
936 } 936 }
937 ext4_unlock_group(sb, group); 937 ext4_unlock_group(sb, group);
938 938
939 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); 939 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
940 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); 940 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
941 if (err) { 941 if (err) {
942 ext4_std_error(sb, err); 942 ext4_std_error(sb, err);
943 goto out; 943 goto out;
944 } 944 }
945 945
946 percpu_counter_dec(&sbi->s_freeinodes_counter); 946 percpu_counter_dec(&sbi->s_freeinodes_counter);
947 if (S_ISDIR(mode)) 947 if (S_ISDIR(mode))
948 percpu_counter_inc(&sbi->s_dirs_counter); 948 percpu_counter_inc(&sbi->s_dirs_counter);
949 949
950 if (sbi->s_log_groups_per_flex) { 950 if (sbi->s_log_groups_per_flex) {
951 flex_group = ext4_flex_group(sbi, group); 951 flex_group = ext4_flex_group(sbi, group);
952 atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); 952 atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
953 } 953 }
954 954
955 inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); 955 inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
956 /* This is the optimal IO size (for stat), not the fs block size */ 956 /* This is the optimal IO size (for stat), not the fs block size */
957 inode->i_blocks = 0; 957 inode->i_blocks = 0;
958 inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = 958 inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
959 ext4_current_time(inode); 959 ext4_current_time(inode);
960 960
961 memset(ei->i_data, 0, sizeof(ei->i_data)); 961 memset(ei->i_data, 0, sizeof(ei->i_data));
962 ei->i_dir_start_lookup = 0; 962 ei->i_dir_start_lookup = 0;
963 ei->i_disksize = 0; 963 ei->i_disksize = 0;
964 964
965 /* Don't inherit extent flag from directory, amongst others. */ 965 /* Don't inherit extent flag from directory, amongst others. */
966 ei->i_flags = 966 ei->i_flags =
967 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); 967 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
968 ei->i_file_acl = 0; 968 ei->i_file_acl = 0;
969 ei->i_dtime = 0; 969 ei->i_dtime = 0;
970 ei->i_block_group = group; 970 ei->i_block_group = group;
971 ei->i_last_alloc_group = ~0; 971 ei->i_last_alloc_group = ~0;
972 972
973 ext4_set_inode_flags(inode); 973 ext4_set_inode_flags(inode);
974 if (IS_DIRSYNC(inode)) 974 if (IS_DIRSYNC(inode))
975 ext4_handle_sync(handle); 975 ext4_handle_sync(handle);
976 if (insert_inode_locked(inode) < 0) { 976 if (insert_inode_locked(inode) < 0) {
977 /* 977 /*
978 * Likely a bitmap corruption causing inode to be allocated 978 * Likely a bitmap corruption causing inode to be allocated
979 * twice. 979 * twice.
980 */ 980 */
981 err = -EIO; 981 err = -EIO;
982 ext4_error(sb, "failed to insert inode %lu: doubly allocated?", 982 ext4_error(sb, "failed to insert inode %lu: doubly allocated?",
983 inode->i_ino); 983 inode->i_ino);
984 goto out; 984 goto out;
985 } 985 }
986 spin_lock(&sbi->s_next_gen_lock); 986 spin_lock(&sbi->s_next_gen_lock);
987 inode->i_generation = sbi->s_next_generation++; 987 inode->i_generation = sbi->s_next_generation++;
988 spin_unlock(&sbi->s_next_gen_lock); 988 spin_unlock(&sbi->s_next_gen_lock);
989 989
990 /* Precompute checksum seed for inode metadata */ 990 /* Precompute checksum seed for inode metadata */
991 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 991 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
992 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 992 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
993 __u32 csum; 993 __u32 csum;
994 __le32 inum = cpu_to_le32(inode->i_ino); 994 __le32 inum = cpu_to_le32(inode->i_ino);
995 __le32 gen = cpu_to_le32(inode->i_generation); 995 __le32 gen = cpu_to_le32(inode->i_generation);
996 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, 996 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
997 sizeof(inum)); 997 sizeof(inum));
998 ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, 998 ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
999 sizeof(gen)); 999 sizeof(gen));
1000 } 1000 }
1001 1001
1002 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ 1002 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
1003 ext4_set_inode_state(inode, EXT4_STATE_NEW); 1003 ext4_set_inode_state(inode, EXT4_STATE_NEW);
1004 1004
1005 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; 1005 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
1006 1006
1007 ei->i_inline_off = 0; 1007 ei->i_inline_off = 0;
1008 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA)) 1008 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA))
1009 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1009 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
1010 1010
1011 ret = inode; 1011 ret = inode;
1012 err = dquot_alloc_inode(inode); 1012 err = dquot_alloc_inode(inode);
1013 if (err) 1013 if (err)
1014 goto fail_drop; 1014 goto fail_drop;
1015 1015
1016 err = ext4_init_acl(handle, inode, dir); 1016 err = ext4_init_acl(handle, inode, dir);
1017 if (err) 1017 if (err)
1018 goto fail_free_drop; 1018 goto fail_free_drop;
1019 1019
1020 err = ext4_init_security(handle, inode, dir, qstr); 1020 err = ext4_init_security(handle, inode, dir, qstr);
1021 if (err) 1021 if (err)
1022 goto fail_free_drop; 1022 goto fail_free_drop;
1023 1023
1024 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { 1024 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
1025 /* set extent flag only for directory, file and normal symlink*/ 1025 /* set extent flag only for directory, file and normal symlink*/
1026 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { 1026 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
1027 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); 1027 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
1028 ext4_ext_tree_init(handle, inode); 1028 ext4_ext_tree_init(handle, inode);
1029 } 1029 }
1030 } 1030 }
1031 1031
1032 if (ext4_handle_valid(handle)) { 1032 if (ext4_handle_valid(handle)) {
1033 ei->i_sync_tid = handle->h_transaction->t_tid; 1033 ei->i_sync_tid = handle->h_transaction->t_tid;
1034 ei->i_datasync_tid = handle->h_transaction->t_tid; 1034 ei->i_datasync_tid = handle->h_transaction->t_tid;
1035 } 1035 }
1036 1036
1037 err = ext4_mark_inode_dirty(handle, inode); 1037 err = ext4_mark_inode_dirty(handle, inode);
1038 if (err) { 1038 if (err) {
1039 ext4_std_error(sb, err); 1039 ext4_std_error(sb, err);
1040 goto fail_free_drop; 1040 goto fail_free_drop;
1041 } 1041 }
1042 1042
1043 ext4_debug("allocating inode %lu\n", inode->i_ino); 1043 ext4_debug("allocating inode %lu\n", inode->i_ino);
1044 trace_ext4_allocate_inode(inode, dir, mode); 1044 trace_ext4_allocate_inode(inode, dir, mode);
1045 brelse(inode_bitmap_bh); 1045 brelse(inode_bitmap_bh);
1046 return ret; 1046 return ret;
1047 1047
1048 fail_free_drop: 1048 fail_free_drop:
1049 dquot_free_inode(inode); 1049 dquot_free_inode(inode);
1050 fail_drop: 1050 fail_drop:
1051 clear_nlink(inode); 1051 clear_nlink(inode);
1052 unlock_new_inode(inode); 1052 unlock_new_inode(inode);
1053 out: 1053 out:
1054 dquot_drop(inode); 1054 dquot_drop(inode);
1055 inode->i_flags |= S_NOQUOTA; 1055 inode->i_flags |= S_NOQUOTA;
1056 iput(inode); 1056 iput(inode);
1057 brelse(inode_bitmap_bh); 1057 brelse(inode_bitmap_bh);
1058 return ERR_PTR(err); 1058 return ERR_PTR(err);
1059 } 1059 }
1060 1060
1061 /* Verify that we are loading a valid orphan from disk */ 1061 /* Verify that we are loading a valid orphan from disk */
1062 struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) 1062 struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
1063 { 1063 {
1064 unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); 1064 unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
1065 ext4_group_t block_group; 1065 ext4_group_t block_group;
1066 int bit; 1066 int bit;
1067 struct buffer_head *bitmap_bh; 1067 struct buffer_head *bitmap_bh;
1068 struct inode *inode = NULL; 1068 struct inode *inode = NULL;
1069 long err = -EIO; 1069 long err = -EIO;
1070 1070
1071 /* Error cases - e2fsck has already cleaned up for us */ 1071 /* Error cases - e2fsck has already cleaned up for us */
1072 if (ino > max_ino) { 1072 if (ino > max_ino) {
1073 ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino); 1073 ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino);
1074 goto error; 1074 goto error;
1075 } 1075 }
1076 1076
1077 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 1077 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
1078 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 1078 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
1079 bitmap_bh = ext4_read_inode_bitmap(sb, block_group); 1079 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
1080 if (!bitmap_bh) { 1080 if (!bitmap_bh) {
1081 ext4_warning(sb, "inode bitmap error for orphan %lu", ino); 1081 ext4_warning(sb, "inode bitmap error for orphan %lu", ino);
1082 goto error; 1082 goto error;
1083 } 1083 }
1084 1084
1085 /* Having the inode bit set should be a 100% indicator that this 1085 /* Having the inode bit set should be a 100% indicator that this
1086 * is a valid orphan (no e2fsck run on fs). Orphans also include 1086 * is a valid orphan (no e2fsck run on fs). Orphans also include
1087 * inodes that were being truncated, so we can't check i_nlink==0. 1087 * inodes that were being truncated, so we can't check i_nlink==0.
1088 */ 1088 */
1089 if (!ext4_test_bit(bit, bitmap_bh->b_data)) 1089 if (!ext4_test_bit(bit, bitmap_bh->b_data))
1090 goto bad_orphan; 1090 goto bad_orphan;
1091 1091
1092 inode = ext4_iget(sb, ino); 1092 inode = ext4_iget(sb, ino);
1093 if (IS_ERR(inode)) 1093 if (IS_ERR(inode))
1094 goto iget_failed; 1094 goto iget_failed;
1095 1095
1096 /* 1096 /*
1097 * If the orphans has i_nlinks > 0 then it should be able to be 1097 * If the orphans has i_nlinks > 0 then it should be able to be
1098 * truncated, otherwise it won't be removed from the orphan list 1098 * truncated, otherwise it won't be removed from the orphan list
1099 * during processing and an infinite loop will result. 1099 * during processing and an infinite loop will result.
1100 */ 1100 */
1101 if (inode->i_nlink && !ext4_can_truncate(inode)) 1101 if (inode->i_nlink && !ext4_can_truncate(inode))
1102 goto bad_orphan; 1102 goto bad_orphan;
1103 1103
1104 if (NEXT_ORPHAN(inode) > max_ino) 1104 if (NEXT_ORPHAN(inode) > max_ino)
1105 goto bad_orphan; 1105 goto bad_orphan;
1106 brelse(bitmap_bh); 1106 brelse(bitmap_bh);
1107 return inode; 1107 return inode;
1108 1108
1109 iget_failed: 1109 iget_failed:
1110 err = PTR_ERR(inode); 1110 err = PTR_ERR(inode);
1111 inode = NULL; 1111 inode = NULL;
1112 bad_orphan: 1112 bad_orphan:
1113 ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino); 1113 ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino);
1114 printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n", 1114 printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n",
1115 bit, (unsigned long long)bitmap_bh->b_blocknr, 1115 bit, (unsigned long long)bitmap_bh->b_blocknr,
1116 ext4_test_bit(bit, bitmap_bh->b_data)); 1116 ext4_test_bit(bit, bitmap_bh->b_data));
1117 printk(KERN_WARNING "inode=%p\n", inode); 1117 printk(KERN_WARNING "inode=%p\n", inode);
1118 if (inode) { 1118 if (inode) {
1119 printk(KERN_WARNING "is_bad_inode(inode)=%d\n", 1119 printk(KERN_WARNING "is_bad_inode(inode)=%d\n",
1120 is_bad_inode(inode)); 1120 is_bad_inode(inode));
1121 printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n", 1121 printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n",
1122 NEXT_ORPHAN(inode)); 1122 NEXT_ORPHAN(inode));
1123 printk(KERN_WARNING "max_ino=%lu\n", max_ino); 1123 printk(KERN_WARNING "max_ino=%lu\n", max_ino);
1124 printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink); 1124 printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink);
1125 /* Avoid freeing blocks if we got a bad deleted inode */ 1125 /* Avoid freeing blocks if we got a bad deleted inode */
1126 if (inode->i_nlink == 0) 1126 if (inode->i_nlink == 0)
1127 inode->i_blocks = 0; 1127 inode->i_blocks = 0;
1128 iput(inode); 1128 iput(inode);
1129 } 1129 }
1130 brelse(bitmap_bh); 1130 brelse(bitmap_bh);
1131 error: 1131 error:
1132 return ERR_PTR(err); 1132 return ERR_PTR(err);
1133 } 1133 }
1134 1134
1135 unsigned long ext4_count_free_inodes(struct super_block *sb) 1135 unsigned long ext4_count_free_inodes(struct super_block *sb)
1136 { 1136 {
1137 unsigned long desc_count; 1137 unsigned long desc_count;
1138 struct ext4_group_desc *gdp; 1138 struct ext4_group_desc *gdp;
1139 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 1139 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
1140 #ifdef EXT4FS_DEBUG 1140 #ifdef EXT4FS_DEBUG
1141 struct ext4_super_block *es; 1141 struct ext4_super_block *es;
1142 unsigned long bitmap_count, x; 1142 unsigned long bitmap_count, x;
1143 struct buffer_head *bitmap_bh = NULL; 1143 struct buffer_head *bitmap_bh = NULL;
1144 1144
1145 es = EXT4_SB(sb)->s_es; 1145 es = EXT4_SB(sb)->s_es;
1146 desc_count = 0; 1146 desc_count = 0;
1147 bitmap_count = 0; 1147 bitmap_count = 0;
1148 gdp = NULL; 1148 gdp = NULL;
1149 for (i = 0; i < ngroups; i++) { 1149 for (i = 0; i < ngroups; i++) {
1150 gdp = ext4_get_group_desc(sb, i, NULL); 1150 gdp = ext4_get_group_desc(sb, i, NULL);
1151 if (!gdp) 1151 if (!gdp)
1152 continue; 1152 continue;
1153 desc_count += ext4_free_inodes_count(sb, gdp); 1153 desc_count += ext4_free_inodes_count(sb, gdp);
1154 brelse(bitmap_bh); 1154 brelse(bitmap_bh);
1155 bitmap_bh = ext4_read_inode_bitmap(sb, i); 1155 bitmap_bh = ext4_read_inode_bitmap(sb, i);
1156 if (!bitmap_bh) 1156 if (!bitmap_bh)
1157 continue; 1157 continue;
1158 1158
1159 x = ext4_count_free(bitmap_bh->b_data, 1159 x = ext4_count_free(bitmap_bh->b_data,
1160 EXT4_INODES_PER_GROUP(sb) / 8); 1160 EXT4_INODES_PER_GROUP(sb) / 8);
1161 printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", 1161 printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
1162 (unsigned long) i, ext4_free_inodes_count(sb, gdp), x); 1162 (unsigned long) i, ext4_free_inodes_count(sb, gdp), x);
1163 bitmap_count += x; 1163 bitmap_count += x;
1164 } 1164 }
1165 brelse(bitmap_bh); 1165 brelse(bitmap_bh);
1166 printk(KERN_DEBUG "ext4_count_free_inodes: " 1166 printk(KERN_DEBUG "ext4_count_free_inodes: "
1167 "stored = %u, computed = %lu, %lu\n", 1167 "stored = %u, computed = %lu, %lu\n",
1168 le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); 1168 le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
1169 return desc_count; 1169 return desc_count;
1170 #else 1170 #else
1171 desc_count = 0; 1171 desc_count = 0;
1172 for (i = 0; i < ngroups; i++) { 1172 for (i = 0; i < ngroups; i++) {
1173 gdp = ext4_get_group_desc(sb, i, NULL); 1173 gdp = ext4_get_group_desc(sb, i, NULL);
1174 if (!gdp) 1174 if (!gdp)
1175 continue; 1175 continue;
1176 desc_count += ext4_free_inodes_count(sb, gdp); 1176 desc_count += ext4_free_inodes_count(sb, gdp);
1177 cond_resched(); 1177 cond_resched();
1178 } 1178 }
1179 return desc_count; 1179 return desc_count;
1180 #endif 1180 #endif
1181 } 1181 }
1182 1182
1183 /* Called at mount-time, super-block is locked */ 1183 /* Called at mount-time, super-block is locked */
1184 unsigned long ext4_count_dirs(struct super_block * sb) 1184 unsigned long ext4_count_dirs(struct super_block * sb)
1185 { 1185 {
1186 unsigned long count = 0; 1186 unsigned long count = 0;
1187 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 1187 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
1188 1188
1189 for (i = 0; i < ngroups; i++) { 1189 for (i = 0; i < ngroups; i++) {
1190 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1190 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1191 if (!gdp) 1191 if (!gdp)
1192 continue; 1192 continue;
1193 count += ext4_used_dirs_count(sb, gdp); 1193 count += ext4_used_dirs_count(sb, gdp);
1194 } 1194 }
1195 return count; 1195 return count;
1196 } 1196 }
1197 1197
1198 /* 1198 /*
1199 * Zeroes not yet zeroed inode table - just write zeroes through the whole 1199 * Zeroes not yet zeroed inode table - just write zeroes through the whole
1200 * inode table. Must be called without any spinlock held. The only place 1200 * inode table. Must be called without any spinlock held. The only place
1201 * where it is called from on active part of filesystem is ext4lazyinit 1201 * where it is called from on active part of filesystem is ext4lazyinit
1202 * thread, so we do not need any special locks, however we have to prevent 1202 * thread, so we do not need any special locks, however we have to prevent
1203 * inode allocation from the current group, so we take alloc_sem lock, to 1203 * inode allocation from the current group, so we take alloc_sem lock, to
1204 * block ext4_new_inode() until we are finished. 1204 * block ext4_new_inode() until we are finished.
1205 */ 1205 */
1206 int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, 1206 int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1207 int barrier) 1207 int barrier)
1208 { 1208 {
1209 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 1209 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1210 struct ext4_sb_info *sbi = EXT4_SB(sb); 1210 struct ext4_sb_info *sbi = EXT4_SB(sb);
1211 struct ext4_group_desc *gdp = NULL; 1211 struct ext4_group_desc *gdp = NULL;
1212 struct buffer_head *group_desc_bh; 1212 struct buffer_head *group_desc_bh;
1213 handle_t *handle; 1213 handle_t *handle;
1214 ext4_fsblk_t blk; 1214 ext4_fsblk_t blk;
1215 int num, ret = 0, used_blks = 0; 1215 int num, ret = 0, used_blks = 0;
1216 1216
1217 /* This should not happen, but just to be sure check this */ 1217 /* This should not happen, but just to be sure check this */
1218 if (sb->s_flags & MS_RDONLY) { 1218 if (sb->s_flags & MS_RDONLY) {
1219 ret = 1; 1219 ret = 1;
1220 goto out; 1220 goto out;
1221 } 1221 }
1222 1222
1223 gdp = ext4_get_group_desc(sb, group, &group_desc_bh); 1223 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
1224 if (!gdp) 1224 if (!gdp)
1225 goto out; 1225 goto out;
1226 1226
1227 /* 1227 /*
1228 * We do not need to lock this, because we are the only one 1228 * We do not need to lock this, because we are the only one
1229 * handling this flag. 1229 * handling this flag.
1230 */ 1230 */
1231 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) 1231 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
1232 goto out; 1232 goto out;
1233 1233
1234 handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); 1234 handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
1235 if (IS_ERR(handle)) { 1235 if (IS_ERR(handle)) {
1236 ret = PTR_ERR(handle); 1236 ret = PTR_ERR(handle);
1237 goto out; 1237 goto out;
1238 } 1238 }
1239 1239
1240 down_write(&grp->alloc_sem); 1240 down_write(&grp->alloc_sem);
1241 /* 1241 /*
1242 * If inode bitmap was already initialized there may be some 1242 * If inode bitmap was already initialized there may be some
1243 * used inodes so we need to skip blocks with used inodes in 1243 * used inodes so we need to skip blocks with used inodes in
1244 * inode table. 1244 * inode table.
1245 */ 1245 */
1246 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) 1246 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
1247 used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) - 1247 used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
1248 ext4_itable_unused_count(sb, gdp)), 1248 ext4_itable_unused_count(sb, gdp)),
1249 sbi->s_inodes_per_block); 1249 sbi->s_inodes_per_block);
1250 1250
1251 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { 1251 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
1252 ext4_error(sb, "Something is wrong with group %u: " 1252 ext4_error(sb, "Something is wrong with group %u: "
1253 "used itable blocks: %d; " 1253 "used itable blocks: %d; "
1254 "itable unused count: %u", 1254 "itable unused count: %u",
1255 group, used_blks, 1255 group, used_blks,
1256 ext4_itable_unused_count(sb, gdp)); 1256 ext4_itable_unused_count(sb, gdp));
1257 ret = 1; 1257 ret = 1;
1258 goto err_out; 1258 goto err_out;
1259 } 1259 }
1260 1260
1261 blk = ext4_inode_table(sb, gdp) + used_blks; 1261 blk = ext4_inode_table(sb, gdp) + used_blks;
1262 num = sbi->s_itb_per_group - used_blks; 1262 num = sbi->s_itb_per_group - used_blks;
1263 1263
1264 BUFFER_TRACE(group_desc_bh, "get_write_access"); 1264 BUFFER_TRACE(group_desc_bh, "get_write_access");
1265 ret = ext4_journal_get_write_access(handle, 1265 ret = ext4_journal_get_write_access(handle,
1266 group_desc_bh); 1266 group_desc_bh);
1267 if (ret) 1267 if (ret)
1268 goto err_out; 1268 goto err_out;
1269 1269
1270 /* 1270 /*
1271 * Skip zeroout if the inode table is full. But we set the ZEROED 1271 * Skip zeroout if the inode table is full. But we set the ZEROED
1272 * flag anyway, because obviously, when it is full it does not need 1272 * flag anyway, because obviously, when it is full it does not need
1273 * further zeroing. 1273 * further zeroing.
1274 */ 1274 */
1275 if (unlikely(num == 0)) 1275 if (unlikely(num == 0))
1276 goto skip_zeroout; 1276 goto skip_zeroout;
1277 1277
1278 ext4_debug("going to zero out inode table in group %d\n", 1278 ext4_debug("going to zero out inode table in group %d\n",
1279 group); 1279 group);
1280 ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS); 1280 ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
1281 if (ret < 0) 1281 if (ret < 0)
1282 goto err_out; 1282 goto err_out;
1283 if (barrier) 1283 if (barrier)
1284 blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL); 1284 blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
1285 1285
1286 skip_zeroout: 1286 skip_zeroout:
1287 ext4_lock_group(sb, group); 1287 ext4_lock_group(sb, group);
1288 gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); 1288 gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
1289 ext4_group_desc_csum_set(sb, group, gdp); 1289 ext4_group_desc_csum_set(sb, group, gdp);
1290 ext4_unlock_group(sb, group); 1290 ext4_unlock_group(sb, group);
1291 1291
1292 BUFFER_TRACE(group_desc_bh, 1292 BUFFER_TRACE(group_desc_bh,
1293 "call ext4_handle_dirty_metadata"); 1293 "call ext4_handle_dirty_metadata");
1294 ret = ext4_handle_dirty_metadata(handle, NULL, 1294 ret = ext4_handle_dirty_metadata(handle, NULL,
1295 group_desc_bh); 1295 group_desc_bh);
1296 1296
1297 err_out: 1297 err_out:
1298 up_write(&grp->alloc_sem); 1298 up_write(&grp->alloc_sem);
1299 ext4_journal_stop(handle); 1299 ext4_journal_stop(handle);
1300 out: 1300 out:
1301 return ret; 1301 return ret;
1302 } 1302 }
1303 1303
1 #include <linux/fs.h> 1 #include <linux/fs.h>
2 #include <linux/random.h> 2 #include <linux/random.h>
3 #include <linux/buffer_head.h> 3 #include <linux/buffer_head.h>
4 #include <linux/utsname.h> 4 #include <linux/utsname.h>
5 #include <linux/kthread.h> 5 #include <linux/kthread.h>
6 6
7 #include "ext4.h" 7 #include "ext4.h"
8 8
9 /* Checksumming functions */ 9 /* Checksumming functions */
10 static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) 10 static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
11 { 11 {
12 struct ext4_sb_info *sbi = EXT4_SB(sb); 12 struct ext4_sb_info *sbi = EXT4_SB(sb);
13 int offset = offsetof(struct mmp_struct, mmp_checksum); 13 int offset = offsetof(struct mmp_struct, mmp_checksum);
14 __u32 csum; 14 __u32 csum;
15 15
16 csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset); 16 csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
17 17
18 return cpu_to_le32(csum); 18 return cpu_to_le32(csum);
19 } 19 }
20 20
21 int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) 21 int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
22 { 22 {
23 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 23 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
24 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 24 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
25 return 1; 25 return 1;
26 26
27 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); 27 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
28 } 28 }
29 29
30 void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) 30 void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
31 { 31 {
32 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 32 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
33 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 33 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
34 return; 34 return;
35 35
36 mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); 36 mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
37 } 37 }
38 38
39 /* 39 /*
40 * Write the MMP block using WRITE_SYNC to try to get the block on-disk 40 * Write the MMP block using WRITE_SYNC to try to get the block on-disk
41 * faster. 41 * faster.
42 */ 42 */
43 static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) 43 static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
44 { 44 {
45 struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); 45 struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
46 46
47 /* 47 /*
48 * We protect against freezing so that we don't create dirty buffers 48 * We protect against freezing so that we don't create dirty buffers
49 * on frozen filesystem. 49 * on frozen filesystem.
50 */ 50 */
51 sb_start_write(sb); 51 sb_start_write(sb);
52 ext4_mmp_csum_set(sb, mmp); 52 ext4_mmp_csum_set(sb, mmp);
53 mark_buffer_dirty(bh); 53 mark_buffer_dirty(bh);
54 lock_buffer(bh); 54 lock_buffer(bh);
55 bh->b_end_io = end_buffer_write_sync; 55 bh->b_end_io = end_buffer_write_sync;
56 get_bh(bh); 56 get_bh(bh);
57 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); 57 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
58 wait_on_buffer(bh); 58 wait_on_buffer(bh);
59 sb_end_write(sb); 59 sb_end_write(sb);
60 if (unlikely(!buffer_uptodate(bh))) 60 if (unlikely(!buffer_uptodate(bh)))
61 return 1; 61 return 1;
62 62
63 return 0; 63 return 0;
64 } 64 }
65 65
66 /* 66 /*
67 * Read the MMP block. It _must_ be read from disk and hence we clear the 67 * Read the MMP block. It _must_ be read from disk and hence we clear the
68 * uptodate flag on the buffer. 68 * uptodate flag on the buffer.
69 */ 69 */
70 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, 70 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
71 ext4_fsblk_t mmp_block) 71 ext4_fsblk_t mmp_block)
72 { 72 {
73 struct mmp_struct *mmp; 73 struct mmp_struct *mmp;
74 74
75 if (*bh) 75 if (*bh)
76 clear_buffer_uptodate(*bh); 76 clear_buffer_uptodate(*bh);
77 77
78 /* This would be sb_bread(sb, mmp_block), except we need to be sure 78 /* This would be sb_bread(sb, mmp_block), except we need to be sure
79 * that the MD RAID device cache has been bypassed, and that the read 79 * that the MD RAID device cache has been bypassed, and that the read
80 * is not blocked in the elevator. */ 80 * is not blocked in the elevator. */
81 if (!*bh) 81 if (!*bh)
82 *bh = sb_getblk(sb, mmp_block); 82 *bh = sb_getblk(sb, mmp_block);
83 if (!*bh) 83 if (!*bh)
84 return -ENOMEM; 84 return -ENOMEM;
85 if (*bh) { 85 if (*bh) {
86 get_bh(*bh); 86 get_bh(*bh);
87 lock_buffer(*bh); 87 lock_buffer(*bh);
88 (*bh)->b_end_io = end_buffer_read_sync; 88 (*bh)->b_end_io = end_buffer_read_sync;
89 submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh); 89 submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
90 wait_on_buffer(*bh); 90 wait_on_buffer(*bh);
91 if (!buffer_uptodate(*bh)) { 91 if (!buffer_uptodate(*bh)) {
92 brelse(*bh); 92 brelse(*bh);
93 *bh = NULL; 93 *bh = NULL;
94 } 94 }
95 } 95 }
96 if (unlikely(!*bh)) { 96 if (unlikely(!*bh)) {
97 ext4_warning(sb, "Error while reading MMP block %llu", 97 ext4_warning(sb, "Error while reading MMP block %llu",
98 mmp_block); 98 mmp_block);
99 return -EIO; 99 return -EIO;
100 } 100 }
101 101
102 mmp = (struct mmp_struct *)((*bh)->b_data); 102 mmp = (struct mmp_struct *)((*bh)->b_data);
103 if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC || 103 if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
104 !ext4_mmp_csum_verify(sb, mmp)) 104 !ext4_mmp_csum_verify(sb, mmp))
105 return -EINVAL; 105 return -EINVAL;
106 106
107 return 0; 107 return 0;
108 } 108 }
109 109
110 /* 110 /*
111 * Dump as much information as possible to help the admin. 111 * Dump as much information as possible to help the admin.
112 */ 112 */
113 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, 113 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
114 const char *function, unsigned int line, const char *msg) 114 const char *function, unsigned int line, const char *msg)
115 { 115 {
116 __ext4_warning(sb, function, line, msg); 116 __ext4_warning(sb, function, line, msg);
117 __ext4_warning(sb, function, line, 117 __ext4_warning(sb, function, line,
118 "MMP failure info: last update time: %llu, last update " 118 "MMP failure info: last update time: %llu, last update "
119 "node: %s, last update device: %s\n", 119 "node: %s, last update device: %s\n",
120 (long long unsigned int) le64_to_cpu(mmp->mmp_time), 120 (long long unsigned int) le64_to_cpu(mmp->mmp_time),
121 mmp->mmp_nodename, mmp->mmp_bdevname); 121 mmp->mmp_nodename, mmp->mmp_bdevname);
122 } 122 }
123 123
124 /* 124 /*
125 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds 125 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
126 */ 126 */
127 static int kmmpd(void *data) 127 static int kmmpd(void *data)
128 { 128 {
129 struct super_block *sb = ((struct mmpd_data *) data)->sb; 129 struct super_block *sb = ((struct mmpd_data *) data)->sb;
130 struct buffer_head *bh = ((struct mmpd_data *) data)->bh; 130 struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
131 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 131 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
132 struct mmp_struct *mmp; 132 struct mmp_struct *mmp;
133 ext4_fsblk_t mmp_block; 133 ext4_fsblk_t mmp_block;
134 u32 seq = 0; 134 u32 seq = 0;
135 unsigned long failed_writes = 0; 135 unsigned long failed_writes = 0;
136 int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); 136 int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
137 unsigned mmp_check_interval; 137 unsigned mmp_check_interval;
138 unsigned long last_update_time; 138 unsigned long last_update_time;
139 unsigned long diff; 139 unsigned long diff;
140 int retval; 140 int retval;
141 141
142 mmp_block = le64_to_cpu(es->s_mmp_block); 142 mmp_block = le64_to_cpu(es->s_mmp_block);
143 mmp = (struct mmp_struct *)(bh->b_data); 143 mmp = (struct mmp_struct *)(bh->b_data);
144 mmp->mmp_time = cpu_to_le64(get_seconds()); 144 mmp->mmp_time = cpu_to_le64(get_seconds());
145 /* 145 /*
146 * Start with the higher mmp_check_interval and reduce it if 146 * Start with the higher mmp_check_interval and reduce it if
147 * the MMP block is being updated on time. 147 * the MMP block is being updated on time.
148 */ 148 */
149 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, 149 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
150 EXT4_MMP_MIN_CHECK_INTERVAL); 150 EXT4_MMP_MIN_CHECK_INTERVAL);
151 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 151 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
152 bdevname(bh->b_bdev, mmp->mmp_bdevname); 152 bdevname(bh->b_bdev, mmp->mmp_bdevname);
153 153
154 memcpy(mmp->mmp_nodename, init_utsname()->nodename, 154 memcpy(mmp->mmp_nodename, init_utsname()->nodename,
155 sizeof(mmp->mmp_nodename)); 155 sizeof(mmp->mmp_nodename));
156 156
157 while (!kthread_should_stop()) { 157 while (!kthread_should_stop()) {
158 if (++seq > EXT4_MMP_SEQ_MAX) 158 if (++seq > EXT4_MMP_SEQ_MAX)
159 seq = 1; 159 seq = 1;
160 160
161 mmp->mmp_seq = cpu_to_le32(seq); 161 mmp->mmp_seq = cpu_to_le32(seq);
162 mmp->mmp_time = cpu_to_le64(get_seconds()); 162 mmp->mmp_time = cpu_to_le64(get_seconds());
163 last_update_time = jiffies; 163 last_update_time = jiffies;
164 164
165 retval = write_mmp_block(sb, bh); 165 retval = write_mmp_block(sb, bh);
166 /* 166 /*
167 * Don't spew too many error messages. Print one every 167 * Don't spew too many error messages. Print one every
168 * (s_mmp_update_interval * 60) seconds. 168 * (s_mmp_update_interval * 60) seconds.
169 */ 169 */
170 if (retval) { 170 if (retval) {
171 if ((failed_writes % 60) == 0) 171 if ((failed_writes % 60) == 0)
172 ext4_error(sb, "Error writing to MMP block"); 172 ext4_error(sb, "Error writing to MMP block");
173 failed_writes++; 173 failed_writes++;
174 } 174 }
175 175
176 if (!(le32_to_cpu(es->s_feature_incompat) & 176 if (!(le32_to_cpu(es->s_feature_incompat) &
177 EXT4_FEATURE_INCOMPAT_MMP)) { 177 EXT4_FEATURE_INCOMPAT_MMP)) {
178 ext4_warning(sb, "kmmpd being stopped since MMP feature" 178 ext4_warning(sb, "kmmpd being stopped since MMP feature"
179 " has been disabled."); 179 " has been disabled.");
180 EXT4_SB(sb)->s_mmp_tsk = NULL; 180 EXT4_SB(sb)->s_mmp_tsk = NULL;
181 goto failed; 181 goto failed;
182 } 182 }
183 183
184 if (sb->s_flags & MS_RDONLY) { 184 if (sb->s_flags & MS_RDONLY) {
185 ext4_warning(sb, "kmmpd being stopped since filesystem " 185 ext4_warning(sb, "kmmpd being stopped since filesystem "
186 "has been remounted as readonly."); 186 "has been remounted as readonly.");
187 EXT4_SB(sb)->s_mmp_tsk = NULL; 187 EXT4_SB(sb)->s_mmp_tsk = NULL;
188 goto failed; 188 goto failed;
189 } 189 }
190 190
191 diff = jiffies - last_update_time; 191 diff = jiffies - last_update_time;
192 if (diff < mmp_update_interval * HZ) 192 if (diff < mmp_update_interval * HZ)
193 schedule_timeout_interruptible(mmp_update_interval * 193 schedule_timeout_interruptible(mmp_update_interval *
194 HZ - diff); 194 HZ - diff);
195 195
196 /* 196 /*
197 * We need to make sure that more than mmp_check_interval 197 * We need to make sure that more than mmp_check_interval
198 * seconds have not passed since writing. If that has happened 198 * seconds have not passed since writing. If that has happened
199 * we need to check if the MMP block is as we left it. 199 * we need to check if the MMP block is as we left it.
200 */ 200 */
201 diff = jiffies - last_update_time; 201 diff = jiffies - last_update_time;
202 if (diff > mmp_check_interval * HZ) { 202 if (diff > mmp_check_interval * HZ) {
203 struct buffer_head *bh_check = NULL; 203 struct buffer_head *bh_check = NULL;
204 struct mmp_struct *mmp_check; 204 struct mmp_struct *mmp_check;
205 205
206 retval = read_mmp_block(sb, &bh_check, mmp_block); 206 retval = read_mmp_block(sb, &bh_check, mmp_block);
207 if (retval) { 207 if (retval) {
208 ext4_error(sb, "error reading MMP data: %d", 208 ext4_error(sb, "error reading MMP data: %d",
209 retval); 209 retval);
210 210
211 EXT4_SB(sb)->s_mmp_tsk = NULL; 211 EXT4_SB(sb)->s_mmp_tsk = NULL;
212 goto failed; 212 goto failed;
213 } 213 }
214 214
215 mmp_check = (struct mmp_struct *)(bh_check->b_data); 215 mmp_check = (struct mmp_struct *)(bh_check->b_data);
216 if (mmp->mmp_seq != mmp_check->mmp_seq || 216 if (mmp->mmp_seq != mmp_check->mmp_seq ||
217 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, 217 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
218 sizeof(mmp->mmp_nodename))) { 218 sizeof(mmp->mmp_nodename))) {
219 dump_mmp_msg(sb, mmp_check, 219 dump_mmp_msg(sb, mmp_check,
220 "Error while updating MMP info. " 220 "Error while updating MMP info. "
221 "The filesystem seems to have been" 221 "The filesystem seems to have been"
222 " multiply mounted."); 222 " multiply mounted.");
223 ext4_error(sb, "abort"); 223 ext4_error(sb, "abort");
224 goto failed; 224 goto failed;
225 } 225 }
226 put_bh(bh_check); 226 put_bh(bh_check);
227 } 227 }
228 228
229 /* 229 /*
230 * Adjust the mmp_check_interval depending on how much time 230 * Adjust the mmp_check_interval depending on how much time
231 * it took for the MMP block to be written. 231 * it took for the MMP block to be written.
232 */ 232 */
233 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ, 233 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
234 EXT4_MMP_MAX_CHECK_INTERVAL), 234 EXT4_MMP_MAX_CHECK_INTERVAL),
235 EXT4_MMP_MIN_CHECK_INTERVAL); 235 EXT4_MMP_MIN_CHECK_INTERVAL);
236 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 236 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
237 } 237 }
238 238
239 /* 239 /*
240 * Unmount seems to be clean. 240 * Unmount seems to be clean.
241 */ 241 */
242 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); 242 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
243 mmp->mmp_time = cpu_to_le64(get_seconds()); 243 mmp->mmp_time = cpu_to_le64(get_seconds());
244 244
245 retval = write_mmp_block(sb, bh); 245 retval = write_mmp_block(sb, bh);
246 246
247 failed: 247 failed:
248 kfree(data); 248 kfree(data);
249 brelse(bh); 249 brelse(bh);
250 return retval; 250 return retval;
251 } 251 }
252 252
253 /* 253 /*
254 * Get a random new sequence number but make sure it is not greater than 254 * Get a random new sequence number but make sure it is not greater than
255 * EXT4_MMP_SEQ_MAX. 255 * EXT4_MMP_SEQ_MAX.
256 */ 256 */
257 static unsigned int mmp_new_seq(void) 257 static unsigned int mmp_new_seq(void)
258 { 258 {
259 u32 new_seq; 259 u32 new_seq;
260 260
261 do { 261 do {
262 get_random_bytes(&new_seq, sizeof(u32)); 262 new_seq = prandom_u32();
263 } while (new_seq > EXT4_MMP_SEQ_MAX); 263 } while (new_seq > EXT4_MMP_SEQ_MAX);
264 264
265 return new_seq; 265 return new_seq;
266 } 266 }
267 267
268 /* 268 /*
269 * Protect the filesystem from being mounted more than once. 269 * Protect the filesystem from being mounted more than once.
270 */ 270 */
271 int ext4_multi_mount_protect(struct super_block *sb, 271 int ext4_multi_mount_protect(struct super_block *sb,
272 ext4_fsblk_t mmp_block) 272 ext4_fsblk_t mmp_block)
273 { 273 {
274 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 274 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
275 struct buffer_head *bh = NULL; 275 struct buffer_head *bh = NULL;
276 struct mmp_struct *mmp = NULL; 276 struct mmp_struct *mmp = NULL;
277 struct mmpd_data *mmpd_data; 277 struct mmpd_data *mmpd_data;
278 u32 seq; 278 u32 seq;
279 unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); 279 unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
280 unsigned int wait_time = 0; 280 unsigned int wait_time = 0;
281 int retval; 281 int retval;
282 282
283 if (mmp_block < le32_to_cpu(es->s_first_data_block) || 283 if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
284 mmp_block >= ext4_blocks_count(es)) { 284 mmp_block >= ext4_blocks_count(es)) {
285 ext4_warning(sb, "Invalid MMP block in superblock"); 285 ext4_warning(sb, "Invalid MMP block in superblock");
286 goto failed; 286 goto failed;
287 } 287 }
288 288
289 retval = read_mmp_block(sb, &bh, mmp_block); 289 retval = read_mmp_block(sb, &bh, mmp_block);
290 if (retval) 290 if (retval)
291 goto failed; 291 goto failed;
292 292
293 mmp = (struct mmp_struct *)(bh->b_data); 293 mmp = (struct mmp_struct *)(bh->b_data);
294 294
295 if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL) 295 if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
296 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL; 296 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
297 297
298 /* 298 /*
299 * If check_interval in MMP block is larger, use that instead of 299 * If check_interval in MMP block is larger, use that instead of
300 * update_interval from the superblock. 300 * update_interval from the superblock.
301 */ 301 */
302 if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval) 302 if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
303 mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval); 303 mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
304 304
305 seq = le32_to_cpu(mmp->mmp_seq); 305 seq = le32_to_cpu(mmp->mmp_seq);
306 if (seq == EXT4_MMP_SEQ_CLEAN) 306 if (seq == EXT4_MMP_SEQ_CLEAN)
307 goto skip; 307 goto skip;
308 308
309 if (seq == EXT4_MMP_SEQ_FSCK) { 309 if (seq == EXT4_MMP_SEQ_FSCK) {
310 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); 310 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
311 goto failed; 311 goto failed;
312 } 312 }
313 313
314 wait_time = min(mmp_check_interval * 2 + 1, 314 wait_time = min(mmp_check_interval * 2 + 1,
315 mmp_check_interval + 60); 315 mmp_check_interval + 60);
316 316
317 /* Print MMP interval if more than 20 secs. */ 317 /* Print MMP interval if more than 20 secs. */
318 if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4) 318 if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
319 ext4_warning(sb, "MMP interval %u higher than expected, please" 319 ext4_warning(sb, "MMP interval %u higher than expected, please"
320 " wait.\n", wait_time * 2); 320 " wait.\n", wait_time * 2);
321 321
322 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 322 if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
323 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 323 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
324 goto failed; 324 goto failed;
325 } 325 }
326 326
327 retval = read_mmp_block(sb, &bh, mmp_block); 327 retval = read_mmp_block(sb, &bh, mmp_block);
328 if (retval) 328 if (retval)
329 goto failed; 329 goto failed;
330 mmp = (struct mmp_struct *)(bh->b_data); 330 mmp = (struct mmp_struct *)(bh->b_data);
331 if (seq != le32_to_cpu(mmp->mmp_seq)) { 331 if (seq != le32_to_cpu(mmp->mmp_seq)) {
332 dump_mmp_msg(sb, mmp, 332 dump_mmp_msg(sb, mmp,
333 "Device is already active on another node."); 333 "Device is already active on another node.");
334 goto failed; 334 goto failed;
335 } 335 }
336 336
337 skip: 337 skip:
338 /* 338 /*
339 * write a new random sequence number. 339 * write a new random sequence number.
340 */ 340 */
341 seq = mmp_new_seq(); 341 seq = mmp_new_seq();
342 mmp->mmp_seq = cpu_to_le32(seq); 342 mmp->mmp_seq = cpu_to_le32(seq);
343 343
344 retval = write_mmp_block(sb, bh); 344 retval = write_mmp_block(sb, bh);
345 if (retval) 345 if (retval)
346 goto failed; 346 goto failed;
347 347
348 /* 348 /*
349 * wait for MMP interval and check mmp_seq. 349 * wait for MMP interval and check mmp_seq.
350 */ 350 */
351 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 351 if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
352 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 352 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
353 goto failed; 353 goto failed;
354 } 354 }
355 355
356 retval = read_mmp_block(sb, &bh, mmp_block); 356 retval = read_mmp_block(sb, &bh, mmp_block);
357 if (retval) 357 if (retval)
358 goto failed; 358 goto failed;
359 mmp = (struct mmp_struct *)(bh->b_data); 359 mmp = (struct mmp_struct *)(bh->b_data);
360 if (seq != le32_to_cpu(mmp->mmp_seq)) { 360 if (seq != le32_to_cpu(mmp->mmp_seq)) {
361 dump_mmp_msg(sb, mmp, 361 dump_mmp_msg(sb, mmp,
362 "Device is already active on another node."); 362 "Device is already active on another node.");
363 goto failed; 363 goto failed;
364 } 364 }
365 365
366 mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL); 366 mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
367 if (!mmpd_data) { 367 if (!mmpd_data) {
368 ext4_warning(sb, "not enough memory for mmpd_data"); 368 ext4_warning(sb, "not enough memory for mmpd_data");
369 goto failed; 369 goto failed;
370 } 370 }
371 mmpd_data->sb = sb; 371 mmpd_data->sb = sb;
372 mmpd_data->bh = bh; 372 mmpd_data->bh = bh;
373 373
374 /* 374 /*
375 * Start a kernel thread to update the MMP block periodically. 375 * Start a kernel thread to update the MMP block periodically.
376 */ 376 */
377 EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", 377 EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
378 bdevname(bh->b_bdev, 378 bdevname(bh->b_bdev,
379 mmp->mmp_bdevname)); 379 mmp->mmp_bdevname));
380 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { 380 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
381 EXT4_SB(sb)->s_mmp_tsk = NULL; 381 EXT4_SB(sb)->s_mmp_tsk = NULL;
382 kfree(mmpd_data); 382 kfree(mmpd_data);
383 ext4_warning(sb, "Unable to create kmmpd thread for %s.", 383 ext4_warning(sb, "Unable to create kmmpd thread for %s.",
384 sb->s_id); 384 sb->s_id);
385 goto failed; 385 goto failed;
386 } 386 }
387 387
388 return 0; 388 return 0;
389 389
390 failed: 390 failed:
391 brelse(bh); 391 brelse(bh);
392 return 1; 392 return 1;
393 } 393 }
394 394
395 395
396 396
1 /* 1 /*
2 * linux/fs/ext4/super.c 2 * linux/fs/ext4/super.c
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 * 8 *
9 * from 9 * from
10 * 10 *
11 * linux/fs/minix/inode.c 11 * linux/fs/minix/inode.c
12 * 12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 * 14 *
15 * Big-endian to little-endian byte-swapping/bitmaps by 15 * Big-endian to little-endian byte-swapping/bitmaps by
16 * David S. Miller (davem@caip.rutgers.edu), 1995 16 * David S. Miller (davem@caip.rutgers.edu), 1995
17 */ 17 */
18 18
19 #include <linux/module.h> 19 #include <linux/module.h>
20 #include <linux/string.h> 20 #include <linux/string.h>
21 #include <linux/fs.h> 21 #include <linux/fs.h>
22 #include <linux/time.h> 22 #include <linux/time.h>
23 #include <linux/vmalloc.h> 23 #include <linux/vmalloc.h>
24 #include <linux/jbd2.h> 24 #include <linux/jbd2.h>
25 #include <linux/slab.h> 25 #include <linux/slab.h>
26 #include <linux/init.h> 26 #include <linux/init.h>
27 #include <linux/blkdev.h> 27 #include <linux/blkdev.h>
28 #include <linux/parser.h> 28 #include <linux/parser.h>
29 #include <linux/buffer_head.h> 29 #include <linux/buffer_head.h>
30 #include <linux/exportfs.h> 30 #include <linux/exportfs.h>
31 #include <linux/vfs.h> 31 #include <linux/vfs.h>
32 #include <linux/random.h> 32 #include <linux/random.h>
33 #include <linux/mount.h> 33 #include <linux/mount.h>
34 #include <linux/namei.h> 34 #include <linux/namei.h>
35 #include <linux/quotaops.h> 35 #include <linux/quotaops.h>
36 #include <linux/seq_file.h> 36 #include <linux/seq_file.h>
37 #include <linux/proc_fs.h> 37 #include <linux/proc_fs.h>
38 #include <linux/ctype.h> 38 #include <linux/ctype.h>
39 #include <linux/log2.h> 39 #include <linux/log2.h>
40 #include <linux/crc16.h> 40 #include <linux/crc16.h>
41 #include <linux/cleancache.h> 41 #include <linux/cleancache.h>
42 #include <asm/uaccess.h> 42 #include <asm/uaccess.h>
43 43
44 #include <linux/kthread.h> 44 #include <linux/kthread.h>
45 #include <linux/freezer.h> 45 #include <linux/freezer.h>
46 46
47 #include "ext4.h" 47 #include "ext4.h"
48 #include "ext4_extents.h" /* Needed for trace points definition */ 48 #include "ext4_extents.h" /* Needed for trace points definition */
49 #include "ext4_jbd2.h" 49 #include "ext4_jbd2.h"
50 #include "xattr.h" 50 #include "xattr.h"
51 #include "acl.h" 51 #include "acl.h"
52 #include "mballoc.h" 52 #include "mballoc.h"
53 53
54 #define CREATE_TRACE_POINTS 54 #define CREATE_TRACE_POINTS
55 #include <trace/events/ext4.h> 55 #include <trace/events/ext4.h>
56 56
57 static struct proc_dir_entry *ext4_proc_root; 57 static struct proc_dir_entry *ext4_proc_root;
58 static struct kset *ext4_kset; 58 static struct kset *ext4_kset;
59 static struct ext4_lazy_init *ext4_li_info; 59 static struct ext4_lazy_init *ext4_li_info;
60 static struct mutex ext4_li_mtx; 60 static struct mutex ext4_li_mtx;
61 static struct ext4_features *ext4_feat; 61 static struct ext4_features *ext4_feat;
62 62
63 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 63 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
64 unsigned long journal_devnum); 64 unsigned long journal_devnum);
65 static int ext4_show_options(struct seq_file *seq, struct dentry *root); 65 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
66 static int ext4_commit_super(struct super_block *sb, int sync); 66 static int ext4_commit_super(struct super_block *sb, int sync);
67 static void ext4_mark_recovery_complete(struct super_block *sb, 67 static void ext4_mark_recovery_complete(struct super_block *sb,
68 struct ext4_super_block *es); 68 struct ext4_super_block *es);
69 static void ext4_clear_journal_err(struct super_block *sb, 69 static void ext4_clear_journal_err(struct super_block *sb,
70 struct ext4_super_block *es); 70 struct ext4_super_block *es);
71 static int ext4_sync_fs(struct super_block *sb, int wait); 71 static int ext4_sync_fs(struct super_block *sb, int wait);
72 static int ext4_sync_fs_nojournal(struct super_block *sb, int wait); 72 static int ext4_sync_fs_nojournal(struct super_block *sb, int wait);
73 static int ext4_remount(struct super_block *sb, int *flags, char *data); 73 static int ext4_remount(struct super_block *sb, int *flags, char *data);
74 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 74 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
75 static int ext4_unfreeze(struct super_block *sb); 75 static int ext4_unfreeze(struct super_block *sb);
76 static int ext4_freeze(struct super_block *sb); 76 static int ext4_freeze(struct super_block *sb);
77 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 77 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
78 const char *dev_name, void *data); 78 const char *dev_name, void *data);
79 static inline int ext2_feature_set_ok(struct super_block *sb); 79 static inline int ext2_feature_set_ok(struct super_block *sb);
80 static inline int ext3_feature_set_ok(struct super_block *sb); 80 static inline int ext3_feature_set_ok(struct super_block *sb);
81 static int ext4_feature_set_ok(struct super_block *sb, int readonly); 81 static int ext4_feature_set_ok(struct super_block *sb, int readonly);
82 static void ext4_destroy_lazyinit_thread(void); 82 static void ext4_destroy_lazyinit_thread(void);
83 static void ext4_unregister_li_request(struct super_block *sb); 83 static void ext4_unregister_li_request(struct super_block *sb);
84 static void ext4_clear_request_list(void); 84 static void ext4_clear_request_list(void);
85 static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t); 85 static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t);
86 86
87 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 87 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
88 static struct file_system_type ext2_fs_type = { 88 static struct file_system_type ext2_fs_type = {
89 .owner = THIS_MODULE, 89 .owner = THIS_MODULE,
90 .name = "ext2", 90 .name = "ext2",
91 .mount = ext4_mount, 91 .mount = ext4_mount,
92 .kill_sb = kill_block_super, 92 .kill_sb = kill_block_super,
93 .fs_flags = FS_REQUIRES_DEV, 93 .fs_flags = FS_REQUIRES_DEV,
94 }; 94 };
95 MODULE_ALIAS_FS("ext2"); 95 MODULE_ALIAS_FS("ext2");
96 MODULE_ALIAS("ext2"); 96 MODULE_ALIAS("ext2");
97 #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) 97 #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
98 #else 98 #else
99 #define IS_EXT2_SB(sb) (0) 99 #define IS_EXT2_SB(sb) (0)
100 #endif 100 #endif
101 101
102 102
103 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 103 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
104 static struct file_system_type ext3_fs_type = { 104 static struct file_system_type ext3_fs_type = {
105 .owner = THIS_MODULE, 105 .owner = THIS_MODULE,
106 .name = "ext3", 106 .name = "ext3",
107 .mount = ext4_mount, 107 .mount = ext4_mount,
108 .kill_sb = kill_block_super, 108 .kill_sb = kill_block_super,
109 .fs_flags = FS_REQUIRES_DEV, 109 .fs_flags = FS_REQUIRES_DEV,
110 }; 110 };
111 MODULE_ALIAS_FS("ext3"); 111 MODULE_ALIAS_FS("ext3");
112 MODULE_ALIAS("ext3"); 112 MODULE_ALIAS("ext3");
113 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) 113 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
114 #else 114 #else
115 #define IS_EXT3_SB(sb) (0) 115 #define IS_EXT3_SB(sb) (0)
116 #endif 116 #endif
117 117
118 static int ext4_verify_csum_type(struct super_block *sb, 118 static int ext4_verify_csum_type(struct super_block *sb,
119 struct ext4_super_block *es) 119 struct ext4_super_block *es)
120 { 120 {
121 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 121 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
122 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 122 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
123 return 1; 123 return 1;
124 124
125 return es->s_checksum_type == EXT4_CRC32C_CHKSUM; 125 return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
126 } 126 }
127 127
128 static __le32 ext4_superblock_csum(struct super_block *sb, 128 static __le32 ext4_superblock_csum(struct super_block *sb,
129 struct ext4_super_block *es) 129 struct ext4_super_block *es)
130 { 130 {
131 struct ext4_sb_info *sbi = EXT4_SB(sb); 131 struct ext4_sb_info *sbi = EXT4_SB(sb);
132 int offset = offsetof(struct ext4_super_block, s_checksum); 132 int offset = offsetof(struct ext4_super_block, s_checksum);
133 __u32 csum; 133 __u32 csum;
134 134
135 csum = ext4_chksum(sbi, ~0, (char *)es, offset); 135 csum = ext4_chksum(sbi, ~0, (char *)es, offset);
136 136
137 return cpu_to_le32(csum); 137 return cpu_to_le32(csum);
138 } 138 }
139 139
140 int ext4_superblock_csum_verify(struct super_block *sb, 140 int ext4_superblock_csum_verify(struct super_block *sb,
141 struct ext4_super_block *es) 141 struct ext4_super_block *es)
142 { 142 {
143 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 143 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
144 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 144 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
145 return 1; 145 return 1;
146 146
147 return es->s_checksum == ext4_superblock_csum(sb, es); 147 return es->s_checksum == ext4_superblock_csum(sb, es);
148 } 148 }
149 149
150 void ext4_superblock_csum_set(struct super_block *sb) 150 void ext4_superblock_csum_set(struct super_block *sb)
151 { 151 {
152 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 152 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
153 153
154 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 154 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
155 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 155 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
156 return; 156 return;
157 157
158 es->s_checksum = ext4_superblock_csum(sb, es); 158 es->s_checksum = ext4_superblock_csum(sb, es);
159 } 159 }
160 160
161 void *ext4_kvmalloc(size_t size, gfp_t flags) 161 void *ext4_kvmalloc(size_t size, gfp_t flags)
162 { 162 {
163 void *ret; 163 void *ret;
164 164
165 ret = kmalloc(size, flags | __GFP_NOWARN); 165 ret = kmalloc(size, flags | __GFP_NOWARN);
166 if (!ret) 166 if (!ret)
167 ret = __vmalloc(size, flags, PAGE_KERNEL); 167 ret = __vmalloc(size, flags, PAGE_KERNEL);
168 return ret; 168 return ret;
169 } 169 }
170 170
171 void *ext4_kvzalloc(size_t size, gfp_t flags) 171 void *ext4_kvzalloc(size_t size, gfp_t flags)
172 { 172 {
173 void *ret; 173 void *ret;
174 174
175 ret = kzalloc(size, flags | __GFP_NOWARN); 175 ret = kzalloc(size, flags | __GFP_NOWARN);
176 if (!ret) 176 if (!ret)
177 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); 177 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
178 return ret; 178 return ret;
179 } 179 }
180 180
181 void ext4_kvfree(void *ptr) 181 void ext4_kvfree(void *ptr)
182 { 182 {
183 if (is_vmalloc_addr(ptr)) 183 if (is_vmalloc_addr(ptr))
184 vfree(ptr); 184 vfree(ptr);
185 else 185 else
186 kfree(ptr); 186 kfree(ptr);
187 187
188 } 188 }
189 189
190 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 190 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
191 struct ext4_group_desc *bg) 191 struct ext4_group_desc *bg)
192 { 192 {
193 return le32_to_cpu(bg->bg_block_bitmap_lo) | 193 return le32_to_cpu(bg->bg_block_bitmap_lo) |
194 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 194 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
195 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 195 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
196 } 196 }
197 197
198 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 198 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
199 struct ext4_group_desc *bg) 199 struct ext4_group_desc *bg)
200 { 200 {
201 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 201 return le32_to_cpu(bg->bg_inode_bitmap_lo) |
202 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 202 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
203 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 203 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
204 } 204 }
205 205
206 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 206 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
207 struct ext4_group_desc *bg) 207 struct ext4_group_desc *bg)
208 { 208 {
209 return le32_to_cpu(bg->bg_inode_table_lo) | 209 return le32_to_cpu(bg->bg_inode_table_lo) |
210 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 210 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
211 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 211 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
212 } 212 }
213 213
214 __u32 ext4_free_group_clusters(struct super_block *sb, 214 __u32 ext4_free_group_clusters(struct super_block *sb,
215 struct ext4_group_desc *bg) 215 struct ext4_group_desc *bg)
216 { 216 {
217 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 217 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
218 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 218 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
219 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 219 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
220 } 220 }
221 221
222 __u32 ext4_free_inodes_count(struct super_block *sb, 222 __u32 ext4_free_inodes_count(struct super_block *sb,
223 struct ext4_group_desc *bg) 223 struct ext4_group_desc *bg)
224 { 224 {
225 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 225 return le16_to_cpu(bg->bg_free_inodes_count_lo) |
226 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 226 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
227 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 227 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
228 } 228 }
229 229
230 __u32 ext4_used_dirs_count(struct super_block *sb, 230 __u32 ext4_used_dirs_count(struct super_block *sb,
231 struct ext4_group_desc *bg) 231 struct ext4_group_desc *bg)
232 { 232 {
233 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 233 return le16_to_cpu(bg->bg_used_dirs_count_lo) |
234 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 234 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
235 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 235 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
236 } 236 }
237 237
238 __u32 ext4_itable_unused_count(struct super_block *sb, 238 __u32 ext4_itable_unused_count(struct super_block *sb,
239 struct ext4_group_desc *bg) 239 struct ext4_group_desc *bg)
240 { 240 {
241 return le16_to_cpu(bg->bg_itable_unused_lo) | 241 return le16_to_cpu(bg->bg_itable_unused_lo) |
242 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 242 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
243 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 243 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
244 } 244 }
245 245
246 void ext4_block_bitmap_set(struct super_block *sb, 246 void ext4_block_bitmap_set(struct super_block *sb,
247 struct ext4_group_desc *bg, ext4_fsblk_t blk) 247 struct ext4_group_desc *bg, ext4_fsblk_t blk)
248 { 248 {
249 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 249 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
250 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 250 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
251 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 251 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
252 } 252 }
253 253
254 void ext4_inode_bitmap_set(struct super_block *sb, 254 void ext4_inode_bitmap_set(struct super_block *sb,
255 struct ext4_group_desc *bg, ext4_fsblk_t blk) 255 struct ext4_group_desc *bg, ext4_fsblk_t blk)
256 { 256 {
257 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 257 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
258 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 258 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
259 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 259 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
260 } 260 }
261 261
262 void ext4_inode_table_set(struct super_block *sb, 262 void ext4_inode_table_set(struct super_block *sb,
263 struct ext4_group_desc *bg, ext4_fsblk_t blk) 263 struct ext4_group_desc *bg, ext4_fsblk_t blk)
264 { 264 {
265 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 265 bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
266 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 266 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
267 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 267 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
268 } 268 }
269 269
270 void ext4_free_group_clusters_set(struct super_block *sb, 270 void ext4_free_group_clusters_set(struct super_block *sb,
271 struct ext4_group_desc *bg, __u32 count) 271 struct ext4_group_desc *bg, __u32 count)
272 { 272 {
273 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 273 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
274 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 274 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
275 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); 275 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
276 } 276 }
277 277
278 void ext4_free_inodes_set(struct super_block *sb, 278 void ext4_free_inodes_set(struct super_block *sb,
279 struct ext4_group_desc *bg, __u32 count) 279 struct ext4_group_desc *bg, __u32 count)
280 { 280 {
281 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); 281 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
282 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 282 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
283 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); 283 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
284 } 284 }
285 285
286 void ext4_used_dirs_set(struct super_block *sb, 286 void ext4_used_dirs_set(struct super_block *sb,
287 struct ext4_group_desc *bg, __u32 count) 287 struct ext4_group_desc *bg, __u32 count)
288 { 288 {
289 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); 289 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
290 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 290 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
291 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); 291 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
292 } 292 }
293 293
294 void ext4_itable_unused_set(struct super_block *sb, 294 void ext4_itable_unused_set(struct super_block *sb,
295 struct ext4_group_desc *bg, __u32 count) 295 struct ext4_group_desc *bg, __u32 count)
296 { 296 {
297 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); 297 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
298 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 298 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
299 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); 299 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
300 } 300 }
301 301
302 302
303 static void __save_error_info(struct super_block *sb, const char *func, 303 static void __save_error_info(struct super_block *sb, const char *func,
304 unsigned int line) 304 unsigned int line)
305 { 305 {
306 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 306 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
307 307
308 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 308 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
309 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 309 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
310 es->s_last_error_time = cpu_to_le32(get_seconds()); 310 es->s_last_error_time = cpu_to_le32(get_seconds());
311 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); 311 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
312 es->s_last_error_line = cpu_to_le32(line); 312 es->s_last_error_line = cpu_to_le32(line);
313 if (!es->s_first_error_time) { 313 if (!es->s_first_error_time) {
314 es->s_first_error_time = es->s_last_error_time; 314 es->s_first_error_time = es->s_last_error_time;
315 strncpy(es->s_first_error_func, func, 315 strncpy(es->s_first_error_func, func,
316 sizeof(es->s_first_error_func)); 316 sizeof(es->s_first_error_func));
317 es->s_first_error_line = cpu_to_le32(line); 317 es->s_first_error_line = cpu_to_le32(line);
318 es->s_first_error_ino = es->s_last_error_ino; 318 es->s_first_error_ino = es->s_last_error_ino;
319 es->s_first_error_block = es->s_last_error_block; 319 es->s_first_error_block = es->s_last_error_block;
320 } 320 }
321 /* 321 /*
322 * Start the daily error reporting function if it hasn't been 322 * Start the daily error reporting function if it hasn't been
323 * started already 323 * started already
324 */ 324 */
325 if (!es->s_error_count) 325 if (!es->s_error_count)
326 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); 326 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
327 le32_add_cpu(&es->s_error_count, 1); 327 le32_add_cpu(&es->s_error_count, 1);
328 } 328 }
329 329
330 static void save_error_info(struct super_block *sb, const char *func, 330 static void save_error_info(struct super_block *sb, const char *func,
331 unsigned int line) 331 unsigned int line)
332 { 332 {
333 __save_error_info(sb, func, line); 333 __save_error_info(sb, func, line);
334 ext4_commit_super(sb, 1); 334 ext4_commit_super(sb, 1);
335 } 335 }
336 336
337 /* 337 /*
338 * The del_gendisk() function uninitializes the disk-specific data 338 * The del_gendisk() function uninitializes the disk-specific data
339 * structures, including the bdi structure, without telling anyone 339 * structures, including the bdi structure, without telling anyone
340 * else. Once this happens, any attempt to call mark_buffer_dirty() 340 * else. Once this happens, any attempt to call mark_buffer_dirty()
341 * (for example, by ext4_commit_super), will cause a kernel OOPS. 341 * (for example, by ext4_commit_super), will cause a kernel OOPS.
342 * This is a kludge to prevent these oops until we can put in a proper 342 * This is a kludge to prevent these oops until we can put in a proper
343 * hook in del_gendisk() to inform the VFS and file system layers. 343 * hook in del_gendisk() to inform the VFS and file system layers.
344 */ 344 */
345 static int block_device_ejected(struct super_block *sb) 345 static int block_device_ejected(struct super_block *sb)
346 { 346 {
347 struct inode *bd_inode = sb->s_bdev->bd_inode; 347 struct inode *bd_inode = sb->s_bdev->bd_inode;
348 struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info; 348 struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
349 349
350 return bdi->dev == NULL; 350 return bdi->dev == NULL;
351 } 351 }
352 352
353 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) 353 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
354 { 354 {
355 struct super_block *sb = journal->j_private; 355 struct super_block *sb = journal->j_private;
356 struct ext4_sb_info *sbi = EXT4_SB(sb); 356 struct ext4_sb_info *sbi = EXT4_SB(sb);
357 int error = is_journal_aborted(journal); 357 int error = is_journal_aborted(journal);
358 struct ext4_journal_cb_entry *jce; 358 struct ext4_journal_cb_entry *jce;
359 359
360 BUG_ON(txn->t_state == T_FINISHED); 360 BUG_ON(txn->t_state == T_FINISHED);
361 spin_lock(&sbi->s_md_lock); 361 spin_lock(&sbi->s_md_lock);
362 while (!list_empty(&txn->t_private_list)) { 362 while (!list_empty(&txn->t_private_list)) {
363 jce = list_entry(txn->t_private_list.next, 363 jce = list_entry(txn->t_private_list.next,
364 struct ext4_journal_cb_entry, jce_list); 364 struct ext4_journal_cb_entry, jce_list);
365 list_del_init(&jce->jce_list); 365 list_del_init(&jce->jce_list);
366 spin_unlock(&sbi->s_md_lock); 366 spin_unlock(&sbi->s_md_lock);
367 jce->jce_func(sb, jce, error); 367 jce->jce_func(sb, jce, error);
368 spin_lock(&sbi->s_md_lock); 368 spin_lock(&sbi->s_md_lock);
369 } 369 }
370 spin_unlock(&sbi->s_md_lock); 370 spin_unlock(&sbi->s_md_lock);
371 } 371 }
372 372
373 /* Deal with the reporting of failure conditions on a filesystem such as 373 /* Deal with the reporting of failure conditions on a filesystem such as
374 * inconsistencies detected or read IO failures. 374 * inconsistencies detected or read IO failures.
375 * 375 *
376 * On ext2, we can store the error state of the filesystem in the 376 * On ext2, we can store the error state of the filesystem in the
377 * superblock. That is not possible on ext4, because we may have other 377 * superblock. That is not possible on ext4, because we may have other
378 * write ordering constraints on the superblock which prevent us from 378 * write ordering constraints on the superblock which prevent us from
379 * writing it out straight away; and given that the journal is about to 379 * writing it out straight away; and given that the journal is about to
380 * be aborted, we can't rely on the current, or future, transactions to 380 * be aborted, we can't rely on the current, or future, transactions to
381 * write out the superblock safely. 381 * write out the superblock safely.
382 * 382 *
383 * We'll just use the jbd2_journal_abort() error code to record an error in 383 * We'll just use the jbd2_journal_abort() error code to record an error in
384 * the journal instead. On recovery, the journal will complain about 384 * the journal instead. On recovery, the journal will complain about
385 * that error until we've noted it down and cleared it. 385 * that error until we've noted it down and cleared it.
386 */ 386 */
387 387
388 static void ext4_handle_error(struct super_block *sb) 388 static void ext4_handle_error(struct super_block *sb)
389 { 389 {
390 if (sb->s_flags & MS_RDONLY) 390 if (sb->s_flags & MS_RDONLY)
391 return; 391 return;
392 392
393 if (!test_opt(sb, ERRORS_CONT)) { 393 if (!test_opt(sb, ERRORS_CONT)) {
394 journal_t *journal = EXT4_SB(sb)->s_journal; 394 journal_t *journal = EXT4_SB(sb)->s_journal;
395 395
396 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 396 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
397 if (journal) 397 if (journal)
398 jbd2_journal_abort(journal, -EIO); 398 jbd2_journal_abort(journal, -EIO);
399 } 399 }
400 if (test_opt(sb, ERRORS_RO)) { 400 if (test_opt(sb, ERRORS_RO)) {
401 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 401 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
402 /* 402 /*
403 * Make sure updated value of ->s_mount_flags will be visible 403 * Make sure updated value of ->s_mount_flags will be visible
404 * before ->s_flags update 404 * before ->s_flags update
405 */ 405 */
406 smp_wmb(); 406 smp_wmb();
407 sb->s_flags |= MS_RDONLY; 407 sb->s_flags |= MS_RDONLY;
408 } 408 }
409 if (test_opt(sb, ERRORS_PANIC)) 409 if (test_opt(sb, ERRORS_PANIC))
410 panic("EXT4-fs (device %s): panic forced after error\n", 410 panic("EXT4-fs (device %s): panic forced after error\n",
411 sb->s_id); 411 sb->s_id);
412 } 412 }
413 413
414 #define ext4_error_ratelimit(sb) \ 414 #define ext4_error_ratelimit(sb) \
415 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \ 415 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
416 "EXT4-fs error") 416 "EXT4-fs error")
417 417
418 void __ext4_error(struct super_block *sb, const char *function, 418 void __ext4_error(struct super_block *sb, const char *function,
419 unsigned int line, const char *fmt, ...) 419 unsigned int line, const char *fmt, ...)
420 { 420 {
421 struct va_format vaf; 421 struct va_format vaf;
422 va_list args; 422 va_list args;
423 423
424 if (ext4_error_ratelimit(sb)) { 424 if (ext4_error_ratelimit(sb)) {
425 va_start(args, fmt); 425 va_start(args, fmt);
426 vaf.fmt = fmt; 426 vaf.fmt = fmt;
427 vaf.va = &args; 427 vaf.va = &args;
428 printk(KERN_CRIT 428 printk(KERN_CRIT
429 "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", 429 "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
430 sb->s_id, function, line, current->comm, &vaf); 430 sb->s_id, function, line, current->comm, &vaf);
431 va_end(args); 431 va_end(args);
432 } 432 }
433 save_error_info(sb, function, line); 433 save_error_info(sb, function, line);
434 ext4_handle_error(sb); 434 ext4_handle_error(sb);
435 } 435 }
436 436
437 void __ext4_error_inode(struct inode *inode, const char *function, 437 void __ext4_error_inode(struct inode *inode, const char *function,
438 unsigned int line, ext4_fsblk_t block, 438 unsigned int line, ext4_fsblk_t block,
439 const char *fmt, ...) 439 const char *fmt, ...)
440 { 440 {
441 va_list args; 441 va_list args;
442 struct va_format vaf; 442 struct va_format vaf;
443 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 443 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
444 444
445 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 445 es->s_last_error_ino = cpu_to_le32(inode->i_ino);
446 es->s_last_error_block = cpu_to_le64(block); 446 es->s_last_error_block = cpu_to_le64(block);
447 if (ext4_error_ratelimit(inode->i_sb)) { 447 if (ext4_error_ratelimit(inode->i_sb)) {
448 va_start(args, fmt); 448 va_start(args, fmt);
449 vaf.fmt = fmt; 449 vaf.fmt = fmt;
450 vaf.va = &args; 450 vaf.va = &args;
451 if (block) 451 if (block)
452 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " 452 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
453 "inode #%lu: block %llu: comm %s: %pV\n", 453 "inode #%lu: block %llu: comm %s: %pV\n",
454 inode->i_sb->s_id, function, line, inode->i_ino, 454 inode->i_sb->s_id, function, line, inode->i_ino,
455 block, current->comm, &vaf); 455 block, current->comm, &vaf);
456 else 456 else
457 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " 457 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
458 "inode #%lu: comm %s: %pV\n", 458 "inode #%lu: comm %s: %pV\n",
459 inode->i_sb->s_id, function, line, inode->i_ino, 459 inode->i_sb->s_id, function, line, inode->i_ino,
460 current->comm, &vaf); 460 current->comm, &vaf);
461 va_end(args); 461 va_end(args);
462 } 462 }
463 save_error_info(inode->i_sb, function, line); 463 save_error_info(inode->i_sb, function, line);
464 ext4_handle_error(inode->i_sb); 464 ext4_handle_error(inode->i_sb);
465 } 465 }
466 466
467 void __ext4_error_file(struct file *file, const char *function, 467 void __ext4_error_file(struct file *file, const char *function,
468 unsigned int line, ext4_fsblk_t block, 468 unsigned int line, ext4_fsblk_t block,
469 const char *fmt, ...) 469 const char *fmt, ...)
470 { 470 {
471 va_list args; 471 va_list args;
472 struct va_format vaf; 472 struct va_format vaf;
473 struct ext4_super_block *es; 473 struct ext4_super_block *es;
474 struct inode *inode = file_inode(file); 474 struct inode *inode = file_inode(file);
475 char pathname[80], *path; 475 char pathname[80], *path;
476 476
477 es = EXT4_SB(inode->i_sb)->s_es; 477 es = EXT4_SB(inode->i_sb)->s_es;
478 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 478 es->s_last_error_ino = cpu_to_le32(inode->i_ino);
479 if (ext4_error_ratelimit(inode->i_sb)) { 479 if (ext4_error_ratelimit(inode->i_sb)) {
480 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 480 path = d_path(&(file->f_path), pathname, sizeof(pathname));
481 if (IS_ERR(path)) 481 if (IS_ERR(path))
482 path = "(unknown)"; 482 path = "(unknown)";
483 va_start(args, fmt); 483 va_start(args, fmt);
484 vaf.fmt = fmt; 484 vaf.fmt = fmt;
485 vaf.va = &args; 485 vaf.va = &args;
486 if (block) 486 if (block)
487 printk(KERN_CRIT 487 printk(KERN_CRIT
488 "EXT4-fs error (device %s): %s:%d: inode #%lu: " 488 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
489 "block %llu: comm %s: path %s: %pV\n", 489 "block %llu: comm %s: path %s: %pV\n",
490 inode->i_sb->s_id, function, line, inode->i_ino, 490 inode->i_sb->s_id, function, line, inode->i_ino,
491 block, current->comm, path, &vaf); 491 block, current->comm, path, &vaf);
492 else 492 else
493 printk(KERN_CRIT 493 printk(KERN_CRIT
494 "EXT4-fs error (device %s): %s:%d: inode #%lu: " 494 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
495 "comm %s: path %s: %pV\n", 495 "comm %s: path %s: %pV\n",
496 inode->i_sb->s_id, function, line, inode->i_ino, 496 inode->i_sb->s_id, function, line, inode->i_ino,
497 current->comm, path, &vaf); 497 current->comm, path, &vaf);
498 va_end(args); 498 va_end(args);
499 } 499 }
500 save_error_info(inode->i_sb, function, line); 500 save_error_info(inode->i_sb, function, line);
501 ext4_handle_error(inode->i_sb); 501 ext4_handle_error(inode->i_sb);
502 } 502 }
503 503
504 const char *ext4_decode_error(struct super_block *sb, int errno, 504 const char *ext4_decode_error(struct super_block *sb, int errno,
505 char nbuf[16]) 505 char nbuf[16])
506 { 506 {
507 char *errstr = NULL; 507 char *errstr = NULL;
508 508
509 switch (errno) { 509 switch (errno) {
510 case -EIO: 510 case -EIO:
511 errstr = "IO failure"; 511 errstr = "IO failure";
512 break; 512 break;
513 case -ENOMEM: 513 case -ENOMEM:
514 errstr = "Out of memory"; 514 errstr = "Out of memory";
515 break; 515 break;
516 case -EROFS: 516 case -EROFS:
517 if (!sb || (EXT4_SB(sb)->s_journal && 517 if (!sb || (EXT4_SB(sb)->s_journal &&
518 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) 518 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
519 errstr = "Journal has aborted"; 519 errstr = "Journal has aborted";
520 else 520 else
521 errstr = "Readonly filesystem"; 521 errstr = "Readonly filesystem";
522 break; 522 break;
523 default: 523 default:
524 /* If the caller passed in an extra buffer for unknown 524 /* If the caller passed in an extra buffer for unknown
525 * errors, textualise them now. Else we just return 525 * errors, textualise them now. Else we just return
526 * NULL. */ 526 * NULL. */
527 if (nbuf) { 527 if (nbuf) {
528 /* Check for truncated error codes... */ 528 /* Check for truncated error codes... */
529 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 529 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
530 errstr = nbuf; 530 errstr = nbuf;
531 } 531 }
532 break; 532 break;
533 } 533 }
534 534
535 return errstr; 535 return errstr;
536 } 536 }
537 537
538 /* __ext4_std_error decodes expected errors from journaling functions 538 /* __ext4_std_error decodes expected errors from journaling functions
539 * automatically and invokes the appropriate error response. */ 539 * automatically and invokes the appropriate error response. */
540 540
541 void __ext4_std_error(struct super_block *sb, const char *function, 541 void __ext4_std_error(struct super_block *sb, const char *function,
542 unsigned int line, int errno) 542 unsigned int line, int errno)
543 { 543 {
544 char nbuf[16]; 544 char nbuf[16];
545 const char *errstr; 545 const char *errstr;
546 546
547 /* Special case: if the error is EROFS, and we're not already 547 /* Special case: if the error is EROFS, and we're not already
548 * inside a transaction, then there's really no point in logging 548 * inside a transaction, then there's really no point in logging
549 * an error. */ 549 * an error. */
550 if (errno == -EROFS && journal_current_handle() == NULL && 550 if (errno == -EROFS && journal_current_handle() == NULL &&
551 (sb->s_flags & MS_RDONLY)) 551 (sb->s_flags & MS_RDONLY))
552 return; 552 return;
553 553
554 if (ext4_error_ratelimit(sb)) { 554 if (ext4_error_ratelimit(sb)) {
555 errstr = ext4_decode_error(sb, errno, nbuf); 555 errstr = ext4_decode_error(sb, errno, nbuf);
556 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", 556 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
557 sb->s_id, function, line, errstr); 557 sb->s_id, function, line, errstr);
558 } 558 }
559 559
560 save_error_info(sb, function, line); 560 save_error_info(sb, function, line);
561 ext4_handle_error(sb); 561 ext4_handle_error(sb);
562 } 562 }
563 563
564 /* 564 /*
565 * ext4_abort is a much stronger failure handler than ext4_error. The 565 * ext4_abort is a much stronger failure handler than ext4_error. The
566 * abort function may be used to deal with unrecoverable failures such 566 * abort function may be used to deal with unrecoverable failures such
567 * as journal IO errors or ENOMEM at a critical moment in log management. 567 * as journal IO errors or ENOMEM at a critical moment in log management.
568 * 568 *
569 * We unconditionally force the filesystem into an ABORT|READONLY state, 569 * We unconditionally force the filesystem into an ABORT|READONLY state,
570 * unless the error response on the fs has been set to panic in which 570 * unless the error response on the fs has been set to panic in which
571 * case we take the easy way out and panic immediately. 571 * case we take the easy way out and panic immediately.
572 */ 572 */
573 573
574 void __ext4_abort(struct super_block *sb, const char *function, 574 void __ext4_abort(struct super_block *sb, const char *function,
575 unsigned int line, const char *fmt, ...) 575 unsigned int line, const char *fmt, ...)
576 { 576 {
577 va_list args; 577 va_list args;
578 578
579 save_error_info(sb, function, line); 579 save_error_info(sb, function, line);
580 va_start(args, fmt); 580 va_start(args, fmt);
581 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id, 581 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
582 function, line); 582 function, line);
583 vprintk(fmt, args); 583 vprintk(fmt, args);
584 printk("\n"); 584 printk("\n");
585 va_end(args); 585 va_end(args);
586 586
587 if ((sb->s_flags & MS_RDONLY) == 0) { 587 if ((sb->s_flags & MS_RDONLY) == 0) {
588 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 588 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
589 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 589 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
590 /* 590 /*
591 * Make sure updated value of ->s_mount_flags will be visible 591 * Make sure updated value of ->s_mount_flags will be visible
592 * before ->s_flags update 592 * before ->s_flags update
593 */ 593 */
594 smp_wmb(); 594 smp_wmb();
595 sb->s_flags |= MS_RDONLY; 595 sb->s_flags |= MS_RDONLY;
596 if (EXT4_SB(sb)->s_journal) 596 if (EXT4_SB(sb)->s_journal)
597 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 597 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
598 save_error_info(sb, function, line); 598 save_error_info(sb, function, line);
599 } 599 }
600 if (test_opt(sb, ERRORS_PANIC)) 600 if (test_opt(sb, ERRORS_PANIC))
601 panic("EXT4-fs panic from previous error\n"); 601 panic("EXT4-fs panic from previous error\n");
602 } 602 }
603 603
604 void __ext4_msg(struct super_block *sb, 604 void __ext4_msg(struct super_block *sb,
605 const char *prefix, const char *fmt, ...) 605 const char *prefix, const char *fmt, ...)
606 { 606 {
607 struct va_format vaf; 607 struct va_format vaf;
608 va_list args; 608 va_list args;
609 609
610 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) 610 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
611 return; 611 return;
612 612
613 va_start(args, fmt); 613 va_start(args, fmt);
614 vaf.fmt = fmt; 614 vaf.fmt = fmt;
615 vaf.va = &args; 615 vaf.va = &args;
616 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); 616 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
617 va_end(args); 617 va_end(args);
618 } 618 }
619 619
620 void __ext4_warning(struct super_block *sb, const char *function, 620 void __ext4_warning(struct super_block *sb, const char *function,
621 unsigned int line, const char *fmt, ...) 621 unsigned int line, const char *fmt, ...)
622 { 622 {
623 struct va_format vaf; 623 struct va_format vaf;
624 va_list args; 624 va_list args;
625 625
626 if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), 626 if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
627 "EXT4-fs warning")) 627 "EXT4-fs warning"))
628 return; 628 return;
629 629
630 va_start(args, fmt); 630 va_start(args, fmt);
631 vaf.fmt = fmt; 631 vaf.fmt = fmt;
632 vaf.va = &args; 632 vaf.va = &args;
633 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n", 633 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
634 sb->s_id, function, line, &vaf); 634 sb->s_id, function, line, &vaf);
635 va_end(args); 635 va_end(args);
636 } 636 }
637 637
638 void __ext4_grp_locked_error(const char *function, unsigned int line, 638 void __ext4_grp_locked_error(const char *function, unsigned int line,
639 struct super_block *sb, ext4_group_t grp, 639 struct super_block *sb, ext4_group_t grp,
640 unsigned long ino, ext4_fsblk_t block, 640 unsigned long ino, ext4_fsblk_t block,
641 const char *fmt, ...) 641 const char *fmt, ...)
642 __releases(bitlock) 642 __releases(bitlock)
643 __acquires(bitlock) 643 __acquires(bitlock)
644 { 644 {
645 struct va_format vaf; 645 struct va_format vaf;
646 va_list args; 646 va_list args;
647 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 647 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
648 648
649 es->s_last_error_ino = cpu_to_le32(ino); 649 es->s_last_error_ino = cpu_to_le32(ino);
650 es->s_last_error_block = cpu_to_le64(block); 650 es->s_last_error_block = cpu_to_le64(block);
651 __save_error_info(sb, function, line); 651 __save_error_info(sb, function, line);
652 652
653 if (ext4_error_ratelimit(sb)) { 653 if (ext4_error_ratelimit(sb)) {
654 va_start(args, fmt); 654 va_start(args, fmt);
655 vaf.fmt = fmt; 655 vaf.fmt = fmt;
656 vaf.va = &args; 656 vaf.va = &args;
657 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", 657 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
658 sb->s_id, function, line, grp); 658 sb->s_id, function, line, grp);
659 if (ino) 659 if (ino)
660 printk(KERN_CONT "inode %lu: ", ino); 660 printk(KERN_CONT "inode %lu: ", ino);
661 if (block) 661 if (block)
662 printk(KERN_CONT "block %llu:", 662 printk(KERN_CONT "block %llu:",
663 (unsigned long long) block); 663 (unsigned long long) block);
664 printk(KERN_CONT "%pV\n", &vaf); 664 printk(KERN_CONT "%pV\n", &vaf);
665 va_end(args); 665 va_end(args);
666 } 666 }
667 667
668 if (test_opt(sb, ERRORS_CONT)) { 668 if (test_opt(sb, ERRORS_CONT)) {
669 ext4_commit_super(sb, 0); 669 ext4_commit_super(sb, 0);
670 return; 670 return;
671 } 671 }
672 672
673 ext4_unlock_group(sb, grp); 673 ext4_unlock_group(sb, grp);
674 ext4_handle_error(sb); 674 ext4_handle_error(sb);
675 /* 675 /*
676 * We only get here in the ERRORS_RO case; relocking the group 676 * We only get here in the ERRORS_RO case; relocking the group
677 * may be dangerous, but nothing bad will happen since the 677 * may be dangerous, but nothing bad will happen since the
678 * filesystem will have already been marked read/only and the 678 * filesystem will have already been marked read/only and the
679 * journal has been aborted. We return 1 as a hint to callers 679 * journal has been aborted. We return 1 as a hint to callers
680 * who might what to use the return value from 680 * who might what to use the return value from
681 * ext4_grp_locked_error() to distinguish between the 681 * ext4_grp_locked_error() to distinguish between the
682 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 682 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
683 * aggressively from the ext4 function in question, with a 683 * aggressively from the ext4 function in question, with a
684 * more appropriate error code. 684 * more appropriate error code.
685 */ 685 */
686 ext4_lock_group(sb, grp); 686 ext4_lock_group(sb, grp);
687 return; 687 return;
688 } 688 }
689 689
690 void ext4_update_dynamic_rev(struct super_block *sb) 690 void ext4_update_dynamic_rev(struct super_block *sb)
691 { 691 {
692 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 692 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
693 693
694 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 694 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
695 return; 695 return;
696 696
697 ext4_warning(sb, 697 ext4_warning(sb,
698 "updating to rev %d because of new feature flag, " 698 "updating to rev %d because of new feature flag, "
699 "running e2fsck is recommended", 699 "running e2fsck is recommended",
700 EXT4_DYNAMIC_REV); 700 EXT4_DYNAMIC_REV);
701 701
702 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 702 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
703 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 703 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
704 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 704 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
705 /* leave es->s_feature_*compat flags alone */ 705 /* leave es->s_feature_*compat flags alone */
706 /* es->s_uuid will be set by e2fsck if empty */ 706 /* es->s_uuid will be set by e2fsck if empty */
707 707
708 /* 708 /*
709 * The rest of the superblock fields should be zero, and if not it 709 * The rest of the superblock fields should be zero, and if not it
710 * means they are likely already in use, so leave them alone. We 710 * means they are likely already in use, so leave them alone. We
711 * can leave it up to e2fsck to clean up any inconsistencies there. 711 * can leave it up to e2fsck to clean up any inconsistencies there.
712 */ 712 */
713 } 713 }
714 714
715 /* 715 /*
716 * Open the external journal device 716 * Open the external journal device
717 */ 717 */
718 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) 718 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
719 { 719 {
720 struct block_device *bdev; 720 struct block_device *bdev;
721 char b[BDEVNAME_SIZE]; 721 char b[BDEVNAME_SIZE];
722 722
723 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); 723 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
724 if (IS_ERR(bdev)) 724 if (IS_ERR(bdev))
725 goto fail; 725 goto fail;
726 return bdev; 726 return bdev;
727 727
728 fail: 728 fail:
729 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", 729 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
730 __bdevname(dev, b), PTR_ERR(bdev)); 730 __bdevname(dev, b), PTR_ERR(bdev));
731 return NULL; 731 return NULL;
732 } 732 }
733 733
734 /* 734 /*
735 * Release the journal device 735 * Release the journal device
736 */ 736 */
737 static void ext4_blkdev_put(struct block_device *bdev) 737 static void ext4_blkdev_put(struct block_device *bdev)
738 { 738 {
739 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 739 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
740 } 740 }
741 741
742 static void ext4_blkdev_remove(struct ext4_sb_info *sbi) 742 static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
743 { 743 {
744 struct block_device *bdev; 744 struct block_device *bdev;
745 bdev = sbi->journal_bdev; 745 bdev = sbi->journal_bdev;
746 if (bdev) { 746 if (bdev) {
747 ext4_blkdev_put(bdev); 747 ext4_blkdev_put(bdev);
748 sbi->journal_bdev = NULL; 748 sbi->journal_bdev = NULL;
749 } 749 }
750 } 750 }
751 751
752 static inline struct inode *orphan_list_entry(struct list_head *l) 752 static inline struct inode *orphan_list_entry(struct list_head *l)
753 { 753 {
754 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 754 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
755 } 755 }
756 756
757 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 757 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
758 { 758 {
759 struct list_head *l; 759 struct list_head *l;
760 760
761 ext4_msg(sb, KERN_ERR, "sb orphan head is %d", 761 ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
762 le32_to_cpu(sbi->s_es->s_last_orphan)); 762 le32_to_cpu(sbi->s_es->s_last_orphan));
763 763
764 printk(KERN_ERR "sb_info orphan list:\n"); 764 printk(KERN_ERR "sb_info orphan list:\n");
765 list_for_each(l, &sbi->s_orphan) { 765 list_for_each(l, &sbi->s_orphan) {
766 struct inode *inode = orphan_list_entry(l); 766 struct inode *inode = orphan_list_entry(l);
767 printk(KERN_ERR " " 767 printk(KERN_ERR " "
768 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 768 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
769 inode->i_sb->s_id, inode->i_ino, inode, 769 inode->i_sb->s_id, inode->i_ino, inode,
770 inode->i_mode, inode->i_nlink, 770 inode->i_mode, inode->i_nlink,
771 NEXT_ORPHAN(inode)); 771 NEXT_ORPHAN(inode));
772 } 772 }
773 } 773 }
774 774
775 static void ext4_put_super(struct super_block *sb) 775 static void ext4_put_super(struct super_block *sb)
776 { 776 {
777 struct ext4_sb_info *sbi = EXT4_SB(sb); 777 struct ext4_sb_info *sbi = EXT4_SB(sb);
778 struct ext4_super_block *es = sbi->s_es; 778 struct ext4_super_block *es = sbi->s_es;
779 int i, err; 779 int i, err;
780 780
781 ext4_unregister_li_request(sb); 781 ext4_unregister_li_request(sb);
782 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 782 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
783 783
784 flush_workqueue(sbi->rsv_conversion_wq); 784 flush_workqueue(sbi->rsv_conversion_wq);
785 destroy_workqueue(sbi->rsv_conversion_wq); 785 destroy_workqueue(sbi->rsv_conversion_wq);
786 786
787 if (sbi->s_journal) { 787 if (sbi->s_journal) {
788 err = jbd2_journal_destroy(sbi->s_journal); 788 err = jbd2_journal_destroy(sbi->s_journal);
789 sbi->s_journal = NULL; 789 sbi->s_journal = NULL;
790 if (err < 0) 790 if (err < 0)
791 ext4_abort(sb, "Couldn't clean up the journal"); 791 ext4_abort(sb, "Couldn't clean up the journal");
792 } 792 }
793 793
794 ext4_es_unregister_shrinker(sbi); 794 ext4_es_unregister_shrinker(sbi);
795 del_timer(&sbi->s_err_report); 795 del_timer(&sbi->s_err_report);
796 ext4_release_system_zone(sb); 796 ext4_release_system_zone(sb);
797 ext4_mb_release(sb); 797 ext4_mb_release(sb);
798 ext4_ext_release(sb); 798 ext4_ext_release(sb);
799 ext4_xattr_put_super(sb); 799 ext4_xattr_put_super(sb);
800 800
801 if (!(sb->s_flags & MS_RDONLY)) { 801 if (!(sb->s_flags & MS_RDONLY)) {
802 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 802 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
803 es->s_state = cpu_to_le16(sbi->s_mount_state); 803 es->s_state = cpu_to_le16(sbi->s_mount_state);
804 } 804 }
805 if (!(sb->s_flags & MS_RDONLY)) 805 if (!(sb->s_flags & MS_RDONLY))
806 ext4_commit_super(sb, 1); 806 ext4_commit_super(sb, 1);
807 807
808 if (sbi->s_proc) { 808 if (sbi->s_proc) {
809 remove_proc_entry("options", sbi->s_proc); 809 remove_proc_entry("options", sbi->s_proc);
810 remove_proc_entry(sb->s_id, ext4_proc_root); 810 remove_proc_entry(sb->s_id, ext4_proc_root);
811 } 811 }
812 kobject_del(&sbi->s_kobj); 812 kobject_del(&sbi->s_kobj);
813 813
814 for (i = 0; i < sbi->s_gdb_count; i++) 814 for (i = 0; i < sbi->s_gdb_count; i++)
815 brelse(sbi->s_group_desc[i]); 815 brelse(sbi->s_group_desc[i]);
816 ext4_kvfree(sbi->s_group_desc); 816 ext4_kvfree(sbi->s_group_desc);
817 ext4_kvfree(sbi->s_flex_groups); 817 ext4_kvfree(sbi->s_flex_groups);
818 percpu_counter_destroy(&sbi->s_freeclusters_counter); 818 percpu_counter_destroy(&sbi->s_freeclusters_counter);
819 percpu_counter_destroy(&sbi->s_freeinodes_counter); 819 percpu_counter_destroy(&sbi->s_freeinodes_counter);
820 percpu_counter_destroy(&sbi->s_dirs_counter); 820 percpu_counter_destroy(&sbi->s_dirs_counter);
821 percpu_counter_destroy(&sbi->s_dirtyclusters_counter); 821 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
822 percpu_counter_destroy(&sbi->s_extent_cache_cnt); 822 percpu_counter_destroy(&sbi->s_extent_cache_cnt);
823 brelse(sbi->s_sbh); 823 brelse(sbi->s_sbh);
824 #ifdef CONFIG_QUOTA 824 #ifdef CONFIG_QUOTA
825 for (i = 0; i < MAXQUOTAS; i++) 825 for (i = 0; i < MAXQUOTAS; i++)
826 kfree(sbi->s_qf_names[i]); 826 kfree(sbi->s_qf_names[i]);
827 #endif 827 #endif
828 828
829 /* Debugging code just in case the in-memory inode orphan list 829 /* Debugging code just in case the in-memory inode orphan list
830 * isn't empty. The on-disk one can be non-empty if we've 830 * isn't empty. The on-disk one can be non-empty if we've
831 * detected an error and taken the fs readonly, but the 831 * detected an error and taken the fs readonly, but the
832 * in-memory list had better be clean by this point. */ 832 * in-memory list had better be clean by this point. */
833 if (!list_empty(&sbi->s_orphan)) 833 if (!list_empty(&sbi->s_orphan))
834 dump_orphan_list(sb, sbi); 834 dump_orphan_list(sb, sbi);
835 J_ASSERT(list_empty(&sbi->s_orphan)); 835 J_ASSERT(list_empty(&sbi->s_orphan));
836 836
837 invalidate_bdev(sb->s_bdev); 837 invalidate_bdev(sb->s_bdev);
838 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 838 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
839 /* 839 /*
840 * Invalidate the journal device's buffers. We don't want them 840 * Invalidate the journal device's buffers. We don't want them
841 * floating about in memory - the physical journal device may 841 * floating about in memory - the physical journal device may
842 * hotswapped, and it breaks the `ro-after' testing code. 842 * hotswapped, and it breaks the `ro-after' testing code.
843 */ 843 */
844 sync_blockdev(sbi->journal_bdev); 844 sync_blockdev(sbi->journal_bdev);
845 invalidate_bdev(sbi->journal_bdev); 845 invalidate_bdev(sbi->journal_bdev);
846 ext4_blkdev_remove(sbi); 846 ext4_blkdev_remove(sbi);
847 } 847 }
848 if (sbi->s_mmp_tsk) 848 if (sbi->s_mmp_tsk)
849 kthread_stop(sbi->s_mmp_tsk); 849 kthread_stop(sbi->s_mmp_tsk);
850 sb->s_fs_info = NULL; 850 sb->s_fs_info = NULL;
851 /* 851 /*
852 * Now that we are completely done shutting down the 852 * Now that we are completely done shutting down the
853 * superblock, we need to actually destroy the kobject. 853 * superblock, we need to actually destroy the kobject.
854 */ 854 */
855 kobject_put(&sbi->s_kobj); 855 kobject_put(&sbi->s_kobj);
856 wait_for_completion(&sbi->s_kobj_unregister); 856 wait_for_completion(&sbi->s_kobj_unregister);
857 if (sbi->s_chksum_driver) 857 if (sbi->s_chksum_driver)
858 crypto_free_shash(sbi->s_chksum_driver); 858 crypto_free_shash(sbi->s_chksum_driver);
859 kfree(sbi->s_blockgroup_lock); 859 kfree(sbi->s_blockgroup_lock);
860 kfree(sbi); 860 kfree(sbi);
861 } 861 }
862 862
863 static struct kmem_cache *ext4_inode_cachep; 863 static struct kmem_cache *ext4_inode_cachep;
864 864
865 /* 865 /*
866 * Called inside transaction, so use GFP_NOFS 866 * Called inside transaction, so use GFP_NOFS
867 */ 867 */
868 static struct inode *ext4_alloc_inode(struct super_block *sb) 868 static struct inode *ext4_alloc_inode(struct super_block *sb)
869 { 869 {
870 struct ext4_inode_info *ei; 870 struct ext4_inode_info *ei;
871 871
872 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 872 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
873 if (!ei) 873 if (!ei)
874 return NULL; 874 return NULL;
875 875
876 ei->vfs_inode.i_version = 1; 876 ei->vfs_inode.i_version = 1;
877 INIT_LIST_HEAD(&ei->i_prealloc_list); 877 INIT_LIST_HEAD(&ei->i_prealloc_list);
878 spin_lock_init(&ei->i_prealloc_lock); 878 spin_lock_init(&ei->i_prealloc_lock);
879 ext4_es_init_tree(&ei->i_es_tree); 879 ext4_es_init_tree(&ei->i_es_tree);
880 rwlock_init(&ei->i_es_lock); 880 rwlock_init(&ei->i_es_lock);
881 INIT_LIST_HEAD(&ei->i_es_lru); 881 INIT_LIST_HEAD(&ei->i_es_lru);
882 ei->i_es_lru_nr = 0; 882 ei->i_es_lru_nr = 0;
883 ei->i_touch_when = 0; 883 ei->i_touch_when = 0;
884 ei->i_reserved_data_blocks = 0; 884 ei->i_reserved_data_blocks = 0;
885 ei->i_reserved_meta_blocks = 0; 885 ei->i_reserved_meta_blocks = 0;
886 ei->i_allocated_meta_blocks = 0; 886 ei->i_allocated_meta_blocks = 0;
887 ei->i_da_metadata_calc_len = 0; 887 ei->i_da_metadata_calc_len = 0;
888 ei->i_da_metadata_calc_last_lblock = 0; 888 ei->i_da_metadata_calc_last_lblock = 0;
889 spin_lock_init(&(ei->i_block_reservation_lock)); 889 spin_lock_init(&(ei->i_block_reservation_lock));
890 #ifdef CONFIG_QUOTA 890 #ifdef CONFIG_QUOTA
891 ei->i_reserved_quota = 0; 891 ei->i_reserved_quota = 0;
892 #endif 892 #endif
893 ei->jinode = NULL; 893 ei->jinode = NULL;
894 INIT_LIST_HEAD(&ei->i_rsv_conversion_list); 894 INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
895 spin_lock_init(&ei->i_completed_io_lock); 895 spin_lock_init(&ei->i_completed_io_lock);
896 ei->i_sync_tid = 0; 896 ei->i_sync_tid = 0;
897 ei->i_datasync_tid = 0; 897 ei->i_datasync_tid = 0;
898 atomic_set(&ei->i_ioend_count, 0); 898 atomic_set(&ei->i_ioend_count, 0);
899 atomic_set(&ei->i_unwritten, 0); 899 atomic_set(&ei->i_unwritten, 0);
900 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); 900 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
901 901
902 return &ei->vfs_inode; 902 return &ei->vfs_inode;
903 } 903 }
904 904
905 static int ext4_drop_inode(struct inode *inode) 905 static int ext4_drop_inode(struct inode *inode)
906 { 906 {
907 int drop = generic_drop_inode(inode); 907 int drop = generic_drop_inode(inode);
908 908
909 trace_ext4_drop_inode(inode, drop); 909 trace_ext4_drop_inode(inode, drop);
910 return drop; 910 return drop;
911 } 911 }
912 912
913 static void ext4_i_callback(struct rcu_head *head) 913 static void ext4_i_callback(struct rcu_head *head)
914 { 914 {
915 struct inode *inode = container_of(head, struct inode, i_rcu); 915 struct inode *inode = container_of(head, struct inode, i_rcu);
916 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 916 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
917 } 917 }
918 918
919 static void ext4_destroy_inode(struct inode *inode) 919 static void ext4_destroy_inode(struct inode *inode)
920 { 920 {
921 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 921 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
922 ext4_msg(inode->i_sb, KERN_ERR, 922 ext4_msg(inode->i_sb, KERN_ERR,
923 "Inode %lu (%p): orphan list check failed!", 923 "Inode %lu (%p): orphan list check failed!",
924 inode->i_ino, EXT4_I(inode)); 924 inode->i_ino, EXT4_I(inode));
925 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 925 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
926 EXT4_I(inode), sizeof(struct ext4_inode_info), 926 EXT4_I(inode), sizeof(struct ext4_inode_info),
927 true); 927 true);
928 dump_stack(); 928 dump_stack();
929 } 929 }
930 call_rcu(&inode->i_rcu, ext4_i_callback); 930 call_rcu(&inode->i_rcu, ext4_i_callback);
931 } 931 }
932 932
933 static void init_once(void *foo) 933 static void init_once(void *foo)
934 { 934 {
935 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 935 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
936 936
937 INIT_LIST_HEAD(&ei->i_orphan); 937 INIT_LIST_HEAD(&ei->i_orphan);
938 init_rwsem(&ei->xattr_sem); 938 init_rwsem(&ei->xattr_sem);
939 init_rwsem(&ei->i_data_sem); 939 init_rwsem(&ei->i_data_sem);
940 inode_init_once(&ei->vfs_inode); 940 inode_init_once(&ei->vfs_inode);
941 } 941 }
942 942
943 static int init_inodecache(void) 943 static int init_inodecache(void)
944 { 944 {
945 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 945 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
946 sizeof(struct ext4_inode_info), 946 sizeof(struct ext4_inode_info),
947 0, (SLAB_RECLAIM_ACCOUNT| 947 0, (SLAB_RECLAIM_ACCOUNT|
948 SLAB_MEM_SPREAD), 948 SLAB_MEM_SPREAD),
949 init_once); 949 init_once);
950 if (ext4_inode_cachep == NULL) 950 if (ext4_inode_cachep == NULL)
951 return -ENOMEM; 951 return -ENOMEM;
952 return 0; 952 return 0;
953 } 953 }
954 954
955 static void destroy_inodecache(void) 955 static void destroy_inodecache(void)
956 { 956 {
957 /* 957 /*
958 * Make sure all delayed rcu free inodes are flushed before we 958 * Make sure all delayed rcu free inodes are flushed before we
959 * destroy cache. 959 * destroy cache.
960 */ 960 */
961 rcu_barrier(); 961 rcu_barrier();
962 kmem_cache_destroy(ext4_inode_cachep); 962 kmem_cache_destroy(ext4_inode_cachep);
963 } 963 }
964 964
965 void ext4_clear_inode(struct inode *inode) 965 void ext4_clear_inode(struct inode *inode)
966 { 966 {
967 invalidate_inode_buffers(inode); 967 invalidate_inode_buffers(inode);
968 clear_inode(inode); 968 clear_inode(inode);
969 dquot_drop(inode); 969 dquot_drop(inode);
970 ext4_discard_preallocations(inode); 970 ext4_discard_preallocations(inode);
971 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 971 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
972 ext4_es_lru_del(inode); 972 ext4_es_lru_del(inode);
973 if (EXT4_I(inode)->jinode) { 973 if (EXT4_I(inode)->jinode) {
974 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), 974 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
975 EXT4_I(inode)->jinode); 975 EXT4_I(inode)->jinode);
976 jbd2_free_inode(EXT4_I(inode)->jinode); 976 jbd2_free_inode(EXT4_I(inode)->jinode);
977 EXT4_I(inode)->jinode = NULL; 977 EXT4_I(inode)->jinode = NULL;
978 } 978 }
979 } 979 }
980 980
981 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 981 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
982 u64 ino, u32 generation) 982 u64 ino, u32 generation)
983 { 983 {
984 struct inode *inode; 984 struct inode *inode;
985 985
986 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 986 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
987 return ERR_PTR(-ESTALE); 987 return ERR_PTR(-ESTALE);
988 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 988 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
989 return ERR_PTR(-ESTALE); 989 return ERR_PTR(-ESTALE);
990 990
991 /* iget isn't really right if the inode is currently unallocated!! 991 /* iget isn't really right if the inode is currently unallocated!!
992 * 992 *
993 * ext4_read_inode will return a bad_inode if the inode had been 993 * ext4_read_inode will return a bad_inode if the inode had been
994 * deleted, so we should be safe. 994 * deleted, so we should be safe.
995 * 995 *
996 * Currently we don't know the generation for parent directory, so 996 * Currently we don't know the generation for parent directory, so
997 * a generation of 0 means "accept any" 997 * a generation of 0 means "accept any"
998 */ 998 */
999 inode = ext4_iget(sb, ino); 999 inode = ext4_iget(sb, ino);
1000 if (IS_ERR(inode)) 1000 if (IS_ERR(inode))
1001 return ERR_CAST(inode); 1001 return ERR_CAST(inode);
1002 if (generation && inode->i_generation != generation) { 1002 if (generation && inode->i_generation != generation) {
1003 iput(inode); 1003 iput(inode);
1004 return ERR_PTR(-ESTALE); 1004 return ERR_PTR(-ESTALE);
1005 } 1005 }
1006 1006
1007 return inode; 1007 return inode;
1008 } 1008 }
1009 1009
1010 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 1010 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1011 int fh_len, int fh_type) 1011 int fh_len, int fh_type)
1012 { 1012 {
1013 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 1013 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1014 ext4_nfs_get_inode); 1014 ext4_nfs_get_inode);
1015 } 1015 }
1016 1016
1017 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 1017 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1018 int fh_len, int fh_type) 1018 int fh_len, int fh_type)
1019 { 1019 {
1020 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 1020 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1021 ext4_nfs_get_inode); 1021 ext4_nfs_get_inode);
1022 } 1022 }
1023 1023
1024 /* 1024 /*
1025 * Try to release metadata pages (indirect blocks, directories) which are 1025 * Try to release metadata pages (indirect blocks, directories) which are
1026 * mapped via the block device. Since these pages could have journal heads 1026 * mapped via the block device. Since these pages could have journal heads
1027 * which would prevent try_to_free_buffers() from freeing them, we must use 1027 * which would prevent try_to_free_buffers() from freeing them, we must use
1028 * jbd2 layer's try_to_free_buffers() function to release them. 1028 * jbd2 layer's try_to_free_buffers() function to release them.
1029 */ 1029 */
1030 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 1030 static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1031 gfp_t wait) 1031 gfp_t wait)
1032 { 1032 {
1033 journal_t *journal = EXT4_SB(sb)->s_journal; 1033 journal_t *journal = EXT4_SB(sb)->s_journal;
1034 1034
1035 WARN_ON(PageChecked(page)); 1035 WARN_ON(PageChecked(page));
1036 if (!page_has_buffers(page)) 1036 if (!page_has_buffers(page))
1037 return 0; 1037 return 0;
1038 if (journal) 1038 if (journal)
1039 return jbd2_journal_try_to_free_buffers(journal, page, 1039 return jbd2_journal_try_to_free_buffers(journal, page,
1040 wait & ~__GFP_WAIT); 1040 wait & ~__GFP_WAIT);
1041 return try_to_free_buffers(page); 1041 return try_to_free_buffers(page);
1042 } 1042 }
1043 1043
1044 #ifdef CONFIG_QUOTA 1044 #ifdef CONFIG_QUOTA
1045 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 1045 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
1046 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 1046 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
1047 1047
1048 static int ext4_write_dquot(struct dquot *dquot); 1048 static int ext4_write_dquot(struct dquot *dquot);
1049 static int ext4_acquire_dquot(struct dquot *dquot); 1049 static int ext4_acquire_dquot(struct dquot *dquot);
1050 static int ext4_release_dquot(struct dquot *dquot); 1050 static int ext4_release_dquot(struct dquot *dquot);
1051 static int ext4_mark_dquot_dirty(struct dquot *dquot); 1051 static int ext4_mark_dquot_dirty(struct dquot *dquot);
1052 static int ext4_write_info(struct super_block *sb, int type); 1052 static int ext4_write_info(struct super_block *sb, int type);
1053 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1053 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1054 struct path *path); 1054 struct path *path);
1055 static int ext4_quota_on_sysfile(struct super_block *sb, int type, 1055 static int ext4_quota_on_sysfile(struct super_block *sb, int type,
1056 int format_id); 1056 int format_id);
1057 static int ext4_quota_off(struct super_block *sb, int type); 1057 static int ext4_quota_off(struct super_block *sb, int type);
1058 static int ext4_quota_off_sysfile(struct super_block *sb, int type); 1058 static int ext4_quota_off_sysfile(struct super_block *sb, int type);
1059 static int ext4_quota_on_mount(struct super_block *sb, int type); 1059 static int ext4_quota_on_mount(struct super_block *sb, int type);
1060 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1060 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1061 size_t len, loff_t off); 1061 size_t len, loff_t off);
1062 static ssize_t ext4_quota_write(struct super_block *sb, int type, 1062 static ssize_t ext4_quota_write(struct super_block *sb, int type,
1063 const char *data, size_t len, loff_t off); 1063 const char *data, size_t len, loff_t off);
1064 static int ext4_quota_enable(struct super_block *sb, int type, int format_id, 1064 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1065 unsigned int flags); 1065 unsigned int flags);
1066 static int ext4_enable_quotas(struct super_block *sb); 1066 static int ext4_enable_quotas(struct super_block *sb);
1067 1067
1068 static const struct dquot_operations ext4_quota_operations = { 1068 static const struct dquot_operations ext4_quota_operations = {
1069 .get_reserved_space = ext4_get_reserved_space, 1069 .get_reserved_space = ext4_get_reserved_space,
1070 .write_dquot = ext4_write_dquot, 1070 .write_dquot = ext4_write_dquot,
1071 .acquire_dquot = ext4_acquire_dquot, 1071 .acquire_dquot = ext4_acquire_dquot,
1072 .release_dquot = ext4_release_dquot, 1072 .release_dquot = ext4_release_dquot,
1073 .mark_dirty = ext4_mark_dquot_dirty, 1073 .mark_dirty = ext4_mark_dquot_dirty,
1074 .write_info = ext4_write_info, 1074 .write_info = ext4_write_info,
1075 .alloc_dquot = dquot_alloc, 1075 .alloc_dquot = dquot_alloc,
1076 .destroy_dquot = dquot_destroy, 1076 .destroy_dquot = dquot_destroy,
1077 }; 1077 };
1078 1078
1079 static const struct quotactl_ops ext4_qctl_operations = { 1079 static const struct quotactl_ops ext4_qctl_operations = {
1080 .quota_on = ext4_quota_on, 1080 .quota_on = ext4_quota_on,
1081 .quota_off = ext4_quota_off, 1081 .quota_off = ext4_quota_off,
1082 .quota_sync = dquot_quota_sync, 1082 .quota_sync = dquot_quota_sync,
1083 .get_info = dquot_get_dqinfo, 1083 .get_info = dquot_get_dqinfo,
1084 .set_info = dquot_set_dqinfo, 1084 .set_info = dquot_set_dqinfo,
1085 .get_dqblk = dquot_get_dqblk, 1085 .get_dqblk = dquot_get_dqblk,
1086 .set_dqblk = dquot_set_dqblk 1086 .set_dqblk = dquot_set_dqblk
1087 }; 1087 };
1088 1088
1089 static const struct quotactl_ops ext4_qctl_sysfile_operations = { 1089 static const struct quotactl_ops ext4_qctl_sysfile_operations = {
1090 .quota_on_meta = ext4_quota_on_sysfile, 1090 .quota_on_meta = ext4_quota_on_sysfile,
1091 .quota_off = ext4_quota_off_sysfile, 1091 .quota_off = ext4_quota_off_sysfile,
1092 .quota_sync = dquot_quota_sync, 1092 .quota_sync = dquot_quota_sync,
1093 .get_info = dquot_get_dqinfo, 1093 .get_info = dquot_get_dqinfo,
1094 .set_info = dquot_set_dqinfo, 1094 .set_info = dquot_set_dqinfo,
1095 .get_dqblk = dquot_get_dqblk, 1095 .get_dqblk = dquot_get_dqblk,
1096 .set_dqblk = dquot_set_dqblk 1096 .set_dqblk = dquot_set_dqblk
1097 }; 1097 };
1098 #endif 1098 #endif
1099 1099
1100 static const struct super_operations ext4_sops = { 1100 static const struct super_operations ext4_sops = {
1101 .alloc_inode = ext4_alloc_inode, 1101 .alloc_inode = ext4_alloc_inode,
1102 .destroy_inode = ext4_destroy_inode, 1102 .destroy_inode = ext4_destroy_inode,
1103 .write_inode = ext4_write_inode, 1103 .write_inode = ext4_write_inode,
1104 .dirty_inode = ext4_dirty_inode, 1104 .dirty_inode = ext4_dirty_inode,
1105 .drop_inode = ext4_drop_inode, 1105 .drop_inode = ext4_drop_inode,
1106 .evict_inode = ext4_evict_inode, 1106 .evict_inode = ext4_evict_inode,
1107 .put_super = ext4_put_super, 1107 .put_super = ext4_put_super,
1108 .sync_fs = ext4_sync_fs, 1108 .sync_fs = ext4_sync_fs,
1109 .freeze_fs = ext4_freeze, 1109 .freeze_fs = ext4_freeze,
1110 .unfreeze_fs = ext4_unfreeze, 1110 .unfreeze_fs = ext4_unfreeze,
1111 .statfs = ext4_statfs, 1111 .statfs = ext4_statfs,
1112 .remount_fs = ext4_remount, 1112 .remount_fs = ext4_remount,
1113 .show_options = ext4_show_options, 1113 .show_options = ext4_show_options,
1114 #ifdef CONFIG_QUOTA 1114 #ifdef CONFIG_QUOTA
1115 .quota_read = ext4_quota_read, 1115 .quota_read = ext4_quota_read,
1116 .quota_write = ext4_quota_write, 1116 .quota_write = ext4_quota_write,
1117 #endif 1117 #endif
1118 .bdev_try_to_free_page = bdev_try_to_free_page, 1118 .bdev_try_to_free_page = bdev_try_to_free_page,
1119 }; 1119 };
1120 1120
1121 static const struct super_operations ext4_nojournal_sops = { 1121 static const struct super_operations ext4_nojournal_sops = {
1122 .alloc_inode = ext4_alloc_inode, 1122 .alloc_inode = ext4_alloc_inode,
1123 .destroy_inode = ext4_destroy_inode, 1123 .destroy_inode = ext4_destroy_inode,
1124 .write_inode = ext4_write_inode, 1124 .write_inode = ext4_write_inode,
1125 .dirty_inode = ext4_dirty_inode, 1125 .dirty_inode = ext4_dirty_inode,
1126 .drop_inode = ext4_drop_inode, 1126 .drop_inode = ext4_drop_inode,
1127 .evict_inode = ext4_evict_inode, 1127 .evict_inode = ext4_evict_inode,
1128 .sync_fs = ext4_sync_fs_nojournal, 1128 .sync_fs = ext4_sync_fs_nojournal,
1129 .put_super = ext4_put_super, 1129 .put_super = ext4_put_super,
1130 .statfs = ext4_statfs, 1130 .statfs = ext4_statfs,
1131 .remount_fs = ext4_remount, 1131 .remount_fs = ext4_remount,
1132 .show_options = ext4_show_options, 1132 .show_options = ext4_show_options,
1133 #ifdef CONFIG_QUOTA 1133 #ifdef CONFIG_QUOTA
1134 .quota_read = ext4_quota_read, 1134 .quota_read = ext4_quota_read,
1135 .quota_write = ext4_quota_write, 1135 .quota_write = ext4_quota_write,
1136 #endif 1136 #endif
1137 .bdev_try_to_free_page = bdev_try_to_free_page, 1137 .bdev_try_to_free_page = bdev_try_to_free_page,
1138 }; 1138 };
1139 1139
1140 static const struct export_operations ext4_export_ops = { 1140 static const struct export_operations ext4_export_ops = {
1141 .fh_to_dentry = ext4_fh_to_dentry, 1141 .fh_to_dentry = ext4_fh_to_dentry,
1142 .fh_to_parent = ext4_fh_to_parent, 1142 .fh_to_parent = ext4_fh_to_parent,
1143 .get_parent = ext4_get_parent, 1143 .get_parent = ext4_get_parent,
1144 }; 1144 };
1145 1145
1146 enum { 1146 enum {
1147 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1147 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1148 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1148 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1149 Opt_nouid32, Opt_debug, Opt_removed, 1149 Opt_nouid32, Opt_debug, Opt_removed,
1150 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1150 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1151 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, 1151 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1152 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, 1152 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1153 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, 1153 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1154 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1154 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1155 Opt_data_err_abort, Opt_data_err_ignore, 1155 Opt_data_err_abort, Opt_data_err_ignore,
1156 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1156 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1157 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1157 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1158 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, 1158 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1159 Opt_usrquota, Opt_grpquota, Opt_i_version, 1159 Opt_usrquota, Opt_grpquota, Opt_i_version,
1160 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, 1160 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1161 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1161 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1162 Opt_inode_readahead_blks, Opt_journal_ioprio, 1162 Opt_inode_readahead_blks, Opt_journal_ioprio,
1163 Opt_dioread_nolock, Opt_dioread_lock, 1163 Opt_dioread_nolock, Opt_dioread_lock,
1164 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, 1164 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1165 Opt_max_dir_size_kb, 1165 Opt_max_dir_size_kb,
1166 }; 1166 };
1167 1167
1168 static const match_table_t tokens = { 1168 static const match_table_t tokens = {
1169 {Opt_bsd_df, "bsddf"}, 1169 {Opt_bsd_df, "bsddf"},
1170 {Opt_minix_df, "minixdf"}, 1170 {Opt_minix_df, "minixdf"},
1171 {Opt_grpid, "grpid"}, 1171 {Opt_grpid, "grpid"},
1172 {Opt_grpid, "bsdgroups"}, 1172 {Opt_grpid, "bsdgroups"},
1173 {Opt_nogrpid, "nogrpid"}, 1173 {Opt_nogrpid, "nogrpid"},
1174 {Opt_nogrpid, "sysvgroups"}, 1174 {Opt_nogrpid, "sysvgroups"},
1175 {Opt_resgid, "resgid=%u"}, 1175 {Opt_resgid, "resgid=%u"},
1176 {Opt_resuid, "resuid=%u"}, 1176 {Opt_resuid, "resuid=%u"},
1177 {Opt_sb, "sb=%u"}, 1177 {Opt_sb, "sb=%u"},
1178 {Opt_err_cont, "errors=continue"}, 1178 {Opt_err_cont, "errors=continue"},
1179 {Opt_err_panic, "errors=panic"}, 1179 {Opt_err_panic, "errors=panic"},
1180 {Opt_err_ro, "errors=remount-ro"}, 1180 {Opt_err_ro, "errors=remount-ro"},
1181 {Opt_nouid32, "nouid32"}, 1181 {Opt_nouid32, "nouid32"},
1182 {Opt_debug, "debug"}, 1182 {Opt_debug, "debug"},
1183 {Opt_removed, "oldalloc"}, 1183 {Opt_removed, "oldalloc"},
1184 {Opt_removed, "orlov"}, 1184 {Opt_removed, "orlov"},
1185 {Opt_user_xattr, "user_xattr"}, 1185 {Opt_user_xattr, "user_xattr"},
1186 {Opt_nouser_xattr, "nouser_xattr"}, 1186 {Opt_nouser_xattr, "nouser_xattr"},
1187 {Opt_acl, "acl"}, 1187 {Opt_acl, "acl"},
1188 {Opt_noacl, "noacl"}, 1188 {Opt_noacl, "noacl"},
1189 {Opt_noload, "norecovery"}, 1189 {Opt_noload, "norecovery"},
1190 {Opt_noload, "noload"}, 1190 {Opt_noload, "noload"},
1191 {Opt_removed, "nobh"}, 1191 {Opt_removed, "nobh"},
1192 {Opt_removed, "bh"}, 1192 {Opt_removed, "bh"},
1193 {Opt_commit, "commit=%u"}, 1193 {Opt_commit, "commit=%u"},
1194 {Opt_min_batch_time, "min_batch_time=%u"}, 1194 {Opt_min_batch_time, "min_batch_time=%u"},
1195 {Opt_max_batch_time, "max_batch_time=%u"}, 1195 {Opt_max_batch_time, "max_batch_time=%u"},
1196 {Opt_journal_dev, "journal_dev=%u"}, 1196 {Opt_journal_dev, "journal_dev=%u"},
1197 {Opt_journal_path, "journal_path=%s"}, 1197 {Opt_journal_path, "journal_path=%s"},
1198 {Opt_journal_checksum, "journal_checksum"}, 1198 {Opt_journal_checksum, "journal_checksum"},
1199 {Opt_journal_async_commit, "journal_async_commit"}, 1199 {Opt_journal_async_commit, "journal_async_commit"},
1200 {Opt_abort, "abort"}, 1200 {Opt_abort, "abort"},
1201 {Opt_data_journal, "data=journal"}, 1201 {Opt_data_journal, "data=journal"},
1202 {Opt_data_ordered, "data=ordered"}, 1202 {Opt_data_ordered, "data=ordered"},
1203 {Opt_data_writeback, "data=writeback"}, 1203 {Opt_data_writeback, "data=writeback"},
1204 {Opt_data_err_abort, "data_err=abort"}, 1204 {Opt_data_err_abort, "data_err=abort"},
1205 {Opt_data_err_ignore, "data_err=ignore"}, 1205 {Opt_data_err_ignore, "data_err=ignore"},
1206 {Opt_offusrjquota, "usrjquota="}, 1206 {Opt_offusrjquota, "usrjquota="},
1207 {Opt_usrjquota, "usrjquota=%s"}, 1207 {Opt_usrjquota, "usrjquota=%s"},
1208 {Opt_offgrpjquota, "grpjquota="}, 1208 {Opt_offgrpjquota, "grpjquota="},
1209 {Opt_grpjquota, "grpjquota=%s"}, 1209 {Opt_grpjquota, "grpjquota=%s"},
1210 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1210 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1211 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1211 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1212 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 1212 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1213 {Opt_grpquota, "grpquota"}, 1213 {Opt_grpquota, "grpquota"},
1214 {Opt_noquota, "noquota"}, 1214 {Opt_noquota, "noquota"},
1215 {Opt_quota, "quota"}, 1215 {Opt_quota, "quota"},
1216 {Opt_usrquota, "usrquota"}, 1216 {Opt_usrquota, "usrquota"},
1217 {Opt_barrier, "barrier=%u"}, 1217 {Opt_barrier, "barrier=%u"},
1218 {Opt_barrier, "barrier"}, 1218 {Opt_barrier, "barrier"},
1219 {Opt_nobarrier, "nobarrier"}, 1219 {Opt_nobarrier, "nobarrier"},
1220 {Opt_i_version, "i_version"}, 1220 {Opt_i_version, "i_version"},
1221 {Opt_stripe, "stripe=%u"}, 1221 {Opt_stripe, "stripe=%u"},
1222 {Opt_delalloc, "delalloc"}, 1222 {Opt_delalloc, "delalloc"},
1223 {Opt_nodelalloc, "nodelalloc"}, 1223 {Opt_nodelalloc, "nodelalloc"},
1224 {Opt_removed, "mblk_io_submit"}, 1224 {Opt_removed, "mblk_io_submit"},
1225 {Opt_removed, "nomblk_io_submit"}, 1225 {Opt_removed, "nomblk_io_submit"},
1226 {Opt_block_validity, "block_validity"}, 1226 {Opt_block_validity, "block_validity"},
1227 {Opt_noblock_validity, "noblock_validity"}, 1227 {Opt_noblock_validity, "noblock_validity"},
1228 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1228 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1229 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1229 {Opt_journal_ioprio, "journal_ioprio=%u"},
1230 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1230 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1231 {Opt_auto_da_alloc, "auto_da_alloc"}, 1231 {Opt_auto_da_alloc, "auto_da_alloc"},
1232 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1232 {Opt_noauto_da_alloc, "noauto_da_alloc"},
1233 {Opt_dioread_nolock, "dioread_nolock"}, 1233 {Opt_dioread_nolock, "dioread_nolock"},
1234 {Opt_dioread_lock, "dioread_lock"}, 1234 {Opt_dioread_lock, "dioread_lock"},
1235 {Opt_discard, "discard"}, 1235 {Opt_discard, "discard"},
1236 {Opt_nodiscard, "nodiscard"}, 1236 {Opt_nodiscard, "nodiscard"},
1237 {Opt_init_itable, "init_itable=%u"}, 1237 {Opt_init_itable, "init_itable=%u"},
1238 {Opt_init_itable, "init_itable"}, 1238 {Opt_init_itable, "init_itable"},
1239 {Opt_noinit_itable, "noinit_itable"}, 1239 {Opt_noinit_itable, "noinit_itable"},
1240 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, 1240 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1241 {Opt_removed, "check=none"}, /* mount option from ext2/3 */ 1241 {Opt_removed, "check=none"}, /* mount option from ext2/3 */
1242 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ 1242 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
1243 {Opt_removed, "reservation"}, /* mount option from ext2/3 */ 1243 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
1244 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */ 1244 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1245 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */ 1245 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
1246 {Opt_err, NULL}, 1246 {Opt_err, NULL},
1247 }; 1247 };
1248 1248
1249 static ext4_fsblk_t get_sb_block(void **data) 1249 static ext4_fsblk_t get_sb_block(void **data)
1250 { 1250 {
1251 ext4_fsblk_t sb_block; 1251 ext4_fsblk_t sb_block;
1252 char *options = (char *) *data; 1252 char *options = (char *) *data;
1253 1253
1254 if (!options || strncmp(options, "sb=", 3) != 0) 1254 if (!options || strncmp(options, "sb=", 3) != 0)
1255 return 1; /* Default location */ 1255 return 1; /* Default location */
1256 1256
1257 options += 3; 1257 options += 3;
1258 /* TODO: use simple_strtoll with >32bit ext4 */ 1258 /* TODO: use simple_strtoll with >32bit ext4 */
1259 sb_block = simple_strtoul(options, &options, 0); 1259 sb_block = simple_strtoul(options, &options, 0);
1260 if (*options && *options != ',') { 1260 if (*options && *options != ',') {
1261 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1261 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1262 (char *) *data); 1262 (char *) *data);
1263 return 1; 1263 return 1;
1264 } 1264 }
1265 if (*options == ',') 1265 if (*options == ',')
1266 options++; 1266 options++;
1267 *data = (void *) options; 1267 *data = (void *) options;
1268 1268
1269 return sb_block; 1269 return sb_block;
1270 } 1270 }
1271 1271
1272 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1272 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1273 static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" 1273 static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
1274 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; 1274 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1275 1275
1276 #ifdef CONFIG_QUOTA 1276 #ifdef CONFIG_QUOTA
1277 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 1277 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1278 { 1278 {
1279 struct ext4_sb_info *sbi = EXT4_SB(sb); 1279 struct ext4_sb_info *sbi = EXT4_SB(sb);
1280 char *qname; 1280 char *qname;
1281 int ret = -1; 1281 int ret = -1;
1282 1282
1283 if (sb_any_quota_loaded(sb) && 1283 if (sb_any_quota_loaded(sb) &&
1284 !sbi->s_qf_names[qtype]) { 1284 !sbi->s_qf_names[qtype]) {
1285 ext4_msg(sb, KERN_ERR, 1285 ext4_msg(sb, KERN_ERR,
1286 "Cannot change journaled " 1286 "Cannot change journaled "
1287 "quota options when quota turned on"); 1287 "quota options when quota turned on");
1288 return -1; 1288 return -1;
1289 } 1289 }
1290 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { 1290 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
1291 ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " 1291 ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options "
1292 "when QUOTA feature is enabled"); 1292 "when QUOTA feature is enabled");
1293 return -1; 1293 return -1;
1294 } 1294 }
1295 qname = match_strdup(args); 1295 qname = match_strdup(args);
1296 if (!qname) { 1296 if (!qname) {
1297 ext4_msg(sb, KERN_ERR, 1297 ext4_msg(sb, KERN_ERR,
1298 "Not enough memory for storing quotafile name"); 1298 "Not enough memory for storing quotafile name");
1299 return -1; 1299 return -1;
1300 } 1300 }
1301 if (sbi->s_qf_names[qtype]) { 1301 if (sbi->s_qf_names[qtype]) {
1302 if (strcmp(sbi->s_qf_names[qtype], qname) == 0) 1302 if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
1303 ret = 1; 1303 ret = 1;
1304 else 1304 else
1305 ext4_msg(sb, KERN_ERR, 1305 ext4_msg(sb, KERN_ERR,
1306 "%s quota file already specified", 1306 "%s quota file already specified",
1307 QTYPE2NAME(qtype)); 1307 QTYPE2NAME(qtype));
1308 goto errout; 1308 goto errout;
1309 } 1309 }
1310 if (strchr(qname, '/')) { 1310 if (strchr(qname, '/')) {
1311 ext4_msg(sb, KERN_ERR, 1311 ext4_msg(sb, KERN_ERR,
1312 "quotafile must be on filesystem root"); 1312 "quotafile must be on filesystem root");
1313 goto errout; 1313 goto errout;
1314 } 1314 }
1315 sbi->s_qf_names[qtype] = qname; 1315 sbi->s_qf_names[qtype] = qname;
1316 set_opt(sb, QUOTA); 1316 set_opt(sb, QUOTA);
1317 return 1; 1317 return 1;
1318 errout: 1318 errout:
1319 kfree(qname); 1319 kfree(qname);
1320 return ret; 1320 return ret;
1321 } 1321 }
1322 1322
1323 static int clear_qf_name(struct super_block *sb, int qtype) 1323 static int clear_qf_name(struct super_block *sb, int qtype)
1324 { 1324 {
1325 1325
1326 struct ext4_sb_info *sbi = EXT4_SB(sb); 1326 struct ext4_sb_info *sbi = EXT4_SB(sb);
1327 1327
1328 if (sb_any_quota_loaded(sb) && 1328 if (sb_any_quota_loaded(sb) &&
1329 sbi->s_qf_names[qtype]) { 1329 sbi->s_qf_names[qtype]) {
1330 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" 1330 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1331 " when quota turned on"); 1331 " when quota turned on");
1332 return -1; 1332 return -1;
1333 } 1333 }
1334 kfree(sbi->s_qf_names[qtype]); 1334 kfree(sbi->s_qf_names[qtype]);
1335 sbi->s_qf_names[qtype] = NULL; 1335 sbi->s_qf_names[qtype] = NULL;
1336 return 1; 1336 return 1;
1337 } 1337 }
1338 #endif 1338 #endif
1339 1339
1340 #define MOPT_SET 0x0001 1340 #define MOPT_SET 0x0001
1341 #define MOPT_CLEAR 0x0002 1341 #define MOPT_CLEAR 0x0002
1342 #define MOPT_NOSUPPORT 0x0004 1342 #define MOPT_NOSUPPORT 0x0004
1343 #define MOPT_EXPLICIT 0x0008 1343 #define MOPT_EXPLICIT 0x0008
1344 #define MOPT_CLEAR_ERR 0x0010 1344 #define MOPT_CLEAR_ERR 0x0010
1345 #define MOPT_GTE0 0x0020 1345 #define MOPT_GTE0 0x0020
1346 #ifdef CONFIG_QUOTA 1346 #ifdef CONFIG_QUOTA
1347 #define MOPT_Q 0 1347 #define MOPT_Q 0
1348 #define MOPT_QFMT 0x0040 1348 #define MOPT_QFMT 0x0040
1349 #else 1349 #else
1350 #define MOPT_Q MOPT_NOSUPPORT 1350 #define MOPT_Q MOPT_NOSUPPORT
1351 #define MOPT_QFMT MOPT_NOSUPPORT 1351 #define MOPT_QFMT MOPT_NOSUPPORT
1352 #endif 1352 #endif
1353 #define MOPT_DATAJ 0x0080 1353 #define MOPT_DATAJ 0x0080
1354 #define MOPT_NO_EXT2 0x0100 1354 #define MOPT_NO_EXT2 0x0100
1355 #define MOPT_NO_EXT3 0x0200 1355 #define MOPT_NO_EXT3 0x0200
1356 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) 1356 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1357 #define MOPT_STRING 0x0400 1357 #define MOPT_STRING 0x0400
1358 1358
1359 static const struct mount_opts { 1359 static const struct mount_opts {
1360 int token; 1360 int token;
1361 int mount_opt; 1361 int mount_opt;
1362 int flags; 1362 int flags;
1363 } ext4_mount_opts[] = { 1363 } ext4_mount_opts[] = {
1364 {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET}, 1364 {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1365 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, 1365 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1366 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, 1366 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1367 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, 1367 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1368 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, 1368 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1369 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, 1369 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1370 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, 1370 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1371 MOPT_EXT4_ONLY | MOPT_SET}, 1371 MOPT_EXT4_ONLY | MOPT_SET},
1372 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, 1372 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1373 MOPT_EXT4_ONLY | MOPT_CLEAR}, 1373 MOPT_EXT4_ONLY | MOPT_CLEAR},
1374 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, 1374 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1375 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, 1375 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1376 {Opt_delalloc, EXT4_MOUNT_DELALLOC, 1376 {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1377 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, 1377 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1378 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, 1378 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1379 MOPT_EXT4_ONLY | MOPT_CLEAR}, 1379 MOPT_EXT4_ONLY | MOPT_CLEAR},
1380 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, 1380 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1381 MOPT_EXT4_ONLY | MOPT_SET}, 1381 MOPT_EXT4_ONLY | MOPT_SET},
1382 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | 1382 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1383 EXT4_MOUNT_JOURNAL_CHECKSUM), 1383 EXT4_MOUNT_JOURNAL_CHECKSUM),
1384 MOPT_EXT4_ONLY | MOPT_SET}, 1384 MOPT_EXT4_ONLY | MOPT_SET},
1385 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET}, 1385 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1386 {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, 1386 {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1387 {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, 1387 {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1388 {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, 1388 {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1389 {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, 1389 {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
1390 MOPT_NO_EXT2 | MOPT_SET}, 1390 MOPT_NO_EXT2 | MOPT_SET},
1391 {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, 1391 {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
1392 MOPT_NO_EXT2 | MOPT_CLEAR}, 1392 MOPT_NO_EXT2 | MOPT_CLEAR},
1393 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, 1393 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1394 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, 1394 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1395 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, 1395 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1396 {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, 1396 {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1397 {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, 1397 {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1398 {Opt_commit, 0, MOPT_GTE0}, 1398 {Opt_commit, 0, MOPT_GTE0},
1399 {Opt_max_batch_time, 0, MOPT_GTE0}, 1399 {Opt_max_batch_time, 0, MOPT_GTE0},
1400 {Opt_min_batch_time, 0, MOPT_GTE0}, 1400 {Opt_min_batch_time, 0, MOPT_GTE0},
1401 {Opt_inode_readahead_blks, 0, MOPT_GTE0}, 1401 {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1402 {Opt_init_itable, 0, MOPT_GTE0}, 1402 {Opt_init_itable, 0, MOPT_GTE0},
1403 {Opt_stripe, 0, MOPT_GTE0}, 1403 {Opt_stripe, 0, MOPT_GTE0},
1404 {Opt_resuid, 0, MOPT_GTE0}, 1404 {Opt_resuid, 0, MOPT_GTE0},
1405 {Opt_resgid, 0, MOPT_GTE0}, 1405 {Opt_resgid, 0, MOPT_GTE0},
1406 {Opt_journal_dev, 0, MOPT_GTE0}, 1406 {Opt_journal_dev, 0, MOPT_GTE0},
1407 {Opt_journal_path, 0, MOPT_STRING}, 1407 {Opt_journal_path, 0, MOPT_STRING},
1408 {Opt_journal_ioprio, 0, MOPT_GTE0}, 1408 {Opt_journal_ioprio, 0, MOPT_GTE0},
1409 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, 1409 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1410 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, 1410 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1411 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, 1411 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1412 MOPT_NO_EXT2 | MOPT_DATAJ}, 1412 MOPT_NO_EXT2 | MOPT_DATAJ},
1413 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, 1413 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1414 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, 1414 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1415 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1415 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1416 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, 1416 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1417 {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, 1417 {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
1418 #else 1418 #else
1419 {Opt_acl, 0, MOPT_NOSUPPORT}, 1419 {Opt_acl, 0, MOPT_NOSUPPORT},
1420 {Opt_noacl, 0, MOPT_NOSUPPORT}, 1420 {Opt_noacl, 0, MOPT_NOSUPPORT},
1421 #endif 1421 #endif
1422 {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, 1422 {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1423 {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, 1423 {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1424 {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, 1424 {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1425 {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, 1425 {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1426 MOPT_SET | MOPT_Q}, 1426 MOPT_SET | MOPT_Q},
1427 {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA, 1427 {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1428 MOPT_SET | MOPT_Q}, 1428 MOPT_SET | MOPT_Q},
1429 {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | 1429 {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1430 EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q}, 1430 EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
1431 {Opt_usrjquota, 0, MOPT_Q}, 1431 {Opt_usrjquota, 0, MOPT_Q},
1432 {Opt_grpjquota, 0, MOPT_Q}, 1432 {Opt_grpjquota, 0, MOPT_Q},
1433 {Opt_offusrjquota, 0, MOPT_Q}, 1433 {Opt_offusrjquota, 0, MOPT_Q},
1434 {Opt_offgrpjquota, 0, MOPT_Q}, 1434 {Opt_offgrpjquota, 0, MOPT_Q},
1435 {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, 1435 {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1436 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, 1436 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1437 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, 1437 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1438 {Opt_max_dir_size_kb, 0, MOPT_GTE0}, 1438 {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1439 {Opt_err, 0, 0} 1439 {Opt_err, 0, 0}
1440 }; 1440 };
1441 1441
1442 static int handle_mount_opt(struct super_block *sb, char *opt, int token, 1442 static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1443 substring_t *args, unsigned long *journal_devnum, 1443 substring_t *args, unsigned long *journal_devnum,
1444 unsigned int *journal_ioprio, int is_remount) 1444 unsigned int *journal_ioprio, int is_remount)
1445 { 1445 {
1446 struct ext4_sb_info *sbi = EXT4_SB(sb); 1446 struct ext4_sb_info *sbi = EXT4_SB(sb);
1447 const struct mount_opts *m; 1447 const struct mount_opts *m;
1448 kuid_t uid; 1448 kuid_t uid;
1449 kgid_t gid; 1449 kgid_t gid;
1450 int arg = 0; 1450 int arg = 0;
1451 1451
1452 #ifdef CONFIG_QUOTA 1452 #ifdef CONFIG_QUOTA
1453 if (token == Opt_usrjquota) 1453 if (token == Opt_usrjquota)
1454 return set_qf_name(sb, USRQUOTA, &args[0]); 1454 return set_qf_name(sb, USRQUOTA, &args[0]);
1455 else if (token == Opt_grpjquota) 1455 else if (token == Opt_grpjquota)
1456 return set_qf_name(sb, GRPQUOTA, &args[0]); 1456 return set_qf_name(sb, GRPQUOTA, &args[0]);
1457 else if (token == Opt_offusrjquota) 1457 else if (token == Opt_offusrjquota)
1458 return clear_qf_name(sb, USRQUOTA); 1458 return clear_qf_name(sb, USRQUOTA);
1459 else if (token == Opt_offgrpjquota) 1459 else if (token == Opt_offgrpjquota)
1460 return clear_qf_name(sb, GRPQUOTA); 1460 return clear_qf_name(sb, GRPQUOTA);
1461 #endif 1461 #endif
1462 switch (token) { 1462 switch (token) {
1463 case Opt_noacl: 1463 case Opt_noacl:
1464 case Opt_nouser_xattr: 1464 case Opt_nouser_xattr:
1465 ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); 1465 ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
1466 break; 1466 break;
1467 case Opt_sb: 1467 case Opt_sb:
1468 return 1; /* handled by get_sb_block() */ 1468 return 1; /* handled by get_sb_block() */
1469 case Opt_removed: 1469 case Opt_removed:
1470 ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt); 1470 ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
1471 return 1; 1471 return 1;
1472 case Opt_abort: 1472 case Opt_abort:
1473 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1473 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1474 return 1; 1474 return 1;
1475 case Opt_i_version: 1475 case Opt_i_version:
1476 sb->s_flags |= MS_I_VERSION; 1476 sb->s_flags |= MS_I_VERSION;
1477 return 1; 1477 return 1;
1478 } 1478 }
1479 1479
1480 for (m = ext4_mount_opts; m->token != Opt_err; m++) 1480 for (m = ext4_mount_opts; m->token != Opt_err; m++)
1481 if (token == m->token) 1481 if (token == m->token)
1482 break; 1482 break;
1483 1483
1484 if (m->token == Opt_err) { 1484 if (m->token == Opt_err) {
1485 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " 1485 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1486 "or missing value", opt); 1486 "or missing value", opt);
1487 return -1; 1487 return -1;
1488 } 1488 }
1489 1489
1490 if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { 1490 if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
1491 ext4_msg(sb, KERN_ERR, 1491 ext4_msg(sb, KERN_ERR,
1492 "Mount option \"%s\" incompatible with ext2", opt); 1492 "Mount option \"%s\" incompatible with ext2", opt);
1493 return -1; 1493 return -1;
1494 } 1494 }
1495 if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) { 1495 if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
1496 ext4_msg(sb, KERN_ERR, 1496 ext4_msg(sb, KERN_ERR,
1497 "Mount option \"%s\" incompatible with ext3", opt); 1497 "Mount option \"%s\" incompatible with ext3", opt);
1498 return -1; 1498 return -1;
1499 } 1499 }
1500 1500
1501 if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg)) 1501 if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
1502 return -1; 1502 return -1;
1503 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) 1503 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1504 return -1; 1504 return -1;
1505 if (m->flags & MOPT_EXPLICIT) 1505 if (m->flags & MOPT_EXPLICIT)
1506 set_opt2(sb, EXPLICIT_DELALLOC); 1506 set_opt2(sb, EXPLICIT_DELALLOC);
1507 if (m->flags & MOPT_CLEAR_ERR) 1507 if (m->flags & MOPT_CLEAR_ERR)
1508 clear_opt(sb, ERRORS_MASK); 1508 clear_opt(sb, ERRORS_MASK);
1509 if (token == Opt_noquota && sb_any_quota_loaded(sb)) { 1509 if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1510 ext4_msg(sb, KERN_ERR, "Cannot change quota " 1510 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1511 "options when quota turned on"); 1511 "options when quota turned on");
1512 return -1; 1512 return -1;
1513 } 1513 }
1514 1514
1515 if (m->flags & MOPT_NOSUPPORT) { 1515 if (m->flags & MOPT_NOSUPPORT) {
1516 ext4_msg(sb, KERN_ERR, "%s option not supported", opt); 1516 ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1517 } else if (token == Opt_commit) { 1517 } else if (token == Opt_commit) {
1518 if (arg == 0) 1518 if (arg == 0)
1519 arg = JBD2_DEFAULT_MAX_COMMIT_AGE; 1519 arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1520 sbi->s_commit_interval = HZ * arg; 1520 sbi->s_commit_interval = HZ * arg;
1521 } else if (token == Opt_max_batch_time) { 1521 } else if (token == Opt_max_batch_time) {
1522 if (arg == 0) 1522 if (arg == 0)
1523 arg = EXT4_DEF_MAX_BATCH_TIME; 1523 arg = EXT4_DEF_MAX_BATCH_TIME;
1524 sbi->s_max_batch_time = arg; 1524 sbi->s_max_batch_time = arg;
1525 } else if (token == Opt_min_batch_time) { 1525 } else if (token == Opt_min_batch_time) {
1526 sbi->s_min_batch_time = arg; 1526 sbi->s_min_batch_time = arg;
1527 } else if (token == Opt_inode_readahead_blks) { 1527 } else if (token == Opt_inode_readahead_blks) {
1528 if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) { 1528 if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
1529 ext4_msg(sb, KERN_ERR, 1529 ext4_msg(sb, KERN_ERR,
1530 "EXT4-fs: inode_readahead_blks must be " 1530 "EXT4-fs: inode_readahead_blks must be "
1531 "0 or a power of 2 smaller than 2^31"); 1531 "0 or a power of 2 smaller than 2^31");
1532 return -1; 1532 return -1;
1533 } 1533 }
1534 sbi->s_inode_readahead_blks = arg; 1534 sbi->s_inode_readahead_blks = arg;
1535 } else if (token == Opt_init_itable) { 1535 } else if (token == Opt_init_itable) {
1536 set_opt(sb, INIT_INODE_TABLE); 1536 set_opt(sb, INIT_INODE_TABLE);
1537 if (!args->from) 1537 if (!args->from)
1538 arg = EXT4_DEF_LI_WAIT_MULT; 1538 arg = EXT4_DEF_LI_WAIT_MULT;
1539 sbi->s_li_wait_mult = arg; 1539 sbi->s_li_wait_mult = arg;
1540 } else if (token == Opt_max_dir_size_kb) { 1540 } else if (token == Opt_max_dir_size_kb) {
1541 sbi->s_max_dir_size_kb = arg; 1541 sbi->s_max_dir_size_kb = arg;
1542 } else if (token == Opt_stripe) { 1542 } else if (token == Opt_stripe) {
1543 sbi->s_stripe = arg; 1543 sbi->s_stripe = arg;
1544 } else if (token == Opt_resuid) { 1544 } else if (token == Opt_resuid) {
1545 uid = make_kuid(current_user_ns(), arg); 1545 uid = make_kuid(current_user_ns(), arg);
1546 if (!uid_valid(uid)) { 1546 if (!uid_valid(uid)) {
1547 ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg); 1547 ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
1548 return -1; 1548 return -1;
1549 } 1549 }
1550 sbi->s_resuid = uid; 1550 sbi->s_resuid = uid;
1551 } else if (token == Opt_resgid) { 1551 } else if (token == Opt_resgid) {
1552 gid = make_kgid(current_user_ns(), arg); 1552 gid = make_kgid(current_user_ns(), arg);
1553 if (!gid_valid(gid)) { 1553 if (!gid_valid(gid)) {
1554 ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg); 1554 ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
1555 return -1; 1555 return -1;
1556 } 1556 }
1557 sbi->s_resgid = gid; 1557 sbi->s_resgid = gid;
1558 } else if (token == Opt_journal_dev) { 1558 } else if (token == Opt_journal_dev) {
1559 if (is_remount) { 1559 if (is_remount) {
1560 ext4_msg(sb, KERN_ERR, 1560 ext4_msg(sb, KERN_ERR,
1561 "Cannot specify journal on remount"); 1561 "Cannot specify journal on remount");
1562 return -1; 1562 return -1;
1563 } 1563 }
1564 *journal_devnum = arg; 1564 *journal_devnum = arg;
1565 } else if (token == Opt_journal_path) { 1565 } else if (token == Opt_journal_path) {
1566 char *journal_path; 1566 char *journal_path;
1567 struct inode *journal_inode; 1567 struct inode *journal_inode;
1568 struct path path; 1568 struct path path;
1569 int error; 1569 int error;
1570 1570
1571 if (is_remount) { 1571 if (is_remount) {
1572 ext4_msg(sb, KERN_ERR, 1572 ext4_msg(sb, KERN_ERR,
1573 "Cannot specify journal on remount"); 1573 "Cannot specify journal on remount");
1574 return -1; 1574 return -1;
1575 } 1575 }
1576 journal_path = match_strdup(&args[0]); 1576 journal_path = match_strdup(&args[0]);
1577 if (!journal_path) { 1577 if (!journal_path) {
1578 ext4_msg(sb, KERN_ERR, "error: could not dup " 1578 ext4_msg(sb, KERN_ERR, "error: could not dup "
1579 "journal device string"); 1579 "journal device string");
1580 return -1; 1580 return -1;
1581 } 1581 }
1582 1582
1583 error = kern_path(journal_path, LOOKUP_FOLLOW, &path); 1583 error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
1584 if (error) { 1584 if (error) {
1585 ext4_msg(sb, KERN_ERR, "error: could not find " 1585 ext4_msg(sb, KERN_ERR, "error: could not find "
1586 "journal device path: error %d", error); 1586 "journal device path: error %d", error);
1587 kfree(journal_path); 1587 kfree(journal_path);
1588 return -1; 1588 return -1;
1589 } 1589 }
1590 1590
1591 journal_inode = path.dentry->d_inode; 1591 journal_inode = path.dentry->d_inode;
1592 if (!S_ISBLK(journal_inode->i_mode)) { 1592 if (!S_ISBLK(journal_inode->i_mode)) {
1593 ext4_msg(sb, KERN_ERR, "error: journal path %s " 1593 ext4_msg(sb, KERN_ERR, "error: journal path %s "
1594 "is not a block device", journal_path); 1594 "is not a block device", journal_path);
1595 path_put(&path); 1595 path_put(&path);
1596 kfree(journal_path); 1596 kfree(journal_path);
1597 return -1; 1597 return -1;
1598 } 1598 }
1599 1599
1600 *journal_devnum = new_encode_dev(journal_inode->i_rdev); 1600 *journal_devnum = new_encode_dev(journal_inode->i_rdev);
1601 path_put(&path); 1601 path_put(&path);
1602 kfree(journal_path); 1602 kfree(journal_path);
1603 } else if (token == Opt_journal_ioprio) { 1603 } else if (token == Opt_journal_ioprio) {
1604 if (arg > 7) { 1604 if (arg > 7) {
1605 ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" 1605 ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
1606 " (must be 0-7)"); 1606 " (must be 0-7)");
1607 return -1; 1607 return -1;
1608 } 1608 }
1609 *journal_ioprio = 1609 *journal_ioprio =
1610 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); 1610 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1611 } else if (m->flags & MOPT_DATAJ) { 1611 } else if (m->flags & MOPT_DATAJ) {
1612 if (is_remount) { 1612 if (is_remount) {
1613 if (!sbi->s_journal) 1613 if (!sbi->s_journal)
1614 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); 1614 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1615 else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) { 1615 else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
1616 ext4_msg(sb, KERN_ERR, 1616 ext4_msg(sb, KERN_ERR,
1617 "Cannot change data mode on remount"); 1617 "Cannot change data mode on remount");
1618 return -1; 1618 return -1;
1619 } 1619 }
1620 } else { 1620 } else {
1621 clear_opt(sb, DATA_FLAGS); 1621 clear_opt(sb, DATA_FLAGS);
1622 sbi->s_mount_opt |= m->mount_opt; 1622 sbi->s_mount_opt |= m->mount_opt;
1623 } 1623 }
1624 #ifdef CONFIG_QUOTA 1624 #ifdef CONFIG_QUOTA
1625 } else if (m->flags & MOPT_QFMT) { 1625 } else if (m->flags & MOPT_QFMT) {
1626 if (sb_any_quota_loaded(sb) && 1626 if (sb_any_quota_loaded(sb) &&
1627 sbi->s_jquota_fmt != m->mount_opt) { 1627 sbi->s_jquota_fmt != m->mount_opt) {
1628 ext4_msg(sb, KERN_ERR, "Cannot change journaled " 1628 ext4_msg(sb, KERN_ERR, "Cannot change journaled "
1629 "quota options when quota turned on"); 1629 "quota options when quota turned on");
1630 return -1; 1630 return -1;
1631 } 1631 }
1632 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 1632 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
1633 EXT4_FEATURE_RO_COMPAT_QUOTA)) { 1633 EXT4_FEATURE_RO_COMPAT_QUOTA)) {
1634 ext4_msg(sb, KERN_ERR, 1634 ext4_msg(sb, KERN_ERR,
1635 "Cannot set journaled quota options " 1635 "Cannot set journaled quota options "
1636 "when QUOTA feature is enabled"); 1636 "when QUOTA feature is enabled");
1637 return -1; 1637 return -1;
1638 } 1638 }
1639 sbi->s_jquota_fmt = m->mount_opt; 1639 sbi->s_jquota_fmt = m->mount_opt;
1640 #endif 1640 #endif
1641 } else { 1641 } else {
1642 if (!args->from) 1642 if (!args->from)
1643 arg = 1; 1643 arg = 1;
1644 if (m->flags & MOPT_CLEAR) 1644 if (m->flags & MOPT_CLEAR)
1645 arg = !arg; 1645 arg = !arg;
1646 else if (unlikely(!(m->flags & MOPT_SET))) { 1646 else if (unlikely(!(m->flags & MOPT_SET))) {
1647 ext4_msg(sb, KERN_WARNING, 1647 ext4_msg(sb, KERN_WARNING,
1648 "buggy handling of option %s", opt); 1648 "buggy handling of option %s", opt);
1649 WARN_ON(1); 1649 WARN_ON(1);
1650 return -1; 1650 return -1;
1651 } 1651 }
1652 if (arg != 0) 1652 if (arg != 0)
1653 sbi->s_mount_opt |= m->mount_opt; 1653 sbi->s_mount_opt |= m->mount_opt;
1654 else 1654 else
1655 sbi->s_mount_opt &= ~m->mount_opt; 1655 sbi->s_mount_opt &= ~m->mount_opt;
1656 } 1656 }
1657 return 1; 1657 return 1;
1658 } 1658 }
1659 1659
1660 static int parse_options(char *options, struct super_block *sb, 1660 static int parse_options(char *options, struct super_block *sb,
1661 unsigned long *journal_devnum, 1661 unsigned long *journal_devnum,
1662 unsigned int *journal_ioprio, 1662 unsigned int *journal_ioprio,
1663 int is_remount) 1663 int is_remount)
1664 { 1664 {
1665 struct ext4_sb_info *sbi = EXT4_SB(sb); 1665 struct ext4_sb_info *sbi = EXT4_SB(sb);
1666 char *p; 1666 char *p;
1667 substring_t args[MAX_OPT_ARGS]; 1667 substring_t args[MAX_OPT_ARGS];
1668 int token; 1668 int token;
1669 1669
1670 if (!options) 1670 if (!options)
1671 return 1; 1671 return 1;
1672 1672
1673 while ((p = strsep(&options, ",")) != NULL) { 1673 while ((p = strsep(&options, ",")) != NULL) {
1674 if (!*p) 1674 if (!*p)
1675 continue; 1675 continue;
1676 /* 1676 /*
1677 * Initialize args struct so we know whether arg was 1677 * Initialize args struct so we know whether arg was
1678 * found; some options take optional arguments. 1678 * found; some options take optional arguments.
1679 */ 1679 */
1680 args[0].to = args[0].from = NULL; 1680 args[0].to = args[0].from = NULL;
1681 token = match_token(p, tokens, args); 1681 token = match_token(p, tokens, args);
1682 if (handle_mount_opt(sb, p, token, args, journal_devnum, 1682 if (handle_mount_opt(sb, p, token, args, journal_devnum,
1683 journal_ioprio, is_remount) < 0) 1683 journal_ioprio, is_remount) < 0)
1684 return 0; 1684 return 0;
1685 } 1685 }
1686 #ifdef CONFIG_QUOTA 1686 #ifdef CONFIG_QUOTA
1687 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 1687 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
1688 (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { 1688 (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
1689 ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " 1689 ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA "
1690 "feature is enabled"); 1690 "feature is enabled");
1691 return 0; 1691 return 0;
1692 } 1692 }
1693 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1693 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1694 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1694 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1695 clear_opt(sb, USRQUOTA); 1695 clear_opt(sb, USRQUOTA);
1696 1696
1697 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1697 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1698 clear_opt(sb, GRPQUOTA); 1698 clear_opt(sb, GRPQUOTA);
1699 1699
1700 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1700 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1701 ext4_msg(sb, KERN_ERR, "old and new quota " 1701 ext4_msg(sb, KERN_ERR, "old and new quota "
1702 "format mixing"); 1702 "format mixing");
1703 return 0; 1703 return 0;
1704 } 1704 }
1705 1705
1706 if (!sbi->s_jquota_fmt) { 1706 if (!sbi->s_jquota_fmt) {
1707 ext4_msg(sb, KERN_ERR, "journaled quota format " 1707 ext4_msg(sb, KERN_ERR, "journaled quota format "
1708 "not specified"); 1708 "not specified");
1709 return 0; 1709 return 0;
1710 } 1710 }
1711 } else { 1711 } else {
1712 if (sbi->s_jquota_fmt) { 1712 if (sbi->s_jquota_fmt) {
1713 ext4_msg(sb, KERN_ERR, "journaled quota format " 1713 ext4_msg(sb, KERN_ERR, "journaled quota format "
1714 "specified with no journaling " 1714 "specified with no journaling "
1715 "enabled"); 1715 "enabled");
1716 return 0; 1716 return 0;
1717 } 1717 }
1718 } 1718 }
1719 #endif 1719 #endif
1720 if (test_opt(sb, DIOREAD_NOLOCK)) { 1720 if (test_opt(sb, DIOREAD_NOLOCK)) {
1721 int blocksize = 1721 int blocksize =
1722 BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); 1722 BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
1723 1723
1724 if (blocksize < PAGE_CACHE_SIZE) { 1724 if (blocksize < PAGE_CACHE_SIZE) {
1725 ext4_msg(sb, KERN_ERR, "can't mount with " 1725 ext4_msg(sb, KERN_ERR, "can't mount with "
1726 "dioread_nolock if block size != PAGE_SIZE"); 1726 "dioread_nolock if block size != PAGE_SIZE");
1727 return 0; 1727 return 0;
1728 } 1728 }
1729 } 1729 }
1730 return 1; 1730 return 1;
1731 } 1731 }
1732 1732
1733 static inline void ext4_show_quota_options(struct seq_file *seq, 1733 static inline void ext4_show_quota_options(struct seq_file *seq,
1734 struct super_block *sb) 1734 struct super_block *sb)
1735 { 1735 {
1736 #if defined(CONFIG_QUOTA) 1736 #if defined(CONFIG_QUOTA)
1737 struct ext4_sb_info *sbi = EXT4_SB(sb); 1737 struct ext4_sb_info *sbi = EXT4_SB(sb);
1738 1738
1739 if (sbi->s_jquota_fmt) { 1739 if (sbi->s_jquota_fmt) {
1740 char *fmtname = ""; 1740 char *fmtname = "";
1741 1741
1742 switch (sbi->s_jquota_fmt) { 1742 switch (sbi->s_jquota_fmt) {
1743 case QFMT_VFS_OLD: 1743 case QFMT_VFS_OLD:
1744 fmtname = "vfsold"; 1744 fmtname = "vfsold";
1745 break; 1745 break;
1746 case QFMT_VFS_V0: 1746 case QFMT_VFS_V0:
1747 fmtname = "vfsv0"; 1747 fmtname = "vfsv0";
1748 break; 1748 break;
1749 case QFMT_VFS_V1: 1749 case QFMT_VFS_V1:
1750 fmtname = "vfsv1"; 1750 fmtname = "vfsv1";
1751 break; 1751 break;
1752 } 1752 }
1753 seq_printf(seq, ",jqfmt=%s", fmtname); 1753 seq_printf(seq, ",jqfmt=%s", fmtname);
1754 } 1754 }
1755 1755
1756 if (sbi->s_qf_names[USRQUOTA]) 1756 if (sbi->s_qf_names[USRQUOTA])
1757 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 1757 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
1758 1758
1759 if (sbi->s_qf_names[GRPQUOTA]) 1759 if (sbi->s_qf_names[GRPQUOTA])
1760 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 1760 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
1761 #endif 1761 #endif
1762 } 1762 }
1763 1763
1764 static const char *token2str(int token) 1764 static const char *token2str(int token)
1765 { 1765 {
1766 const struct match_token *t; 1766 const struct match_token *t;
1767 1767
1768 for (t = tokens; t->token != Opt_err; t++) 1768 for (t = tokens; t->token != Opt_err; t++)
1769 if (t->token == token && !strchr(t->pattern, '=')) 1769 if (t->token == token && !strchr(t->pattern, '='))
1770 break; 1770 break;
1771 return t->pattern; 1771 return t->pattern;
1772 } 1772 }
1773 1773
1774 /* 1774 /*
1775 * Show an option if 1775 * Show an option if
1776 * - it's set to a non-default value OR 1776 * - it's set to a non-default value OR
1777 * - if the per-sb default is different from the global default 1777 * - if the per-sb default is different from the global default
1778 */ 1778 */
1779 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, 1779 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
1780 int nodefs) 1780 int nodefs)
1781 { 1781 {
1782 struct ext4_sb_info *sbi = EXT4_SB(sb); 1782 struct ext4_sb_info *sbi = EXT4_SB(sb);
1783 struct ext4_super_block *es = sbi->s_es; 1783 struct ext4_super_block *es = sbi->s_es;
1784 int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt; 1784 int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
1785 const struct mount_opts *m; 1785 const struct mount_opts *m;
1786 char sep = nodefs ? '\n' : ','; 1786 char sep = nodefs ? '\n' : ',';
1787 1787
1788 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep) 1788 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
1789 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg) 1789 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
1790 1790
1791 if (sbi->s_sb_block != 1) 1791 if (sbi->s_sb_block != 1)
1792 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block); 1792 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
1793 1793
1794 for (m = ext4_mount_opts; m->token != Opt_err; m++) { 1794 for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1795 int want_set = m->flags & MOPT_SET; 1795 int want_set = m->flags & MOPT_SET;
1796 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || 1796 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
1797 (m->flags & MOPT_CLEAR_ERR)) 1797 (m->flags & MOPT_CLEAR_ERR))
1798 continue; 1798 continue;
1799 if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) 1799 if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
1800 continue; /* skip if same as the default */ 1800 continue; /* skip if same as the default */
1801 if ((want_set && 1801 if ((want_set &&
1802 (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || 1802 (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
1803 (!want_set && (sbi->s_mount_opt & m->mount_opt))) 1803 (!want_set && (sbi->s_mount_opt & m->mount_opt)))
1804 continue; /* select Opt_noFoo vs Opt_Foo */ 1804 continue; /* select Opt_noFoo vs Opt_Foo */
1805 SEQ_OPTS_PRINT("%s", token2str(m->token)); 1805 SEQ_OPTS_PRINT("%s", token2str(m->token));
1806 } 1806 }
1807 1807
1808 if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) || 1808 if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
1809 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) 1809 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
1810 SEQ_OPTS_PRINT("resuid=%u", 1810 SEQ_OPTS_PRINT("resuid=%u",
1811 from_kuid_munged(&init_user_ns, sbi->s_resuid)); 1811 from_kuid_munged(&init_user_ns, sbi->s_resuid));
1812 if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) || 1812 if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
1813 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) 1813 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
1814 SEQ_OPTS_PRINT("resgid=%u", 1814 SEQ_OPTS_PRINT("resgid=%u",
1815 from_kgid_munged(&init_user_ns, sbi->s_resgid)); 1815 from_kgid_munged(&init_user_ns, sbi->s_resgid));
1816 def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); 1816 def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
1817 if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO) 1817 if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
1818 SEQ_OPTS_PUTS("errors=remount-ro"); 1818 SEQ_OPTS_PUTS("errors=remount-ro");
1819 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 1819 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
1820 SEQ_OPTS_PUTS("errors=continue"); 1820 SEQ_OPTS_PUTS("errors=continue");
1821 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 1821 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
1822 SEQ_OPTS_PUTS("errors=panic"); 1822 SEQ_OPTS_PUTS("errors=panic");
1823 if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) 1823 if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
1824 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ); 1824 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
1825 if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) 1825 if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
1826 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); 1826 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
1827 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) 1827 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
1828 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); 1828 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
1829 if (sb->s_flags & MS_I_VERSION) 1829 if (sb->s_flags & MS_I_VERSION)
1830 SEQ_OPTS_PUTS("i_version"); 1830 SEQ_OPTS_PUTS("i_version");
1831 if (nodefs || sbi->s_stripe) 1831 if (nodefs || sbi->s_stripe)
1832 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); 1832 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
1833 if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) { 1833 if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
1834 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 1834 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
1835 SEQ_OPTS_PUTS("data=journal"); 1835 SEQ_OPTS_PUTS("data=journal");
1836 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 1836 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
1837 SEQ_OPTS_PUTS("data=ordered"); 1837 SEQ_OPTS_PUTS("data=ordered");
1838 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 1838 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
1839 SEQ_OPTS_PUTS("data=writeback"); 1839 SEQ_OPTS_PUTS("data=writeback");
1840 } 1840 }
1841 if (nodefs || 1841 if (nodefs ||
1842 sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 1842 sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
1843 SEQ_OPTS_PRINT("inode_readahead_blks=%u", 1843 SEQ_OPTS_PRINT("inode_readahead_blks=%u",
1844 sbi->s_inode_readahead_blks); 1844 sbi->s_inode_readahead_blks);
1845 1845
1846 if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && 1846 if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
1847 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) 1847 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
1848 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); 1848 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
1849 if (nodefs || sbi->s_max_dir_size_kb) 1849 if (nodefs || sbi->s_max_dir_size_kb)
1850 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb); 1850 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
1851 1851
1852 ext4_show_quota_options(seq, sb); 1852 ext4_show_quota_options(seq, sb);
1853 return 0; 1853 return 0;
1854 } 1854 }
1855 1855
1856 static int ext4_show_options(struct seq_file *seq, struct dentry *root) 1856 static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1857 { 1857 {
1858 return _ext4_show_options(seq, root->d_sb, 0); 1858 return _ext4_show_options(seq, root->d_sb, 0);
1859 } 1859 }
1860 1860
1861 static int options_seq_show(struct seq_file *seq, void *offset) 1861 static int options_seq_show(struct seq_file *seq, void *offset)
1862 { 1862 {
1863 struct super_block *sb = seq->private; 1863 struct super_block *sb = seq->private;
1864 int rc; 1864 int rc;
1865 1865
1866 seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw"); 1866 seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
1867 rc = _ext4_show_options(seq, sb, 1); 1867 rc = _ext4_show_options(seq, sb, 1);
1868 seq_puts(seq, "\n"); 1868 seq_puts(seq, "\n");
1869 return rc; 1869 return rc;
1870 } 1870 }
1871 1871
1872 static int options_open_fs(struct inode *inode, struct file *file) 1872 static int options_open_fs(struct inode *inode, struct file *file)
1873 { 1873 {
1874 return single_open(file, options_seq_show, PDE_DATA(inode)); 1874 return single_open(file, options_seq_show, PDE_DATA(inode));
1875 } 1875 }
1876 1876
1877 static const struct file_operations ext4_seq_options_fops = { 1877 static const struct file_operations ext4_seq_options_fops = {
1878 .owner = THIS_MODULE, 1878 .owner = THIS_MODULE,
1879 .open = options_open_fs, 1879 .open = options_open_fs,
1880 .read = seq_read, 1880 .read = seq_read,
1881 .llseek = seq_lseek, 1881 .llseek = seq_lseek,
1882 .release = single_release, 1882 .release = single_release,
1883 }; 1883 };
1884 1884
1885 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1885 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1886 int read_only) 1886 int read_only)
1887 { 1887 {
1888 struct ext4_sb_info *sbi = EXT4_SB(sb); 1888 struct ext4_sb_info *sbi = EXT4_SB(sb);
1889 int res = 0; 1889 int res = 0;
1890 1890
1891 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1891 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1892 ext4_msg(sb, KERN_ERR, "revision level too high, " 1892 ext4_msg(sb, KERN_ERR, "revision level too high, "
1893 "forcing read-only mode"); 1893 "forcing read-only mode");
1894 res = MS_RDONLY; 1894 res = MS_RDONLY;
1895 } 1895 }
1896 if (read_only) 1896 if (read_only)
1897 goto done; 1897 goto done;
1898 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1898 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1899 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1899 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1900 "running e2fsck is recommended"); 1900 "running e2fsck is recommended");
1901 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1901 else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1902 ext4_msg(sb, KERN_WARNING, 1902 ext4_msg(sb, KERN_WARNING,
1903 "warning: mounting fs with errors, " 1903 "warning: mounting fs with errors, "
1904 "running e2fsck is recommended"); 1904 "running e2fsck is recommended");
1905 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && 1905 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1906 le16_to_cpu(es->s_mnt_count) >= 1906 le16_to_cpu(es->s_mnt_count) >=
1907 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1907 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1908 ext4_msg(sb, KERN_WARNING, 1908 ext4_msg(sb, KERN_WARNING,
1909 "warning: maximal mount count reached, " 1909 "warning: maximal mount count reached, "
1910 "running e2fsck is recommended"); 1910 "running e2fsck is recommended");
1911 else if (le32_to_cpu(es->s_checkinterval) && 1911 else if (le32_to_cpu(es->s_checkinterval) &&
1912 (le32_to_cpu(es->s_lastcheck) + 1912 (le32_to_cpu(es->s_lastcheck) +
1913 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1913 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1914 ext4_msg(sb, KERN_WARNING, 1914 ext4_msg(sb, KERN_WARNING,
1915 "warning: checktime reached, " 1915 "warning: checktime reached, "
1916 "running e2fsck is recommended"); 1916 "running e2fsck is recommended");
1917 if (!sbi->s_journal) 1917 if (!sbi->s_journal)
1918 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1918 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1919 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1919 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1920 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1920 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1921 le16_add_cpu(&es->s_mnt_count, 1); 1921 le16_add_cpu(&es->s_mnt_count, 1);
1922 es->s_mtime = cpu_to_le32(get_seconds()); 1922 es->s_mtime = cpu_to_le32(get_seconds());
1923 ext4_update_dynamic_rev(sb); 1923 ext4_update_dynamic_rev(sb);
1924 if (sbi->s_journal) 1924 if (sbi->s_journal)
1925 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1925 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1926 1926
1927 ext4_commit_super(sb, 1); 1927 ext4_commit_super(sb, 1);
1928 done: 1928 done:
1929 if (test_opt(sb, DEBUG)) 1929 if (test_opt(sb, DEBUG))
1930 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1930 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1931 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", 1931 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
1932 sb->s_blocksize, 1932 sb->s_blocksize,
1933 sbi->s_groups_count, 1933 sbi->s_groups_count,
1934 EXT4_BLOCKS_PER_GROUP(sb), 1934 EXT4_BLOCKS_PER_GROUP(sb),
1935 EXT4_INODES_PER_GROUP(sb), 1935 EXT4_INODES_PER_GROUP(sb),
1936 sbi->s_mount_opt, sbi->s_mount_opt2); 1936 sbi->s_mount_opt, sbi->s_mount_opt2);
1937 1937
1938 cleancache_init_fs(sb); 1938 cleancache_init_fs(sb);
1939 return res; 1939 return res;
1940 } 1940 }
1941 1941
1942 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) 1942 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
1943 { 1943 {
1944 struct ext4_sb_info *sbi = EXT4_SB(sb); 1944 struct ext4_sb_info *sbi = EXT4_SB(sb);
1945 struct flex_groups *new_groups; 1945 struct flex_groups *new_groups;
1946 int size; 1946 int size;
1947 1947
1948 if (!sbi->s_log_groups_per_flex) 1948 if (!sbi->s_log_groups_per_flex)
1949 return 0; 1949 return 0;
1950 1950
1951 size = ext4_flex_group(sbi, ngroup - 1) + 1; 1951 size = ext4_flex_group(sbi, ngroup - 1) + 1;
1952 if (size <= sbi->s_flex_groups_allocated) 1952 if (size <= sbi->s_flex_groups_allocated)
1953 return 0; 1953 return 0;
1954 1954
1955 size = roundup_pow_of_two(size * sizeof(struct flex_groups)); 1955 size = roundup_pow_of_two(size * sizeof(struct flex_groups));
1956 new_groups = ext4_kvzalloc(size, GFP_KERNEL); 1956 new_groups = ext4_kvzalloc(size, GFP_KERNEL);
1957 if (!new_groups) { 1957 if (!new_groups) {
1958 ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", 1958 ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
1959 size / (int) sizeof(struct flex_groups)); 1959 size / (int) sizeof(struct flex_groups));
1960 return -ENOMEM; 1960 return -ENOMEM;
1961 } 1961 }
1962 1962
1963 if (sbi->s_flex_groups) { 1963 if (sbi->s_flex_groups) {
1964 memcpy(new_groups, sbi->s_flex_groups, 1964 memcpy(new_groups, sbi->s_flex_groups,
1965 (sbi->s_flex_groups_allocated * 1965 (sbi->s_flex_groups_allocated *
1966 sizeof(struct flex_groups))); 1966 sizeof(struct flex_groups)));
1967 ext4_kvfree(sbi->s_flex_groups); 1967 ext4_kvfree(sbi->s_flex_groups);
1968 } 1968 }
1969 sbi->s_flex_groups = new_groups; 1969 sbi->s_flex_groups = new_groups;
1970 sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); 1970 sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
1971 return 0; 1971 return 0;
1972 } 1972 }
1973 1973
1974 static int ext4_fill_flex_info(struct super_block *sb) 1974 static int ext4_fill_flex_info(struct super_block *sb)
1975 { 1975 {
1976 struct ext4_sb_info *sbi = EXT4_SB(sb); 1976 struct ext4_sb_info *sbi = EXT4_SB(sb);
1977 struct ext4_group_desc *gdp = NULL; 1977 struct ext4_group_desc *gdp = NULL;
1978 ext4_group_t flex_group; 1978 ext4_group_t flex_group;
1979 int i, err; 1979 int i, err;
1980 1980
1981 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1981 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1982 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { 1982 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
1983 sbi->s_log_groups_per_flex = 0; 1983 sbi->s_log_groups_per_flex = 0;
1984 return 1; 1984 return 1;
1985 } 1985 }
1986 1986
1987 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); 1987 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
1988 if (err) 1988 if (err)
1989 goto failed; 1989 goto failed;
1990 1990
1991 for (i = 0; i < sbi->s_groups_count; i++) { 1991 for (i = 0; i < sbi->s_groups_count; i++) {
1992 gdp = ext4_get_group_desc(sb, i, NULL); 1992 gdp = ext4_get_group_desc(sb, i, NULL);
1993 1993
1994 flex_group = ext4_flex_group(sbi, i); 1994 flex_group = ext4_flex_group(sbi, i);
1995 atomic_add(ext4_free_inodes_count(sb, gdp), 1995 atomic_add(ext4_free_inodes_count(sb, gdp),
1996 &sbi->s_flex_groups[flex_group].free_inodes); 1996 &sbi->s_flex_groups[flex_group].free_inodes);
1997 atomic64_add(ext4_free_group_clusters(sb, gdp), 1997 atomic64_add(ext4_free_group_clusters(sb, gdp),
1998 &sbi->s_flex_groups[flex_group].free_clusters); 1998 &sbi->s_flex_groups[flex_group].free_clusters);
1999 atomic_add(ext4_used_dirs_count(sb, gdp), 1999 atomic_add(ext4_used_dirs_count(sb, gdp),
2000 &sbi->s_flex_groups[flex_group].used_dirs); 2000 &sbi->s_flex_groups[flex_group].used_dirs);
2001 } 2001 }
2002 2002
2003 return 1; 2003 return 1;
2004 failed: 2004 failed:
2005 return 0; 2005 return 0;
2006 } 2006 }
2007 2007
2008 static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 2008 static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
2009 struct ext4_group_desc *gdp) 2009 struct ext4_group_desc *gdp)
2010 { 2010 {
2011 int offset; 2011 int offset;
2012 __u16 crc = 0; 2012 __u16 crc = 0;
2013 __le32 le_group = cpu_to_le32(block_group); 2013 __le32 le_group = cpu_to_le32(block_group);
2014 2014
2015 if ((sbi->s_es->s_feature_ro_compat & 2015 if ((sbi->s_es->s_feature_ro_compat &
2016 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) { 2016 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
2017 /* Use new metadata_csum algorithm */ 2017 /* Use new metadata_csum algorithm */
2018 __le16 save_csum; 2018 __le16 save_csum;
2019 __u32 csum32; 2019 __u32 csum32;
2020 2020
2021 save_csum = gdp->bg_checksum; 2021 save_csum = gdp->bg_checksum;
2022 gdp->bg_checksum = 0; 2022 gdp->bg_checksum = 0;
2023 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group, 2023 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
2024 sizeof(le_group)); 2024 sizeof(le_group));
2025 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, 2025 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
2026 sbi->s_desc_size); 2026 sbi->s_desc_size);
2027 gdp->bg_checksum = save_csum; 2027 gdp->bg_checksum = save_csum;
2028 2028
2029 crc = csum32 & 0xFFFF; 2029 crc = csum32 & 0xFFFF;
2030 goto out; 2030 goto out;
2031 } 2031 }
2032 2032
2033 /* old crc16 code */ 2033 /* old crc16 code */
2034 offset = offsetof(struct ext4_group_desc, bg_checksum); 2034 offset = offsetof(struct ext4_group_desc, bg_checksum);
2035 2035
2036 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 2036 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
2037 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 2037 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
2038 crc = crc16(crc, (__u8 *)gdp, offset); 2038 crc = crc16(crc, (__u8 *)gdp, offset);
2039 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 2039 offset += sizeof(gdp->bg_checksum); /* skip checksum */
2040 /* for checksum of struct ext4_group_desc do the rest...*/ 2040 /* for checksum of struct ext4_group_desc do the rest...*/
2041 if ((sbi->s_es->s_feature_incompat & 2041 if ((sbi->s_es->s_feature_incompat &
2042 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 2042 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
2043 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 2043 offset < le16_to_cpu(sbi->s_es->s_desc_size))
2044 crc = crc16(crc, (__u8 *)gdp + offset, 2044 crc = crc16(crc, (__u8 *)gdp + offset,
2045 le16_to_cpu(sbi->s_es->s_desc_size) - 2045 le16_to_cpu(sbi->s_es->s_desc_size) -
2046 offset); 2046 offset);
2047 2047
2048 out: 2048 out:
2049 return cpu_to_le16(crc); 2049 return cpu_to_le16(crc);
2050 } 2050 }
2051 2051
2052 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group, 2052 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
2053 struct ext4_group_desc *gdp) 2053 struct ext4_group_desc *gdp)
2054 { 2054 {
2055 if (ext4_has_group_desc_csum(sb) && 2055 if (ext4_has_group_desc_csum(sb) &&
2056 (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), 2056 (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb),
2057 block_group, gdp))) 2057 block_group, gdp)))
2058 return 0; 2058 return 0;
2059 2059
2060 return 1; 2060 return 1;
2061 } 2061 }
2062 2062
2063 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, 2063 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
2064 struct ext4_group_desc *gdp) 2064 struct ext4_group_desc *gdp)
2065 { 2065 {
2066 if (!ext4_has_group_desc_csum(sb)) 2066 if (!ext4_has_group_desc_csum(sb))
2067 return; 2067 return;
2068 gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); 2068 gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp);
2069 } 2069 }
2070 2070
2071 /* Called at mount-time, super-block is locked */ 2071 /* Called at mount-time, super-block is locked */
2072 static int ext4_check_descriptors(struct super_block *sb, 2072 static int ext4_check_descriptors(struct super_block *sb,
2073 ext4_group_t *first_not_zeroed) 2073 ext4_group_t *first_not_zeroed)
2074 { 2074 {
2075 struct ext4_sb_info *sbi = EXT4_SB(sb); 2075 struct ext4_sb_info *sbi = EXT4_SB(sb);
2076 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 2076 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
2077 ext4_fsblk_t last_block; 2077 ext4_fsblk_t last_block;
2078 ext4_fsblk_t block_bitmap; 2078 ext4_fsblk_t block_bitmap;
2079 ext4_fsblk_t inode_bitmap; 2079 ext4_fsblk_t inode_bitmap;
2080 ext4_fsblk_t inode_table; 2080 ext4_fsblk_t inode_table;
2081 int flexbg_flag = 0; 2081 int flexbg_flag = 0;
2082 ext4_group_t i, grp = sbi->s_groups_count; 2082 ext4_group_t i, grp = sbi->s_groups_count;
2083 2083
2084 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2084 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2085 flexbg_flag = 1; 2085 flexbg_flag = 1;
2086 2086
2087 ext4_debug("Checking group descriptors"); 2087 ext4_debug("Checking group descriptors");
2088 2088
2089 for (i = 0; i < sbi->s_groups_count; i++) { 2089 for (i = 0; i < sbi->s_groups_count; i++) {
2090 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 2090 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
2091 2091
2092 if (i == sbi->s_groups_count - 1 || flexbg_flag) 2092 if (i == sbi->s_groups_count - 1 || flexbg_flag)
2093 last_block = ext4_blocks_count(sbi->s_es) - 1; 2093 last_block = ext4_blocks_count(sbi->s_es) - 1;
2094 else 2094 else
2095 last_block = first_block + 2095 last_block = first_block +
2096 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 2096 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
2097 2097
2098 if ((grp == sbi->s_groups_count) && 2098 if ((grp == sbi->s_groups_count) &&
2099 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 2099 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2100 grp = i; 2100 grp = i;
2101 2101
2102 block_bitmap = ext4_block_bitmap(sb, gdp); 2102 block_bitmap = ext4_block_bitmap(sb, gdp);
2103 if (block_bitmap < first_block || block_bitmap > last_block) { 2103 if (block_bitmap < first_block || block_bitmap > last_block) {
2104 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2104 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2105 "Block bitmap for group %u not in group " 2105 "Block bitmap for group %u not in group "
2106 "(block %llu)!", i, block_bitmap); 2106 "(block %llu)!", i, block_bitmap);
2107 return 0; 2107 return 0;
2108 } 2108 }
2109 inode_bitmap = ext4_inode_bitmap(sb, gdp); 2109 inode_bitmap = ext4_inode_bitmap(sb, gdp);
2110 if (inode_bitmap < first_block || inode_bitmap > last_block) { 2110 if (inode_bitmap < first_block || inode_bitmap > last_block) {
2111 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2111 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2112 "Inode bitmap for group %u not in group " 2112 "Inode bitmap for group %u not in group "
2113 "(block %llu)!", i, inode_bitmap); 2113 "(block %llu)!", i, inode_bitmap);
2114 return 0; 2114 return 0;
2115 } 2115 }
2116 inode_table = ext4_inode_table(sb, gdp); 2116 inode_table = ext4_inode_table(sb, gdp);
2117 if (inode_table < first_block || 2117 if (inode_table < first_block ||
2118 inode_table + sbi->s_itb_per_group - 1 > last_block) { 2118 inode_table + sbi->s_itb_per_group - 1 > last_block) {
2119 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2119 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2120 "Inode table for group %u not in group " 2120 "Inode table for group %u not in group "
2121 "(block %llu)!", i, inode_table); 2121 "(block %llu)!", i, inode_table);
2122 return 0; 2122 return 0;
2123 } 2123 }
2124 ext4_lock_group(sb, i); 2124 ext4_lock_group(sb, i);
2125 if (!ext4_group_desc_csum_verify(sb, i, gdp)) { 2125 if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
2126 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2126 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2127 "Checksum for group %u failed (%u!=%u)", 2127 "Checksum for group %u failed (%u!=%u)",
2128 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 2128 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
2129 gdp)), le16_to_cpu(gdp->bg_checksum)); 2129 gdp)), le16_to_cpu(gdp->bg_checksum));
2130 if (!(sb->s_flags & MS_RDONLY)) { 2130 if (!(sb->s_flags & MS_RDONLY)) {
2131 ext4_unlock_group(sb, i); 2131 ext4_unlock_group(sb, i);
2132 return 0; 2132 return 0;
2133 } 2133 }
2134 } 2134 }
2135 ext4_unlock_group(sb, i); 2135 ext4_unlock_group(sb, i);
2136 if (!flexbg_flag) 2136 if (!flexbg_flag)
2137 first_block += EXT4_BLOCKS_PER_GROUP(sb); 2137 first_block += EXT4_BLOCKS_PER_GROUP(sb);
2138 } 2138 }
2139 if (NULL != first_not_zeroed) 2139 if (NULL != first_not_zeroed)
2140 *first_not_zeroed = grp; 2140 *first_not_zeroed = grp;
2141 2141
2142 ext4_free_blocks_count_set(sbi->s_es, 2142 ext4_free_blocks_count_set(sbi->s_es,
2143 EXT4_C2B(sbi, ext4_count_free_clusters(sb))); 2143 EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
2144 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 2144 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
2145 return 1; 2145 return 1;
2146 } 2146 }
2147 2147
2148 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 2148 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
2149 * the superblock) which were deleted from all directories, but held open by 2149 * the superblock) which were deleted from all directories, but held open by
2150 * a process at the time of a crash. We walk the list and try to delete these 2150 * a process at the time of a crash. We walk the list and try to delete these
2151 * inodes at recovery time (only with a read-write filesystem). 2151 * inodes at recovery time (only with a read-write filesystem).
2152 * 2152 *
2153 * In order to keep the orphan inode chain consistent during traversal (in 2153 * In order to keep the orphan inode chain consistent during traversal (in
2154 * case of crash during recovery), we link each inode into the superblock 2154 * case of crash during recovery), we link each inode into the superblock
2155 * orphan list_head and handle it the same way as an inode deletion during 2155 * orphan list_head and handle it the same way as an inode deletion during
2156 * normal operation (which journals the operations for us). 2156 * normal operation (which journals the operations for us).
2157 * 2157 *
2158 * We only do an iget() and an iput() on each inode, which is very safe if we 2158 * We only do an iget() and an iput() on each inode, which is very safe if we
2159 * accidentally point at an in-use or already deleted inode. The worst that 2159 * accidentally point at an in-use or already deleted inode. The worst that
2160 * can happen in this case is that we get a "bit already cleared" message from 2160 * can happen in this case is that we get a "bit already cleared" message from
2161 * ext4_free_inode(). The only reason we would point at a wrong inode is if 2161 * ext4_free_inode(). The only reason we would point at a wrong inode is if
2162 * e2fsck was run on this filesystem, and it must have already done the orphan 2162 * e2fsck was run on this filesystem, and it must have already done the orphan
2163 * inode cleanup for us, so we can safely abort without any further action. 2163 * inode cleanup for us, so we can safely abort without any further action.
2164 */ 2164 */
2165 static void ext4_orphan_cleanup(struct super_block *sb, 2165 static void ext4_orphan_cleanup(struct super_block *sb,
2166 struct ext4_super_block *es) 2166 struct ext4_super_block *es)
2167 { 2167 {
2168 unsigned int s_flags = sb->s_flags; 2168 unsigned int s_flags = sb->s_flags;
2169 int nr_orphans = 0, nr_truncates = 0; 2169 int nr_orphans = 0, nr_truncates = 0;
2170 #ifdef CONFIG_QUOTA 2170 #ifdef CONFIG_QUOTA
2171 int i; 2171 int i;
2172 #endif 2172 #endif
2173 if (!es->s_last_orphan) { 2173 if (!es->s_last_orphan) {
2174 jbd_debug(4, "no orphan inodes to clean up\n"); 2174 jbd_debug(4, "no orphan inodes to clean up\n");
2175 return; 2175 return;
2176 } 2176 }
2177 2177
2178 if (bdev_read_only(sb->s_bdev)) { 2178 if (bdev_read_only(sb->s_bdev)) {
2179 ext4_msg(sb, KERN_ERR, "write access " 2179 ext4_msg(sb, KERN_ERR, "write access "
2180 "unavailable, skipping orphan cleanup"); 2180 "unavailable, skipping orphan cleanup");
2181 return; 2181 return;
2182 } 2182 }
2183 2183
2184 /* Check if feature set would not allow a r/w mount */ 2184 /* Check if feature set would not allow a r/w mount */
2185 if (!ext4_feature_set_ok(sb, 0)) { 2185 if (!ext4_feature_set_ok(sb, 0)) {
2186 ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " 2186 ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2187 "unknown ROCOMPAT features"); 2187 "unknown ROCOMPAT features");
2188 return; 2188 return;
2189 } 2189 }
2190 2190
2191 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 2191 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2192 /* don't clear list on RO mount w/ errors */ 2192 /* don't clear list on RO mount w/ errors */
2193 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { 2193 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
2194 jbd_debug(1, "Errors on filesystem, " 2194 jbd_debug(1, "Errors on filesystem, "
2195 "clearing orphan list.\n"); 2195 "clearing orphan list.\n");
2196 es->s_last_orphan = 0; 2196 es->s_last_orphan = 0;
2197 } 2197 }
2198 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 2198 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2199 return; 2199 return;
2200 } 2200 }
2201 2201
2202 if (s_flags & MS_RDONLY) { 2202 if (s_flags & MS_RDONLY) {
2203 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 2203 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
2204 sb->s_flags &= ~MS_RDONLY; 2204 sb->s_flags &= ~MS_RDONLY;
2205 } 2205 }
2206 #ifdef CONFIG_QUOTA 2206 #ifdef CONFIG_QUOTA
2207 /* Needed for iput() to work correctly and not trash data */ 2207 /* Needed for iput() to work correctly and not trash data */
2208 sb->s_flags |= MS_ACTIVE; 2208 sb->s_flags |= MS_ACTIVE;
2209 /* Turn on quotas so that they are updated correctly */ 2209 /* Turn on quotas so that they are updated correctly */
2210 for (i = 0; i < MAXQUOTAS; i++) { 2210 for (i = 0; i < MAXQUOTAS; i++) {
2211 if (EXT4_SB(sb)->s_qf_names[i]) { 2211 if (EXT4_SB(sb)->s_qf_names[i]) {
2212 int ret = ext4_quota_on_mount(sb, i); 2212 int ret = ext4_quota_on_mount(sb, i);
2213 if (ret < 0) 2213 if (ret < 0)
2214 ext4_msg(sb, KERN_ERR, 2214 ext4_msg(sb, KERN_ERR,
2215 "Cannot turn on journaled " 2215 "Cannot turn on journaled "
2216 "quota: error %d", ret); 2216 "quota: error %d", ret);
2217 } 2217 }
2218 } 2218 }
2219 #endif 2219 #endif
2220 2220
2221 while (es->s_last_orphan) { 2221 while (es->s_last_orphan) {
2222 struct inode *inode; 2222 struct inode *inode;
2223 2223
2224 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 2224 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
2225 if (IS_ERR(inode)) { 2225 if (IS_ERR(inode)) {
2226 es->s_last_orphan = 0; 2226 es->s_last_orphan = 0;
2227 break; 2227 break;
2228 } 2228 }
2229 2229
2230 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2230 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2231 dquot_initialize(inode); 2231 dquot_initialize(inode);
2232 if (inode->i_nlink) { 2232 if (inode->i_nlink) {
2233 if (test_opt(sb, DEBUG)) 2233 if (test_opt(sb, DEBUG))
2234 ext4_msg(sb, KERN_DEBUG, 2234 ext4_msg(sb, KERN_DEBUG,
2235 "%s: truncating inode %lu to %lld bytes", 2235 "%s: truncating inode %lu to %lld bytes",
2236 __func__, inode->i_ino, inode->i_size); 2236 __func__, inode->i_ino, inode->i_size);
2237 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 2237 jbd_debug(2, "truncating inode %lu to %lld bytes\n",
2238 inode->i_ino, inode->i_size); 2238 inode->i_ino, inode->i_size);
2239 mutex_lock(&inode->i_mutex); 2239 mutex_lock(&inode->i_mutex);
2240 truncate_inode_pages(inode->i_mapping, inode->i_size); 2240 truncate_inode_pages(inode->i_mapping, inode->i_size);
2241 ext4_truncate(inode); 2241 ext4_truncate(inode);
2242 mutex_unlock(&inode->i_mutex); 2242 mutex_unlock(&inode->i_mutex);
2243 nr_truncates++; 2243 nr_truncates++;
2244 } else { 2244 } else {
2245 if (test_opt(sb, DEBUG)) 2245 if (test_opt(sb, DEBUG))
2246 ext4_msg(sb, KERN_DEBUG, 2246 ext4_msg(sb, KERN_DEBUG,
2247 "%s: deleting unreferenced inode %lu", 2247 "%s: deleting unreferenced inode %lu",
2248 __func__, inode->i_ino); 2248 __func__, inode->i_ino);
2249 jbd_debug(2, "deleting unreferenced inode %lu\n", 2249 jbd_debug(2, "deleting unreferenced inode %lu\n",
2250 inode->i_ino); 2250 inode->i_ino);
2251 nr_orphans++; 2251 nr_orphans++;
2252 } 2252 }
2253 iput(inode); /* The delete magic happens here! */ 2253 iput(inode); /* The delete magic happens here! */
2254 } 2254 }
2255 2255
2256 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 2256 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
2257 2257
2258 if (nr_orphans) 2258 if (nr_orphans)
2259 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 2259 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
2260 PLURAL(nr_orphans)); 2260 PLURAL(nr_orphans));
2261 if (nr_truncates) 2261 if (nr_truncates)
2262 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 2262 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
2263 PLURAL(nr_truncates)); 2263 PLURAL(nr_truncates));
2264 #ifdef CONFIG_QUOTA 2264 #ifdef CONFIG_QUOTA
2265 /* Turn quotas off */ 2265 /* Turn quotas off */
2266 for (i = 0; i < MAXQUOTAS; i++) { 2266 for (i = 0; i < MAXQUOTAS; i++) {
2267 if (sb_dqopt(sb)->files[i]) 2267 if (sb_dqopt(sb)->files[i])
2268 dquot_quota_off(sb, i); 2268 dquot_quota_off(sb, i);
2269 } 2269 }
2270 #endif 2270 #endif
2271 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 2271 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
2272 } 2272 }
2273 2273
2274 /* 2274 /*
2275 * Maximal extent format file size. 2275 * Maximal extent format file size.
2276 * Resulting logical blkno at s_maxbytes must fit in our on-disk 2276 * Resulting logical blkno at s_maxbytes must fit in our on-disk
2277 * extent format containers, within a sector_t, and within i_blocks 2277 * extent format containers, within a sector_t, and within i_blocks
2278 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 2278 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
2279 * so that won't be a limiting factor. 2279 * so that won't be a limiting factor.
2280 * 2280 *
2281 * However there is other limiting factor. We do store extents in the form 2281 * However there is other limiting factor. We do store extents in the form
2282 * of starting block and length, hence the resulting length of the extent 2282 * of starting block and length, hence the resulting length of the extent
2283 * covering maximum file size must fit into on-disk format containers as 2283 * covering maximum file size must fit into on-disk format containers as
2284 * well. Given that length is always by 1 unit bigger than max unit (because 2284 * well. Given that length is always by 1 unit bigger than max unit (because
2285 * we count 0 as well) we have to lower the s_maxbytes by one fs block. 2285 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2286 * 2286 *
2287 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 2287 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2288 */ 2288 */
2289 static loff_t ext4_max_size(int blkbits, int has_huge_files) 2289 static loff_t ext4_max_size(int blkbits, int has_huge_files)
2290 { 2290 {
2291 loff_t res; 2291 loff_t res;
2292 loff_t upper_limit = MAX_LFS_FILESIZE; 2292 loff_t upper_limit = MAX_LFS_FILESIZE;
2293 2293
2294 /* small i_blocks in vfs inode? */ 2294 /* small i_blocks in vfs inode? */
2295 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2295 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2296 /* 2296 /*
2297 * CONFIG_LBDAF is not enabled implies the inode 2297 * CONFIG_LBDAF is not enabled implies the inode
2298 * i_block represent total blocks in 512 bytes 2298 * i_block represent total blocks in 512 bytes
2299 * 32 == size of vfs inode i_blocks * 8 2299 * 32 == size of vfs inode i_blocks * 8
2300 */ 2300 */
2301 upper_limit = (1LL << 32) - 1; 2301 upper_limit = (1LL << 32) - 1;
2302 2302
2303 /* total blocks in file system block size */ 2303 /* total blocks in file system block size */
2304 upper_limit >>= (blkbits - 9); 2304 upper_limit >>= (blkbits - 9);
2305 upper_limit <<= blkbits; 2305 upper_limit <<= blkbits;
2306 } 2306 }
2307 2307
2308 /* 2308 /*
2309 * 32-bit extent-start container, ee_block. We lower the maxbytes 2309 * 32-bit extent-start container, ee_block. We lower the maxbytes
2310 * by one fs block, so ee_len can cover the extent of maximum file 2310 * by one fs block, so ee_len can cover the extent of maximum file
2311 * size 2311 * size
2312 */ 2312 */
2313 res = (1LL << 32) - 1; 2313 res = (1LL << 32) - 1;
2314 res <<= blkbits; 2314 res <<= blkbits;
2315 2315
2316 /* Sanity check against vm- & vfs- imposed limits */ 2316 /* Sanity check against vm- & vfs- imposed limits */
2317 if (res > upper_limit) 2317 if (res > upper_limit)
2318 res = upper_limit; 2318 res = upper_limit;
2319 2319
2320 return res; 2320 return res;
2321 } 2321 }
2322 2322
2323 /* 2323 /*
2324 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 2324 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
2325 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 2325 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
2326 * We need to be 1 filesystem block less than the 2^48 sector limit. 2326 * We need to be 1 filesystem block less than the 2^48 sector limit.
2327 */ 2327 */
2328 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 2328 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2329 { 2329 {
2330 loff_t res = EXT4_NDIR_BLOCKS; 2330 loff_t res = EXT4_NDIR_BLOCKS;
2331 int meta_blocks; 2331 int meta_blocks;
2332 loff_t upper_limit; 2332 loff_t upper_limit;
2333 /* This is calculated to be the largest file size for a dense, block 2333 /* This is calculated to be the largest file size for a dense, block
2334 * mapped file such that the file's total number of 512-byte sectors, 2334 * mapped file such that the file's total number of 512-byte sectors,
2335 * including data and all indirect blocks, does not exceed (2^48 - 1). 2335 * including data and all indirect blocks, does not exceed (2^48 - 1).
2336 * 2336 *
2337 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total 2337 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
2338 * number of 512-byte sectors of the file. 2338 * number of 512-byte sectors of the file.
2339 */ 2339 */
2340 2340
2341 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2341 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2342 /* 2342 /*
2343 * !has_huge_files or CONFIG_LBDAF not enabled implies that 2343 * !has_huge_files or CONFIG_LBDAF not enabled implies that
2344 * the inode i_block field represents total file blocks in 2344 * the inode i_block field represents total file blocks in
2345 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 2345 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
2346 */ 2346 */
2347 upper_limit = (1LL << 32) - 1; 2347 upper_limit = (1LL << 32) - 1;
2348 2348
2349 /* total blocks in file system block size */ 2349 /* total blocks in file system block size */
2350 upper_limit >>= (bits - 9); 2350 upper_limit >>= (bits - 9);
2351 2351
2352 } else { 2352 } else {
2353 /* 2353 /*
2354 * We use 48 bit ext4_inode i_blocks 2354 * We use 48 bit ext4_inode i_blocks
2355 * With EXT4_HUGE_FILE_FL set the i_blocks 2355 * With EXT4_HUGE_FILE_FL set the i_blocks
2356 * represent total number of blocks in 2356 * represent total number of blocks in
2357 * file system block size 2357 * file system block size
2358 */ 2358 */
2359 upper_limit = (1LL << 48) - 1; 2359 upper_limit = (1LL << 48) - 1;
2360 2360
2361 } 2361 }
2362 2362
2363 /* indirect blocks */ 2363 /* indirect blocks */
2364 meta_blocks = 1; 2364 meta_blocks = 1;
2365 /* double indirect blocks */ 2365 /* double indirect blocks */
2366 meta_blocks += 1 + (1LL << (bits-2)); 2366 meta_blocks += 1 + (1LL << (bits-2));
2367 /* tripple indirect blocks */ 2367 /* tripple indirect blocks */
2368 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 2368 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
2369 2369
2370 upper_limit -= meta_blocks; 2370 upper_limit -= meta_blocks;
2371 upper_limit <<= bits; 2371 upper_limit <<= bits;
2372 2372
2373 res += 1LL << (bits-2); 2373 res += 1LL << (bits-2);
2374 res += 1LL << (2*(bits-2)); 2374 res += 1LL << (2*(bits-2));
2375 res += 1LL << (3*(bits-2)); 2375 res += 1LL << (3*(bits-2));
2376 res <<= bits; 2376 res <<= bits;
2377 if (res > upper_limit) 2377 if (res > upper_limit)
2378 res = upper_limit; 2378 res = upper_limit;
2379 2379
2380 if (res > MAX_LFS_FILESIZE) 2380 if (res > MAX_LFS_FILESIZE)
2381 res = MAX_LFS_FILESIZE; 2381 res = MAX_LFS_FILESIZE;
2382 2382
2383 return res; 2383 return res;
2384 } 2384 }
2385 2385
2386 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2386 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2387 ext4_fsblk_t logical_sb_block, int nr) 2387 ext4_fsblk_t logical_sb_block, int nr)
2388 { 2388 {
2389 struct ext4_sb_info *sbi = EXT4_SB(sb); 2389 struct ext4_sb_info *sbi = EXT4_SB(sb);
2390 ext4_group_t bg, first_meta_bg; 2390 ext4_group_t bg, first_meta_bg;
2391 int has_super = 0; 2391 int has_super = 0;
2392 2392
2393 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 2393 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2394 2394
2395 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 2395 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2396 nr < first_meta_bg) 2396 nr < first_meta_bg)
2397 return logical_sb_block + nr + 1; 2397 return logical_sb_block + nr + 1;
2398 bg = sbi->s_desc_per_block * nr; 2398 bg = sbi->s_desc_per_block * nr;
2399 if (ext4_bg_has_super(sb, bg)) 2399 if (ext4_bg_has_super(sb, bg))
2400 has_super = 1; 2400 has_super = 1;
2401 2401
2402 return (has_super + ext4_group_first_block_no(sb, bg)); 2402 return (has_super + ext4_group_first_block_no(sb, bg));
2403 } 2403 }
2404 2404
2405 /** 2405 /**
2406 * ext4_get_stripe_size: Get the stripe size. 2406 * ext4_get_stripe_size: Get the stripe size.
2407 * @sbi: In memory super block info 2407 * @sbi: In memory super block info
2408 * 2408 *
2409 * If we have specified it via mount option, then 2409 * If we have specified it via mount option, then
2410 * use the mount option value. If the value specified at mount time is 2410 * use the mount option value. If the value specified at mount time is
2411 * greater than the blocks per group use the super block value. 2411 * greater than the blocks per group use the super block value.
2412 * If the super block value is greater than blocks per group return 0. 2412 * If the super block value is greater than blocks per group return 0.
2413 * Allocator needs it be less than blocks per group. 2413 * Allocator needs it be less than blocks per group.
2414 * 2414 *
2415 */ 2415 */
2416 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 2416 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2417 { 2417 {
2418 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 2418 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2419 unsigned long stripe_width = 2419 unsigned long stripe_width =
2420 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 2420 le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2421 int ret; 2421 int ret;
2422 2422
2423 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 2423 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2424 ret = sbi->s_stripe; 2424 ret = sbi->s_stripe;
2425 else if (stripe_width <= sbi->s_blocks_per_group) 2425 else if (stripe_width <= sbi->s_blocks_per_group)
2426 ret = stripe_width; 2426 ret = stripe_width;
2427 else if (stride <= sbi->s_blocks_per_group) 2427 else if (stride <= sbi->s_blocks_per_group)
2428 ret = stride; 2428 ret = stride;
2429 else 2429 else
2430 ret = 0; 2430 ret = 0;
2431 2431
2432 /* 2432 /*
2433 * If the stripe width is 1, this makes no sense and 2433 * If the stripe width is 1, this makes no sense and
2434 * we set it to 0 to turn off stripe handling code. 2434 * we set it to 0 to turn off stripe handling code.
2435 */ 2435 */
2436 if (ret <= 1) 2436 if (ret <= 1)
2437 ret = 0; 2437 ret = 0;
2438 2438
2439 return ret; 2439 return ret;
2440 } 2440 }
2441 2441
2442 /* sysfs supprt */ 2442 /* sysfs supprt */
2443 2443
2444 struct ext4_attr { 2444 struct ext4_attr {
2445 struct attribute attr; 2445 struct attribute attr;
2446 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); 2446 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2447 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 2447 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
2448 const char *, size_t); 2448 const char *, size_t);
2449 union { 2449 union {
2450 int offset; 2450 int offset;
2451 int deprecated_val; 2451 int deprecated_val;
2452 } u; 2452 } u;
2453 }; 2453 };
2454 2454
2455 static int parse_strtoull(const char *buf, 2455 static int parse_strtoull(const char *buf,
2456 unsigned long long max, unsigned long long *value) 2456 unsigned long long max, unsigned long long *value)
2457 { 2457 {
2458 int ret; 2458 int ret;
2459 2459
2460 ret = kstrtoull(skip_spaces(buf), 0, value); 2460 ret = kstrtoull(skip_spaces(buf), 0, value);
2461 if (!ret && *value > max) 2461 if (!ret && *value > max)
2462 ret = -EINVAL; 2462 ret = -EINVAL;
2463 return ret; 2463 return ret;
2464 } 2464 }
2465 2465
2466 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, 2466 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2467 struct ext4_sb_info *sbi, 2467 struct ext4_sb_info *sbi,
2468 char *buf) 2468 char *buf)
2469 { 2469 {
2470 return snprintf(buf, PAGE_SIZE, "%llu\n", 2470 return snprintf(buf, PAGE_SIZE, "%llu\n",
2471 (s64) EXT4_C2B(sbi, 2471 (s64) EXT4_C2B(sbi,
2472 percpu_counter_sum(&sbi->s_dirtyclusters_counter))); 2472 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
2473 } 2473 }
2474 2474
2475 static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2475 static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2476 struct ext4_sb_info *sbi, char *buf) 2476 struct ext4_sb_info *sbi, char *buf)
2477 { 2477 {
2478 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2478 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2479 2479
2480 if (!sb->s_bdev->bd_part) 2480 if (!sb->s_bdev->bd_part)
2481 return snprintf(buf, PAGE_SIZE, "0\n"); 2481 return snprintf(buf, PAGE_SIZE, "0\n");
2482 return snprintf(buf, PAGE_SIZE, "%lu\n", 2482 return snprintf(buf, PAGE_SIZE, "%lu\n",
2483 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2483 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2484 sbi->s_sectors_written_start) >> 1); 2484 sbi->s_sectors_written_start) >> 1);
2485 } 2485 }
2486 2486
2487 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, 2487 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2488 struct ext4_sb_info *sbi, char *buf) 2488 struct ext4_sb_info *sbi, char *buf)
2489 { 2489 {
2490 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2490 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2491 2491
2492 if (!sb->s_bdev->bd_part) 2492 if (!sb->s_bdev->bd_part)
2493 return snprintf(buf, PAGE_SIZE, "0\n"); 2493 return snprintf(buf, PAGE_SIZE, "0\n");
2494 return snprintf(buf, PAGE_SIZE, "%llu\n", 2494 return snprintf(buf, PAGE_SIZE, "%llu\n",
2495 (unsigned long long)(sbi->s_kbytes_written + 2495 (unsigned long long)(sbi->s_kbytes_written +
2496 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2496 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2497 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 2497 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2498 } 2498 }
2499 2499
2500 static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2500 static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2501 struct ext4_sb_info *sbi, 2501 struct ext4_sb_info *sbi,
2502 const char *buf, size_t count) 2502 const char *buf, size_t count)
2503 { 2503 {
2504 unsigned long t; 2504 unsigned long t;
2505 int ret; 2505 int ret;
2506 2506
2507 ret = kstrtoul(skip_spaces(buf), 0, &t); 2507 ret = kstrtoul(skip_spaces(buf), 0, &t);
2508 if (ret) 2508 if (ret)
2509 return ret; 2509 return ret;
2510 2510
2511 if (t && (!is_power_of_2(t) || t > 0x40000000)) 2511 if (t && (!is_power_of_2(t) || t > 0x40000000))
2512 return -EINVAL; 2512 return -EINVAL;
2513 2513
2514 sbi->s_inode_readahead_blks = t; 2514 sbi->s_inode_readahead_blks = t;
2515 return count; 2515 return count;
2516 } 2516 }
2517 2517
2518 static ssize_t sbi_ui_show(struct ext4_attr *a, 2518 static ssize_t sbi_ui_show(struct ext4_attr *a,
2519 struct ext4_sb_info *sbi, char *buf) 2519 struct ext4_sb_info *sbi, char *buf)
2520 { 2520 {
2521 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); 2521 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
2522 2522
2523 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 2523 return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2524 } 2524 }
2525 2525
2526 static ssize_t sbi_ui_store(struct ext4_attr *a, 2526 static ssize_t sbi_ui_store(struct ext4_attr *a,
2527 struct ext4_sb_info *sbi, 2527 struct ext4_sb_info *sbi,
2528 const char *buf, size_t count) 2528 const char *buf, size_t count)
2529 { 2529 {
2530 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); 2530 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
2531 unsigned long t; 2531 unsigned long t;
2532 int ret; 2532 int ret;
2533 2533
2534 ret = kstrtoul(skip_spaces(buf), 0, &t); 2534 ret = kstrtoul(skip_spaces(buf), 0, &t);
2535 if (ret) 2535 if (ret)
2536 return ret; 2536 return ret;
2537 *ui = t; 2537 *ui = t;
2538 return count; 2538 return count;
2539 } 2539 }
2540 2540
2541 static ssize_t reserved_clusters_show(struct ext4_attr *a, 2541 static ssize_t reserved_clusters_show(struct ext4_attr *a,
2542 struct ext4_sb_info *sbi, char *buf) 2542 struct ext4_sb_info *sbi, char *buf)
2543 { 2543 {
2544 return snprintf(buf, PAGE_SIZE, "%llu\n", 2544 return snprintf(buf, PAGE_SIZE, "%llu\n",
2545 (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); 2545 (unsigned long long) atomic64_read(&sbi->s_resv_clusters));
2546 } 2546 }
2547 2547
2548 static ssize_t reserved_clusters_store(struct ext4_attr *a, 2548 static ssize_t reserved_clusters_store(struct ext4_attr *a,
2549 struct ext4_sb_info *sbi, 2549 struct ext4_sb_info *sbi,
2550 const char *buf, size_t count) 2550 const char *buf, size_t count)
2551 { 2551 {
2552 unsigned long long val; 2552 unsigned long long val;
2553 int ret; 2553 int ret;
2554 2554
2555 if (parse_strtoull(buf, -1ULL, &val)) 2555 if (parse_strtoull(buf, -1ULL, &val))
2556 return -EINVAL; 2556 return -EINVAL;
2557 ret = ext4_reserve_clusters(sbi, val); 2557 ret = ext4_reserve_clusters(sbi, val);
2558 2558
2559 return ret ? ret : count; 2559 return ret ? ret : count;
2560 } 2560 }
2561 2561
2562 static ssize_t trigger_test_error(struct ext4_attr *a, 2562 static ssize_t trigger_test_error(struct ext4_attr *a,
2563 struct ext4_sb_info *sbi, 2563 struct ext4_sb_info *sbi,
2564 const char *buf, size_t count) 2564 const char *buf, size_t count)
2565 { 2565 {
2566 int len = count; 2566 int len = count;
2567 2567
2568 if (!capable(CAP_SYS_ADMIN)) 2568 if (!capable(CAP_SYS_ADMIN))
2569 return -EPERM; 2569 return -EPERM;
2570 2570
2571 if (len && buf[len-1] == '\n') 2571 if (len && buf[len-1] == '\n')
2572 len--; 2572 len--;
2573 2573
2574 if (len) 2574 if (len)
2575 ext4_error(sbi->s_sb, "%.*s", len, buf); 2575 ext4_error(sbi->s_sb, "%.*s", len, buf);
2576 return count; 2576 return count;
2577 } 2577 }
2578 2578
2579 static ssize_t sbi_deprecated_show(struct ext4_attr *a, 2579 static ssize_t sbi_deprecated_show(struct ext4_attr *a,
2580 struct ext4_sb_info *sbi, char *buf) 2580 struct ext4_sb_info *sbi, char *buf)
2581 { 2581 {
2582 return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); 2582 return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val);
2583 } 2583 }
2584 2584
2585 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ 2585 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2586 static struct ext4_attr ext4_attr_##_name = { \ 2586 static struct ext4_attr ext4_attr_##_name = { \
2587 .attr = {.name = __stringify(_name), .mode = _mode }, \ 2587 .attr = {.name = __stringify(_name), .mode = _mode }, \
2588 .show = _show, \ 2588 .show = _show, \
2589 .store = _store, \ 2589 .store = _store, \
2590 .u = { \ 2590 .u = { \
2591 .offset = offsetof(struct ext4_sb_info, _elname),\ 2591 .offset = offsetof(struct ext4_sb_info, _elname),\
2592 }, \ 2592 }, \
2593 } 2593 }
2594 #define EXT4_ATTR(name, mode, show, store) \ 2594 #define EXT4_ATTR(name, mode, show, store) \
2595 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2595 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2596 2596
2597 #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) 2597 #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
2598 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2598 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2599 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2599 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2600 #define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2600 #define EXT4_RW_ATTR_SBI_UI(name, elname) \
2601 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) 2601 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2602 #define ATTR_LIST(name) &ext4_attr_##name.attr 2602 #define ATTR_LIST(name) &ext4_attr_##name.attr
2603 #define EXT4_DEPRECATED_ATTR(_name, _val) \ 2603 #define EXT4_DEPRECATED_ATTR(_name, _val) \
2604 static struct ext4_attr ext4_attr_##_name = { \ 2604 static struct ext4_attr ext4_attr_##_name = { \
2605 .attr = {.name = __stringify(_name), .mode = 0444 }, \ 2605 .attr = {.name = __stringify(_name), .mode = 0444 }, \
2606 .show = sbi_deprecated_show, \ 2606 .show = sbi_deprecated_show, \
2607 .u = { \ 2607 .u = { \
2608 .deprecated_val = _val, \ 2608 .deprecated_val = _val, \
2609 }, \ 2609 }, \
2610 } 2610 }
2611 2611
2612 EXT4_RO_ATTR(delayed_allocation_blocks); 2612 EXT4_RO_ATTR(delayed_allocation_blocks);
2613 EXT4_RO_ATTR(session_write_kbytes); 2613 EXT4_RO_ATTR(session_write_kbytes);
2614 EXT4_RO_ATTR(lifetime_write_kbytes); 2614 EXT4_RO_ATTR(lifetime_write_kbytes);
2615 EXT4_RW_ATTR(reserved_clusters); 2615 EXT4_RW_ATTR(reserved_clusters);
2616 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2616 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2617 inode_readahead_blks_store, s_inode_readahead_blks); 2617 inode_readahead_blks_store, s_inode_readahead_blks);
2618 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); 2618 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
2619 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2619 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2620 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2620 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2621 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2621 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2622 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); 2622 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2623 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2623 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2624 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2624 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2625 EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); 2625 EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128);
2626 EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); 2626 EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
2627 EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); 2627 EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
2628 EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); 2628 EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
2629 EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); 2629 EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
2630 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); 2630 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
2631 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); 2631 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
2632 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); 2632 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
2633 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); 2633 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
2634 2634
2635 static struct attribute *ext4_attrs[] = { 2635 static struct attribute *ext4_attrs[] = {
2636 ATTR_LIST(delayed_allocation_blocks), 2636 ATTR_LIST(delayed_allocation_blocks),
2637 ATTR_LIST(session_write_kbytes), 2637 ATTR_LIST(session_write_kbytes),
2638 ATTR_LIST(lifetime_write_kbytes), 2638 ATTR_LIST(lifetime_write_kbytes),
2639 ATTR_LIST(reserved_clusters), 2639 ATTR_LIST(reserved_clusters),
2640 ATTR_LIST(inode_readahead_blks), 2640 ATTR_LIST(inode_readahead_blks),
2641 ATTR_LIST(inode_goal), 2641 ATTR_LIST(inode_goal),
2642 ATTR_LIST(mb_stats), 2642 ATTR_LIST(mb_stats),
2643 ATTR_LIST(mb_max_to_scan), 2643 ATTR_LIST(mb_max_to_scan),
2644 ATTR_LIST(mb_min_to_scan), 2644 ATTR_LIST(mb_min_to_scan),
2645 ATTR_LIST(mb_order2_req), 2645 ATTR_LIST(mb_order2_req),
2646 ATTR_LIST(mb_stream_req), 2646 ATTR_LIST(mb_stream_req),
2647 ATTR_LIST(mb_group_prealloc), 2647 ATTR_LIST(mb_group_prealloc),
2648 ATTR_LIST(max_writeback_mb_bump), 2648 ATTR_LIST(max_writeback_mb_bump),
2649 ATTR_LIST(extent_max_zeroout_kb), 2649 ATTR_LIST(extent_max_zeroout_kb),
2650 ATTR_LIST(trigger_fs_error), 2650 ATTR_LIST(trigger_fs_error),
2651 ATTR_LIST(err_ratelimit_interval_ms), 2651 ATTR_LIST(err_ratelimit_interval_ms),
2652 ATTR_LIST(err_ratelimit_burst), 2652 ATTR_LIST(err_ratelimit_burst),
2653 ATTR_LIST(warning_ratelimit_interval_ms), 2653 ATTR_LIST(warning_ratelimit_interval_ms),
2654 ATTR_LIST(warning_ratelimit_burst), 2654 ATTR_LIST(warning_ratelimit_burst),
2655 ATTR_LIST(msg_ratelimit_interval_ms), 2655 ATTR_LIST(msg_ratelimit_interval_ms),
2656 ATTR_LIST(msg_ratelimit_burst), 2656 ATTR_LIST(msg_ratelimit_burst),
2657 NULL, 2657 NULL,
2658 }; 2658 };
2659 2659
2660 /* Features this copy of ext4 supports */ 2660 /* Features this copy of ext4 supports */
2661 EXT4_INFO_ATTR(lazy_itable_init); 2661 EXT4_INFO_ATTR(lazy_itable_init);
2662 EXT4_INFO_ATTR(batched_discard); 2662 EXT4_INFO_ATTR(batched_discard);
2663 EXT4_INFO_ATTR(meta_bg_resize); 2663 EXT4_INFO_ATTR(meta_bg_resize);
2664 2664
2665 static struct attribute *ext4_feat_attrs[] = { 2665 static struct attribute *ext4_feat_attrs[] = {
2666 ATTR_LIST(lazy_itable_init), 2666 ATTR_LIST(lazy_itable_init),
2667 ATTR_LIST(batched_discard), 2667 ATTR_LIST(batched_discard),
2668 ATTR_LIST(meta_bg_resize), 2668 ATTR_LIST(meta_bg_resize),
2669 NULL, 2669 NULL,
2670 }; 2670 };
2671 2671
2672 static ssize_t ext4_attr_show(struct kobject *kobj, 2672 static ssize_t ext4_attr_show(struct kobject *kobj,
2673 struct attribute *attr, char *buf) 2673 struct attribute *attr, char *buf)
2674 { 2674 {
2675 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2675 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2676 s_kobj); 2676 s_kobj);
2677 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2677 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2678 2678
2679 return a->show ? a->show(a, sbi, buf) : 0; 2679 return a->show ? a->show(a, sbi, buf) : 0;
2680 } 2680 }
2681 2681
2682 static ssize_t ext4_attr_store(struct kobject *kobj, 2682 static ssize_t ext4_attr_store(struct kobject *kobj,
2683 struct attribute *attr, 2683 struct attribute *attr,
2684 const char *buf, size_t len) 2684 const char *buf, size_t len)
2685 { 2685 {
2686 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2686 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2687 s_kobj); 2687 s_kobj);
2688 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2688 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2689 2689
2690 return a->store ? a->store(a, sbi, buf, len) : 0; 2690 return a->store ? a->store(a, sbi, buf, len) : 0;
2691 } 2691 }
2692 2692
2693 static void ext4_sb_release(struct kobject *kobj) 2693 static void ext4_sb_release(struct kobject *kobj)
2694 { 2694 {
2695 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2695 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2696 s_kobj); 2696 s_kobj);
2697 complete(&sbi->s_kobj_unregister); 2697 complete(&sbi->s_kobj_unregister);
2698 } 2698 }
2699 2699
2700 static const struct sysfs_ops ext4_attr_ops = { 2700 static const struct sysfs_ops ext4_attr_ops = {
2701 .show = ext4_attr_show, 2701 .show = ext4_attr_show,
2702 .store = ext4_attr_store, 2702 .store = ext4_attr_store,
2703 }; 2703 };
2704 2704
2705 static struct kobj_type ext4_ktype = { 2705 static struct kobj_type ext4_ktype = {
2706 .default_attrs = ext4_attrs, 2706 .default_attrs = ext4_attrs,
2707 .sysfs_ops = &ext4_attr_ops, 2707 .sysfs_ops = &ext4_attr_ops,
2708 .release = ext4_sb_release, 2708 .release = ext4_sb_release,
2709 }; 2709 };
2710 2710
2711 static void ext4_feat_release(struct kobject *kobj) 2711 static void ext4_feat_release(struct kobject *kobj)
2712 { 2712 {
2713 complete(&ext4_feat->f_kobj_unregister); 2713 complete(&ext4_feat->f_kobj_unregister);
2714 } 2714 }
2715 2715
2716 static struct kobj_type ext4_feat_ktype = { 2716 static struct kobj_type ext4_feat_ktype = {
2717 .default_attrs = ext4_feat_attrs, 2717 .default_attrs = ext4_feat_attrs,
2718 .sysfs_ops = &ext4_attr_ops, 2718 .sysfs_ops = &ext4_attr_ops,
2719 .release = ext4_feat_release, 2719 .release = ext4_feat_release,
2720 }; 2720 };
2721 2721
2722 /* 2722 /*
2723 * Check whether this filesystem can be mounted based on 2723 * Check whether this filesystem can be mounted based on
2724 * the features present and the RDONLY/RDWR mount requested. 2724 * the features present and the RDONLY/RDWR mount requested.
2725 * Returns 1 if this filesystem can be mounted as requested, 2725 * Returns 1 if this filesystem can be mounted as requested,
2726 * 0 if it cannot be. 2726 * 0 if it cannot be.
2727 */ 2727 */
2728 static int ext4_feature_set_ok(struct super_block *sb, int readonly) 2728 static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2729 { 2729 {
2730 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { 2730 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
2731 ext4_msg(sb, KERN_ERR, 2731 ext4_msg(sb, KERN_ERR,
2732 "Couldn't mount because of " 2732 "Couldn't mount because of "
2733 "unsupported optional features (%x)", 2733 "unsupported optional features (%x)",
2734 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2734 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2735 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2735 ~EXT4_FEATURE_INCOMPAT_SUPP));
2736 return 0; 2736 return 0;
2737 } 2737 }
2738 2738
2739 if (readonly) 2739 if (readonly)
2740 return 1; 2740 return 1;
2741 2741
2742 /* Check that feature set is OK for a read-write mount */ 2742 /* Check that feature set is OK for a read-write mount */
2743 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { 2743 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
2744 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " 2744 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
2745 "unsupported optional features (%x)", 2745 "unsupported optional features (%x)",
2746 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2746 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2747 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2747 ~EXT4_FEATURE_RO_COMPAT_SUPP));
2748 return 0; 2748 return 0;
2749 } 2749 }
2750 /* 2750 /*
2751 * Large file size enabled file system can only be mounted 2751 * Large file size enabled file system can only be mounted
2752 * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF 2752 * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
2753 */ 2753 */
2754 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { 2754 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
2755 if (sizeof(blkcnt_t) < sizeof(u64)) { 2755 if (sizeof(blkcnt_t) < sizeof(u64)) {
2756 ext4_msg(sb, KERN_ERR, "Filesystem with huge files " 2756 ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
2757 "cannot be mounted RDWR without " 2757 "cannot be mounted RDWR without "
2758 "CONFIG_LBDAF"); 2758 "CONFIG_LBDAF");
2759 return 0; 2759 return 0;
2760 } 2760 }
2761 } 2761 }
2762 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) && 2762 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
2763 !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { 2763 !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2764 ext4_msg(sb, KERN_ERR, 2764 ext4_msg(sb, KERN_ERR,
2765 "Can't support bigalloc feature without " 2765 "Can't support bigalloc feature without "
2766 "extents feature\n"); 2766 "extents feature\n");
2767 return 0; 2767 return 0;
2768 } 2768 }
2769 2769
2770 #ifndef CONFIG_QUOTA 2770 #ifndef CONFIG_QUOTA
2771 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 2771 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
2772 !readonly) { 2772 !readonly) {
2773 ext4_msg(sb, KERN_ERR, 2773 ext4_msg(sb, KERN_ERR,
2774 "Filesystem with quota feature cannot be mounted RDWR " 2774 "Filesystem with quota feature cannot be mounted RDWR "
2775 "without CONFIG_QUOTA"); 2775 "without CONFIG_QUOTA");
2776 return 0; 2776 return 0;
2777 } 2777 }
2778 #endif /* CONFIG_QUOTA */ 2778 #endif /* CONFIG_QUOTA */
2779 return 1; 2779 return 1;
2780 } 2780 }
2781 2781
2782 /* 2782 /*
2783 * This function is called once a day if we have errors logged 2783 * This function is called once a day if we have errors logged
2784 * on the file system 2784 * on the file system
2785 */ 2785 */
2786 static void print_daily_error_info(unsigned long arg) 2786 static void print_daily_error_info(unsigned long arg)
2787 { 2787 {
2788 struct super_block *sb = (struct super_block *) arg; 2788 struct super_block *sb = (struct super_block *) arg;
2789 struct ext4_sb_info *sbi; 2789 struct ext4_sb_info *sbi;
2790 struct ext4_super_block *es; 2790 struct ext4_super_block *es;
2791 2791
2792 sbi = EXT4_SB(sb); 2792 sbi = EXT4_SB(sb);
2793 es = sbi->s_es; 2793 es = sbi->s_es;
2794 2794
2795 if (es->s_error_count) 2795 if (es->s_error_count)
2796 ext4_msg(sb, KERN_NOTICE, "error count: %u", 2796 ext4_msg(sb, KERN_NOTICE, "error count: %u",
2797 le32_to_cpu(es->s_error_count)); 2797 le32_to_cpu(es->s_error_count));
2798 if (es->s_first_error_time) { 2798 if (es->s_first_error_time) {
2799 printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d", 2799 printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
2800 sb->s_id, le32_to_cpu(es->s_first_error_time), 2800 sb->s_id, le32_to_cpu(es->s_first_error_time),
2801 (int) sizeof(es->s_first_error_func), 2801 (int) sizeof(es->s_first_error_func),
2802 es->s_first_error_func, 2802 es->s_first_error_func,
2803 le32_to_cpu(es->s_first_error_line)); 2803 le32_to_cpu(es->s_first_error_line));
2804 if (es->s_first_error_ino) 2804 if (es->s_first_error_ino)
2805 printk(": inode %u", 2805 printk(": inode %u",
2806 le32_to_cpu(es->s_first_error_ino)); 2806 le32_to_cpu(es->s_first_error_ino));
2807 if (es->s_first_error_block) 2807 if (es->s_first_error_block)
2808 printk(": block %llu", (unsigned long long) 2808 printk(": block %llu", (unsigned long long)
2809 le64_to_cpu(es->s_first_error_block)); 2809 le64_to_cpu(es->s_first_error_block));
2810 printk("\n"); 2810 printk("\n");
2811 } 2811 }
2812 if (es->s_last_error_time) { 2812 if (es->s_last_error_time) {
2813 printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d", 2813 printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
2814 sb->s_id, le32_to_cpu(es->s_last_error_time), 2814 sb->s_id, le32_to_cpu(es->s_last_error_time),
2815 (int) sizeof(es->s_last_error_func), 2815 (int) sizeof(es->s_last_error_func),
2816 es->s_last_error_func, 2816 es->s_last_error_func,
2817 le32_to_cpu(es->s_last_error_line)); 2817 le32_to_cpu(es->s_last_error_line));
2818 if (es->s_last_error_ino) 2818 if (es->s_last_error_ino)
2819 printk(": inode %u", 2819 printk(": inode %u",
2820 le32_to_cpu(es->s_last_error_ino)); 2820 le32_to_cpu(es->s_last_error_ino));
2821 if (es->s_last_error_block) 2821 if (es->s_last_error_block)
2822 printk(": block %llu", (unsigned long long) 2822 printk(": block %llu", (unsigned long long)
2823 le64_to_cpu(es->s_last_error_block)); 2823 le64_to_cpu(es->s_last_error_block));
2824 printk("\n"); 2824 printk("\n");
2825 } 2825 }
2826 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ 2826 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
2827 } 2827 }
2828 2828
2829 /* Find next suitable group and run ext4_init_inode_table */ 2829 /* Find next suitable group and run ext4_init_inode_table */
2830 static int ext4_run_li_request(struct ext4_li_request *elr) 2830 static int ext4_run_li_request(struct ext4_li_request *elr)
2831 { 2831 {
2832 struct ext4_group_desc *gdp = NULL; 2832 struct ext4_group_desc *gdp = NULL;
2833 ext4_group_t group, ngroups; 2833 ext4_group_t group, ngroups;
2834 struct super_block *sb; 2834 struct super_block *sb;
2835 unsigned long timeout = 0; 2835 unsigned long timeout = 0;
2836 int ret = 0; 2836 int ret = 0;
2837 2837
2838 sb = elr->lr_super; 2838 sb = elr->lr_super;
2839 ngroups = EXT4_SB(sb)->s_groups_count; 2839 ngroups = EXT4_SB(sb)->s_groups_count;
2840 2840
2841 sb_start_write(sb); 2841 sb_start_write(sb);
2842 for (group = elr->lr_next_group; group < ngroups; group++) { 2842 for (group = elr->lr_next_group; group < ngroups; group++) {
2843 gdp = ext4_get_group_desc(sb, group, NULL); 2843 gdp = ext4_get_group_desc(sb, group, NULL);
2844 if (!gdp) { 2844 if (!gdp) {
2845 ret = 1; 2845 ret = 1;
2846 break; 2846 break;
2847 } 2847 }
2848 2848
2849 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 2849 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2850 break; 2850 break;
2851 } 2851 }
2852 2852
2853 if (group >= ngroups) 2853 if (group >= ngroups)
2854 ret = 1; 2854 ret = 1;
2855 2855
2856 if (!ret) { 2856 if (!ret) {
2857 timeout = jiffies; 2857 timeout = jiffies;
2858 ret = ext4_init_inode_table(sb, group, 2858 ret = ext4_init_inode_table(sb, group,
2859 elr->lr_timeout ? 0 : 1); 2859 elr->lr_timeout ? 0 : 1);
2860 if (elr->lr_timeout == 0) { 2860 if (elr->lr_timeout == 0) {
2861 timeout = (jiffies - timeout) * 2861 timeout = (jiffies - timeout) *
2862 elr->lr_sbi->s_li_wait_mult; 2862 elr->lr_sbi->s_li_wait_mult;
2863 elr->lr_timeout = timeout; 2863 elr->lr_timeout = timeout;
2864 } 2864 }
2865 elr->lr_next_sched = jiffies + elr->lr_timeout; 2865 elr->lr_next_sched = jiffies + elr->lr_timeout;
2866 elr->lr_next_group = group + 1; 2866 elr->lr_next_group = group + 1;
2867 } 2867 }
2868 sb_end_write(sb); 2868 sb_end_write(sb);
2869 2869
2870 return ret; 2870 return ret;
2871 } 2871 }
2872 2872
2873 /* 2873 /*
2874 * Remove lr_request from the list_request and free the 2874 * Remove lr_request from the list_request and free the
2875 * request structure. Should be called with li_list_mtx held 2875 * request structure. Should be called with li_list_mtx held
2876 */ 2876 */
2877 static void ext4_remove_li_request(struct ext4_li_request *elr) 2877 static void ext4_remove_li_request(struct ext4_li_request *elr)
2878 { 2878 {
2879 struct ext4_sb_info *sbi; 2879 struct ext4_sb_info *sbi;
2880 2880
2881 if (!elr) 2881 if (!elr)
2882 return; 2882 return;
2883 2883
2884 sbi = elr->lr_sbi; 2884 sbi = elr->lr_sbi;
2885 2885
2886 list_del(&elr->lr_request); 2886 list_del(&elr->lr_request);
2887 sbi->s_li_request = NULL; 2887 sbi->s_li_request = NULL;
2888 kfree(elr); 2888 kfree(elr);
2889 } 2889 }
2890 2890
2891 static void ext4_unregister_li_request(struct super_block *sb) 2891 static void ext4_unregister_li_request(struct super_block *sb)
2892 { 2892 {
2893 mutex_lock(&ext4_li_mtx); 2893 mutex_lock(&ext4_li_mtx);
2894 if (!ext4_li_info) { 2894 if (!ext4_li_info) {
2895 mutex_unlock(&ext4_li_mtx); 2895 mutex_unlock(&ext4_li_mtx);
2896 return; 2896 return;
2897 } 2897 }
2898 2898
2899 mutex_lock(&ext4_li_info->li_list_mtx); 2899 mutex_lock(&ext4_li_info->li_list_mtx);
2900 ext4_remove_li_request(EXT4_SB(sb)->s_li_request); 2900 ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
2901 mutex_unlock(&ext4_li_info->li_list_mtx); 2901 mutex_unlock(&ext4_li_info->li_list_mtx);
2902 mutex_unlock(&ext4_li_mtx); 2902 mutex_unlock(&ext4_li_mtx);
2903 } 2903 }
2904 2904
2905 static struct task_struct *ext4_lazyinit_task; 2905 static struct task_struct *ext4_lazyinit_task;
2906 2906
2907 /* 2907 /*
2908 * This is the function where ext4lazyinit thread lives. It walks 2908 * This is the function where ext4lazyinit thread lives. It walks
2909 * through the request list searching for next scheduled filesystem. 2909 * through the request list searching for next scheduled filesystem.
2910 * When such a fs is found, run the lazy initialization request 2910 * When such a fs is found, run the lazy initialization request
2911 * (ext4_rn_li_request) and keep track of the time spend in this 2911 * (ext4_rn_li_request) and keep track of the time spend in this
2912 * function. Based on that time we compute next schedule time of 2912 * function. Based on that time we compute next schedule time of
2913 * the request. When walking through the list is complete, compute 2913 * the request. When walking through the list is complete, compute
2914 * next waking time and put itself into sleep. 2914 * next waking time and put itself into sleep.
2915 */ 2915 */
2916 static int ext4_lazyinit_thread(void *arg) 2916 static int ext4_lazyinit_thread(void *arg)
2917 { 2917 {
2918 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; 2918 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
2919 struct list_head *pos, *n; 2919 struct list_head *pos, *n;
2920 struct ext4_li_request *elr; 2920 struct ext4_li_request *elr;
2921 unsigned long next_wakeup, cur; 2921 unsigned long next_wakeup, cur;
2922 2922
2923 BUG_ON(NULL == eli); 2923 BUG_ON(NULL == eli);
2924 2924
2925 cont_thread: 2925 cont_thread:
2926 while (true) { 2926 while (true) {
2927 next_wakeup = MAX_JIFFY_OFFSET; 2927 next_wakeup = MAX_JIFFY_OFFSET;
2928 2928
2929 mutex_lock(&eli->li_list_mtx); 2929 mutex_lock(&eli->li_list_mtx);
2930 if (list_empty(&eli->li_request_list)) { 2930 if (list_empty(&eli->li_request_list)) {
2931 mutex_unlock(&eli->li_list_mtx); 2931 mutex_unlock(&eli->li_list_mtx);
2932 goto exit_thread; 2932 goto exit_thread;
2933 } 2933 }
2934 2934
2935 list_for_each_safe(pos, n, &eli->li_request_list) { 2935 list_for_each_safe(pos, n, &eli->li_request_list) {
2936 elr = list_entry(pos, struct ext4_li_request, 2936 elr = list_entry(pos, struct ext4_li_request,
2937 lr_request); 2937 lr_request);
2938 2938
2939 if (time_after_eq(jiffies, elr->lr_next_sched)) { 2939 if (time_after_eq(jiffies, elr->lr_next_sched)) {
2940 if (ext4_run_li_request(elr) != 0) { 2940 if (ext4_run_li_request(elr) != 0) {
2941 /* error, remove the lazy_init job */ 2941 /* error, remove the lazy_init job */
2942 ext4_remove_li_request(elr); 2942 ext4_remove_li_request(elr);
2943 continue; 2943 continue;
2944 } 2944 }
2945 } 2945 }
2946 2946
2947 if (time_before(elr->lr_next_sched, next_wakeup)) 2947 if (time_before(elr->lr_next_sched, next_wakeup))
2948 next_wakeup = elr->lr_next_sched; 2948 next_wakeup = elr->lr_next_sched;
2949 } 2949 }
2950 mutex_unlock(&eli->li_list_mtx); 2950 mutex_unlock(&eli->li_list_mtx);
2951 2951
2952 try_to_freeze(); 2952 try_to_freeze();
2953 2953
2954 cur = jiffies; 2954 cur = jiffies;
2955 if ((time_after_eq(cur, next_wakeup)) || 2955 if ((time_after_eq(cur, next_wakeup)) ||
2956 (MAX_JIFFY_OFFSET == next_wakeup)) { 2956 (MAX_JIFFY_OFFSET == next_wakeup)) {
2957 cond_resched(); 2957 cond_resched();
2958 continue; 2958 continue;
2959 } 2959 }
2960 2960
2961 schedule_timeout_interruptible(next_wakeup - cur); 2961 schedule_timeout_interruptible(next_wakeup - cur);
2962 2962
2963 if (kthread_should_stop()) { 2963 if (kthread_should_stop()) {
2964 ext4_clear_request_list(); 2964 ext4_clear_request_list();
2965 goto exit_thread; 2965 goto exit_thread;
2966 } 2966 }
2967 } 2967 }
2968 2968
2969 exit_thread: 2969 exit_thread:
2970 /* 2970 /*
2971 * It looks like the request list is empty, but we need 2971 * It looks like the request list is empty, but we need
2972 * to check it under the li_list_mtx lock, to prevent any 2972 * to check it under the li_list_mtx lock, to prevent any
2973 * additions into it, and of course we should lock ext4_li_mtx 2973 * additions into it, and of course we should lock ext4_li_mtx
2974 * to atomically free the list and ext4_li_info, because at 2974 * to atomically free the list and ext4_li_info, because at
2975 * this point another ext4 filesystem could be registering 2975 * this point another ext4 filesystem could be registering
2976 * new one. 2976 * new one.
2977 */ 2977 */
2978 mutex_lock(&ext4_li_mtx); 2978 mutex_lock(&ext4_li_mtx);
2979 mutex_lock(&eli->li_list_mtx); 2979 mutex_lock(&eli->li_list_mtx);
2980 if (!list_empty(&eli->li_request_list)) { 2980 if (!list_empty(&eli->li_request_list)) {
2981 mutex_unlock(&eli->li_list_mtx); 2981 mutex_unlock(&eli->li_list_mtx);
2982 mutex_unlock(&ext4_li_mtx); 2982 mutex_unlock(&ext4_li_mtx);
2983 goto cont_thread; 2983 goto cont_thread;
2984 } 2984 }
2985 mutex_unlock(&eli->li_list_mtx); 2985 mutex_unlock(&eli->li_list_mtx);
2986 kfree(ext4_li_info); 2986 kfree(ext4_li_info);
2987 ext4_li_info = NULL; 2987 ext4_li_info = NULL;
2988 mutex_unlock(&ext4_li_mtx); 2988 mutex_unlock(&ext4_li_mtx);
2989 2989
2990 return 0; 2990 return 0;
2991 } 2991 }
2992 2992
2993 static void ext4_clear_request_list(void) 2993 static void ext4_clear_request_list(void)
2994 { 2994 {
2995 struct list_head *pos, *n; 2995 struct list_head *pos, *n;
2996 struct ext4_li_request *elr; 2996 struct ext4_li_request *elr;
2997 2997
2998 mutex_lock(&ext4_li_info->li_list_mtx); 2998 mutex_lock(&ext4_li_info->li_list_mtx);
2999 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { 2999 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3000 elr = list_entry(pos, struct ext4_li_request, 3000 elr = list_entry(pos, struct ext4_li_request,
3001 lr_request); 3001 lr_request);
3002 ext4_remove_li_request(elr); 3002 ext4_remove_li_request(elr);
3003 } 3003 }
3004 mutex_unlock(&ext4_li_info->li_list_mtx); 3004 mutex_unlock(&ext4_li_info->li_list_mtx);
3005 } 3005 }
3006 3006
3007 static int ext4_run_lazyinit_thread(void) 3007 static int ext4_run_lazyinit_thread(void)
3008 { 3008 {
3009 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread, 3009 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3010 ext4_li_info, "ext4lazyinit"); 3010 ext4_li_info, "ext4lazyinit");
3011 if (IS_ERR(ext4_lazyinit_task)) { 3011 if (IS_ERR(ext4_lazyinit_task)) {
3012 int err = PTR_ERR(ext4_lazyinit_task); 3012 int err = PTR_ERR(ext4_lazyinit_task);
3013 ext4_clear_request_list(); 3013 ext4_clear_request_list();
3014 kfree(ext4_li_info); 3014 kfree(ext4_li_info);
3015 ext4_li_info = NULL; 3015 ext4_li_info = NULL;
3016 printk(KERN_CRIT "EXT4-fs: error %d creating inode table " 3016 printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3017 "initialization thread\n", 3017 "initialization thread\n",
3018 err); 3018 err);
3019 return err; 3019 return err;
3020 } 3020 }
3021 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; 3021 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3022 return 0; 3022 return 0;
3023 } 3023 }
3024 3024
3025 /* 3025 /*
3026 * Check whether it make sense to run itable init. thread or not. 3026 * Check whether it make sense to run itable init. thread or not.
3027 * If there is at least one uninitialized inode table, return 3027 * If there is at least one uninitialized inode table, return
3028 * corresponding group number, else the loop goes through all 3028 * corresponding group number, else the loop goes through all
3029 * groups and return total number of groups. 3029 * groups and return total number of groups.
3030 */ 3030 */
3031 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) 3031 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3032 { 3032 {
3033 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; 3033 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3034 struct ext4_group_desc *gdp = NULL; 3034 struct ext4_group_desc *gdp = NULL;
3035 3035
3036 for (group = 0; group < ngroups; group++) { 3036 for (group = 0; group < ngroups; group++) {
3037 gdp = ext4_get_group_desc(sb, group, NULL); 3037 gdp = ext4_get_group_desc(sb, group, NULL);
3038 if (!gdp) 3038 if (!gdp)
3039 continue; 3039 continue;
3040 3040
3041 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 3041 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3042 break; 3042 break;
3043 } 3043 }
3044 3044
3045 return group; 3045 return group;
3046 } 3046 }
3047 3047
3048 static int ext4_li_info_new(void) 3048 static int ext4_li_info_new(void)
3049 { 3049 {
3050 struct ext4_lazy_init *eli = NULL; 3050 struct ext4_lazy_init *eli = NULL;
3051 3051
3052 eli = kzalloc(sizeof(*eli), GFP_KERNEL); 3052 eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3053 if (!eli) 3053 if (!eli)
3054 return -ENOMEM; 3054 return -ENOMEM;
3055 3055
3056 INIT_LIST_HEAD(&eli->li_request_list); 3056 INIT_LIST_HEAD(&eli->li_request_list);
3057 mutex_init(&eli->li_list_mtx); 3057 mutex_init(&eli->li_list_mtx);
3058 3058
3059 eli->li_state |= EXT4_LAZYINIT_QUIT; 3059 eli->li_state |= EXT4_LAZYINIT_QUIT;
3060 3060
3061 ext4_li_info = eli; 3061 ext4_li_info = eli;
3062 3062
3063 return 0; 3063 return 0;
3064 } 3064 }
3065 3065
3066 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, 3066 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3067 ext4_group_t start) 3067 ext4_group_t start)
3068 { 3068 {
3069 struct ext4_sb_info *sbi = EXT4_SB(sb); 3069 struct ext4_sb_info *sbi = EXT4_SB(sb);
3070 struct ext4_li_request *elr; 3070 struct ext4_li_request *elr;
3071 unsigned long rnd;
3072 3071
3073 elr = kzalloc(sizeof(*elr), GFP_KERNEL); 3072 elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3074 if (!elr) 3073 if (!elr)
3075 return NULL; 3074 return NULL;
3076 3075
3077 elr->lr_super = sb; 3076 elr->lr_super = sb;
3078 elr->lr_sbi = sbi; 3077 elr->lr_sbi = sbi;
3079 elr->lr_next_group = start; 3078 elr->lr_next_group = start;
3080 3079
3081 /* 3080 /*
3082 * Randomize first schedule time of the request to 3081 * Randomize first schedule time of the request to
3083 * spread the inode table initialization requests 3082 * spread the inode table initialization requests
3084 * better. 3083 * better.
3085 */ 3084 */
3086 get_random_bytes(&rnd, sizeof(rnd)); 3085 elr->lr_next_sched = jiffies + (prandom_u32() %
3087 elr->lr_next_sched = jiffies + (unsigned long)rnd % 3086 (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3088 (EXT4_DEF_LI_MAX_START_DELAY * HZ);
3089
3090 return elr; 3087 return elr;
3091 } 3088 }
3092 3089
3093 int ext4_register_li_request(struct super_block *sb, 3090 int ext4_register_li_request(struct super_block *sb,
3094 ext4_group_t first_not_zeroed) 3091 ext4_group_t first_not_zeroed)
3095 { 3092 {
3096 struct ext4_sb_info *sbi = EXT4_SB(sb); 3093 struct ext4_sb_info *sbi = EXT4_SB(sb);
3097 struct ext4_li_request *elr = NULL; 3094 struct ext4_li_request *elr = NULL;
3098 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 3095 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3099 int ret = 0; 3096 int ret = 0;
3100 3097
3101 mutex_lock(&ext4_li_mtx); 3098 mutex_lock(&ext4_li_mtx);
3102 if (sbi->s_li_request != NULL) { 3099 if (sbi->s_li_request != NULL) {
3103 /* 3100 /*
3104 * Reset timeout so it can be computed again, because 3101 * Reset timeout so it can be computed again, because
3105 * s_li_wait_mult might have changed. 3102 * s_li_wait_mult might have changed.
3106 */ 3103 */
3107 sbi->s_li_request->lr_timeout = 0; 3104 sbi->s_li_request->lr_timeout = 0;
3108 goto out; 3105 goto out;
3109 } 3106 }
3110 3107
3111 if (first_not_zeroed == ngroups || 3108 if (first_not_zeroed == ngroups ||
3112 (sb->s_flags & MS_RDONLY) || 3109 (sb->s_flags & MS_RDONLY) ||
3113 !test_opt(sb, INIT_INODE_TABLE)) 3110 !test_opt(sb, INIT_INODE_TABLE))
3114 goto out; 3111 goto out;
3115 3112
3116 elr = ext4_li_request_new(sb, first_not_zeroed); 3113 elr = ext4_li_request_new(sb, first_not_zeroed);
3117 if (!elr) { 3114 if (!elr) {
3118 ret = -ENOMEM; 3115 ret = -ENOMEM;
3119 goto out; 3116 goto out;
3120 } 3117 }
3121 3118
3122 if (NULL == ext4_li_info) { 3119 if (NULL == ext4_li_info) {
3123 ret = ext4_li_info_new(); 3120 ret = ext4_li_info_new();
3124 if (ret) 3121 if (ret)
3125 goto out; 3122 goto out;
3126 } 3123 }
3127 3124
3128 mutex_lock(&ext4_li_info->li_list_mtx); 3125 mutex_lock(&ext4_li_info->li_list_mtx);
3129 list_add(&elr->lr_request, &ext4_li_info->li_request_list); 3126 list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3130 mutex_unlock(&ext4_li_info->li_list_mtx); 3127 mutex_unlock(&ext4_li_info->li_list_mtx);
3131 3128
3132 sbi->s_li_request = elr; 3129 sbi->s_li_request = elr;
3133 /* 3130 /*
3134 * set elr to NULL here since it has been inserted to 3131 * set elr to NULL here since it has been inserted to
3135 * the request_list and the removal and free of it is 3132 * the request_list and the removal and free of it is
3136 * handled by ext4_clear_request_list from now on. 3133 * handled by ext4_clear_request_list from now on.
3137 */ 3134 */
3138 elr = NULL; 3135 elr = NULL;
3139 3136
3140 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { 3137 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3141 ret = ext4_run_lazyinit_thread(); 3138 ret = ext4_run_lazyinit_thread();
3142 if (ret) 3139 if (ret)
3143 goto out; 3140 goto out;
3144 } 3141 }
3145 out: 3142 out:
3146 mutex_unlock(&ext4_li_mtx); 3143 mutex_unlock(&ext4_li_mtx);
3147 if (ret) 3144 if (ret)
3148 kfree(elr); 3145 kfree(elr);
3149 return ret; 3146 return ret;
3150 } 3147 }
3151 3148
3152 /* 3149 /*
3153 * We do not need to lock anything since this is called on 3150 * We do not need to lock anything since this is called on
3154 * module unload. 3151 * module unload.
3155 */ 3152 */
3156 static void ext4_destroy_lazyinit_thread(void) 3153 static void ext4_destroy_lazyinit_thread(void)
3157 { 3154 {
3158 /* 3155 /*
3159 * If thread exited earlier 3156 * If thread exited earlier
3160 * there's nothing to be done. 3157 * there's nothing to be done.
3161 */ 3158 */
3162 if (!ext4_li_info || !ext4_lazyinit_task) 3159 if (!ext4_li_info || !ext4_lazyinit_task)
3163 return; 3160 return;
3164 3161
3165 kthread_stop(ext4_lazyinit_task); 3162 kthread_stop(ext4_lazyinit_task);
3166 } 3163 }
3167 3164
3168 static int set_journal_csum_feature_set(struct super_block *sb) 3165 static int set_journal_csum_feature_set(struct super_block *sb)
3169 { 3166 {
3170 int ret = 1; 3167 int ret = 1;
3171 int compat, incompat; 3168 int compat, incompat;
3172 struct ext4_sb_info *sbi = EXT4_SB(sb); 3169 struct ext4_sb_info *sbi = EXT4_SB(sb);
3173 3170
3174 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3171 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3175 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 3172 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3176 /* journal checksum v2 */ 3173 /* journal checksum v2 */
3177 compat = 0; 3174 compat = 0;
3178 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; 3175 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2;
3179 } else { 3176 } else {
3180 /* journal checksum v1 */ 3177 /* journal checksum v1 */
3181 compat = JBD2_FEATURE_COMPAT_CHECKSUM; 3178 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
3182 incompat = 0; 3179 incompat = 0;
3183 } 3180 }
3184 3181
3185 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 3182 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3186 ret = jbd2_journal_set_features(sbi->s_journal, 3183 ret = jbd2_journal_set_features(sbi->s_journal,
3187 compat, 0, 3184 compat, 0,
3188 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | 3185 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3189 incompat); 3186 incompat);
3190 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 3187 } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
3191 ret = jbd2_journal_set_features(sbi->s_journal, 3188 ret = jbd2_journal_set_features(sbi->s_journal,
3192 compat, 0, 3189 compat, 0,
3193 incompat); 3190 incompat);
3194 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 3191 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3195 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3192 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3196 } else { 3193 } else {
3197 jbd2_journal_clear_features(sbi->s_journal, 3194 jbd2_journal_clear_features(sbi->s_journal,
3198 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3195 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3199 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | 3196 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3200 JBD2_FEATURE_INCOMPAT_CSUM_V2); 3197 JBD2_FEATURE_INCOMPAT_CSUM_V2);
3201 } 3198 }
3202 3199
3203 return ret; 3200 return ret;
3204 } 3201 }
3205 3202
3206 /* 3203 /*
3207 * Note: calculating the overhead so we can be compatible with 3204 * Note: calculating the overhead so we can be compatible with
3208 * historical BSD practice is quite difficult in the face of 3205 * historical BSD practice is quite difficult in the face of
3209 * clusters/bigalloc. This is because multiple metadata blocks from 3206 * clusters/bigalloc. This is because multiple metadata blocks from
3210 * different block group can end up in the same allocation cluster. 3207 * different block group can end up in the same allocation cluster.
3211 * Calculating the exact overhead in the face of clustered allocation 3208 * Calculating the exact overhead in the face of clustered allocation
3212 * requires either O(all block bitmaps) in memory or O(number of block 3209 * requires either O(all block bitmaps) in memory or O(number of block
3213 * groups**2) in time. We will still calculate the superblock for 3210 * groups**2) in time. We will still calculate the superblock for
3214 * older file systems --- and if we come across with a bigalloc file 3211 * older file systems --- and if we come across with a bigalloc file
3215 * system with zero in s_overhead_clusters the estimate will be close to 3212 * system with zero in s_overhead_clusters the estimate will be close to
3216 * correct especially for very large cluster sizes --- but for newer 3213 * correct especially for very large cluster sizes --- but for newer
3217 * file systems, it's better to calculate this figure once at mkfs 3214 * file systems, it's better to calculate this figure once at mkfs
3218 * time, and store it in the superblock. If the superblock value is 3215 * time, and store it in the superblock. If the superblock value is
3219 * present (even for non-bigalloc file systems), we will use it. 3216 * present (even for non-bigalloc file systems), we will use it.
3220 */ 3217 */
3221 static int count_overhead(struct super_block *sb, ext4_group_t grp, 3218 static int count_overhead(struct super_block *sb, ext4_group_t grp,
3222 char *buf) 3219 char *buf)
3223 { 3220 {
3224 struct ext4_sb_info *sbi = EXT4_SB(sb); 3221 struct ext4_sb_info *sbi = EXT4_SB(sb);
3225 struct ext4_group_desc *gdp; 3222 struct ext4_group_desc *gdp;
3226 ext4_fsblk_t first_block, last_block, b; 3223 ext4_fsblk_t first_block, last_block, b;
3227 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3224 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3228 int s, j, count = 0; 3225 int s, j, count = 0;
3229 3226
3230 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) 3227 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
3231 return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + 3228 return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3232 sbi->s_itb_per_group + 2); 3229 sbi->s_itb_per_group + 2);
3233 3230
3234 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + 3231 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3235 (grp * EXT4_BLOCKS_PER_GROUP(sb)); 3232 (grp * EXT4_BLOCKS_PER_GROUP(sb));
3236 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; 3233 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
3237 for (i = 0; i < ngroups; i++) { 3234 for (i = 0; i < ngroups; i++) {
3238 gdp = ext4_get_group_desc(sb, i, NULL); 3235 gdp = ext4_get_group_desc(sb, i, NULL);
3239 b = ext4_block_bitmap(sb, gdp); 3236 b = ext4_block_bitmap(sb, gdp);
3240 if (b >= first_block && b <= last_block) { 3237 if (b >= first_block && b <= last_block) {
3241 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); 3238 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3242 count++; 3239 count++;
3243 } 3240 }
3244 b = ext4_inode_bitmap(sb, gdp); 3241 b = ext4_inode_bitmap(sb, gdp);
3245 if (b >= first_block && b <= last_block) { 3242 if (b >= first_block && b <= last_block) {
3246 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); 3243 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3247 count++; 3244 count++;
3248 } 3245 }
3249 b = ext4_inode_table(sb, gdp); 3246 b = ext4_inode_table(sb, gdp);
3250 if (b >= first_block && b + sbi->s_itb_per_group <= last_block) 3247 if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
3251 for (j = 0; j < sbi->s_itb_per_group; j++, b++) { 3248 for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
3252 int c = EXT4_B2C(sbi, b - first_block); 3249 int c = EXT4_B2C(sbi, b - first_block);
3253 ext4_set_bit(c, buf); 3250 ext4_set_bit(c, buf);
3254 count++; 3251 count++;
3255 } 3252 }
3256 if (i != grp) 3253 if (i != grp)
3257 continue; 3254 continue;
3258 s = 0; 3255 s = 0;
3259 if (ext4_bg_has_super(sb, grp)) { 3256 if (ext4_bg_has_super(sb, grp)) {
3260 ext4_set_bit(s++, buf); 3257 ext4_set_bit(s++, buf);
3261 count++; 3258 count++;
3262 } 3259 }
3263 for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { 3260 for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
3264 ext4_set_bit(EXT4_B2C(sbi, s++), buf); 3261 ext4_set_bit(EXT4_B2C(sbi, s++), buf);
3265 count++; 3262 count++;
3266 } 3263 }
3267 } 3264 }
3268 if (!count) 3265 if (!count)
3269 return 0; 3266 return 0;
3270 return EXT4_CLUSTERS_PER_GROUP(sb) - 3267 return EXT4_CLUSTERS_PER_GROUP(sb) -
3271 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8); 3268 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
3272 } 3269 }
3273 3270
3274 /* 3271 /*
3275 * Compute the overhead and stash it in sbi->s_overhead 3272 * Compute the overhead and stash it in sbi->s_overhead
3276 */ 3273 */
3277 int ext4_calculate_overhead(struct super_block *sb) 3274 int ext4_calculate_overhead(struct super_block *sb)
3278 { 3275 {
3279 struct ext4_sb_info *sbi = EXT4_SB(sb); 3276 struct ext4_sb_info *sbi = EXT4_SB(sb);
3280 struct ext4_super_block *es = sbi->s_es; 3277 struct ext4_super_block *es = sbi->s_es;
3281 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3278 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3282 ext4_fsblk_t overhead = 0; 3279 ext4_fsblk_t overhead = 0;
3283 char *buf = (char *) get_zeroed_page(GFP_KERNEL); 3280 char *buf = (char *) get_zeroed_page(GFP_KERNEL);
3284 3281
3285 if (!buf) 3282 if (!buf)
3286 return -ENOMEM; 3283 return -ENOMEM;
3287 3284
3288 /* 3285 /*
3289 * Compute the overhead (FS structures). This is constant 3286 * Compute the overhead (FS structures). This is constant
3290 * for a given filesystem unless the number of block groups 3287 * for a given filesystem unless the number of block groups
3291 * changes so we cache the previous value until it does. 3288 * changes so we cache the previous value until it does.
3292 */ 3289 */
3293 3290
3294 /* 3291 /*
3295 * All of the blocks before first_data_block are overhead 3292 * All of the blocks before first_data_block are overhead
3296 */ 3293 */
3297 overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block)); 3294 overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
3298 3295
3299 /* 3296 /*
3300 * Add the overhead found in each block group 3297 * Add the overhead found in each block group
3301 */ 3298 */
3302 for (i = 0; i < ngroups; i++) { 3299 for (i = 0; i < ngroups; i++) {
3303 int blks; 3300 int blks;
3304 3301
3305 blks = count_overhead(sb, i, buf); 3302 blks = count_overhead(sb, i, buf);
3306 overhead += blks; 3303 overhead += blks;
3307 if (blks) 3304 if (blks)
3308 memset(buf, 0, PAGE_SIZE); 3305 memset(buf, 0, PAGE_SIZE);
3309 cond_resched(); 3306 cond_resched();
3310 } 3307 }
3311 /* Add the journal blocks as well */ 3308 /* Add the journal blocks as well */
3312 if (sbi->s_journal) 3309 if (sbi->s_journal)
3313 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); 3310 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
3314 3311
3315 sbi->s_overhead = overhead; 3312 sbi->s_overhead = overhead;
3316 smp_wmb(); 3313 smp_wmb();
3317 free_page((unsigned long) buf); 3314 free_page((unsigned long) buf);
3318 return 0; 3315 return 0;
3319 } 3316 }
3320 3317
3321 3318
3322 static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) 3319 static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
3323 { 3320 {
3324 ext4_fsblk_t resv_clusters; 3321 ext4_fsblk_t resv_clusters;
3325 3322
3326 /* 3323 /*
3327 * By default we reserve 2% or 4096 clusters, whichever is smaller. 3324 * By default we reserve 2% or 4096 clusters, whichever is smaller.
3328 * This should cover the situations where we can not afford to run 3325 * This should cover the situations where we can not afford to run
3329 * out of space like for example punch hole, or converting 3326 * out of space like for example punch hole, or converting
3330 * uninitialized extents in delalloc path. In most cases such 3327 * uninitialized extents in delalloc path. In most cases such
3331 * allocation would require 1, or 2 blocks, higher numbers are 3328 * allocation would require 1, or 2 blocks, higher numbers are
3332 * very rare. 3329 * very rare.
3333 */ 3330 */
3334 resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits; 3331 resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
3335 3332
3336 do_div(resv_clusters, 50); 3333 do_div(resv_clusters, 50);
3337 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); 3334 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
3338 3335
3339 return resv_clusters; 3336 return resv_clusters;
3340 } 3337 }
3341 3338
3342 3339
3343 static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count) 3340 static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count)
3344 { 3341 {
3345 ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >> 3342 ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >>
3346 sbi->s_cluster_bits; 3343 sbi->s_cluster_bits;
3347 3344
3348 if (count >= clusters) 3345 if (count >= clusters)
3349 return -EINVAL; 3346 return -EINVAL;
3350 3347
3351 atomic64_set(&sbi->s_resv_clusters, count); 3348 atomic64_set(&sbi->s_resv_clusters, count);
3352 return 0; 3349 return 0;
3353 } 3350 }
3354 3351
3355 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3352 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3356 { 3353 {
3357 char *orig_data = kstrdup(data, GFP_KERNEL); 3354 char *orig_data = kstrdup(data, GFP_KERNEL);
3358 struct buffer_head *bh; 3355 struct buffer_head *bh;
3359 struct ext4_super_block *es = NULL; 3356 struct ext4_super_block *es = NULL;
3360 struct ext4_sb_info *sbi; 3357 struct ext4_sb_info *sbi;
3361 ext4_fsblk_t block; 3358 ext4_fsblk_t block;
3362 ext4_fsblk_t sb_block = get_sb_block(&data); 3359 ext4_fsblk_t sb_block = get_sb_block(&data);
3363 ext4_fsblk_t logical_sb_block; 3360 ext4_fsblk_t logical_sb_block;
3364 unsigned long offset = 0; 3361 unsigned long offset = 0;
3365 unsigned long journal_devnum = 0; 3362 unsigned long journal_devnum = 0;
3366 unsigned long def_mount_opts; 3363 unsigned long def_mount_opts;
3367 struct inode *root; 3364 struct inode *root;
3368 char *cp; 3365 char *cp;
3369 const char *descr; 3366 const char *descr;
3370 int ret = -ENOMEM; 3367 int ret = -ENOMEM;
3371 int blocksize, clustersize; 3368 int blocksize, clustersize;
3372 unsigned int db_count; 3369 unsigned int db_count;
3373 unsigned int i; 3370 unsigned int i;
3374 int needs_recovery, has_huge_files, has_bigalloc; 3371 int needs_recovery, has_huge_files, has_bigalloc;
3375 __u64 blocks_count; 3372 __u64 blocks_count;
3376 int err = 0; 3373 int err = 0;
3377 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3374 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3378 ext4_group_t first_not_zeroed; 3375 ext4_group_t first_not_zeroed;
3379 3376
3380 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 3377 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
3381 if (!sbi) 3378 if (!sbi)
3382 goto out_free_orig; 3379 goto out_free_orig;
3383 3380
3384 sbi->s_blockgroup_lock = 3381 sbi->s_blockgroup_lock =
3385 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 3382 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
3386 if (!sbi->s_blockgroup_lock) { 3383 if (!sbi->s_blockgroup_lock) {
3387 kfree(sbi); 3384 kfree(sbi);
3388 goto out_free_orig; 3385 goto out_free_orig;
3389 } 3386 }
3390 sb->s_fs_info = sbi; 3387 sb->s_fs_info = sbi;
3391 sbi->s_sb = sb; 3388 sbi->s_sb = sb;
3392 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 3389 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
3393 sbi->s_sb_block = sb_block; 3390 sbi->s_sb_block = sb_block;
3394 if (sb->s_bdev->bd_part) 3391 if (sb->s_bdev->bd_part)
3395 sbi->s_sectors_written_start = 3392 sbi->s_sectors_written_start =
3396 part_stat_read(sb->s_bdev->bd_part, sectors[1]); 3393 part_stat_read(sb->s_bdev->bd_part, sectors[1]);
3397 3394
3398 /* Cleanup superblock name */ 3395 /* Cleanup superblock name */
3399 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 3396 for (cp = sb->s_id; (cp = strchr(cp, '/'));)
3400 *cp = '!'; 3397 *cp = '!';
3401 3398
3402 /* -EINVAL is default */ 3399 /* -EINVAL is default */
3403 ret = -EINVAL; 3400 ret = -EINVAL;
3404 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 3401 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
3405 if (!blocksize) { 3402 if (!blocksize) {
3406 ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 3403 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
3407 goto out_fail; 3404 goto out_fail;
3408 } 3405 }
3409 3406
3410 /* 3407 /*
3411 * The ext4 superblock will not be buffer aligned for other than 1kB 3408 * The ext4 superblock will not be buffer aligned for other than 1kB
3412 * block sizes. We need to calculate the offset from buffer start. 3409 * block sizes. We need to calculate the offset from buffer start.
3413 */ 3410 */
3414 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 3411 if (blocksize != EXT4_MIN_BLOCK_SIZE) {
3415 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 3412 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3416 offset = do_div(logical_sb_block, blocksize); 3413 offset = do_div(logical_sb_block, blocksize);
3417 } else { 3414 } else {
3418 logical_sb_block = sb_block; 3415 logical_sb_block = sb_block;
3419 } 3416 }
3420 3417
3421 if (!(bh = sb_bread(sb, logical_sb_block))) { 3418 if (!(bh = sb_bread(sb, logical_sb_block))) {
3422 ext4_msg(sb, KERN_ERR, "unable to read superblock"); 3419 ext4_msg(sb, KERN_ERR, "unable to read superblock");
3423 goto out_fail; 3420 goto out_fail;
3424 } 3421 }
3425 /* 3422 /*
3426 * Note: s_es must be initialized as soon as possible because 3423 * Note: s_es must be initialized as soon as possible because
3427 * some ext4 macro-instructions depend on its value 3424 * some ext4 macro-instructions depend on its value
3428 */ 3425 */
3429 es = (struct ext4_super_block *) (bh->b_data + offset); 3426 es = (struct ext4_super_block *) (bh->b_data + offset);
3430 sbi->s_es = es; 3427 sbi->s_es = es;
3431 sb->s_magic = le16_to_cpu(es->s_magic); 3428 sb->s_magic = le16_to_cpu(es->s_magic);
3432 if (sb->s_magic != EXT4_SUPER_MAGIC) 3429 if (sb->s_magic != EXT4_SUPER_MAGIC)
3433 goto cantfind_ext4; 3430 goto cantfind_ext4;
3434 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 3431 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
3435 3432
3436 /* Warn if metadata_csum and gdt_csum are both set. */ 3433 /* Warn if metadata_csum and gdt_csum are both set. */
3437 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3434 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3438 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && 3435 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
3439 EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) 3436 EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
3440 ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are " 3437 ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are "
3441 "redundant flags; please run fsck."); 3438 "redundant flags; please run fsck.");
3442 3439
3443 /* Check for a known checksum algorithm */ 3440 /* Check for a known checksum algorithm */
3444 if (!ext4_verify_csum_type(sb, es)) { 3441 if (!ext4_verify_csum_type(sb, es)) {
3445 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " 3442 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3446 "unknown checksum algorithm."); 3443 "unknown checksum algorithm.");
3447 silent = 1; 3444 silent = 1;
3448 goto cantfind_ext4; 3445 goto cantfind_ext4;
3449 } 3446 }
3450 3447
3451 /* Load the checksum driver */ 3448 /* Load the checksum driver */
3452 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3449 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3453 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 3450 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3454 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 3451 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3455 if (IS_ERR(sbi->s_chksum_driver)) { 3452 if (IS_ERR(sbi->s_chksum_driver)) {
3456 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); 3453 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
3457 ret = PTR_ERR(sbi->s_chksum_driver); 3454 ret = PTR_ERR(sbi->s_chksum_driver);
3458 sbi->s_chksum_driver = NULL; 3455 sbi->s_chksum_driver = NULL;
3459 goto failed_mount; 3456 goto failed_mount;
3460 } 3457 }
3461 } 3458 }
3462 3459
3463 /* Check superblock checksum */ 3460 /* Check superblock checksum */
3464 if (!ext4_superblock_csum_verify(sb, es)) { 3461 if (!ext4_superblock_csum_verify(sb, es)) {
3465 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " 3462 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3466 "invalid superblock checksum. Run e2fsck?"); 3463 "invalid superblock checksum. Run e2fsck?");
3467 silent = 1; 3464 silent = 1;
3468 goto cantfind_ext4; 3465 goto cantfind_ext4;
3469 } 3466 }
3470 3467
3471 /* Precompute checksum seed for all metadata */ 3468 /* Precompute checksum seed for all metadata */
3472 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3469 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3473 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 3470 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
3474 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, 3471 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3475 sizeof(es->s_uuid)); 3472 sizeof(es->s_uuid));
3476 3473
3477 /* Set defaults before we parse the mount options */ 3474 /* Set defaults before we parse the mount options */
3478 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 3475 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3479 set_opt(sb, INIT_INODE_TABLE); 3476 set_opt(sb, INIT_INODE_TABLE);
3480 if (def_mount_opts & EXT4_DEFM_DEBUG) 3477 if (def_mount_opts & EXT4_DEFM_DEBUG)
3481 set_opt(sb, DEBUG); 3478 set_opt(sb, DEBUG);
3482 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 3479 if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
3483 set_opt(sb, GRPID); 3480 set_opt(sb, GRPID);
3484 if (def_mount_opts & EXT4_DEFM_UID16) 3481 if (def_mount_opts & EXT4_DEFM_UID16)
3485 set_opt(sb, NO_UID32); 3482 set_opt(sb, NO_UID32);
3486 /* xattr user namespace & acls are now defaulted on */ 3483 /* xattr user namespace & acls are now defaulted on */
3487 set_opt(sb, XATTR_USER); 3484 set_opt(sb, XATTR_USER);
3488 #ifdef CONFIG_EXT4_FS_POSIX_ACL 3485 #ifdef CONFIG_EXT4_FS_POSIX_ACL
3489 set_opt(sb, POSIX_ACL); 3486 set_opt(sb, POSIX_ACL);
3490 #endif 3487 #endif
3491 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3488 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3492 set_opt(sb, JOURNAL_DATA); 3489 set_opt(sb, JOURNAL_DATA);
3493 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3490 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3494 set_opt(sb, ORDERED_DATA); 3491 set_opt(sb, ORDERED_DATA);
3495 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 3492 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3496 set_opt(sb, WRITEBACK_DATA); 3493 set_opt(sb, WRITEBACK_DATA);
3497 3494
3498 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 3495 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
3499 set_opt(sb, ERRORS_PANIC); 3496 set_opt(sb, ERRORS_PANIC);
3500 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 3497 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
3501 set_opt(sb, ERRORS_CONT); 3498 set_opt(sb, ERRORS_CONT);
3502 else 3499 else
3503 set_opt(sb, ERRORS_RO); 3500 set_opt(sb, ERRORS_RO);
3504 if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) 3501 if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
3505 set_opt(sb, BLOCK_VALIDITY); 3502 set_opt(sb, BLOCK_VALIDITY);
3506 if (def_mount_opts & EXT4_DEFM_DISCARD) 3503 if (def_mount_opts & EXT4_DEFM_DISCARD)
3507 set_opt(sb, DISCARD); 3504 set_opt(sb, DISCARD);
3508 3505
3509 sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); 3506 sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
3510 sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); 3507 sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
3511 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 3508 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
3512 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 3509 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
3513 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 3510 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3514 3511
3515 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) 3512 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3516 set_opt(sb, BARRIER); 3513 set_opt(sb, BARRIER);
3517 3514
3518 /* 3515 /*
3519 * enable delayed allocation by default 3516 * enable delayed allocation by default
3520 * Use -o nodelalloc to turn it off 3517 * Use -o nodelalloc to turn it off
3521 */ 3518 */
3522 if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) && 3519 if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
3523 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) 3520 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3524 set_opt(sb, DELALLOC); 3521 set_opt(sb, DELALLOC);
3525 3522
3526 /* 3523 /*
3527 * set default s_li_wait_mult for lazyinit, for the case there is 3524 * set default s_li_wait_mult for lazyinit, for the case there is
3528 * no mount option specified. 3525 * no mount option specified.
3529 */ 3526 */
3530 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; 3527 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3531 3528
3532 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, 3529 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
3533 &journal_devnum, &journal_ioprio, 0)) { 3530 &journal_devnum, &journal_ioprio, 0)) {
3534 ext4_msg(sb, KERN_WARNING, 3531 ext4_msg(sb, KERN_WARNING,
3535 "failed to parse options in superblock: %s", 3532 "failed to parse options in superblock: %s",
3536 sbi->s_es->s_mount_opts); 3533 sbi->s_es->s_mount_opts);
3537 } 3534 }
3538 sbi->s_def_mount_opt = sbi->s_mount_opt; 3535 sbi->s_def_mount_opt = sbi->s_mount_opt;
3539 if (!parse_options((char *) data, sb, &journal_devnum, 3536 if (!parse_options((char *) data, sb, &journal_devnum,
3540 &journal_ioprio, 0)) 3537 &journal_ioprio, 0))
3541 goto failed_mount; 3538 goto failed_mount;
3542 3539
3543 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 3540 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3544 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting " 3541 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
3545 "with data=journal disables delayed " 3542 "with data=journal disables delayed "
3546 "allocation and O_DIRECT support!\n"); 3543 "allocation and O_DIRECT support!\n");
3547 if (test_opt2(sb, EXPLICIT_DELALLOC)) { 3544 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3548 ext4_msg(sb, KERN_ERR, "can't mount with " 3545 ext4_msg(sb, KERN_ERR, "can't mount with "
3549 "both data=journal and delalloc"); 3546 "both data=journal and delalloc");
3550 goto failed_mount; 3547 goto failed_mount;
3551 } 3548 }
3552 if (test_opt(sb, DIOREAD_NOLOCK)) { 3549 if (test_opt(sb, DIOREAD_NOLOCK)) {
3553 ext4_msg(sb, KERN_ERR, "can't mount with " 3550 ext4_msg(sb, KERN_ERR, "can't mount with "
3554 "both data=journal and dioread_nolock"); 3551 "both data=journal and dioread_nolock");
3555 goto failed_mount; 3552 goto failed_mount;
3556 } 3553 }
3557 if (test_opt(sb, DELALLOC)) 3554 if (test_opt(sb, DELALLOC))
3558 clear_opt(sb, DELALLOC); 3555 clear_opt(sb, DELALLOC);
3559 } 3556 }
3560 3557
3561 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3558 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3562 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3559 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3563 3560
3564 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 3561 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
3565 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 3562 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
3566 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 3563 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
3567 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 3564 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
3568 ext4_msg(sb, KERN_WARNING, 3565 ext4_msg(sb, KERN_WARNING,
3569 "feature flags set on rev 0 fs, " 3566 "feature flags set on rev 0 fs, "
3570 "running e2fsck is recommended"); 3567 "running e2fsck is recommended");
3571 3568
3572 if (IS_EXT2_SB(sb)) { 3569 if (IS_EXT2_SB(sb)) {
3573 if (ext2_feature_set_ok(sb)) 3570 if (ext2_feature_set_ok(sb))
3574 ext4_msg(sb, KERN_INFO, "mounting ext2 file system " 3571 ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
3575 "using the ext4 subsystem"); 3572 "using the ext4 subsystem");
3576 else { 3573 else {
3577 ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " 3574 ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
3578 "to feature incompatibilities"); 3575 "to feature incompatibilities");
3579 goto failed_mount; 3576 goto failed_mount;
3580 } 3577 }
3581 } 3578 }
3582 3579
3583 if (IS_EXT3_SB(sb)) { 3580 if (IS_EXT3_SB(sb)) {
3584 if (ext3_feature_set_ok(sb)) 3581 if (ext3_feature_set_ok(sb))
3585 ext4_msg(sb, KERN_INFO, "mounting ext3 file system " 3582 ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
3586 "using the ext4 subsystem"); 3583 "using the ext4 subsystem");
3587 else { 3584 else {
3588 ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " 3585 ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
3589 "to feature incompatibilities"); 3586 "to feature incompatibilities");
3590 goto failed_mount; 3587 goto failed_mount;
3591 } 3588 }
3592 } 3589 }
3593 3590
3594 /* 3591 /*
3595 * Check feature flags regardless of the revision level, since we 3592 * Check feature flags regardless of the revision level, since we
3596 * previously didn't change the revision level when setting the flags, 3593 * previously didn't change the revision level when setting the flags,
3597 * so there is a chance incompat flags are set on a rev 0 filesystem. 3594 * so there is a chance incompat flags are set on a rev 0 filesystem.
3598 */ 3595 */
3599 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 3596 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3600 goto failed_mount; 3597 goto failed_mount;
3601 3598
3602 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 3599 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3603 if (blocksize < EXT4_MIN_BLOCK_SIZE || 3600 if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3604 blocksize > EXT4_MAX_BLOCK_SIZE) { 3601 blocksize > EXT4_MAX_BLOCK_SIZE) {
3605 ext4_msg(sb, KERN_ERR, 3602 ext4_msg(sb, KERN_ERR,
3606 "Unsupported filesystem blocksize %d", blocksize); 3603 "Unsupported filesystem blocksize %d", blocksize);
3607 goto failed_mount; 3604 goto failed_mount;
3608 } 3605 }
3609 3606
3610 if (sb->s_blocksize != blocksize) { 3607 if (sb->s_blocksize != blocksize) {
3611 /* Validate the filesystem blocksize */ 3608 /* Validate the filesystem blocksize */
3612 if (!sb_set_blocksize(sb, blocksize)) { 3609 if (!sb_set_blocksize(sb, blocksize)) {
3613 ext4_msg(sb, KERN_ERR, "bad block size %d", 3610 ext4_msg(sb, KERN_ERR, "bad block size %d",
3614 blocksize); 3611 blocksize);
3615 goto failed_mount; 3612 goto failed_mount;
3616 } 3613 }
3617 3614
3618 brelse(bh); 3615 brelse(bh);
3619 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 3616 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3620 offset = do_div(logical_sb_block, blocksize); 3617 offset = do_div(logical_sb_block, blocksize);
3621 bh = sb_bread(sb, logical_sb_block); 3618 bh = sb_bread(sb, logical_sb_block);
3622 if (!bh) { 3619 if (!bh) {
3623 ext4_msg(sb, KERN_ERR, 3620 ext4_msg(sb, KERN_ERR,
3624 "Can't read superblock on 2nd try"); 3621 "Can't read superblock on 2nd try");
3625 goto failed_mount; 3622 goto failed_mount;
3626 } 3623 }
3627 es = (struct ext4_super_block *)(bh->b_data + offset); 3624 es = (struct ext4_super_block *)(bh->b_data + offset);
3628 sbi->s_es = es; 3625 sbi->s_es = es;
3629 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 3626 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
3630 ext4_msg(sb, KERN_ERR, 3627 ext4_msg(sb, KERN_ERR,
3631 "Magic mismatch, very weird!"); 3628 "Magic mismatch, very weird!");
3632 goto failed_mount; 3629 goto failed_mount;
3633 } 3630 }
3634 } 3631 }
3635 3632
3636 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3633 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3637 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 3634 EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
3638 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 3635 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
3639 has_huge_files); 3636 has_huge_files);
3640 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 3637 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
3641 3638
3642 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 3639 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
3643 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 3640 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
3644 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 3641 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
3645 } else { 3642 } else {
3646 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 3643 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
3647 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 3644 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
3648 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 3645 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
3649 (!is_power_of_2(sbi->s_inode_size)) || 3646 (!is_power_of_2(sbi->s_inode_size)) ||
3650 (sbi->s_inode_size > blocksize)) { 3647 (sbi->s_inode_size > blocksize)) {
3651 ext4_msg(sb, KERN_ERR, 3648 ext4_msg(sb, KERN_ERR,
3652 "unsupported inode size: %d", 3649 "unsupported inode size: %d",
3653 sbi->s_inode_size); 3650 sbi->s_inode_size);
3654 goto failed_mount; 3651 goto failed_mount;
3655 } 3652 }
3656 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 3653 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
3657 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 3654 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
3658 } 3655 }
3659 3656
3660 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 3657 sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
3661 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 3658 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
3662 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 3659 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
3663 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 3660 sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
3664 !is_power_of_2(sbi->s_desc_size)) { 3661 !is_power_of_2(sbi->s_desc_size)) {
3665 ext4_msg(sb, KERN_ERR, 3662 ext4_msg(sb, KERN_ERR,
3666 "unsupported descriptor size %lu", 3663 "unsupported descriptor size %lu",
3667 sbi->s_desc_size); 3664 sbi->s_desc_size);
3668 goto failed_mount; 3665 goto failed_mount;
3669 } 3666 }
3670 } else 3667 } else
3671 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 3668 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
3672 3669
3673 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 3670 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
3674 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 3671 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
3675 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 3672 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
3676 goto cantfind_ext4; 3673 goto cantfind_ext4;
3677 3674
3678 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 3675 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
3679 if (sbi->s_inodes_per_block == 0) 3676 if (sbi->s_inodes_per_block == 0)
3680 goto cantfind_ext4; 3677 goto cantfind_ext4;
3681 sbi->s_itb_per_group = sbi->s_inodes_per_group / 3678 sbi->s_itb_per_group = sbi->s_inodes_per_group /
3682 sbi->s_inodes_per_block; 3679 sbi->s_inodes_per_block;
3683 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 3680 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
3684 sbi->s_sbh = bh; 3681 sbi->s_sbh = bh;
3685 sbi->s_mount_state = le16_to_cpu(es->s_state); 3682 sbi->s_mount_state = le16_to_cpu(es->s_state);
3686 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 3683 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
3687 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 3684 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
3688 3685
3689 for (i = 0; i < 4; i++) 3686 for (i = 0; i < 4; i++)
3690 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 3687 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
3691 sbi->s_def_hash_version = es->s_def_hash_version; 3688 sbi->s_def_hash_version = es->s_def_hash_version;
3692 i = le32_to_cpu(es->s_flags); 3689 i = le32_to_cpu(es->s_flags);
3693 if (i & EXT2_FLAGS_UNSIGNED_HASH) 3690 if (i & EXT2_FLAGS_UNSIGNED_HASH)
3694 sbi->s_hash_unsigned = 3; 3691 sbi->s_hash_unsigned = 3;
3695 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 3692 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
3696 #ifdef __CHAR_UNSIGNED__ 3693 #ifdef __CHAR_UNSIGNED__
3697 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 3694 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
3698 sbi->s_hash_unsigned = 3; 3695 sbi->s_hash_unsigned = 3;
3699 #else 3696 #else
3700 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 3697 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
3701 #endif 3698 #endif
3702 } 3699 }
3703 3700
3704 /* Handle clustersize */ 3701 /* Handle clustersize */
3705 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); 3702 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
3706 has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3703 has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3707 EXT4_FEATURE_RO_COMPAT_BIGALLOC); 3704 EXT4_FEATURE_RO_COMPAT_BIGALLOC);
3708 if (has_bigalloc) { 3705 if (has_bigalloc) {
3709 if (clustersize < blocksize) { 3706 if (clustersize < blocksize) {
3710 ext4_msg(sb, KERN_ERR, 3707 ext4_msg(sb, KERN_ERR,
3711 "cluster size (%d) smaller than " 3708 "cluster size (%d) smaller than "
3712 "block size (%d)", clustersize, blocksize); 3709 "block size (%d)", clustersize, blocksize);
3713 goto failed_mount; 3710 goto failed_mount;
3714 } 3711 }
3715 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - 3712 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
3716 le32_to_cpu(es->s_log_block_size); 3713 le32_to_cpu(es->s_log_block_size);
3717 sbi->s_clusters_per_group = 3714 sbi->s_clusters_per_group =
3718 le32_to_cpu(es->s_clusters_per_group); 3715 le32_to_cpu(es->s_clusters_per_group);
3719 if (sbi->s_clusters_per_group > blocksize * 8) { 3716 if (sbi->s_clusters_per_group > blocksize * 8) {
3720 ext4_msg(sb, KERN_ERR, 3717 ext4_msg(sb, KERN_ERR,
3721 "#clusters per group too big: %lu", 3718 "#clusters per group too big: %lu",
3722 sbi->s_clusters_per_group); 3719 sbi->s_clusters_per_group);
3723 goto failed_mount; 3720 goto failed_mount;
3724 } 3721 }
3725 if (sbi->s_blocks_per_group != 3722 if (sbi->s_blocks_per_group !=
3726 (sbi->s_clusters_per_group * (clustersize / blocksize))) { 3723 (sbi->s_clusters_per_group * (clustersize / blocksize))) {
3727 ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " 3724 ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
3728 "clusters per group (%lu) inconsistent", 3725 "clusters per group (%lu) inconsistent",
3729 sbi->s_blocks_per_group, 3726 sbi->s_blocks_per_group,
3730 sbi->s_clusters_per_group); 3727 sbi->s_clusters_per_group);
3731 goto failed_mount; 3728 goto failed_mount;
3732 } 3729 }
3733 } else { 3730 } else {
3734 if (clustersize != blocksize) { 3731 if (clustersize != blocksize) {
3735 ext4_warning(sb, "fragment/cluster size (%d) != " 3732 ext4_warning(sb, "fragment/cluster size (%d) != "
3736 "block size (%d)", clustersize, 3733 "block size (%d)", clustersize,
3737 blocksize); 3734 blocksize);
3738 clustersize = blocksize; 3735 clustersize = blocksize;
3739 } 3736 }
3740 if (sbi->s_blocks_per_group > blocksize * 8) { 3737 if (sbi->s_blocks_per_group > blocksize * 8) {
3741 ext4_msg(sb, KERN_ERR, 3738 ext4_msg(sb, KERN_ERR,
3742 "#blocks per group too big: %lu", 3739 "#blocks per group too big: %lu",
3743 sbi->s_blocks_per_group); 3740 sbi->s_blocks_per_group);
3744 goto failed_mount; 3741 goto failed_mount;
3745 } 3742 }
3746 sbi->s_clusters_per_group = sbi->s_blocks_per_group; 3743 sbi->s_clusters_per_group = sbi->s_blocks_per_group;
3747 sbi->s_cluster_bits = 0; 3744 sbi->s_cluster_bits = 0;
3748 } 3745 }
3749 sbi->s_cluster_ratio = clustersize / blocksize; 3746 sbi->s_cluster_ratio = clustersize / blocksize;
3750 3747
3751 if (sbi->s_inodes_per_group > blocksize * 8) { 3748 if (sbi->s_inodes_per_group > blocksize * 8) {
3752 ext4_msg(sb, KERN_ERR, 3749 ext4_msg(sb, KERN_ERR,
3753 "#inodes per group too big: %lu", 3750 "#inodes per group too big: %lu",
3754 sbi->s_inodes_per_group); 3751 sbi->s_inodes_per_group);
3755 goto failed_mount; 3752 goto failed_mount;
3756 } 3753 }
3757 3754
3758 /* Do we have standard group size of clustersize * 8 blocks ? */ 3755 /* Do we have standard group size of clustersize * 8 blocks ? */
3759 if (sbi->s_blocks_per_group == clustersize << 3) 3756 if (sbi->s_blocks_per_group == clustersize << 3)
3760 set_opt2(sb, STD_GROUP_SIZE); 3757 set_opt2(sb, STD_GROUP_SIZE);
3761 3758
3762 /* 3759 /*
3763 * Test whether we have more sectors than will fit in sector_t, 3760 * Test whether we have more sectors than will fit in sector_t,
3764 * and whether the max offset is addressable by the page cache. 3761 * and whether the max offset is addressable by the page cache.
3765 */ 3762 */
3766 err = generic_check_addressable(sb->s_blocksize_bits, 3763 err = generic_check_addressable(sb->s_blocksize_bits,
3767 ext4_blocks_count(es)); 3764 ext4_blocks_count(es));
3768 if (err) { 3765 if (err) {
3769 ext4_msg(sb, KERN_ERR, "filesystem" 3766 ext4_msg(sb, KERN_ERR, "filesystem"
3770 " too large to mount safely on this system"); 3767 " too large to mount safely on this system");
3771 if (sizeof(sector_t) < 8) 3768 if (sizeof(sector_t) < 8)
3772 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); 3769 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
3773 goto failed_mount; 3770 goto failed_mount;
3774 } 3771 }
3775 3772
3776 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 3773 if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
3777 goto cantfind_ext4; 3774 goto cantfind_ext4;
3778 3775
3779 /* check blocks count against device size */ 3776 /* check blocks count against device size */
3780 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 3777 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
3781 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 3778 if (blocks_count && ext4_blocks_count(es) > blocks_count) {
3782 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " 3779 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
3783 "exceeds size of device (%llu blocks)", 3780 "exceeds size of device (%llu blocks)",
3784 ext4_blocks_count(es), blocks_count); 3781 ext4_blocks_count(es), blocks_count);
3785 goto failed_mount; 3782 goto failed_mount;
3786 } 3783 }
3787 3784
3788 /* 3785 /*
3789 * It makes no sense for the first data block to be beyond the end 3786 * It makes no sense for the first data block to be beyond the end
3790 * of the filesystem. 3787 * of the filesystem.
3791 */ 3788 */
3792 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 3789 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
3793 ext4_msg(sb, KERN_WARNING, "bad geometry: first data " 3790 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
3794 "block %u is beyond end of filesystem (%llu)", 3791 "block %u is beyond end of filesystem (%llu)",
3795 le32_to_cpu(es->s_first_data_block), 3792 le32_to_cpu(es->s_first_data_block),
3796 ext4_blocks_count(es)); 3793 ext4_blocks_count(es));
3797 goto failed_mount; 3794 goto failed_mount;
3798 } 3795 }
3799 blocks_count = (ext4_blocks_count(es) - 3796 blocks_count = (ext4_blocks_count(es) -
3800 le32_to_cpu(es->s_first_data_block) + 3797 le32_to_cpu(es->s_first_data_block) +
3801 EXT4_BLOCKS_PER_GROUP(sb) - 1); 3798 EXT4_BLOCKS_PER_GROUP(sb) - 1);
3802 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 3799 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
3803 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 3800 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
3804 ext4_msg(sb, KERN_WARNING, "groups count too large: %u " 3801 ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
3805 "(block count %llu, first data block %u, " 3802 "(block count %llu, first data block %u, "
3806 "blocks per group %lu)", sbi->s_groups_count, 3803 "blocks per group %lu)", sbi->s_groups_count,
3807 ext4_blocks_count(es), 3804 ext4_blocks_count(es),
3808 le32_to_cpu(es->s_first_data_block), 3805 le32_to_cpu(es->s_first_data_block),
3809 EXT4_BLOCKS_PER_GROUP(sb)); 3806 EXT4_BLOCKS_PER_GROUP(sb));
3810 goto failed_mount; 3807 goto failed_mount;
3811 } 3808 }
3812 sbi->s_groups_count = blocks_count; 3809 sbi->s_groups_count = blocks_count;
3813 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, 3810 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
3814 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 3811 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
3815 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 3812 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
3816 EXT4_DESC_PER_BLOCK(sb); 3813 EXT4_DESC_PER_BLOCK(sb);
3817 sbi->s_group_desc = ext4_kvmalloc(db_count * 3814 sbi->s_group_desc = ext4_kvmalloc(db_count *
3818 sizeof(struct buffer_head *), 3815 sizeof(struct buffer_head *),
3819 GFP_KERNEL); 3816 GFP_KERNEL);
3820 if (sbi->s_group_desc == NULL) { 3817 if (sbi->s_group_desc == NULL) {
3821 ext4_msg(sb, KERN_ERR, "not enough memory"); 3818 ext4_msg(sb, KERN_ERR, "not enough memory");
3822 ret = -ENOMEM; 3819 ret = -ENOMEM;
3823 goto failed_mount; 3820 goto failed_mount;
3824 } 3821 }
3825 3822
3826 if (ext4_proc_root) 3823 if (ext4_proc_root)
3827 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 3824 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
3828 3825
3829 if (sbi->s_proc) 3826 if (sbi->s_proc)
3830 proc_create_data("options", S_IRUGO, sbi->s_proc, 3827 proc_create_data("options", S_IRUGO, sbi->s_proc,
3831 &ext4_seq_options_fops, sb); 3828 &ext4_seq_options_fops, sb);
3832 3829
3833 bgl_lock_init(sbi->s_blockgroup_lock); 3830 bgl_lock_init(sbi->s_blockgroup_lock);
3834 3831
3835 for (i = 0; i < db_count; i++) { 3832 for (i = 0; i < db_count; i++) {
3836 block = descriptor_loc(sb, logical_sb_block, i); 3833 block = descriptor_loc(sb, logical_sb_block, i);
3837 sbi->s_group_desc[i] = sb_bread(sb, block); 3834 sbi->s_group_desc[i] = sb_bread(sb, block);
3838 if (!sbi->s_group_desc[i]) { 3835 if (!sbi->s_group_desc[i]) {
3839 ext4_msg(sb, KERN_ERR, 3836 ext4_msg(sb, KERN_ERR,
3840 "can't read group descriptor %d", i); 3837 "can't read group descriptor %d", i);
3841 db_count = i; 3838 db_count = i;
3842 goto failed_mount2; 3839 goto failed_mount2;
3843 } 3840 }
3844 } 3841 }
3845 if (!ext4_check_descriptors(sb, &first_not_zeroed)) { 3842 if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
3846 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3843 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3847 goto failed_mount2; 3844 goto failed_mount2;
3848 } 3845 }
3849 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 3846 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
3850 if (!ext4_fill_flex_info(sb)) { 3847 if (!ext4_fill_flex_info(sb)) {
3851 ext4_msg(sb, KERN_ERR, 3848 ext4_msg(sb, KERN_ERR,
3852 "unable to initialize " 3849 "unable to initialize "
3853 "flex_bg meta info!"); 3850 "flex_bg meta info!");
3854 goto failed_mount2; 3851 goto failed_mount2;
3855 } 3852 }
3856 3853
3857 sbi->s_gdb_count = db_count; 3854 sbi->s_gdb_count = db_count;
3858 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3855 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3859 spin_lock_init(&sbi->s_next_gen_lock); 3856 spin_lock_init(&sbi->s_next_gen_lock);
3860 3857
3861 init_timer(&sbi->s_err_report); 3858 init_timer(&sbi->s_err_report);
3862 sbi->s_err_report.function = print_daily_error_info; 3859 sbi->s_err_report.function = print_daily_error_info;
3863 sbi->s_err_report.data = (unsigned long) sb; 3860 sbi->s_err_report.data = (unsigned long) sb;
3864 3861
3865 /* Register extent status tree shrinker */ 3862 /* Register extent status tree shrinker */
3866 ext4_es_register_shrinker(sbi); 3863 ext4_es_register_shrinker(sbi);
3867 3864
3868 err = percpu_counter_init(&sbi->s_freeclusters_counter, 3865 err = percpu_counter_init(&sbi->s_freeclusters_counter,
3869 ext4_count_free_clusters(sb)); 3866 ext4_count_free_clusters(sb));
3870 if (!err) { 3867 if (!err) {
3871 err = percpu_counter_init(&sbi->s_freeinodes_counter, 3868 err = percpu_counter_init(&sbi->s_freeinodes_counter,
3872 ext4_count_free_inodes(sb)); 3869 ext4_count_free_inodes(sb));
3873 } 3870 }
3874 if (!err) { 3871 if (!err) {
3875 err = percpu_counter_init(&sbi->s_dirs_counter, 3872 err = percpu_counter_init(&sbi->s_dirs_counter,
3876 ext4_count_dirs(sb)); 3873 ext4_count_dirs(sb));
3877 } 3874 }
3878 if (!err) { 3875 if (!err) {
3879 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0); 3876 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
3880 } 3877 }
3881 if (!err) { 3878 if (!err) {
3882 err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0); 3879 err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0);
3883 } 3880 }
3884 if (err) { 3881 if (err) {
3885 ext4_msg(sb, KERN_ERR, "insufficient memory"); 3882 ext4_msg(sb, KERN_ERR, "insufficient memory");
3886 goto failed_mount3; 3883 goto failed_mount3;
3887 } 3884 }
3888 3885
3889 sbi->s_stripe = ext4_get_stripe_size(sbi); 3886 sbi->s_stripe = ext4_get_stripe_size(sbi);
3890 sbi->s_extent_max_zeroout_kb = 32; 3887 sbi->s_extent_max_zeroout_kb = 32;
3891 3888
3892 /* 3889 /*
3893 * set up enough so that it can read an inode 3890 * set up enough so that it can read an inode
3894 */ 3891 */
3895 if (!test_opt(sb, NOLOAD) && 3892 if (!test_opt(sb, NOLOAD) &&
3896 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) 3893 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3897 sb->s_op = &ext4_sops; 3894 sb->s_op = &ext4_sops;
3898 else 3895 else
3899 sb->s_op = &ext4_nojournal_sops; 3896 sb->s_op = &ext4_nojournal_sops;
3900 sb->s_export_op = &ext4_export_ops; 3897 sb->s_export_op = &ext4_export_ops;
3901 sb->s_xattr = ext4_xattr_handlers; 3898 sb->s_xattr = ext4_xattr_handlers;
3902 #ifdef CONFIG_QUOTA 3899 #ifdef CONFIG_QUOTA
3903 sb->dq_op = &ext4_quota_operations; 3900 sb->dq_op = &ext4_quota_operations;
3904 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) 3901 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
3905 sb->s_qcop = &ext4_qctl_sysfile_operations; 3902 sb->s_qcop = &ext4_qctl_sysfile_operations;
3906 else 3903 else
3907 sb->s_qcop = &ext4_qctl_operations; 3904 sb->s_qcop = &ext4_qctl_operations;
3908 #endif 3905 #endif
3909 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); 3906 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3910 3907
3911 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3908 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3912 mutex_init(&sbi->s_orphan_lock); 3909 mutex_init(&sbi->s_orphan_lock);
3913 3910
3914 sb->s_root = NULL; 3911 sb->s_root = NULL;
3915 3912
3916 needs_recovery = (es->s_last_orphan != 0 || 3913 needs_recovery = (es->s_last_orphan != 0 ||
3917 EXT4_HAS_INCOMPAT_FEATURE(sb, 3914 EXT4_HAS_INCOMPAT_FEATURE(sb,
3918 EXT4_FEATURE_INCOMPAT_RECOVER)); 3915 EXT4_FEATURE_INCOMPAT_RECOVER));
3919 3916
3920 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && 3917 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
3921 !(sb->s_flags & MS_RDONLY)) 3918 !(sb->s_flags & MS_RDONLY))
3922 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) 3919 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
3923 goto failed_mount3; 3920 goto failed_mount3;
3924 3921
3925 /* 3922 /*
3926 * The first inode we look at is the journal inode. Don't try 3923 * The first inode we look at is the journal inode. Don't try
3927 * root first: it may be modified in the journal! 3924 * root first: it may be modified in the journal!
3928 */ 3925 */
3929 if (!test_opt(sb, NOLOAD) && 3926 if (!test_opt(sb, NOLOAD) &&
3930 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 3927 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
3931 if (ext4_load_journal(sb, es, journal_devnum)) 3928 if (ext4_load_journal(sb, es, journal_devnum))
3932 goto failed_mount3; 3929 goto failed_mount3;
3933 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 3930 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
3934 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3931 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3935 ext4_msg(sb, KERN_ERR, "required journal recovery " 3932 ext4_msg(sb, KERN_ERR, "required journal recovery "
3936 "suppressed and not mounted read-only"); 3933 "suppressed and not mounted read-only");
3937 goto failed_mount_wq; 3934 goto failed_mount_wq;
3938 } else { 3935 } else {
3939 clear_opt(sb, DATA_FLAGS); 3936 clear_opt(sb, DATA_FLAGS);
3940 sbi->s_journal = NULL; 3937 sbi->s_journal = NULL;
3941 needs_recovery = 0; 3938 needs_recovery = 0;
3942 goto no_journal; 3939 goto no_journal;
3943 } 3940 }
3944 3941
3945 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) && 3942 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) &&
3946 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 3943 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
3947 JBD2_FEATURE_INCOMPAT_64BIT)) { 3944 JBD2_FEATURE_INCOMPAT_64BIT)) {
3948 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 3945 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
3949 goto failed_mount_wq; 3946 goto failed_mount_wq;
3950 } 3947 }
3951 3948
3952 if (!set_journal_csum_feature_set(sb)) { 3949 if (!set_journal_csum_feature_set(sb)) {
3953 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " 3950 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
3954 "feature set"); 3951 "feature set");
3955 goto failed_mount_wq; 3952 goto failed_mount_wq;
3956 } 3953 }
3957 3954
3958 /* We have now updated the journal if required, so we can 3955 /* We have now updated the journal if required, so we can
3959 * validate the data journaling mode. */ 3956 * validate the data journaling mode. */
3960 switch (test_opt(sb, DATA_FLAGS)) { 3957 switch (test_opt(sb, DATA_FLAGS)) {
3961 case 0: 3958 case 0:
3962 /* No mode set, assume a default based on the journal 3959 /* No mode set, assume a default based on the journal
3963 * capabilities: ORDERED_DATA if the journal can 3960 * capabilities: ORDERED_DATA if the journal can
3964 * cope, else JOURNAL_DATA 3961 * cope, else JOURNAL_DATA
3965 */ 3962 */
3966 if (jbd2_journal_check_available_features 3963 if (jbd2_journal_check_available_features
3967 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 3964 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
3968 set_opt(sb, ORDERED_DATA); 3965 set_opt(sb, ORDERED_DATA);
3969 else 3966 else
3970 set_opt(sb, JOURNAL_DATA); 3967 set_opt(sb, JOURNAL_DATA);
3971 break; 3968 break;
3972 3969
3973 case EXT4_MOUNT_ORDERED_DATA: 3970 case EXT4_MOUNT_ORDERED_DATA:
3974 case EXT4_MOUNT_WRITEBACK_DATA: 3971 case EXT4_MOUNT_WRITEBACK_DATA:
3975 if (!jbd2_journal_check_available_features 3972 if (!jbd2_journal_check_available_features
3976 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 3973 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
3977 ext4_msg(sb, KERN_ERR, "Journal does not support " 3974 ext4_msg(sb, KERN_ERR, "Journal does not support "
3978 "requested data journaling mode"); 3975 "requested data journaling mode");
3979 goto failed_mount_wq; 3976 goto failed_mount_wq;
3980 } 3977 }
3981 default: 3978 default:
3982 break; 3979 break;
3983 } 3980 }
3984 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3981 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3985 3982
3986 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; 3983 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
3987 3984
3988 /* 3985 /*
3989 * The journal may have updated the bg summary counts, so we 3986 * The journal may have updated the bg summary counts, so we
3990 * need to update the global counters. 3987 * need to update the global counters.
3991 */ 3988 */
3992 percpu_counter_set(&sbi->s_freeclusters_counter, 3989 percpu_counter_set(&sbi->s_freeclusters_counter,
3993 ext4_count_free_clusters(sb)); 3990 ext4_count_free_clusters(sb));
3994 percpu_counter_set(&sbi->s_freeinodes_counter, 3991 percpu_counter_set(&sbi->s_freeinodes_counter,
3995 ext4_count_free_inodes(sb)); 3992 ext4_count_free_inodes(sb));
3996 percpu_counter_set(&sbi->s_dirs_counter, 3993 percpu_counter_set(&sbi->s_dirs_counter,
3997 ext4_count_dirs(sb)); 3994 ext4_count_dirs(sb));
3998 percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); 3995 percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
3999 3996
4000 no_journal: 3997 no_journal:
4001 /* 3998 /*
4002 * Get the # of file system overhead blocks from the 3999 * Get the # of file system overhead blocks from the
4003 * superblock if present. 4000 * superblock if present.
4004 */ 4001 */
4005 if (es->s_overhead_clusters) 4002 if (es->s_overhead_clusters)
4006 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); 4003 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
4007 else { 4004 else {
4008 err = ext4_calculate_overhead(sb); 4005 err = ext4_calculate_overhead(sb);
4009 if (err) 4006 if (err)
4010 goto failed_mount_wq; 4007 goto failed_mount_wq;
4011 } 4008 }
4012 4009
4013 /* 4010 /*
4014 * The maximum number of concurrent works can be high and 4011 * The maximum number of concurrent works can be high and
4015 * concurrency isn't really necessary. Limit it to 1. 4012 * concurrency isn't really necessary. Limit it to 1.
4016 */ 4013 */
4017 EXT4_SB(sb)->rsv_conversion_wq = 4014 EXT4_SB(sb)->rsv_conversion_wq =
4018 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); 4015 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
4019 if (!EXT4_SB(sb)->rsv_conversion_wq) { 4016 if (!EXT4_SB(sb)->rsv_conversion_wq) {
4020 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); 4017 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
4021 ret = -ENOMEM; 4018 ret = -ENOMEM;
4022 goto failed_mount4; 4019 goto failed_mount4;
4023 } 4020 }
4024 4021
4025 /* 4022 /*
4026 * The jbd2_journal_load will have done any necessary log recovery, 4023 * The jbd2_journal_load will have done any necessary log recovery,
4027 * so we can safely mount the rest of the filesystem now. 4024 * so we can safely mount the rest of the filesystem now.
4028 */ 4025 */
4029 4026
4030 root = ext4_iget(sb, EXT4_ROOT_INO); 4027 root = ext4_iget(sb, EXT4_ROOT_INO);
4031 if (IS_ERR(root)) { 4028 if (IS_ERR(root)) {
4032 ext4_msg(sb, KERN_ERR, "get root inode failed"); 4029 ext4_msg(sb, KERN_ERR, "get root inode failed");
4033 ret = PTR_ERR(root); 4030 ret = PTR_ERR(root);
4034 root = NULL; 4031 root = NULL;
4035 goto failed_mount4; 4032 goto failed_mount4;
4036 } 4033 }
4037 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 4034 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
4038 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 4035 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
4039 iput(root); 4036 iput(root);
4040 goto failed_mount4; 4037 goto failed_mount4;
4041 } 4038 }
4042 sb->s_root = d_make_root(root); 4039 sb->s_root = d_make_root(root);
4043 if (!sb->s_root) { 4040 if (!sb->s_root) {
4044 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 4041 ext4_msg(sb, KERN_ERR, "get root dentry failed");
4045 ret = -ENOMEM; 4042 ret = -ENOMEM;
4046 goto failed_mount4; 4043 goto failed_mount4;
4047 } 4044 }
4048 4045
4049 if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) 4046 if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
4050 sb->s_flags |= MS_RDONLY; 4047 sb->s_flags |= MS_RDONLY;
4051 4048
4052 /* determine the minimum size of new large inodes, if present */ 4049 /* determine the minimum size of new large inodes, if present */
4053 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 4050 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4054 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 4051 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4055 EXT4_GOOD_OLD_INODE_SIZE; 4052 EXT4_GOOD_OLD_INODE_SIZE;
4056 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 4053 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
4057 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 4054 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
4058 if (sbi->s_want_extra_isize < 4055 if (sbi->s_want_extra_isize <
4059 le16_to_cpu(es->s_want_extra_isize)) 4056 le16_to_cpu(es->s_want_extra_isize))
4060 sbi->s_want_extra_isize = 4057 sbi->s_want_extra_isize =
4061 le16_to_cpu(es->s_want_extra_isize); 4058 le16_to_cpu(es->s_want_extra_isize);
4062 if (sbi->s_want_extra_isize < 4059 if (sbi->s_want_extra_isize <
4063 le16_to_cpu(es->s_min_extra_isize)) 4060 le16_to_cpu(es->s_min_extra_isize))
4064 sbi->s_want_extra_isize = 4061 sbi->s_want_extra_isize =
4065 le16_to_cpu(es->s_min_extra_isize); 4062 le16_to_cpu(es->s_min_extra_isize);
4066 } 4063 }
4067 } 4064 }
4068 /* Check if enough inode space is available */ 4065 /* Check if enough inode space is available */
4069 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 4066 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
4070 sbi->s_inode_size) { 4067 sbi->s_inode_size) {
4071 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 4068 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4072 EXT4_GOOD_OLD_INODE_SIZE; 4069 EXT4_GOOD_OLD_INODE_SIZE;
4073 ext4_msg(sb, KERN_INFO, "required extra inode space not" 4070 ext4_msg(sb, KERN_INFO, "required extra inode space not"
4074 "available"); 4071 "available");
4075 } 4072 }
4076 4073
4077 err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi)); 4074 err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
4078 if (err) { 4075 if (err) {
4079 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " 4076 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
4080 "reserved pool", ext4_calculate_resv_clusters(sbi)); 4077 "reserved pool", ext4_calculate_resv_clusters(sbi));
4081 goto failed_mount4a; 4078 goto failed_mount4a;
4082 } 4079 }
4083 4080
4084 err = ext4_setup_system_zone(sb); 4081 err = ext4_setup_system_zone(sb);
4085 if (err) { 4082 if (err) {
4086 ext4_msg(sb, KERN_ERR, "failed to initialize system " 4083 ext4_msg(sb, KERN_ERR, "failed to initialize system "
4087 "zone (%d)", err); 4084 "zone (%d)", err);
4088 goto failed_mount4a; 4085 goto failed_mount4a;
4089 } 4086 }
4090 4087
4091 ext4_ext_init(sb); 4088 ext4_ext_init(sb);
4092 err = ext4_mb_init(sb); 4089 err = ext4_mb_init(sb);
4093 if (err) { 4090 if (err) {
4094 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", 4091 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4095 err); 4092 err);
4096 goto failed_mount5; 4093 goto failed_mount5;
4097 } 4094 }
4098 4095
4099 err = ext4_register_li_request(sb, first_not_zeroed); 4096 err = ext4_register_li_request(sb, first_not_zeroed);
4100 if (err) 4097 if (err)
4101 goto failed_mount6; 4098 goto failed_mount6;
4102 4099
4103 sbi->s_kobj.kset = ext4_kset; 4100 sbi->s_kobj.kset = ext4_kset;
4104 init_completion(&sbi->s_kobj_unregister); 4101 init_completion(&sbi->s_kobj_unregister);
4105 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 4102 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
4106 "%s", sb->s_id); 4103 "%s", sb->s_id);
4107 if (err) 4104 if (err)
4108 goto failed_mount7; 4105 goto failed_mount7;
4109 4106
4110 #ifdef CONFIG_QUOTA 4107 #ifdef CONFIG_QUOTA
4111 /* Enable quota usage during mount. */ 4108 /* Enable quota usage during mount. */
4112 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 4109 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
4113 !(sb->s_flags & MS_RDONLY)) { 4110 !(sb->s_flags & MS_RDONLY)) {
4114 err = ext4_enable_quotas(sb); 4111 err = ext4_enable_quotas(sb);
4115 if (err) 4112 if (err)
4116 goto failed_mount8; 4113 goto failed_mount8;
4117 } 4114 }
4118 #endif /* CONFIG_QUOTA */ 4115 #endif /* CONFIG_QUOTA */
4119 4116
4120 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 4117 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
4121 ext4_orphan_cleanup(sb, es); 4118 ext4_orphan_cleanup(sb, es);
4122 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 4119 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
4123 if (needs_recovery) { 4120 if (needs_recovery) {
4124 ext4_msg(sb, KERN_INFO, "recovery complete"); 4121 ext4_msg(sb, KERN_INFO, "recovery complete");
4125 ext4_mark_recovery_complete(sb, es); 4122 ext4_mark_recovery_complete(sb, es);
4126 } 4123 }
4127 if (EXT4_SB(sb)->s_journal) { 4124 if (EXT4_SB(sb)->s_journal) {
4128 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 4125 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
4129 descr = " journalled data mode"; 4126 descr = " journalled data mode";
4130 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 4127 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
4131 descr = " ordered data mode"; 4128 descr = " ordered data mode";
4132 else 4129 else
4133 descr = " writeback data mode"; 4130 descr = " writeback data mode";
4134 } else 4131 } else
4135 descr = "out journal"; 4132 descr = "out journal";
4136 4133
4137 if (test_opt(sb, DISCARD)) { 4134 if (test_opt(sb, DISCARD)) {
4138 struct request_queue *q = bdev_get_queue(sb->s_bdev); 4135 struct request_queue *q = bdev_get_queue(sb->s_bdev);
4139 if (!blk_queue_discard(q)) 4136 if (!blk_queue_discard(q))
4140 ext4_msg(sb, KERN_WARNING, 4137 ext4_msg(sb, KERN_WARNING,
4141 "mounting with \"discard\" option, but " 4138 "mounting with \"discard\" option, but "
4142 "the device does not support discard"); 4139 "the device does not support discard");
4143 } 4140 }
4144 4141
4145 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " 4142 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
4146 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, 4143 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
4147 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); 4144 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
4148 4145
4149 if (es->s_error_count) 4146 if (es->s_error_count)
4150 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ 4147 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
4151 4148
4152 /* Enable message ratelimiting. Default is 10 messages per 5 secs. */ 4149 /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
4153 ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10); 4150 ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
4154 ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); 4151 ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
4155 ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); 4152 ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
4156 4153
4157 kfree(orig_data); 4154 kfree(orig_data);
4158 return 0; 4155 return 0;
4159 4156
4160 cantfind_ext4: 4157 cantfind_ext4:
4161 if (!silent) 4158 if (!silent)
4162 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 4159 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4163 goto failed_mount; 4160 goto failed_mount;
4164 4161
4165 #ifdef CONFIG_QUOTA 4162 #ifdef CONFIG_QUOTA
4166 failed_mount8: 4163 failed_mount8:
4167 kobject_del(&sbi->s_kobj); 4164 kobject_del(&sbi->s_kobj);
4168 #endif 4165 #endif
4169 failed_mount7: 4166 failed_mount7:
4170 ext4_unregister_li_request(sb); 4167 ext4_unregister_li_request(sb);
4171 failed_mount6: 4168 failed_mount6:
4172 ext4_mb_release(sb); 4169 ext4_mb_release(sb);
4173 failed_mount5: 4170 failed_mount5:
4174 ext4_ext_release(sb); 4171 ext4_ext_release(sb);
4175 ext4_release_system_zone(sb); 4172 ext4_release_system_zone(sb);
4176 failed_mount4a: 4173 failed_mount4a:
4177 dput(sb->s_root); 4174 dput(sb->s_root);
4178 sb->s_root = NULL; 4175 sb->s_root = NULL;
4179 failed_mount4: 4176 failed_mount4:
4180 ext4_msg(sb, KERN_ERR, "mount failed"); 4177 ext4_msg(sb, KERN_ERR, "mount failed");
4181 if (EXT4_SB(sb)->rsv_conversion_wq) 4178 if (EXT4_SB(sb)->rsv_conversion_wq)
4182 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); 4179 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4183 failed_mount_wq: 4180 failed_mount_wq:
4184 if (sbi->s_journal) { 4181 if (sbi->s_journal) {
4185 jbd2_journal_destroy(sbi->s_journal); 4182 jbd2_journal_destroy(sbi->s_journal);
4186 sbi->s_journal = NULL; 4183 sbi->s_journal = NULL;
4187 } 4184 }
4188 failed_mount3: 4185 failed_mount3:
4189 ext4_es_unregister_shrinker(sbi); 4186 ext4_es_unregister_shrinker(sbi);
4190 del_timer(&sbi->s_err_report); 4187 del_timer(&sbi->s_err_report);
4191 if (sbi->s_flex_groups) 4188 if (sbi->s_flex_groups)
4192 ext4_kvfree(sbi->s_flex_groups); 4189 ext4_kvfree(sbi->s_flex_groups);
4193 percpu_counter_destroy(&sbi->s_freeclusters_counter); 4190 percpu_counter_destroy(&sbi->s_freeclusters_counter);
4194 percpu_counter_destroy(&sbi->s_freeinodes_counter); 4191 percpu_counter_destroy(&sbi->s_freeinodes_counter);
4195 percpu_counter_destroy(&sbi->s_dirs_counter); 4192 percpu_counter_destroy(&sbi->s_dirs_counter);
4196 percpu_counter_destroy(&sbi->s_dirtyclusters_counter); 4193 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4197 percpu_counter_destroy(&sbi->s_extent_cache_cnt); 4194 percpu_counter_destroy(&sbi->s_extent_cache_cnt);
4198 if (sbi->s_mmp_tsk) 4195 if (sbi->s_mmp_tsk)
4199 kthread_stop(sbi->s_mmp_tsk); 4196 kthread_stop(sbi->s_mmp_tsk);
4200 failed_mount2: 4197 failed_mount2:
4201 for (i = 0; i < db_count; i++) 4198 for (i = 0; i < db_count; i++)
4202 brelse(sbi->s_group_desc[i]); 4199 brelse(sbi->s_group_desc[i]);
4203 ext4_kvfree(sbi->s_group_desc); 4200 ext4_kvfree(sbi->s_group_desc);
4204 failed_mount: 4201 failed_mount:
4205 if (sbi->s_chksum_driver) 4202 if (sbi->s_chksum_driver)
4206 crypto_free_shash(sbi->s_chksum_driver); 4203 crypto_free_shash(sbi->s_chksum_driver);
4207 if (sbi->s_proc) { 4204 if (sbi->s_proc) {
4208 remove_proc_entry("options", sbi->s_proc); 4205 remove_proc_entry("options", sbi->s_proc);
4209 remove_proc_entry(sb->s_id, ext4_proc_root); 4206 remove_proc_entry(sb->s_id, ext4_proc_root);
4210 } 4207 }
4211 #ifdef CONFIG_QUOTA 4208 #ifdef CONFIG_QUOTA
4212 for (i = 0; i < MAXQUOTAS; i++) 4209 for (i = 0; i < MAXQUOTAS; i++)
4213 kfree(sbi->s_qf_names[i]); 4210 kfree(sbi->s_qf_names[i]);
4214 #endif 4211 #endif
4215 ext4_blkdev_remove(sbi); 4212 ext4_blkdev_remove(sbi);
4216 brelse(bh); 4213 brelse(bh);
4217 out_fail: 4214 out_fail:
4218 sb->s_fs_info = NULL; 4215 sb->s_fs_info = NULL;
4219 kfree(sbi->s_blockgroup_lock); 4216 kfree(sbi->s_blockgroup_lock);
4220 kfree(sbi); 4217 kfree(sbi);
4221 out_free_orig: 4218 out_free_orig:
4222 kfree(orig_data); 4219 kfree(orig_data);
4223 return err ? err : ret; 4220 return err ? err : ret;
4224 } 4221 }
4225 4222
4226 /* 4223 /*
4227 * Setup any per-fs journal parameters now. We'll do this both on 4224 * Setup any per-fs journal parameters now. We'll do this both on
4228 * initial mount, once the journal has been initialised but before we've 4225 * initial mount, once the journal has been initialised but before we've
4229 * done any recovery; and again on any subsequent remount. 4226 * done any recovery; and again on any subsequent remount.
4230 */ 4227 */
4231 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 4228 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
4232 { 4229 {
4233 struct ext4_sb_info *sbi = EXT4_SB(sb); 4230 struct ext4_sb_info *sbi = EXT4_SB(sb);
4234 4231
4235 journal->j_commit_interval = sbi->s_commit_interval; 4232 journal->j_commit_interval = sbi->s_commit_interval;
4236 journal->j_min_batch_time = sbi->s_min_batch_time; 4233 journal->j_min_batch_time = sbi->s_min_batch_time;
4237 journal->j_max_batch_time = sbi->s_max_batch_time; 4234 journal->j_max_batch_time = sbi->s_max_batch_time;
4238 4235
4239 write_lock(&journal->j_state_lock); 4236 write_lock(&journal->j_state_lock);
4240 if (test_opt(sb, BARRIER)) 4237 if (test_opt(sb, BARRIER))
4241 journal->j_flags |= JBD2_BARRIER; 4238 journal->j_flags |= JBD2_BARRIER;
4242 else 4239 else
4243 journal->j_flags &= ~JBD2_BARRIER; 4240 journal->j_flags &= ~JBD2_BARRIER;
4244 if (test_opt(sb, DATA_ERR_ABORT)) 4241 if (test_opt(sb, DATA_ERR_ABORT))
4245 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 4242 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
4246 else 4243 else
4247 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 4244 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
4248 write_unlock(&journal->j_state_lock); 4245 write_unlock(&journal->j_state_lock);
4249 } 4246 }
4250 4247
4251 static journal_t *ext4_get_journal(struct super_block *sb, 4248 static journal_t *ext4_get_journal(struct super_block *sb,
4252 unsigned int journal_inum) 4249 unsigned int journal_inum)
4253 { 4250 {
4254 struct inode *journal_inode; 4251 struct inode *journal_inode;
4255 journal_t *journal; 4252 journal_t *journal;
4256 4253
4257 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4254 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4258 4255
4259 /* First, test for the existence of a valid inode on disk. Bad 4256 /* First, test for the existence of a valid inode on disk. Bad
4260 * things happen if we iget() an unused inode, as the subsequent 4257 * things happen if we iget() an unused inode, as the subsequent
4261 * iput() will try to delete it. */ 4258 * iput() will try to delete it. */
4262 4259
4263 journal_inode = ext4_iget(sb, journal_inum); 4260 journal_inode = ext4_iget(sb, journal_inum);
4264 if (IS_ERR(journal_inode)) { 4261 if (IS_ERR(journal_inode)) {
4265 ext4_msg(sb, KERN_ERR, "no journal found"); 4262 ext4_msg(sb, KERN_ERR, "no journal found");
4266 return NULL; 4263 return NULL;
4267 } 4264 }
4268 if (!journal_inode->i_nlink) { 4265 if (!journal_inode->i_nlink) {
4269 make_bad_inode(journal_inode); 4266 make_bad_inode(journal_inode);
4270 iput(journal_inode); 4267 iput(journal_inode);
4271 ext4_msg(sb, KERN_ERR, "journal inode is deleted"); 4268 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
4272 return NULL; 4269 return NULL;
4273 } 4270 }
4274 4271
4275 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 4272 jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
4276 journal_inode, journal_inode->i_size); 4273 journal_inode, journal_inode->i_size);
4277 if (!S_ISREG(journal_inode->i_mode)) { 4274 if (!S_ISREG(journal_inode->i_mode)) {
4278 ext4_msg(sb, KERN_ERR, "invalid journal inode"); 4275 ext4_msg(sb, KERN_ERR, "invalid journal inode");
4279 iput(journal_inode); 4276 iput(journal_inode);
4280 return NULL; 4277 return NULL;
4281 } 4278 }
4282 4279
4283 journal = jbd2_journal_init_inode(journal_inode); 4280 journal = jbd2_journal_init_inode(journal_inode);
4284 if (!journal) { 4281 if (!journal) {
4285 ext4_msg(sb, KERN_ERR, "Could not load journal inode"); 4282 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
4286 iput(journal_inode); 4283 iput(journal_inode);
4287 return NULL; 4284 return NULL;
4288 } 4285 }
4289 journal->j_private = sb; 4286 journal->j_private = sb;
4290 ext4_init_journal_params(sb, journal); 4287 ext4_init_journal_params(sb, journal);
4291 return journal; 4288 return journal;
4292 } 4289 }
4293 4290
4294 static journal_t *ext4_get_dev_journal(struct super_block *sb, 4291 static journal_t *ext4_get_dev_journal(struct super_block *sb,
4295 dev_t j_dev) 4292 dev_t j_dev)
4296 { 4293 {
4297 struct buffer_head *bh; 4294 struct buffer_head *bh;
4298 journal_t *journal; 4295 journal_t *journal;
4299 ext4_fsblk_t start; 4296 ext4_fsblk_t start;
4300 ext4_fsblk_t len; 4297 ext4_fsblk_t len;
4301 int hblock, blocksize; 4298 int hblock, blocksize;
4302 ext4_fsblk_t sb_block; 4299 ext4_fsblk_t sb_block;
4303 unsigned long offset; 4300 unsigned long offset;
4304 struct ext4_super_block *es; 4301 struct ext4_super_block *es;
4305 struct block_device *bdev; 4302 struct block_device *bdev;
4306 4303
4307 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4304 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4308 4305
4309 bdev = ext4_blkdev_get(j_dev, sb); 4306 bdev = ext4_blkdev_get(j_dev, sb);
4310 if (bdev == NULL) 4307 if (bdev == NULL)
4311 return NULL; 4308 return NULL;
4312 4309
4313 blocksize = sb->s_blocksize; 4310 blocksize = sb->s_blocksize;
4314 hblock = bdev_logical_block_size(bdev); 4311 hblock = bdev_logical_block_size(bdev);
4315 if (blocksize < hblock) { 4312 if (blocksize < hblock) {
4316 ext4_msg(sb, KERN_ERR, 4313 ext4_msg(sb, KERN_ERR,
4317 "blocksize too small for journal device"); 4314 "blocksize too small for journal device");
4318 goto out_bdev; 4315 goto out_bdev;
4319 } 4316 }
4320 4317
4321 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 4318 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
4322 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 4319 offset = EXT4_MIN_BLOCK_SIZE % blocksize;
4323 set_blocksize(bdev, blocksize); 4320 set_blocksize(bdev, blocksize);
4324 if (!(bh = __bread(bdev, sb_block, blocksize))) { 4321 if (!(bh = __bread(bdev, sb_block, blocksize))) {
4325 ext4_msg(sb, KERN_ERR, "couldn't read superblock of " 4322 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
4326 "external journal"); 4323 "external journal");
4327 goto out_bdev; 4324 goto out_bdev;
4328 } 4325 }
4329 4326
4330 es = (struct ext4_super_block *) (bh->b_data + offset); 4327 es = (struct ext4_super_block *) (bh->b_data + offset);
4331 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 4328 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
4332 !(le32_to_cpu(es->s_feature_incompat) & 4329 !(le32_to_cpu(es->s_feature_incompat) &
4333 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 4330 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
4334 ext4_msg(sb, KERN_ERR, "external journal has " 4331 ext4_msg(sb, KERN_ERR, "external journal has "
4335 "bad superblock"); 4332 "bad superblock");
4336 brelse(bh); 4333 brelse(bh);
4337 goto out_bdev; 4334 goto out_bdev;
4338 } 4335 }
4339 4336
4340 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 4337 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4341 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 4338 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
4342 brelse(bh); 4339 brelse(bh);
4343 goto out_bdev; 4340 goto out_bdev;
4344 } 4341 }
4345 4342
4346 len = ext4_blocks_count(es); 4343 len = ext4_blocks_count(es);
4347 start = sb_block + 1; 4344 start = sb_block + 1;
4348 brelse(bh); /* we're done with the superblock */ 4345 brelse(bh); /* we're done with the superblock */
4349 4346
4350 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 4347 journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
4351 start, len, blocksize); 4348 start, len, blocksize);
4352 if (!journal) { 4349 if (!journal) {
4353 ext4_msg(sb, KERN_ERR, "failed to create device journal"); 4350 ext4_msg(sb, KERN_ERR, "failed to create device journal");
4354 goto out_bdev; 4351 goto out_bdev;
4355 } 4352 }
4356 journal->j_private = sb; 4353 journal->j_private = sb;
4357 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer); 4354 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
4358 wait_on_buffer(journal->j_sb_buffer); 4355 wait_on_buffer(journal->j_sb_buffer);
4359 if (!buffer_uptodate(journal->j_sb_buffer)) { 4356 if (!buffer_uptodate(journal->j_sb_buffer)) {
4360 ext4_msg(sb, KERN_ERR, "I/O error on journal device"); 4357 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
4361 goto out_journal; 4358 goto out_journal;
4362 } 4359 }
4363 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 4360 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
4364 ext4_msg(sb, KERN_ERR, "External journal has more than one " 4361 ext4_msg(sb, KERN_ERR, "External journal has more than one "
4365 "user (unsupported) - %d", 4362 "user (unsupported) - %d",
4366 be32_to_cpu(journal->j_superblock->s_nr_users)); 4363 be32_to_cpu(journal->j_superblock->s_nr_users));
4367 goto out_journal; 4364 goto out_journal;
4368 } 4365 }
4369 EXT4_SB(sb)->journal_bdev = bdev; 4366 EXT4_SB(sb)->journal_bdev = bdev;
4370 ext4_init_journal_params(sb, journal); 4367 ext4_init_journal_params(sb, journal);
4371 return journal; 4368 return journal;
4372 4369
4373 out_journal: 4370 out_journal:
4374 jbd2_journal_destroy(journal); 4371 jbd2_journal_destroy(journal);
4375 out_bdev: 4372 out_bdev:
4376 ext4_blkdev_put(bdev); 4373 ext4_blkdev_put(bdev);
4377 return NULL; 4374 return NULL;
4378 } 4375 }
4379 4376
4380 static int ext4_load_journal(struct super_block *sb, 4377 static int ext4_load_journal(struct super_block *sb,
4381 struct ext4_super_block *es, 4378 struct ext4_super_block *es,
4382 unsigned long journal_devnum) 4379 unsigned long journal_devnum)
4383 { 4380 {
4384 journal_t *journal; 4381 journal_t *journal;
4385 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 4382 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
4386 dev_t journal_dev; 4383 dev_t journal_dev;
4387 int err = 0; 4384 int err = 0;
4388 int really_read_only; 4385 int really_read_only;
4389 4386
4390 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4387 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4391 4388
4392 if (journal_devnum && 4389 if (journal_devnum &&
4393 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 4390 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4394 ext4_msg(sb, KERN_INFO, "external journal device major/minor " 4391 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
4395 "numbers have changed"); 4392 "numbers have changed");
4396 journal_dev = new_decode_dev(journal_devnum); 4393 journal_dev = new_decode_dev(journal_devnum);
4397 } else 4394 } else
4398 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 4395 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
4399 4396
4400 really_read_only = bdev_read_only(sb->s_bdev); 4397 really_read_only = bdev_read_only(sb->s_bdev);
4401 4398
4402 /* 4399 /*
4403 * Are we loading a blank journal or performing recovery after a 4400 * Are we loading a blank journal or performing recovery after a
4404 * crash? For recovery, we need to check in advance whether we 4401 * crash? For recovery, we need to check in advance whether we
4405 * can get read-write access to the device. 4402 * can get read-write access to the device.
4406 */ 4403 */
4407 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 4404 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
4408 if (sb->s_flags & MS_RDONLY) { 4405 if (sb->s_flags & MS_RDONLY) {
4409 ext4_msg(sb, KERN_INFO, "INFO: recovery " 4406 ext4_msg(sb, KERN_INFO, "INFO: recovery "
4410 "required on readonly filesystem"); 4407 "required on readonly filesystem");
4411 if (really_read_only) { 4408 if (really_read_only) {
4412 ext4_msg(sb, KERN_ERR, "write access " 4409 ext4_msg(sb, KERN_ERR, "write access "
4413 "unavailable, cannot proceed"); 4410 "unavailable, cannot proceed");
4414 return -EROFS; 4411 return -EROFS;
4415 } 4412 }
4416 ext4_msg(sb, KERN_INFO, "write access will " 4413 ext4_msg(sb, KERN_INFO, "write access will "
4417 "be enabled during recovery"); 4414 "be enabled during recovery");
4418 } 4415 }
4419 } 4416 }
4420 4417
4421 if (journal_inum && journal_dev) { 4418 if (journal_inum && journal_dev) {
4422 ext4_msg(sb, KERN_ERR, "filesystem has both journal " 4419 ext4_msg(sb, KERN_ERR, "filesystem has both journal "
4423 "and inode journals!"); 4420 "and inode journals!");
4424 return -EINVAL; 4421 return -EINVAL;
4425 } 4422 }
4426 4423
4427 if (journal_inum) { 4424 if (journal_inum) {
4428 if (!(journal = ext4_get_journal(sb, journal_inum))) 4425 if (!(journal = ext4_get_journal(sb, journal_inum)))
4429 return -EINVAL; 4426 return -EINVAL;
4430 } else { 4427 } else {
4431 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 4428 if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
4432 return -EINVAL; 4429 return -EINVAL;
4433 } 4430 }
4434 4431
4435 if (!(journal->j_flags & JBD2_BARRIER)) 4432 if (!(journal->j_flags & JBD2_BARRIER))
4436 ext4_msg(sb, KERN_INFO, "barriers disabled"); 4433 ext4_msg(sb, KERN_INFO, "barriers disabled");
4437 4434
4438 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 4435 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
4439 err = jbd2_journal_wipe(journal, !really_read_only); 4436 err = jbd2_journal_wipe(journal, !really_read_only);
4440 if (!err) { 4437 if (!err) {
4441 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); 4438 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
4442 if (save) 4439 if (save)
4443 memcpy(save, ((char *) es) + 4440 memcpy(save, ((char *) es) +
4444 EXT4_S_ERR_START, EXT4_S_ERR_LEN); 4441 EXT4_S_ERR_START, EXT4_S_ERR_LEN);
4445 err = jbd2_journal_load(journal); 4442 err = jbd2_journal_load(journal);
4446 if (save) 4443 if (save)
4447 memcpy(((char *) es) + EXT4_S_ERR_START, 4444 memcpy(((char *) es) + EXT4_S_ERR_START,
4448 save, EXT4_S_ERR_LEN); 4445 save, EXT4_S_ERR_LEN);
4449 kfree(save); 4446 kfree(save);
4450 } 4447 }
4451 4448
4452 if (err) { 4449 if (err) {
4453 ext4_msg(sb, KERN_ERR, "error loading journal"); 4450 ext4_msg(sb, KERN_ERR, "error loading journal");
4454 jbd2_journal_destroy(journal); 4451 jbd2_journal_destroy(journal);
4455 return err; 4452 return err;
4456 } 4453 }
4457 4454
4458 EXT4_SB(sb)->s_journal = journal; 4455 EXT4_SB(sb)->s_journal = journal;
4459 ext4_clear_journal_err(sb, es); 4456 ext4_clear_journal_err(sb, es);
4460 4457
4461 if (!really_read_only && journal_devnum && 4458 if (!really_read_only && journal_devnum &&
4462 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 4459 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4463 es->s_journal_dev = cpu_to_le32(journal_devnum); 4460 es->s_journal_dev = cpu_to_le32(journal_devnum);
4464 4461
4465 /* Make sure we flush the recovery flag to disk. */ 4462 /* Make sure we flush the recovery flag to disk. */
4466 ext4_commit_super(sb, 1); 4463 ext4_commit_super(sb, 1);
4467 } 4464 }
4468 4465
4469 return 0; 4466 return 0;
4470 } 4467 }
4471 4468
4472 static int ext4_commit_super(struct super_block *sb, int sync) 4469 static int ext4_commit_super(struct super_block *sb, int sync)
4473 { 4470 {
4474 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 4471 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
4475 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 4472 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4476 int error = 0; 4473 int error = 0;
4477 4474
4478 if (!sbh || block_device_ejected(sb)) 4475 if (!sbh || block_device_ejected(sb))
4479 return error; 4476 return error;
4480 if (buffer_write_io_error(sbh)) { 4477 if (buffer_write_io_error(sbh)) {
4481 /* 4478 /*
4482 * Oh, dear. A previous attempt to write the 4479 * Oh, dear. A previous attempt to write the
4483 * superblock failed. This could happen because the 4480 * superblock failed. This could happen because the
4484 * USB device was yanked out. Or it could happen to 4481 * USB device was yanked out. Or it could happen to
4485 * be a transient write error and maybe the block will 4482 * be a transient write error and maybe the block will
4486 * be remapped. Nothing we can do but to retry the 4483 * be remapped. Nothing we can do but to retry the
4487 * write and hope for the best. 4484 * write and hope for the best.
4488 */ 4485 */
4489 ext4_msg(sb, KERN_ERR, "previous I/O error to " 4486 ext4_msg(sb, KERN_ERR, "previous I/O error to "
4490 "superblock detected"); 4487 "superblock detected");
4491 clear_buffer_write_io_error(sbh); 4488 clear_buffer_write_io_error(sbh);
4492 set_buffer_uptodate(sbh); 4489 set_buffer_uptodate(sbh);
4493 } 4490 }
4494 /* 4491 /*
4495 * If the file system is mounted read-only, don't update the 4492 * If the file system is mounted read-only, don't update the
4496 * superblock write time. This avoids updating the superblock 4493 * superblock write time. This avoids updating the superblock
4497 * write time when we are mounting the root file system 4494 * write time when we are mounting the root file system
4498 * read/only but we need to replay the journal; at that point, 4495 * read/only but we need to replay the journal; at that point,
4499 * for people who are east of GMT and who make their clock 4496 * for people who are east of GMT and who make their clock
4500 * tick in localtime for Windows bug-for-bug compatibility, 4497 * tick in localtime for Windows bug-for-bug compatibility,
4501 * the clock is set in the future, and this will cause e2fsck 4498 * the clock is set in the future, and this will cause e2fsck
4502 * to complain and force a full file system check. 4499 * to complain and force a full file system check.
4503 */ 4500 */
4504 if (!(sb->s_flags & MS_RDONLY)) 4501 if (!(sb->s_flags & MS_RDONLY))
4505 es->s_wtime = cpu_to_le32(get_seconds()); 4502 es->s_wtime = cpu_to_le32(get_seconds());
4506 if (sb->s_bdev->bd_part) 4503 if (sb->s_bdev->bd_part)
4507 es->s_kbytes_written = 4504 es->s_kbytes_written =
4508 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 4505 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
4509 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 4506 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
4510 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 4507 EXT4_SB(sb)->s_sectors_written_start) >> 1));
4511 else 4508 else
4512 es->s_kbytes_written = 4509 es->s_kbytes_written =
4513 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 4510 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4514 ext4_free_blocks_count_set(es, 4511 ext4_free_blocks_count_set(es,
4515 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( 4512 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4516 &EXT4_SB(sb)->s_freeclusters_counter))); 4513 &EXT4_SB(sb)->s_freeclusters_counter)));
4517 es->s_free_inodes_count = 4514 es->s_free_inodes_count =
4518 cpu_to_le32(percpu_counter_sum_positive( 4515 cpu_to_le32(percpu_counter_sum_positive(
4519 &EXT4_SB(sb)->s_freeinodes_counter)); 4516 &EXT4_SB(sb)->s_freeinodes_counter));
4520 BUFFER_TRACE(sbh, "marking dirty"); 4517 BUFFER_TRACE(sbh, "marking dirty");
4521 ext4_superblock_csum_set(sb); 4518 ext4_superblock_csum_set(sb);
4522 mark_buffer_dirty(sbh); 4519 mark_buffer_dirty(sbh);
4523 if (sync) { 4520 if (sync) {
4524 error = sync_dirty_buffer(sbh); 4521 error = sync_dirty_buffer(sbh);
4525 if (error) 4522 if (error)
4526 return error; 4523 return error;
4527 4524
4528 error = buffer_write_io_error(sbh); 4525 error = buffer_write_io_error(sbh);
4529 if (error) { 4526 if (error) {
4530 ext4_msg(sb, KERN_ERR, "I/O error while writing " 4527 ext4_msg(sb, KERN_ERR, "I/O error while writing "
4531 "superblock"); 4528 "superblock");
4532 clear_buffer_write_io_error(sbh); 4529 clear_buffer_write_io_error(sbh);
4533 set_buffer_uptodate(sbh); 4530 set_buffer_uptodate(sbh);
4534 } 4531 }
4535 } 4532 }
4536 return error; 4533 return error;
4537 } 4534 }
4538 4535
4539 /* 4536 /*
4540 * Have we just finished recovery? If so, and if we are mounting (or 4537 * Have we just finished recovery? If so, and if we are mounting (or
4541 * remounting) the filesystem readonly, then we will end up with a 4538 * remounting) the filesystem readonly, then we will end up with a
4542 * consistent fs on disk. Record that fact. 4539 * consistent fs on disk. Record that fact.
4543 */ 4540 */
4544 static void ext4_mark_recovery_complete(struct super_block *sb, 4541 static void ext4_mark_recovery_complete(struct super_block *sb,
4545 struct ext4_super_block *es) 4542 struct ext4_super_block *es)
4546 { 4543 {
4547 journal_t *journal = EXT4_SB(sb)->s_journal; 4544 journal_t *journal = EXT4_SB(sb)->s_journal;
4548 4545
4549 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 4546 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
4550 BUG_ON(journal != NULL); 4547 BUG_ON(journal != NULL);
4551 return; 4548 return;
4552 } 4549 }
4553 jbd2_journal_lock_updates(journal); 4550 jbd2_journal_lock_updates(journal);
4554 if (jbd2_journal_flush(journal) < 0) 4551 if (jbd2_journal_flush(journal) < 0)
4555 goto out; 4552 goto out;
4556 4553
4557 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 4554 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
4558 sb->s_flags & MS_RDONLY) { 4555 sb->s_flags & MS_RDONLY) {
4559 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4556 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4560 ext4_commit_super(sb, 1); 4557 ext4_commit_super(sb, 1);
4561 } 4558 }
4562 4559
4563 out: 4560 out:
4564 jbd2_journal_unlock_updates(journal); 4561 jbd2_journal_unlock_updates(journal);
4565 } 4562 }
4566 4563
4567 /* 4564 /*
4568 * If we are mounting (or read-write remounting) a filesystem whose journal 4565 * If we are mounting (or read-write remounting) a filesystem whose journal
4569 * has recorded an error from a previous lifetime, move that error to the 4566 * has recorded an error from a previous lifetime, move that error to the
4570 * main filesystem now. 4567 * main filesystem now.
4571 */ 4568 */
4572 static void ext4_clear_journal_err(struct super_block *sb, 4569 static void ext4_clear_journal_err(struct super_block *sb,
4573 struct ext4_super_block *es) 4570 struct ext4_super_block *es)
4574 { 4571 {
4575 journal_t *journal; 4572 journal_t *journal;
4576 int j_errno; 4573 int j_errno;
4577 const char *errstr; 4574 const char *errstr;
4578 4575
4579 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4576 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4580 4577
4581 journal = EXT4_SB(sb)->s_journal; 4578 journal = EXT4_SB(sb)->s_journal;
4582 4579
4583 /* 4580 /*
4584 * Now check for any error status which may have been recorded in the 4581 * Now check for any error status which may have been recorded in the
4585 * journal by a prior ext4_error() or ext4_abort() 4582 * journal by a prior ext4_error() or ext4_abort()
4586 */ 4583 */
4587 4584
4588 j_errno = jbd2_journal_errno(journal); 4585 j_errno = jbd2_journal_errno(journal);
4589 if (j_errno) { 4586 if (j_errno) {
4590 char nbuf[16]; 4587 char nbuf[16];
4591 4588
4592 errstr = ext4_decode_error(sb, j_errno, nbuf); 4589 errstr = ext4_decode_error(sb, j_errno, nbuf);
4593 ext4_warning(sb, "Filesystem error recorded " 4590 ext4_warning(sb, "Filesystem error recorded "
4594 "from previous mount: %s", errstr); 4591 "from previous mount: %s", errstr);
4595 ext4_warning(sb, "Marking fs in need of filesystem check."); 4592 ext4_warning(sb, "Marking fs in need of filesystem check.");
4596 4593
4597 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 4594 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
4598 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 4595 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
4599 ext4_commit_super(sb, 1); 4596 ext4_commit_super(sb, 1);
4600 4597
4601 jbd2_journal_clear_err(journal); 4598 jbd2_journal_clear_err(journal);
4602 jbd2_journal_update_sb_errno(journal); 4599 jbd2_journal_update_sb_errno(journal);
4603 } 4600 }
4604 } 4601 }
4605 4602
4606 /* 4603 /*
4607 * Force the running and committing transactions to commit, 4604 * Force the running and committing transactions to commit,
4608 * and wait on the commit. 4605 * and wait on the commit.
4609 */ 4606 */
4610 int ext4_force_commit(struct super_block *sb) 4607 int ext4_force_commit(struct super_block *sb)
4611 { 4608 {
4612 journal_t *journal; 4609 journal_t *journal;
4613 4610
4614 if (sb->s_flags & MS_RDONLY) 4611 if (sb->s_flags & MS_RDONLY)
4615 return 0; 4612 return 0;
4616 4613
4617 journal = EXT4_SB(sb)->s_journal; 4614 journal = EXT4_SB(sb)->s_journal;
4618 return ext4_journal_force_commit(journal); 4615 return ext4_journal_force_commit(journal);
4619 } 4616 }
4620 4617
4621 static int ext4_sync_fs(struct super_block *sb, int wait) 4618 static int ext4_sync_fs(struct super_block *sb, int wait)
4622 { 4619 {
4623 int ret = 0; 4620 int ret = 0;
4624 tid_t target; 4621 tid_t target;
4625 bool needs_barrier = false; 4622 bool needs_barrier = false;
4626 struct ext4_sb_info *sbi = EXT4_SB(sb); 4623 struct ext4_sb_info *sbi = EXT4_SB(sb);
4627 4624
4628 trace_ext4_sync_fs(sb, wait); 4625 trace_ext4_sync_fs(sb, wait);
4629 flush_workqueue(sbi->rsv_conversion_wq); 4626 flush_workqueue(sbi->rsv_conversion_wq);
4630 /* 4627 /*
4631 * Writeback quota in non-journalled quota case - journalled quota has 4628 * Writeback quota in non-journalled quota case - journalled quota has
4632 * no dirty dquots 4629 * no dirty dquots
4633 */ 4630 */
4634 dquot_writeback_dquots(sb, -1); 4631 dquot_writeback_dquots(sb, -1);
4635 /* 4632 /*
4636 * Data writeback is possible w/o journal transaction, so barrier must 4633 * Data writeback is possible w/o journal transaction, so barrier must
4637 * being sent at the end of the function. But we can skip it if 4634 * being sent at the end of the function. But we can skip it if
4638 * transaction_commit will do it for us. 4635 * transaction_commit will do it for us.
4639 */ 4636 */
4640 target = jbd2_get_latest_transaction(sbi->s_journal); 4637 target = jbd2_get_latest_transaction(sbi->s_journal);
4641 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && 4638 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
4642 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) 4639 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
4643 needs_barrier = true; 4640 needs_barrier = true;
4644 4641
4645 if (jbd2_journal_start_commit(sbi->s_journal, &target)) { 4642 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4646 if (wait) 4643 if (wait)
4647 ret = jbd2_log_wait_commit(sbi->s_journal, target); 4644 ret = jbd2_log_wait_commit(sbi->s_journal, target);
4648 } 4645 }
4649 if (needs_barrier) { 4646 if (needs_barrier) {
4650 int err; 4647 int err;
4651 err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); 4648 err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
4652 if (!ret) 4649 if (!ret)
4653 ret = err; 4650 ret = err;
4654 } 4651 }
4655 4652
4656 return ret; 4653 return ret;
4657 } 4654 }
4658 4655
4659 static int ext4_sync_fs_nojournal(struct super_block *sb, int wait) 4656 static int ext4_sync_fs_nojournal(struct super_block *sb, int wait)
4660 { 4657 {
4661 int ret = 0; 4658 int ret = 0;
4662 4659
4663 trace_ext4_sync_fs(sb, wait); 4660 trace_ext4_sync_fs(sb, wait);
4664 flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); 4661 flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4665 dquot_writeback_dquots(sb, -1); 4662 dquot_writeback_dquots(sb, -1);
4666 if (wait && test_opt(sb, BARRIER)) 4663 if (wait && test_opt(sb, BARRIER))
4667 ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); 4664 ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
4668 4665
4669 return ret; 4666 return ret;
4670 } 4667 }
4671 4668
4672 /* 4669 /*
4673 * LVM calls this function before a (read-only) snapshot is created. This 4670 * LVM calls this function before a (read-only) snapshot is created. This
4674 * gives us a chance to flush the journal completely and mark the fs clean. 4671 * gives us a chance to flush the journal completely and mark the fs clean.
4675 * 4672 *
4676 * Note that only this function cannot bring a filesystem to be in a clean 4673 * Note that only this function cannot bring a filesystem to be in a clean
4677 * state independently. It relies on upper layer to stop all data & metadata 4674 * state independently. It relies on upper layer to stop all data & metadata
4678 * modifications. 4675 * modifications.
4679 */ 4676 */
4680 static int ext4_freeze(struct super_block *sb) 4677 static int ext4_freeze(struct super_block *sb)
4681 { 4678 {
4682 int error = 0; 4679 int error = 0;
4683 journal_t *journal; 4680 journal_t *journal;
4684 4681
4685 if (sb->s_flags & MS_RDONLY) 4682 if (sb->s_flags & MS_RDONLY)
4686 return 0; 4683 return 0;
4687 4684
4688 journal = EXT4_SB(sb)->s_journal; 4685 journal = EXT4_SB(sb)->s_journal;
4689 4686
4690 /* Now we set up the journal barrier. */ 4687 /* Now we set up the journal barrier. */
4691 jbd2_journal_lock_updates(journal); 4688 jbd2_journal_lock_updates(journal);
4692 4689
4693 /* 4690 /*
4694 * Don't clear the needs_recovery flag if we failed to flush 4691 * Don't clear the needs_recovery flag if we failed to flush
4695 * the journal. 4692 * the journal.
4696 */ 4693 */
4697 error = jbd2_journal_flush(journal); 4694 error = jbd2_journal_flush(journal);
4698 if (error < 0) 4695 if (error < 0)
4699 goto out; 4696 goto out;
4700 4697
4701 /* Journal blocked and flushed, clear needs_recovery flag. */ 4698 /* Journal blocked and flushed, clear needs_recovery flag. */
4702 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4699 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4703 error = ext4_commit_super(sb, 1); 4700 error = ext4_commit_super(sb, 1);
4704 out: 4701 out:
4705 /* we rely on upper layer to stop further updates */ 4702 /* we rely on upper layer to stop further updates */
4706 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 4703 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
4707 return error; 4704 return error;
4708 } 4705 }
4709 4706
4710 /* 4707 /*
4711 * Called by LVM after the snapshot is done. We need to reset the RECOVER 4708 * Called by LVM after the snapshot is done. We need to reset the RECOVER
4712 * flag here, even though the filesystem is not technically dirty yet. 4709 * flag here, even though the filesystem is not technically dirty yet.
4713 */ 4710 */
4714 static int ext4_unfreeze(struct super_block *sb) 4711 static int ext4_unfreeze(struct super_block *sb)
4715 { 4712 {
4716 if (sb->s_flags & MS_RDONLY) 4713 if (sb->s_flags & MS_RDONLY)
4717 return 0; 4714 return 0;
4718 4715
4719 /* Reset the needs_recovery flag before the fs is unlocked. */ 4716 /* Reset the needs_recovery flag before the fs is unlocked. */
4720 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4717 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4721 ext4_commit_super(sb, 1); 4718 ext4_commit_super(sb, 1);
4722 return 0; 4719 return 0;
4723 } 4720 }
4724 4721
4725 /* 4722 /*
4726 * Structure to save mount options for ext4_remount's benefit 4723 * Structure to save mount options for ext4_remount's benefit
4727 */ 4724 */
4728 struct ext4_mount_options { 4725 struct ext4_mount_options {
4729 unsigned long s_mount_opt; 4726 unsigned long s_mount_opt;
4730 unsigned long s_mount_opt2; 4727 unsigned long s_mount_opt2;
4731 kuid_t s_resuid; 4728 kuid_t s_resuid;
4732 kgid_t s_resgid; 4729 kgid_t s_resgid;
4733 unsigned long s_commit_interval; 4730 unsigned long s_commit_interval;
4734 u32 s_min_batch_time, s_max_batch_time; 4731 u32 s_min_batch_time, s_max_batch_time;
4735 #ifdef CONFIG_QUOTA 4732 #ifdef CONFIG_QUOTA
4736 int s_jquota_fmt; 4733 int s_jquota_fmt;
4737 char *s_qf_names[MAXQUOTAS]; 4734 char *s_qf_names[MAXQUOTAS];
4738 #endif 4735 #endif
4739 }; 4736 };
4740 4737
4741 static int ext4_remount(struct super_block *sb, int *flags, char *data) 4738 static int ext4_remount(struct super_block *sb, int *flags, char *data)
4742 { 4739 {
4743 struct ext4_super_block *es; 4740 struct ext4_super_block *es;
4744 struct ext4_sb_info *sbi = EXT4_SB(sb); 4741 struct ext4_sb_info *sbi = EXT4_SB(sb);
4745 unsigned long old_sb_flags; 4742 unsigned long old_sb_flags;
4746 struct ext4_mount_options old_opts; 4743 struct ext4_mount_options old_opts;
4747 int enable_quota = 0; 4744 int enable_quota = 0;
4748 ext4_group_t g; 4745 ext4_group_t g;
4749 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 4746 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
4750 int err = 0; 4747 int err = 0;
4751 #ifdef CONFIG_QUOTA 4748 #ifdef CONFIG_QUOTA
4752 int i, j; 4749 int i, j;
4753 #endif 4750 #endif
4754 char *orig_data = kstrdup(data, GFP_KERNEL); 4751 char *orig_data = kstrdup(data, GFP_KERNEL);
4755 4752
4756 /* Store the original options */ 4753 /* Store the original options */
4757 old_sb_flags = sb->s_flags; 4754 old_sb_flags = sb->s_flags;
4758 old_opts.s_mount_opt = sbi->s_mount_opt; 4755 old_opts.s_mount_opt = sbi->s_mount_opt;
4759 old_opts.s_mount_opt2 = sbi->s_mount_opt2; 4756 old_opts.s_mount_opt2 = sbi->s_mount_opt2;
4760 old_opts.s_resuid = sbi->s_resuid; 4757 old_opts.s_resuid = sbi->s_resuid;
4761 old_opts.s_resgid = sbi->s_resgid; 4758 old_opts.s_resgid = sbi->s_resgid;
4762 old_opts.s_commit_interval = sbi->s_commit_interval; 4759 old_opts.s_commit_interval = sbi->s_commit_interval;
4763 old_opts.s_min_batch_time = sbi->s_min_batch_time; 4760 old_opts.s_min_batch_time = sbi->s_min_batch_time;
4764 old_opts.s_max_batch_time = sbi->s_max_batch_time; 4761 old_opts.s_max_batch_time = sbi->s_max_batch_time;
4765 #ifdef CONFIG_QUOTA 4762 #ifdef CONFIG_QUOTA
4766 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 4763 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
4767 for (i = 0; i < MAXQUOTAS; i++) 4764 for (i = 0; i < MAXQUOTAS; i++)
4768 if (sbi->s_qf_names[i]) { 4765 if (sbi->s_qf_names[i]) {
4769 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], 4766 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
4770 GFP_KERNEL); 4767 GFP_KERNEL);
4771 if (!old_opts.s_qf_names[i]) { 4768 if (!old_opts.s_qf_names[i]) {
4772 for (j = 0; j < i; j++) 4769 for (j = 0; j < i; j++)
4773 kfree(old_opts.s_qf_names[j]); 4770 kfree(old_opts.s_qf_names[j]);
4774 kfree(orig_data); 4771 kfree(orig_data);
4775 return -ENOMEM; 4772 return -ENOMEM;
4776 } 4773 }
4777 } else 4774 } else
4778 old_opts.s_qf_names[i] = NULL; 4775 old_opts.s_qf_names[i] = NULL;
4779 #endif 4776 #endif
4780 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 4777 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
4781 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 4778 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
4782 4779
4783 /* 4780 /*
4784 * Allow the "check" option to be passed as a remount option. 4781 * Allow the "check" option to be passed as a remount option.
4785 */ 4782 */
4786 if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { 4783 if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
4787 err = -EINVAL; 4784 err = -EINVAL;
4788 goto restore_opts; 4785 goto restore_opts;
4789 } 4786 }
4790 4787
4791 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 4788 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4792 if (test_opt2(sb, EXPLICIT_DELALLOC)) { 4789 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4793 ext4_msg(sb, KERN_ERR, "can't mount with " 4790 ext4_msg(sb, KERN_ERR, "can't mount with "
4794 "both data=journal and delalloc"); 4791 "both data=journal and delalloc");
4795 err = -EINVAL; 4792 err = -EINVAL;
4796 goto restore_opts; 4793 goto restore_opts;
4797 } 4794 }
4798 if (test_opt(sb, DIOREAD_NOLOCK)) { 4795 if (test_opt(sb, DIOREAD_NOLOCK)) {
4799 ext4_msg(sb, KERN_ERR, "can't mount with " 4796 ext4_msg(sb, KERN_ERR, "can't mount with "
4800 "both data=journal and dioread_nolock"); 4797 "both data=journal and dioread_nolock");
4801 err = -EINVAL; 4798 err = -EINVAL;
4802 goto restore_opts; 4799 goto restore_opts;
4803 } 4800 }
4804 } 4801 }
4805 4802
4806 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) 4803 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
4807 ext4_abort(sb, "Abort forced by user"); 4804 ext4_abort(sb, "Abort forced by user");
4808 4805
4809 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 4806 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
4810 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 4807 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
4811 4808
4812 es = sbi->s_es; 4809 es = sbi->s_es;
4813 4810
4814 if (sbi->s_journal) { 4811 if (sbi->s_journal) {
4815 ext4_init_journal_params(sb, sbi->s_journal); 4812 ext4_init_journal_params(sb, sbi->s_journal);
4816 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 4813 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4817 } 4814 }
4818 4815
4819 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { 4816 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
4820 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { 4817 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
4821 err = -EROFS; 4818 err = -EROFS;
4822 goto restore_opts; 4819 goto restore_opts;
4823 } 4820 }
4824 4821
4825 if (*flags & MS_RDONLY) { 4822 if (*flags & MS_RDONLY) {
4826 err = dquot_suspend(sb, -1); 4823 err = dquot_suspend(sb, -1);
4827 if (err < 0) 4824 if (err < 0)
4828 goto restore_opts; 4825 goto restore_opts;
4829 4826
4830 /* 4827 /*
4831 * First of all, the unconditional stuff we have to do 4828 * First of all, the unconditional stuff we have to do
4832 * to disable replay of the journal when we next remount 4829 * to disable replay of the journal when we next remount
4833 */ 4830 */
4834 sb->s_flags |= MS_RDONLY; 4831 sb->s_flags |= MS_RDONLY;
4835 4832
4836 /* 4833 /*
4837 * OK, test if we are remounting a valid rw partition 4834 * OK, test if we are remounting a valid rw partition
4838 * readonly, and if so set the rdonly flag and then 4835 * readonly, and if so set the rdonly flag and then
4839 * mark the partition as valid again. 4836 * mark the partition as valid again.
4840 */ 4837 */
4841 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 4838 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
4842 (sbi->s_mount_state & EXT4_VALID_FS)) 4839 (sbi->s_mount_state & EXT4_VALID_FS))
4843 es->s_state = cpu_to_le16(sbi->s_mount_state); 4840 es->s_state = cpu_to_le16(sbi->s_mount_state);
4844 4841
4845 if (sbi->s_journal) 4842 if (sbi->s_journal)
4846 ext4_mark_recovery_complete(sb, es); 4843 ext4_mark_recovery_complete(sb, es);
4847 } else { 4844 } else {
4848 /* Make sure we can mount this feature set readwrite */ 4845 /* Make sure we can mount this feature set readwrite */
4849 if (!ext4_feature_set_ok(sb, 0)) { 4846 if (!ext4_feature_set_ok(sb, 0)) {
4850 err = -EROFS; 4847 err = -EROFS;
4851 goto restore_opts; 4848 goto restore_opts;
4852 } 4849 }
4853 /* 4850 /*
4854 * Make sure the group descriptor checksums 4851 * Make sure the group descriptor checksums
4855 * are sane. If they aren't, refuse to remount r/w. 4852 * are sane. If they aren't, refuse to remount r/w.
4856 */ 4853 */
4857 for (g = 0; g < sbi->s_groups_count; g++) { 4854 for (g = 0; g < sbi->s_groups_count; g++) {
4858 struct ext4_group_desc *gdp = 4855 struct ext4_group_desc *gdp =
4859 ext4_get_group_desc(sb, g, NULL); 4856 ext4_get_group_desc(sb, g, NULL);
4860 4857
4861 if (!ext4_group_desc_csum_verify(sb, g, gdp)) { 4858 if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
4862 ext4_msg(sb, KERN_ERR, 4859 ext4_msg(sb, KERN_ERR,
4863 "ext4_remount: Checksum for group %u failed (%u!=%u)", 4860 "ext4_remount: Checksum for group %u failed (%u!=%u)",
4864 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 4861 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
4865 le16_to_cpu(gdp->bg_checksum)); 4862 le16_to_cpu(gdp->bg_checksum));
4866 err = -EINVAL; 4863 err = -EINVAL;
4867 goto restore_opts; 4864 goto restore_opts;
4868 } 4865 }
4869 } 4866 }
4870 4867
4871 /* 4868 /*
4872 * If we have an unprocessed orphan list hanging 4869 * If we have an unprocessed orphan list hanging
4873 * around from a previously readonly bdev mount, 4870 * around from a previously readonly bdev mount,
4874 * require a full umount/remount for now. 4871 * require a full umount/remount for now.
4875 */ 4872 */
4876 if (es->s_last_orphan) { 4873 if (es->s_last_orphan) {
4877 ext4_msg(sb, KERN_WARNING, "Couldn't " 4874 ext4_msg(sb, KERN_WARNING, "Couldn't "
4878 "remount RDWR because of unprocessed " 4875 "remount RDWR because of unprocessed "
4879 "orphan inode list. Please " 4876 "orphan inode list. Please "
4880 "umount/remount instead"); 4877 "umount/remount instead");
4881 err = -EINVAL; 4878 err = -EINVAL;
4882 goto restore_opts; 4879 goto restore_opts;
4883 } 4880 }
4884 4881
4885 /* 4882 /*
4886 * Mounting a RDONLY partition read-write, so reread 4883 * Mounting a RDONLY partition read-write, so reread
4887 * and store the current valid flag. (It may have 4884 * and store the current valid flag. (It may have
4888 * been changed by e2fsck since we originally mounted 4885 * been changed by e2fsck since we originally mounted
4889 * the partition.) 4886 * the partition.)
4890 */ 4887 */
4891 if (sbi->s_journal) 4888 if (sbi->s_journal)
4892 ext4_clear_journal_err(sb, es); 4889 ext4_clear_journal_err(sb, es);
4893 sbi->s_mount_state = le16_to_cpu(es->s_state); 4890 sbi->s_mount_state = le16_to_cpu(es->s_state);
4894 if (!ext4_setup_super(sb, es, 0)) 4891 if (!ext4_setup_super(sb, es, 0))
4895 sb->s_flags &= ~MS_RDONLY; 4892 sb->s_flags &= ~MS_RDONLY;
4896 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 4893 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
4897 EXT4_FEATURE_INCOMPAT_MMP)) 4894 EXT4_FEATURE_INCOMPAT_MMP))
4898 if (ext4_multi_mount_protect(sb, 4895 if (ext4_multi_mount_protect(sb,
4899 le64_to_cpu(es->s_mmp_block))) { 4896 le64_to_cpu(es->s_mmp_block))) {
4900 err = -EROFS; 4897 err = -EROFS;
4901 goto restore_opts; 4898 goto restore_opts;
4902 } 4899 }
4903 enable_quota = 1; 4900 enable_quota = 1;
4904 } 4901 }
4905 } 4902 }
4906 4903
4907 /* 4904 /*
4908 * Reinitialize lazy itable initialization thread based on 4905 * Reinitialize lazy itable initialization thread based on
4909 * current settings 4906 * current settings
4910 */ 4907 */
4911 if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) 4908 if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
4912 ext4_unregister_li_request(sb); 4909 ext4_unregister_li_request(sb);
4913 else { 4910 else {
4914 ext4_group_t first_not_zeroed; 4911 ext4_group_t first_not_zeroed;
4915 first_not_zeroed = ext4_has_uninit_itable(sb); 4912 first_not_zeroed = ext4_has_uninit_itable(sb);
4916 ext4_register_li_request(sb, first_not_zeroed); 4913 ext4_register_li_request(sb, first_not_zeroed);
4917 } 4914 }
4918 4915
4919 ext4_setup_system_zone(sb); 4916 ext4_setup_system_zone(sb);
4920 if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY)) 4917 if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
4921 ext4_commit_super(sb, 1); 4918 ext4_commit_super(sb, 1);
4922 4919
4923 #ifdef CONFIG_QUOTA 4920 #ifdef CONFIG_QUOTA
4924 /* Release old quota file names */ 4921 /* Release old quota file names */
4925 for (i = 0; i < MAXQUOTAS; i++) 4922 for (i = 0; i < MAXQUOTAS; i++)
4926 kfree(old_opts.s_qf_names[i]); 4923 kfree(old_opts.s_qf_names[i]);
4927 if (enable_quota) { 4924 if (enable_quota) {
4928 if (sb_any_quota_suspended(sb)) 4925 if (sb_any_quota_suspended(sb))
4929 dquot_resume(sb, -1); 4926 dquot_resume(sb, -1);
4930 else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 4927 else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
4931 EXT4_FEATURE_RO_COMPAT_QUOTA)) { 4928 EXT4_FEATURE_RO_COMPAT_QUOTA)) {
4932 err = ext4_enable_quotas(sb); 4929 err = ext4_enable_quotas(sb);
4933 if (err) 4930 if (err)
4934 goto restore_opts; 4931 goto restore_opts;
4935 } 4932 }
4936 } 4933 }
4937 #endif 4934 #endif
4938 4935
4939 ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); 4936 ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
4940 kfree(orig_data); 4937 kfree(orig_data);
4941 return 0; 4938 return 0;
4942 4939
4943 restore_opts: 4940 restore_opts:
4944 sb->s_flags = old_sb_flags; 4941 sb->s_flags = old_sb_flags;
4945 sbi->s_mount_opt = old_opts.s_mount_opt; 4942 sbi->s_mount_opt = old_opts.s_mount_opt;
4946 sbi->s_mount_opt2 = old_opts.s_mount_opt2; 4943 sbi->s_mount_opt2 = old_opts.s_mount_opt2;
4947 sbi->s_resuid = old_opts.s_resuid; 4944 sbi->s_resuid = old_opts.s_resuid;
4948 sbi->s_resgid = old_opts.s_resgid; 4945 sbi->s_resgid = old_opts.s_resgid;
4949 sbi->s_commit_interval = old_opts.s_commit_interval; 4946 sbi->s_commit_interval = old_opts.s_commit_interval;
4950 sbi->s_min_batch_time = old_opts.s_min_batch_time; 4947 sbi->s_min_batch_time = old_opts.s_min_batch_time;
4951 sbi->s_max_batch_time = old_opts.s_max_batch_time; 4948 sbi->s_max_batch_time = old_opts.s_max_batch_time;
4952 #ifdef CONFIG_QUOTA 4949 #ifdef CONFIG_QUOTA
4953 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 4950 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
4954 for (i = 0; i < MAXQUOTAS; i++) { 4951 for (i = 0; i < MAXQUOTAS; i++) {
4955 kfree(sbi->s_qf_names[i]); 4952 kfree(sbi->s_qf_names[i]);
4956 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 4953 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
4957 } 4954 }
4958 #endif 4955 #endif
4959 kfree(orig_data); 4956 kfree(orig_data);
4960 return err; 4957 return err;
4961 } 4958 }
4962 4959
4963 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 4960 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4964 { 4961 {
4965 struct super_block *sb = dentry->d_sb; 4962 struct super_block *sb = dentry->d_sb;
4966 struct ext4_sb_info *sbi = EXT4_SB(sb); 4963 struct ext4_sb_info *sbi = EXT4_SB(sb);
4967 struct ext4_super_block *es = sbi->s_es; 4964 struct ext4_super_block *es = sbi->s_es;
4968 ext4_fsblk_t overhead = 0, resv_blocks; 4965 ext4_fsblk_t overhead = 0, resv_blocks;
4969 u64 fsid; 4966 u64 fsid;
4970 s64 bfree; 4967 s64 bfree;
4971 resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters)); 4968 resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
4972 4969
4973 if (!test_opt(sb, MINIX_DF)) 4970 if (!test_opt(sb, MINIX_DF))
4974 overhead = sbi->s_overhead; 4971 overhead = sbi->s_overhead;
4975 4972
4976 buf->f_type = EXT4_SUPER_MAGIC; 4973 buf->f_type = EXT4_SUPER_MAGIC;
4977 buf->f_bsize = sb->s_blocksize; 4974 buf->f_bsize = sb->s_blocksize;
4978 buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead); 4975 buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
4979 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) - 4976 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
4980 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter); 4977 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
4981 /* prevent underflow in case that few free space is available */ 4978 /* prevent underflow in case that few free space is available */
4982 buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0)); 4979 buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
4983 buf->f_bavail = buf->f_bfree - 4980 buf->f_bavail = buf->f_bfree -
4984 (ext4_r_blocks_count(es) + resv_blocks); 4981 (ext4_r_blocks_count(es) + resv_blocks);
4985 if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks)) 4982 if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
4986 buf->f_bavail = 0; 4983 buf->f_bavail = 0;
4987 buf->f_files = le32_to_cpu(es->s_inodes_count); 4984 buf->f_files = le32_to_cpu(es->s_inodes_count);
4988 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 4985 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
4989 buf->f_namelen = EXT4_NAME_LEN; 4986 buf->f_namelen = EXT4_NAME_LEN;
4990 fsid = le64_to_cpup((void *)es->s_uuid) ^ 4987 fsid = le64_to_cpup((void *)es->s_uuid) ^
4991 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 4988 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
4992 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 4989 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
4993 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 4990 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
4994 4991
4995 return 0; 4992 return 0;
4996 } 4993 }
4997 4994
4998 /* Helper function for writing quotas on sync - we need to start transaction 4995 /* Helper function for writing quotas on sync - we need to start transaction
4999 * before quota file is locked for write. Otherwise the are possible deadlocks: 4996 * before quota file is locked for write. Otherwise the are possible deadlocks:
5000 * Process 1 Process 2 4997 * Process 1 Process 2
5001 * ext4_create() quota_sync() 4998 * ext4_create() quota_sync()
5002 * jbd2_journal_start() write_dquot() 4999 * jbd2_journal_start() write_dquot()
5003 * dquot_initialize() down(dqio_mutex) 5000 * dquot_initialize() down(dqio_mutex)
5004 * down(dqio_mutex) jbd2_journal_start() 5001 * down(dqio_mutex) jbd2_journal_start()
5005 * 5002 *
5006 */ 5003 */
5007 5004
5008 #ifdef CONFIG_QUOTA 5005 #ifdef CONFIG_QUOTA
5009 5006
5010 static inline struct inode *dquot_to_inode(struct dquot *dquot) 5007 static inline struct inode *dquot_to_inode(struct dquot *dquot)
5011 { 5008 {
5012 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type]; 5009 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
5013 } 5010 }
5014 5011
5015 static int ext4_write_dquot(struct dquot *dquot) 5012 static int ext4_write_dquot(struct dquot *dquot)
5016 { 5013 {
5017 int ret, err; 5014 int ret, err;
5018 handle_t *handle; 5015 handle_t *handle;
5019 struct inode *inode; 5016 struct inode *inode;
5020 5017
5021 inode = dquot_to_inode(dquot); 5018 inode = dquot_to_inode(dquot);
5022 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 5019 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
5023 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 5020 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
5024 if (IS_ERR(handle)) 5021 if (IS_ERR(handle))
5025 return PTR_ERR(handle); 5022 return PTR_ERR(handle);
5026 ret = dquot_commit(dquot); 5023 ret = dquot_commit(dquot);
5027 err = ext4_journal_stop(handle); 5024 err = ext4_journal_stop(handle);
5028 if (!ret) 5025 if (!ret)
5029 ret = err; 5026 ret = err;
5030 return ret; 5027 return ret;
5031 } 5028 }
5032 5029
5033 static int ext4_acquire_dquot(struct dquot *dquot) 5030 static int ext4_acquire_dquot(struct dquot *dquot)
5034 { 5031 {
5035 int ret, err; 5032 int ret, err;
5036 handle_t *handle; 5033 handle_t *handle;
5037 5034
5038 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA, 5035 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5039 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 5036 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
5040 if (IS_ERR(handle)) 5037 if (IS_ERR(handle))
5041 return PTR_ERR(handle); 5038 return PTR_ERR(handle);
5042 ret = dquot_acquire(dquot); 5039 ret = dquot_acquire(dquot);
5043 err = ext4_journal_stop(handle); 5040 err = ext4_journal_stop(handle);
5044 if (!ret) 5041 if (!ret)
5045 ret = err; 5042 ret = err;
5046 return ret; 5043 return ret;
5047 } 5044 }
5048 5045
5049 static int ext4_release_dquot(struct dquot *dquot) 5046 static int ext4_release_dquot(struct dquot *dquot)
5050 { 5047 {
5051 int ret, err; 5048 int ret, err;
5052 handle_t *handle; 5049 handle_t *handle;
5053 5050
5054 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA, 5051 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5055 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 5052 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
5056 if (IS_ERR(handle)) { 5053 if (IS_ERR(handle)) {
5057 /* Release dquot anyway to avoid endless cycle in dqput() */ 5054 /* Release dquot anyway to avoid endless cycle in dqput() */
5058 dquot_release(dquot); 5055 dquot_release(dquot);
5059 return PTR_ERR(handle); 5056 return PTR_ERR(handle);
5060 } 5057 }
5061 ret = dquot_release(dquot); 5058 ret = dquot_release(dquot);
5062 err = ext4_journal_stop(handle); 5059 err = ext4_journal_stop(handle);
5063 if (!ret) 5060 if (!ret)
5064 ret = err; 5061 ret = err;
5065 return ret; 5062 return ret;
5066 } 5063 }
5067 5064
5068 static int ext4_mark_dquot_dirty(struct dquot *dquot) 5065 static int ext4_mark_dquot_dirty(struct dquot *dquot)
5069 { 5066 {
5070 struct super_block *sb = dquot->dq_sb; 5067 struct super_block *sb = dquot->dq_sb;
5071 struct ext4_sb_info *sbi = EXT4_SB(sb); 5068 struct ext4_sb_info *sbi = EXT4_SB(sb);
5072 5069
5073 /* Are we journaling quotas? */ 5070 /* Are we journaling quotas? */
5074 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || 5071 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) ||
5075 sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 5072 sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
5076 dquot_mark_dquot_dirty(dquot); 5073 dquot_mark_dquot_dirty(dquot);
5077 return ext4_write_dquot(dquot); 5074 return ext4_write_dquot(dquot);
5078 } else { 5075 } else {
5079 return dquot_mark_dquot_dirty(dquot); 5076 return dquot_mark_dquot_dirty(dquot);
5080 } 5077 }
5081 } 5078 }
5082 5079
5083 static int ext4_write_info(struct super_block *sb, int type) 5080 static int ext4_write_info(struct super_block *sb, int type)
5084 { 5081 {
5085 int ret, err; 5082 int ret, err;
5086 handle_t *handle; 5083 handle_t *handle;
5087 5084
5088 /* Data block + inode block */ 5085 /* Data block + inode block */
5089 handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2); 5086 handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2);
5090 if (IS_ERR(handle)) 5087 if (IS_ERR(handle))
5091 return PTR_ERR(handle); 5088 return PTR_ERR(handle);
5092 ret = dquot_commit_info(sb, type); 5089 ret = dquot_commit_info(sb, type);
5093 err = ext4_journal_stop(handle); 5090 err = ext4_journal_stop(handle);
5094 if (!ret) 5091 if (!ret)
5095 ret = err; 5092 ret = err;
5096 return ret; 5093 return ret;
5097 } 5094 }
5098 5095
5099 /* 5096 /*
5100 * Turn on quotas during mount time - we need to find 5097 * Turn on quotas during mount time - we need to find
5101 * the quota file and such... 5098 * the quota file and such...
5102 */ 5099 */
5103 static int ext4_quota_on_mount(struct super_block *sb, int type) 5100 static int ext4_quota_on_mount(struct super_block *sb, int type)
5104 { 5101 {
5105 return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 5102 return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
5106 EXT4_SB(sb)->s_jquota_fmt, type); 5103 EXT4_SB(sb)->s_jquota_fmt, type);
5107 } 5104 }
5108 5105
5109 /* 5106 /*
5110 * Standard function to be called on quota_on 5107 * Standard function to be called on quota_on
5111 */ 5108 */
5112 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 5109 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
5113 struct path *path) 5110 struct path *path)
5114 { 5111 {
5115 int err; 5112 int err;
5116 5113
5117 if (!test_opt(sb, QUOTA)) 5114 if (!test_opt(sb, QUOTA))
5118 return -EINVAL; 5115 return -EINVAL;
5119 5116
5120 /* Quotafile not on the same filesystem? */ 5117 /* Quotafile not on the same filesystem? */
5121 if (path->dentry->d_sb != sb) 5118 if (path->dentry->d_sb != sb)
5122 return -EXDEV; 5119 return -EXDEV;
5123 /* Journaling quota? */ 5120 /* Journaling quota? */
5124 if (EXT4_SB(sb)->s_qf_names[type]) { 5121 if (EXT4_SB(sb)->s_qf_names[type]) {
5125 /* Quotafile not in fs root? */ 5122 /* Quotafile not in fs root? */
5126 if (path->dentry->d_parent != sb->s_root) 5123 if (path->dentry->d_parent != sb->s_root)
5127 ext4_msg(sb, KERN_WARNING, 5124 ext4_msg(sb, KERN_WARNING,
5128 "Quota file not on filesystem root. " 5125 "Quota file not on filesystem root. "
5129 "Journaled quota will not work"); 5126 "Journaled quota will not work");
5130 } 5127 }
5131 5128
5132 /* 5129 /*
5133 * When we journal data on quota file, we have to flush journal to see 5130 * When we journal data on quota file, we have to flush journal to see
5134 * all updates to the file when we bypass pagecache... 5131 * all updates to the file when we bypass pagecache...
5135 */ 5132 */
5136 if (EXT4_SB(sb)->s_journal && 5133 if (EXT4_SB(sb)->s_journal &&
5137 ext4_should_journal_data(path->dentry->d_inode)) { 5134 ext4_should_journal_data(path->dentry->d_inode)) {
5138 /* 5135 /*
5139 * We don't need to lock updates but journal_flush() could 5136 * We don't need to lock updates but journal_flush() could
5140 * otherwise be livelocked... 5137 * otherwise be livelocked...
5141 */ 5138 */
5142 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 5139 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5143 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 5140 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5144 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 5141 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5145 if (err) 5142 if (err)
5146 return err; 5143 return err;
5147 } 5144 }
5148 5145
5149 return dquot_quota_on(sb, type, format_id, path); 5146 return dquot_quota_on(sb, type, format_id, path);
5150 } 5147 }
5151 5148
5152 static int ext4_quota_enable(struct super_block *sb, int type, int format_id, 5149 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5153 unsigned int flags) 5150 unsigned int flags)
5154 { 5151 {
5155 int err; 5152 int err;
5156 struct inode *qf_inode; 5153 struct inode *qf_inode;
5157 unsigned long qf_inums[MAXQUOTAS] = { 5154 unsigned long qf_inums[MAXQUOTAS] = {
5158 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), 5155 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5159 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) 5156 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5160 }; 5157 };
5161 5158
5162 BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)); 5159 BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA));
5163 5160
5164 if (!qf_inums[type]) 5161 if (!qf_inums[type])
5165 return -EPERM; 5162 return -EPERM;
5166 5163
5167 qf_inode = ext4_iget(sb, qf_inums[type]); 5164 qf_inode = ext4_iget(sb, qf_inums[type]);
5168 if (IS_ERR(qf_inode)) { 5165 if (IS_ERR(qf_inode)) {
5169 ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]); 5166 ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
5170 return PTR_ERR(qf_inode); 5167 return PTR_ERR(qf_inode);
5171 } 5168 }
5172 5169
5173 /* Don't account quota for quota files to avoid recursion */ 5170 /* Don't account quota for quota files to avoid recursion */
5174 qf_inode->i_flags |= S_NOQUOTA; 5171 qf_inode->i_flags |= S_NOQUOTA;
5175 err = dquot_enable(qf_inode, type, format_id, flags); 5172 err = dquot_enable(qf_inode, type, format_id, flags);
5176 iput(qf_inode); 5173 iput(qf_inode);
5177 5174
5178 return err; 5175 return err;
5179 } 5176 }
5180 5177
5181 /* Enable usage tracking for all quota types. */ 5178 /* Enable usage tracking for all quota types. */
5182 static int ext4_enable_quotas(struct super_block *sb) 5179 static int ext4_enable_quotas(struct super_block *sb)
5183 { 5180 {
5184 int type, err = 0; 5181 int type, err = 0;
5185 unsigned long qf_inums[MAXQUOTAS] = { 5182 unsigned long qf_inums[MAXQUOTAS] = {
5186 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), 5183 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5187 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) 5184 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5188 }; 5185 };
5189 5186
5190 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; 5187 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
5191 for (type = 0; type < MAXQUOTAS; type++) { 5188 for (type = 0; type < MAXQUOTAS; type++) {
5192 if (qf_inums[type]) { 5189 if (qf_inums[type]) {
5193 err = ext4_quota_enable(sb, type, QFMT_VFS_V1, 5190 err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
5194 DQUOT_USAGE_ENABLED); 5191 DQUOT_USAGE_ENABLED);
5195 if (err) { 5192 if (err) {
5196 ext4_warning(sb, 5193 ext4_warning(sb,
5197 "Failed to enable quota tracking " 5194 "Failed to enable quota tracking "
5198 "(type=%d, err=%d). Please run " 5195 "(type=%d, err=%d). Please run "
5199 "e2fsck to fix.", type, err); 5196 "e2fsck to fix.", type, err);
5200 return err; 5197 return err;
5201 } 5198 }
5202 } 5199 }
5203 } 5200 }
5204 return 0; 5201 return 0;
5205 } 5202 }
5206 5203
5207 /* 5204 /*
5208 * quota_on function that is used when QUOTA feature is set. 5205 * quota_on function that is used when QUOTA feature is set.
5209 */ 5206 */
5210 static int ext4_quota_on_sysfile(struct super_block *sb, int type, 5207 static int ext4_quota_on_sysfile(struct super_block *sb, int type,
5211 int format_id) 5208 int format_id)
5212 { 5209 {
5213 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) 5210 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
5214 return -EINVAL; 5211 return -EINVAL;
5215 5212
5216 /* 5213 /*
5217 * USAGE was enabled at mount time. Only need to enable LIMITS now. 5214 * USAGE was enabled at mount time. Only need to enable LIMITS now.
5218 */ 5215 */
5219 return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED); 5216 return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED);
5220 } 5217 }
5221 5218
5222 static int ext4_quota_off(struct super_block *sb, int type) 5219 static int ext4_quota_off(struct super_block *sb, int type)
5223 { 5220 {
5224 struct inode *inode = sb_dqopt(sb)->files[type]; 5221 struct inode *inode = sb_dqopt(sb)->files[type];
5225 handle_t *handle; 5222 handle_t *handle;
5226 5223
5227 /* Force all delayed allocation blocks to be allocated. 5224 /* Force all delayed allocation blocks to be allocated.
5228 * Caller already holds s_umount sem */ 5225 * Caller already holds s_umount sem */
5229 if (test_opt(sb, DELALLOC)) 5226 if (test_opt(sb, DELALLOC))
5230 sync_filesystem(sb); 5227 sync_filesystem(sb);
5231 5228
5232 if (!inode) 5229 if (!inode)
5233 goto out; 5230 goto out;
5234 5231
5235 /* Update modification times of quota files when userspace can 5232 /* Update modification times of quota files when userspace can
5236 * start looking at them */ 5233 * start looking at them */
5237 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); 5234 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5238 if (IS_ERR(handle)) 5235 if (IS_ERR(handle))
5239 goto out; 5236 goto out;
5240 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 5237 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
5241 ext4_mark_inode_dirty(handle, inode); 5238 ext4_mark_inode_dirty(handle, inode);
5242 ext4_journal_stop(handle); 5239 ext4_journal_stop(handle);
5243 5240
5244 out: 5241 out:
5245 return dquot_quota_off(sb, type); 5242 return dquot_quota_off(sb, type);
5246 } 5243 }
5247 5244
5248 /* 5245 /*
5249 * quota_off function that is used when QUOTA feature is set. 5246 * quota_off function that is used when QUOTA feature is set.
5250 */ 5247 */
5251 static int ext4_quota_off_sysfile(struct super_block *sb, int type) 5248 static int ext4_quota_off_sysfile(struct super_block *sb, int type)
5252 { 5249 {
5253 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) 5250 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
5254 return -EINVAL; 5251 return -EINVAL;
5255 5252
5256 /* Disable only the limits. */ 5253 /* Disable only the limits. */
5257 return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); 5254 return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
5258 } 5255 }
5259 5256
5260 /* Read data from quotafile - avoid pagecache and such because we cannot afford 5257 /* Read data from quotafile - avoid pagecache and such because we cannot afford
5261 * acquiring the locks... As quota files are never truncated and quota code 5258 * acquiring the locks... As quota files are never truncated and quota code
5262 * itself serializes the operations (and no one else should touch the files) 5259 * itself serializes the operations (and no one else should touch the files)
5263 * we don't have to be afraid of races */ 5260 * we don't have to be afraid of races */
5264 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 5261 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5265 size_t len, loff_t off) 5262 size_t len, loff_t off)
5266 { 5263 {
5267 struct inode *inode = sb_dqopt(sb)->files[type]; 5264 struct inode *inode = sb_dqopt(sb)->files[type];
5268 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 5265 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5269 int err = 0; 5266 int err = 0;
5270 int offset = off & (sb->s_blocksize - 1); 5267 int offset = off & (sb->s_blocksize - 1);
5271 int tocopy; 5268 int tocopy;
5272 size_t toread; 5269 size_t toread;
5273 struct buffer_head *bh; 5270 struct buffer_head *bh;
5274 loff_t i_size = i_size_read(inode); 5271 loff_t i_size = i_size_read(inode);
5275 5272
5276 if (off > i_size) 5273 if (off > i_size)
5277 return 0; 5274 return 0;
5278 if (off+len > i_size) 5275 if (off+len > i_size)
5279 len = i_size-off; 5276 len = i_size-off;
5280 toread = len; 5277 toread = len;
5281 while (toread > 0) { 5278 while (toread > 0) {
5282 tocopy = sb->s_blocksize - offset < toread ? 5279 tocopy = sb->s_blocksize - offset < toread ?
5283 sb->s_blocksize - offset : toread; 5280 sb->s_blocksize - offset : toread;
5284 bh = ext4_bread(NULL, inode, blk, 0, &err); 5281 bh = ext4_bread(NULL, inode, blk, 0, &err);
5285 if (err) 5282 if (err)
5286 return err; 5283 return err;
5287 if (!bh) /* A hole? */ 5284 if (!bh) /* A hole? */
5288 memset(data, 0, tocopy); 5285 memset(data, 0, tocopy);
5289 else 5286 else
5290 memcpy(data, bh->b_data+offset, tocopy); 5287 memcpy(data, bh->b_data+offset, tocopy);
5291 brelse(bh); 5288 brelse(bh);
5292 offset = 0; 5289 offset = 0;
5293 toread -= tocopy; 5290 toread -= tocopy;
5294 data += tocopy; 5291 data += tocopy;
5295 blk++; 5292 blk++;
5296 } 5293 }
5297 return len; 5294 return len;
5298 } 5295 }
5299 5296
5300 /* Write to quotafile (we know the transaction is already started and has 5297 /* Write to quotafile (we know the transaction is already started and has
5301 * enough credits) */ 5298 * enough credits) */
5302 static ssize_t ext4_quota_write(struct super_block *sb, int type, 5299 static ssize_t ext4_quota_write(struct super_block *sb, int type,
5303 const char *data, size_t len, loff_t off) 5300 const char *data, size_t len, loff_t off)
5304 { 5301 {
5305 struct inode *inode = sb_dqopt(sb)->files[type]; 5302 struct inode *inode = sb_dqopt(sb)->files[type];
5306 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 5303 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5307 int err = 0; 5304 int err = 0;
5308 int offset = off & (sb->s_blocksize - 1); 5305 int offset = off & (sb->s_blocksize - 1);
5309 struct buffer_head *bh; 5306 struct buffer_head *bh;
5310 handle_t *handle = journal_current_handle(); 5307 handle_t *handle = journal_current_handle();
5311 5308
5312 if (EXT4_SB(sb)->s_journal && !handle) { 5309 if (EXT4_SB(sb)->s_journal && !handle) {
5313 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 5310 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5314 " cancelled because transaction is not started", 5311 " cancelled because transaction is not started",
5315 (unsigned long long)off, (unsigned long long)len); 5312 (unsigned long long)off, (unsigned long long)len);
5316 return -EIO; 5313 return -EIO;
5317 } 5314 }
5318 /* 5315 /*
5319 * Since we account only one data block in transaction credits, 5316 * Since we account only one data block in transaction credits,
5320 * then it is impossible to cross a block boundary. 5317 * then it is impossible to cross a block boundary.
5321 */ 5318 */
5322 if (sb->s_blocksize - offset < len) { 5319 if (sb->s_blocksize - offset < len) {
5323 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 5320 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5324 " cancelled because not block aligned", 5321 " cancelled because not block aligned",
5325 (unsigned long long)off, (unsigned long long)len); 5322 (unsigned long long)off, (unsigned long long)len);
5326 return -EIO; 5323 return -EIO;
5327 } 5324 }
5328 5325
5329 bh = ext4_bread(handle, inode, blk, 1, &err); 5326 bh = ext4_bread(handle, inode, blk, 1, &err);
5330 if (!bh) 5327 if (!bh)
5331 goto out; 5328 goto out;
5332 err = ext4_journal_get_write_access(handle, bh); 5329 err = ext4_journal_get_write_access(handle, bh);
5333 if (err) { 5330 if (err) {
5334 brelse(bh); 5331 brelse(bh);
5335 goto out; 5332 goto out;
5336 } 5333 }
5337 lock_buffer(bh); 5334 lock_buffer(bh);
5338 memcpy(bh->b_data+offset, data, len); 5335 memcpy(bh->b_data+offset, data, len);
5339 flush_dcache_page(bh->b_page); 5336 flush_dcache_page(bh->b_page);
5340 unlock_buffer(bh); 5337 unlock_buffer(bh);
5341 err = ext4_handle_dirty_metadata(handle, NULL, bh); 5338 err = ext4_handle_dirty_metadata(handle, NULL, bh);
5342 brelse(bh); 5339 brelse(bh);
5343 out: 5340 out:
5344 if (err) 5341 if (err)
5345 return err; 5342 return err;
5346 if (inode->i_size < off + len) { 5343 if (inode->i_size < off + len) {
5347 i_size_write(inode, off + len); 5344 i_size_write(inode, off + len);
5348 EXT4_I(inode)->i_disksize = inode->i_size; 5345 EXT4_I(inode)->i_disksize = inode->i_size;
5349 ext4_mark_inode_dirty(handle, inode); 5346 ext4_mark_inode_dirty(handle, inode);
5350 } 5347 }
5351 return len; 5348 return len;
5352 } 5349 }
5353 5350
5354 #endif 5351 #endif
5355 5352
5356 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 5353 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
5357 const char *dev_name, void *data) 5354 const char *dev_name, void *data)
5358 { 5355 {
5359 return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); 5356 return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
5360 } 5357 }
5361 5358
5362 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 5359 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
5363 static inline void register_as_ext2(void) 5360 static inline void register_as_ext2(void)
5364 { 5361 {
5365 int err = register_filesystem(&ext2_fs_type); 5362 int err = register_filesystem(&ext2_fs_type);
5366 if (err) 5363 if (err)
5367 printk(KERN_WARNING 5364 printk(KERN_WARNING
5368 "EXT4-fs: Unable to register as ext2 (%d)\n", err); 5365 "EXT4-fs: Unable to register as ext2 (%d)\n", err);
5369 } 5366 }
5370 5367
5371 static inline void unregister_as_ext2(void) 5368 static inline void unregister_as_ext2(void)
5372 { 5369 {
5373 unregister_filesystem(&ext2_fs_type); 5370 unregister_filesystem(&ext2_fs_type);
5374 } 5371 }
5375 5372
5376 static inline int ext2_feature_set_ok(struct super_block *sb) 5373 static inline int ext2_feature_set_ok(struct super_block *sb)
5377 { 5374 {
5378 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) 5375 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
5379 return 0; 5376 return 0;
5380 if (sb->s_flags & MS_RDONLY) 5377 if (sb->s_flags & MS_RDONLY)
5381 return 1; 5378 return 1;
5382 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) 5379 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
5383 return 0; 5380 return 0;
5384 return 1; 5381 return 1;
5385 } 5382 }
5386 #else 5383 #else
5387 static inline void register_as_ext2(void) { } 5384 static inline void register_as_ext2(void) { }
5388 static inline void unregister_as_ext2(void) { } 5385 static inline void unregister_as_ext2(void) { }
5389 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } 5386 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
5390 #endif 5387 #endif
5391 5388
5392 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 5389 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
5393 static inline void register_as_ext3(void) 5390 static inline void register_as_ext3(void)
5394 { 5391 {
5395 int err = register_filesystem(&ext3_fs_type); 5392 int err = register_filesystem(&ext3_fs_type);
5396 if (err) 5393 if (err)
5397 printk(KERN_WARNING 5394 printk(KERN_WARNING
5398 "EXT4-fs: Unable to register as ext3 (%d)\n", err); 5395 "EXT4-fs: Unable to register as ext3 (%d)\n", err);
5399 } 5396 }
5400 5397
5401 static inline void unregister_as_ext3(void) 5398 static inline void unregister_as_ext3(void)
5402 { 5399 {
5403 unregister_filesystem(&ext3_fs_type); 5400 unregister_filesystem(&ext3_fs_type);
5404 } 5401 }
5405 5402
5406 static inline int ext3_feature_set_ok(struct super_block *sb) 5403 static inline int ext3_feature_set_ok(struct super_block *sb)
5407 { 5404 {
5408 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) 5405 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
5409 return 0; 5406 return 0;
5410 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) 5407 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
5411 return 0; 5408 return 0;
5412 if (sb->s_flags & MS_RDONLY) 5409 if (sb->s_flags & MS_RDONLY)
5413 return 1; 5410 return 1;
5414 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) 5411 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
5415 return 0; 5412 return 0;
5416 return 1; 5413 return 1;
5417 } 5414 }
5418 #else 5415 #else
5419 static inline void register_as_ext3(void) { } 5416 static inline void register_as_ext3(void) { }
5420 static inline void unregister_as_ext3(void) { } 5417 static inline void unregister_as_ext3(void) { }
5421 static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } 5418 static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
5422 #endif 5419 #endif
5423 5420
5424 static struct file_system_type ext4_fs_type = { 5421 static struct file_system_type ext4_fs_type = {
5425 .owner = THIS_MODULE, 5422 .owner = THIS_MODULE,
5426 .name = "ext4", 5423 .name = "ext4",
5427 .mount = ext4_mount, 5424 .mount = ext4_mount,
5428 .kill_sb = kill_block_super, 5425 .kill_sb = kill_block_super,
5429 .fs_flags = FS_REQUIRES_DEV, 5426 .fs_flags = FS_REQUIRES_DEV,
5430 }; 5427 };
5431 MODULE_ALIAS_FS("ext4"); 5428 MODULE_ALIAS_FS("ext4");
5432 5429
5433 static int __init ext4_init_feat_adverts(void) 5430 static int __init ext4_init_feat_adverts(void)
5434 { 5431 {
5435 struct ext4_features *ef; 5432 struct ext4_features *ef;
5436 int ret = -ENOMEM; 5433 int ret = -ENOMEM;
5437 5434
5438 ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); 5435 ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
5439 if (!ef) 5436 if (!ef)
5440 goto out; 5437 goto out;
5441 5438
5442 ef->f_kobj.kset = ext4_kset; 5439 ef->f_kobj.kset = ext4_kset;
5443 init_completion(&ef->f_kobj_unregister); 5440 init_completion(&ef->f_kobj_unregister);
5444 ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, 5441 ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
5445 "features"); 5442 "features");
5446 if (ret) { 5443 if (ret) {
5447 kfree(ef); 5444 kfree(ef);
5448 goto out; 5445 goto out;
5449 } 5446 }
5450 5447
5451 ext4_feat = ef; 5448 ext4_feat = ef;
5452 ret = 0; 5449 ret = 0;
5453 out: 5450 out:
5454 return ret; 5451 return ret;
5455 } 5452 }
5456 5453
5457 static void ext4_exit_feat_adverts(void) 5454 static void ext4_exit_feat_adverts(void)
5458 { 5455 {
5459 kobject_put(&ext4_feat->f_kobj); 5456 kobject_put(&ext4_feat->f_kobj);
5460 wait_for_completion(&ext4_feat->f_kobj_unregister); 5457 wait_for_completion(&ext4_feat->f_kobj_unregister);
5461 kfree(ext4_feat); 5458 kfree(ext4_feat);
5462 } 5459 }
5463 5460
5464 /* Shared across all ext4 file systems */ 5461 /* Shared across all ext4 file systems */
5465 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; 5462 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
5466 struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; 5463 struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
5467 5464
5468 static int __init ext4_init_fs(void) 5465 static int __init ext4_init_fs(void)
5469 { 5466 {
5470 int i, err; 5467 int i, err;
5471 5468
5472 ext4_li_info = NULL; 5469 ext4_li_info = NULL;
5473 mutex_init(&ext4_li_mtx); 5470 mutex_init(&ext4_li_mtx);
5474 5471
5475 /* Build-time check for flags consistency */ 5472 /* Build-time check for flags consistency */
5476 ext4_check_flag_values(); 5473 ext4_check_flag_values();
5477 5474
5478 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { 5475 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
5479 mutex_init(&ext4__aio_mutex[i]); 5476 mutex_init(&ext4__aio_mutex[i]);
5480 init_waitqueue_head(&ext4__ioend_wq[i]); 5477 init_waitqueue_head(&ext4__ioend_wq[i]);
5481 } 5478 }
5482 5479
5483 err = ext4_init_es(); 5480 err = ext4_init_es();
5484 if (err) 5481 if (err)
5485 return err; 5482 return err;
5486 5483
5487 err = ext4_init_pageio(); 5484 err = ext4_init_pageio();
5488 if (err) 5485 if (err)
5489 goto out7; 5486 goto out7;
5490 5487
5491 err = ext4_init_system_zone(); 5488 err = ext4_init_system_zone();
5492 if (err) 5489 if (err)
5493 goto out6; 5490 goto out6;
5494 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 5491 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
5495 if (!ext4_kset) { 5492 if (!ext4_kset) {
5496 err = -ENOMEM; 5493 err = -ENOMEM;
5497 goto out5; 5494 goto out5;
5498 } 5495 }
5499 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 5496 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
5500 5497
5501 err = ext4_init_feat_adverts(); 5498 err = ext4_init_feat_adverts();
5502 if (err) 5499 if (err)
5503 goto out4; 5500 goto out4;
5504 5501
5505 err = ext4_init_mballoc(); 5502 err = ext4_init_mballoc();
5506 if (err) 5503 if (err)
5507 goto out3; 5504 goto out3;
5508 5505
5509 err = ext4_init_xattr(); 5506 err = ext4_init_xattr();
5510 if (err) 5507 if (err)
5511 goto out2; 5508 goto out2;
5512 err = init_inodecache(); 5509 err = init_inodecache();
5513 if (err) 5510 if (err)
5514 goto out1; 5511 goto out1;
5515 register_as_ext3(); 5512 register_as_ext3();
5516 register_as_ext2(); 5513 register_as_ext2();
5517 err = register_filesystem(&ext4_fs_type); 5514 err = register_filesystem(&ext4_fs_type);
5518 if (err) 5515 if (err)
5519 goto out; 5516 goto out;
5520 5517
5521 return 0; 5518 return 0;
5522 out: 5519 out:
5523 unregister_as_ext2(); 5520 unregister_as_ext2();
5524 unregister_as_ext3(); 5521 unregister_as_ext3();
5525 destroy_inodecache(); 5522 destroy_inodecache();
5526 out1: 5523 out1:
5527 ext4_exit_xattr(); 5524 ext4_exit_xattr();
5528 out2: 5525 out2:
5529 ext4_exit_mballoc(); 5526 ext4_exit_mballoc();
5530 out3: 5527 out3:
5531 ext4_exit_feat_adverts(); 5528 ext4_exit_feat_adverts();
5532 out4: 5529 out4:
5533 if (ext4_proc_root) 5530 if (ext4_proc_root)
5534 remove_proc_entry("fs/ext4", NULL); 5531 remove_proc_entry("fs/ext4", NULL);
5535 kset_unregister(ext4_kset); 5532 kset_unregister(ext4_kset);
5536 out5: 5533 out5:
5537 ext4_exit_system_zone(); 5534 ext4_exit_system_zone();
5538 out6: 5535 out6:
5539 ext4_exit_pageio(); 5536 ext4_exit_pageio();
5540 out7: 5537 out7:
5541 ext4_exit_es(); 5538 ext4_exit_es();
5542 5539
5543 return err; 5540 return err;
5544 } 5541 }
5545 5542
5546 static void __exit ext4_exit_fs(void) 5543 static void __exit ext4_exit_fs(void)
5547 { 5544 {
5548 ext4_destroy_lazyinit_thread(); 5545 ext4_destroy_lazyinit_thread();
5549 unregister_as_ext2(); 5546 unregister_as_ext2();
5550 unregister_as_ext3(); 5547 unregister_as_ext3();
5551 unregister_filesystem(&ext4_fs_type); 5548 unregister_filesystem(&ext4_fs_type);
5552 destroy_inodecache(); 5549 destroy_inodecache();
5553 ext4_exit_xattr(); 5550 ext4_exit_xattr();
5554 ext4_exit_mballoc(); 5551 ext4_exit_mballoc();
5555 ext4_exit_feat_adverts(); 5552 ext4_exit_feat_adverts();
5556 remove_proc_entry("fs/ext4", NULL); 5553 remove_proc_entry("fs/ext4", NULL);
5557 kset_unregister(ext4_kset); 5554 kset_unregister(ext4_kset);
5558 ext4_exit_system_zone(); 5555 ext4_exit_system_zone();
5559 ext4_exit_pageio(); 5556 ext4_exit_pageio();
5560 ext4_exit_es(); 5557 ext4_exit_es();
5561 } 5558 }
5562 5559
5563 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 5560 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
5564 MODULE_DESCRIPTION("Fourth Extended Filesystem"); 5561 MODULE_DESCRIPTION("Fourth Extended Filesystem");
5565 MODULE_LICENSE("GPL"); 5562 MODULE_LICENSE("GPL");
5566 module_init(ext4_init_fs) 5563 module_init(ext4_init_fs)
5567 module_exit(ext4_exit_fs) 5564 module_exit(ext4_exit_fs)
5568 5565