Blame view
fs/ext4/ialloc.c
33.5 KB
ac27a0ec1
|
1 |
/* |
617ba13b3
|
2 |
* linux/fs/ext4/ialloc.c |
ac27a0ec1
|
3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
* * Copyright (C) 1992, 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) * * BSD ufs-inspired inode and directory allocation by * Stephen Tweedie (sct@redhat.com), 1993 * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller (davem@caip.rutgers.edu), 1995 */ #include <linux/time.h> #include <linux/fs.h> |
dab291af8
|
17 |
#include <linux/jbd2.h> |
ac27a0ec1
|
18 19 20 21 22 23 |
#include <linux/stat.h> #include <linux/string.h> #include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/random.h> #include <linux/bitops.h> |
3a5b2ecdd
|
24 |
#include <linux/blkdev.h> |
ac27a0ec1
|
25 |
#include <asm/byteorder.h> |
9bffad1ed
|
26 |
|
3dcf54515
|
27 28 |
#include "ext4.h" #include "ext4_jbd2.h" |
ac27a0ec1
|
29 30 |
#include "xattr.h" #include "acl.h" |
9bffad1ed
|
31 |
#include <trace/events/ext4.h> |
ac27a0ec1
|
32 33 34 35 36 37 38 39 40 41 42 43 44 |
/* * ialloc.c contains the inodes allocation and deallocation routines */ /* * The free inodes are managed by bitmaps. A file system contains several * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap * block for inodes, N blocks for the inode table and data blocks. * * The file system contains group descriptors which are located after the * super block. Each descriptor contains the number of the bitmap block and * the free blocks count in the block. */ |
717d50e49
|
45 46 47 48 49 |
/* * To avoid calling the atomic setbit hundreds or thousands of times, we only * need to use it within a single byte (to ensure we get endianness right). * We can use memset for the rest of the bitmap as there are no other users. */ |
61d08673d
|
50 |
void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) |
717d50e49
|
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
{ int i; if (start_bit >= end_bit) return; ext4_debug("mark end bits +%d through +%d used ", start_bit, end_bit); for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) ext4_set_bit(i, bitmap); if (i < end_bit) memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); } /* Initializes an uninitialized inode bitmap */ |
1f109d5a1
|
66 67 68 69 |
static unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, ext4_group_t block_group, struct ext4_group_desc *gdp) |
717d50e49
|
70 71 72 73 74 75 76 77 |
{ struct ext4_sb_info *sbi = EXT4_SB(sb); J_ASSERT_BH(bh, buffer_locked(bh)); /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { |
12062dddd
|
78 |
ext4_error(sb, "Checksum bad for group %u", block_group); |
021b65bb1
|
79 |
ext4_free_group_clusters_set(sb, gdp, 0); |
560671a0d
|
80 81 |
ext4_free_inodes_set(sb, gdp, 0); ext4_itable_unused_set(sb, gdp, 0); |
717d50e49
|
82 83 84 85 86 |
memset(bh->b_data, 0xff, sb->s_blocksize); return 0; } memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); |
61d08673d
|
87 |
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
717d50e49
|
88 89 90 91 |
bh->b_data); return EXT4_INODES_PER_GROUP(sb); } |
ac27a0ec1
|
92 93 94 95 96 97 98 99 |
/* * Read the inode allocation bitmap for a given block_group, reading * into the specified slot in the superblock's bitmap cache. * * Return buffer_head of bitmap on success or NULL. */ static struct buffer_head * |
e29d1cde6
|
100 |
ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) |
ac27a0ec1
|
101 |
{ |
617ba13b3
|
102 |
struct ext4_group_desc *desc; |
ac27a0ec1
|
103 |
struct buffer_head *bh = NULL; |
e29d1cde6
|
104 |
ext4_fsblk_t bitmap_blk; |
ac27a0ec1
|
105 |
|
617ba13b3
|
106 |
desc = ext4_get_group_desc(sb, block_group, NULL); |
ac27a0ec1
|
107 |
if (!desc) |
e29d1cde6
|
108 |
return NULL; |
bfff68738
|
109 |
|
e29d1cde6
|
110 111 112 |
bitmap_blk = ext4_inode_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { |
12062dddd
|
113 |
ext4_error(sb, "Cannot read inode bitmap - " |
a9df9a491
|
114 |
"block_group = %u, inode_bitmap = %llu", |
e29d1cde6
|
115 116 117 |
block_group, bitmap_blk); return NULL; } |
2ccb5fb9f
|
118 |
if (bitmap_uptodate(bh)) |
e29d1cde6
|
119 |
return bh; |
c806e68f5
|
120 |
lock_buffer(bh); |
2ccb5fb9f
|
121 122 123 124 |
if (bitmap_uptodate(bh)) { unlock_buffer(bh); return bh; } |
bfff68738
|
125 |
|
955ce5f5b
|
126 |
ext4_lock_group(sb, block_group); |
717d50e49
|
127 |
if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
e29d1cde6
|
128 |
ext4_init_inode_bitmap(sb, bh, block_group, desc); |
2ccb5fb9f
|
129 |
set_bitmap_uptodate(bh); |
e29d1cde6
|
130 |
set_buffer_uptodate(bh); |
955ce5f5b
|
131 |
ext4_unlock_group(sb, block_group); |
3300beda5
|
132 |
unlock_buffer(bh); |
e29d1cde6
|
133 |
return bh; |
717d50e49
|
134 |
} |
955ce5f5b
|
135 |
ext4_unlock_group(sb, block_group); |
bfff68738
|
136 |
|
2ccb5fb9f
|
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
if (buffer_uptodate(bh)) { /* * if not uninit if bh is uptodate, * bitmap is also uptodate */ set_bitmap_uptodate(bh); unlock_buffer(bh); return bh; } /* * submit the buffer_head for read. We can * safely mark the bitmap as uptodate now. * We do it here so the bitmap uptodate bit * get set with buffer lock held. */ |
0562e0bad
|
152 |
trace_ext4_load_inode_bitmap(sb, block_group); |
2ccb5fb9f
|
153 |
set_bitmap_uptodate(bh); |
e29d1cde6
|
154 155 |
if (bh_submit_read(bh) < 0) { put_bh(bh); |
12062dddd
|
156 |
ext4_error(sb, "Cannot read inode bitmap - " |
a9df9a491
|
157 |
"block_group = %u, inode_bitmap = %llu", |
e29d1cde6
|
158 159 160 |
block_group, bitmap_blk); return NULL; } |
ac27a0ec1
|
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
return bh; } /* * NOTE! When we get the inode, we're the only people * that have access to it, and as such there are no * race conditions we have to worry about. The inode * is not on the hash-lists, and it cannot be reached * through the filesystem because the directory entry * has been deleted earlier. * * HOWEVER: we must make sure that we get no aliases, * which means that we have to call "clear_inode()" * _before_ we mark the inode not in use in the inode * bitmaps. Otherwise a newly created file might use * the same inode number (not actually the same pointer * though), and then we'd have two inodes sharing the * same inode number and space on the harddisk. */ |
af5bc92dd
|
180 |
void ext4_free_inode(handle_t *handle, struct inode *inode) |
ac27a0ec1
|
181 |
{ |
af5bc92dd
|
182 |
struct super_block *sb = inode->i_sb; |
ac27a0ec1
|
183 184 185 186 |
int is_directory; unsigned long ino; struct buffer_head *bitmap_bh = NULL; struct buffer_head *bh2; |
fd2d42912
|
187 |
ext4_group_t block_group; |
ac27a0ec1
|
188 |
unsigned long bit; |
af5bc92dd
|
189 190 |
struct ext4_group_desc *gdp; struct ext4_super_block *es; |
617ba13b3
|
191 |
struct ext4_sb_info *sbi; |
7ce9d5d1f
|
192 |
int fatal = 0, err, count, cleared; |
ac27a0ec1
|
193 194 |
if (atomic_read(&inode->i_count) > 1) { |
4776004f5
|
195 196 197 |
printk(KERN_ERR "ext4_free_inode: inode has count=%d ", atomic_read(&inode->i_count)); |
ac27a0ec1
|
198 199 200 |
return; } if (inode->i_nlink) { |
4776004f5
|
201 202 203 |
printk(KERN_ERR "ext4_free_inode: inode has nlink=%d ", inode->i_nlink); |
ac27a0ec1
|
204 205 206 |
return; } if (!sb) { |
4776004f5
|
207 208 209 |
printk(KERN_ERR "ext4_free_inode: inode on " "nonexistent device "); |
ac27a0ec1
|
210 211 |
return; } |
617ba13b3
|
212 |
sbi = EXT4_SB(sb); |
ac27a0ec1
|
213 214 |
ino = inode->i_ino; |
af5bc92dd
|
215 216 |
ext4_debug("freeing inode %lu ", ino); |
9bffad1ed
|
217 |
trace_ext4_free_inode(inode); |
ac27a0ec1
|
218 219 220 221 222 |
/* * Note: we must free any quota before locking the superblock, * as writing the quota to disk may need the lock as well. */ |
871a29315
|
223 |
dquot_initialize(inode); |
617ba13b3
|
224 |
ext4_xattr_delete_inode(handle, inode); |
63936ddaa
|
225 |
dquot_free_inode(inode); |
9f7547580
|
226 |
dquot_drop(inode); |
ac27a0ec1
|
227 228 229 230 |
is_directory = S_ISDIR(inode->i_mode); /* Do this BEFORE marking the inode not in use or returning an error */ |
0930fcc1e
|
231 |
ext4_clear_inode(inode); |
ac27a0ec1
|
232 |
|
617ba13b3
|
233 234 |
es = EXT4_SB(sb)->s_es; if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { |
12062dddd
|
235 |
ext4_error(sb, "reserved or nonexistent inode %lu", ino); |
ac27a0ec1
|
236 237 |
goto error_return; } |
617ba13b3
|
238 239 |
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); |
e29d1cde6
|
240 |
bitmap_bh = ext4_read_inode_bitmap(sb, block_group); |
ac27a0ec1
|
241 242 243 244 |
if (!bitmap_bh) goto error_return; BUFFER_TRACE(bitmap_bh, "get_write_access"); |
617ba13b3
|
245 |
fatal = ext4_journal_get_write_access(handle, bitmap_bh); |
ac27a0ec1
|
246 247 |
if (fatal) goto error_return; |
d17413c08
|
248 249 250 |
fatal = -ESRCH; gdp = ext4_get_group_desc(sb, block_group, &bh2); if (gdp) { |
ac27a0ec1
|
251 |
BUFFER_TRACE(bh2, "get_write_access"); |
617ba13b3
|
252 |
fatal = ext4_journal_get_write_access(handle, bh2); |
d17413c08
|
253 254 |
} ext4_lock_group(sb, block_group); |
597d508c1
|
255 |
cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data); |
d17413c08
|
256 257 258 259 |
if (fatal || !cleared) { ext4_unlock_group(sb, block_group); goto out; } |
7d39db14a
|
260 |
|
d17413c08
|
261 262 263 264 265 266 |
count = ext4_free_inodes_count(sb, gdp) + 1; ext4_free_inodes_set(sb, gdp, count); if (is_directory) { count = ext4_used_dirs_count(sb, gdp) - 1; ext4_used_dirs_set(sb, gdp, count); percpu_counter_dec(&sbi->s_dirs_counter); |
ac27a0ec1
|
267 |
} |
d17413c08
|
268 269 |
gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); ext4_unlock_group(sb, block_group); |
ac27a0ec1
|
270 |
|
d17413c08
|
271 272 273 |
percpu_counter_inc(&sbi->s_freeinodes_counter); if (sbi->s_log_groups_per_flex) { ext4_group_t f = ext4_flex_group(sbi, block_group); |
9f24e4208
|
274 |
|
d17413c08
|
275 276 277 |
atomic_inc(&sbi->s_flex_groups[f].free_inodes); if (is_directory) atomic_dec(&sbi->s_flex_groups[f].used_dirs); |
ac27a0ec1
|
278 |
} |
d17413c08
|
279 280 281 282 283 284 285 286 |
BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); out: if (cleared) { BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!fatal) fatal = err; |
a0375156c
|
287 |
ext4_mark_super_dirty(sb); |
d17413c08
|
288 289 |
} else ext4_error(sb, "bit already cleared for inode %lu", ino); |
ac27a0ec1
|
290 291 |
error_return: brelse(bitmap_bh); |
617ba13b3
|
292 |
ext4_std_error(sb, fatal); |
ac27a0ec1
|
293 |
} |
a4912123b
|
294 295 |
struct orlov_stats { __u32 free_inodes; |
24aaa8ef4
|
296 |
__u32 free_clusters; |
a4912123b
|
297 298 299 300 301 302 303 304 |
__u32 used_dirs; }; /* * Helper function for Orlov's allocator; returns critical information * for a particular block group or flex_bg. If flex_size is 1, then g * is a block group number; otherwise it is flex_bg number. */ |
1f109d5a1
|
305 306 |
static void get_orlov_stats(struct super_block *sb, ext4_group_t g, int flex_size, struct orlov_stats *stats) |
a4912123b
|
307 308 |
{ struct ext4_group_desc *desc; |
7d39db14a
|
309 |
struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; |
a4912123b
|
310 |
|
7d39db14a
|
311 312 |
if (flex_size > 1) { stats->free_inodes = atomic_read(&flex_group[g].free_inodes); |
24aaa8ef4
|
313 |
stats->free_clusters = atomic_read(&flex_group[g].free_clusters); |
7d39db14a
|
314 315 316 |
stats->used_dirs = atomic_read(&flex_group[g].used_dirs); return; } |
a4912123b
|
317 |
|
7d39db14a
|
318 319 320 |
desc = ext4_get_group_desc(sb, g, NULL); if (desc) { stats->free_inodes = ext4_free_inodes_count(sb, desc); |
021b65bb1
|
321 |
stats->free_clusters = ext4_free_group_clusters(sb, desc); |
7d39db14a
|
322 323 324 |
stats->used_dirs = ext4_used_dirs_count(sb, desc); } else { stats->free_inodes = 0; |
24aaa8ef4
|
325 |
stats->free_clusters = 0; |
7d39db14a
|
326 |
stats->used_dirs = 0; |
a4912123b
|
327 328 |
} } |
ac27a0ec1
|
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 |
/* * Orlov's allocator for directories. * * We always try to spread first-level directories. * * If there are blockgroups with both free inodes and free blocks counts * not worse than average we return one with smallest directory count. * Otherwise we simply return a random group. * * For the rest rules look so: * * It's OK to put directory into a group unless * it has too many directories already (max_dirs) or * it has too few free inodes left (min_inodes) or * it has too few free blocks left (min_blocks) or |
1cc8dcf56
|
344 |
* Parent's group is preferred, if it doesn't satisfy these |
ac27a0ec1
|
345 346 347 |
* conditions we search cyclically through the rest. If none * of the groups look good we just look for a group with more * free inodes than average (starting at parent's group). |
ac27a0ec1
|
348 |
*/ |
2aa9fc4c4
|
349 |
static int find_group_orlov(struct super_block *sb, struct inode *parent, |
dcca3fec9
|
350 |
ext4_group_t *group, umode_t mode, |
f157a4aa9
|
351 |
const struct qstr *qstr) |
ac27a0ec1
|
352 |
{ |
fd2d42912
|
353 |
ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
617ba13b3
|
354 |
struct ext4_sb_info *sbi = EXT4_SB(sb); |
8df9675f8
|
355 |
ext4_group_t real_ngroups = ext4_get_groups_count(sb); |
617ba13b3
|
356 |
int inodes_per_group = EXT4_INODES_PER_GROUP(sb); |
14c83c9fd
|
357 |
unsigned int freei, avefreei, grp_free; |
24aaa8ef4
|
358 |
ext4_fsblk_t freeb, avefreec; |
ac27a0ec1
|
359 |
unsigned int ndirs; |
a4912123b
|
360 |
int max_dirs, min_inodes; |
24aaa8ef4
|
361 |
ext4_grpblk_t min_clusters; |
8df9675f8
|
362 |
ext4_group_t i, grp, g, ngroups; |
617ba13b3
|
363 |
struct ext4_group_desc *desc; |
a4912123b
|
364 365 |
struct orlov_stats stats; int flex_size = ext4_flex_bg_size(sbi); |
f157a4aa9
|
366 |
struct dx_hash_info hinfo; |
a4912123b
|
367 |
|
8df9675f8
|
368 |
ngroups = real_ngroups; |
a4912123b
|
369 |
if (flex_size > 1) { |
8df9675f8
|
370 |
ngroups = (real_ngroups + flex_size - 1) >> |
a4912123b
|
371 372 373 |
sbi->s_log_groups_per_flex; parent_group >>= sbi->s_log_groups_per_flex; } |
ac27a0ec1
|
374 375 376 |
freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); avefreei = freei / ngroups; |
570426518
|
377 378 |
freeb = EXT4_C2B(sbi, percpu_counter_read_positive(&sbi->s_freeclusters_counter)); |
24aaa8ef4
|
379 380 |
avefreec = freeb; do_div(avefreec, ngroups); |
ac27a0ec1
|
381 |
ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); |
a4912123b
|
382 383 |
if (S_ISDIR(mode) && ((parent == sb->s_root->d_inode) || |
12e9b8920
|
384 |
(ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) { |
ac27a0ec1
|
385 |
int best_ndir = inodes_per_group; |
2aa9fc4c4
|
386 |
int ret = -1; |
ac27a0ec1
|
387 |
|
f157a4aa9
|
388 389 390 391 392 393 394 |
if (qstr) { hinfo.hash_version = DX_HASH_HALF_MD4; hinfo.seed = sbi->s_hash_seed; ext4fs_dirhash(qstr->name, qstr->len, &hinfo); grp = hinfo.hash; } else get_random_bytes(&grp, sizeof(grp)); |
2aa9fc4c4
|
395 |
parent_group = (unsigned)grp % ngroups; |
ac27a0ec1
|
396 |
for (i = 0; i < ngroups; i++) { |
a4912123b
|
397 398 399 |
g = (parent_group + i) % ngroups; get_orlov_stats(sb, g, flex_size, &stats); if (!stats.free_inodes) |
ac27a0ec1
|
400 |
continue; |
a4912123b
|
401 |
if (stats.used_dirs >= best_ndir) |
ac27a0ec1
|
402 |
continue; |
a4912123b
|
403 |
if (stats.free_inodes < avefreei) |
ac27a0ec1
|
404 |
continue; |
24aaa8ef4
|
405 |
if (stats.free_clusters < avefreec) |
ac27a0ec1
|
406 |
continue; |
a4912123b
|
407 |
grp = g; |
2aa9fc4c4
|
408 |
ret = 0; |
a4912123b
|
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 |
best_ndir = stats.used_dirs; } if (ret) goto fallback; found_flex_bg: if (flex_size == 1) { *group = grp; return 0; } /* * We pack inodes at the beginning of the flexgroup's * inode tables. Block allocation decisions will do * something similar, although regular files will * start at 2nd block group of the flexgroup. See * ext4_ext_find_goal() and ext4_find_near(). */ grp *= flex_size; for (i = 0; i < flex_size; i++) { |
8df9675f8
|
428 |
if (grp+i >= real_ngroups) |
a4912123b
|
429 430 431 432 433 434 |
break; desc = ext4_get_group_desc(sb, grp+i, NULL); if (desc && ext4_free_inodes_count(sb, desc)) { *group = grp+i; return 0; } |
ac27a0ec1
|
435 |
} |
ac27a0ec1
|
436 437 |
goto fallback; } |
ac27a0ec1
|
438 |
max_dirs = ndirs / ngroups + inodes_per_group / 16; |
a4912123b
|
439 440 441 |
min_inodes = avefreei - inodes_per_group*flex_size / 4; if (min_inodes < 1) min_inodes = 1; |
24aaa8ef4
|
442 |
min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4; |
a4912123b
|
443 444 445 446 447 448 449 450 451 452 |
/* * Start looking in the flex group where we last allocated an * inode for this parent directory */ if (EXT4_I(parent)->i_last_alloc_group != ~0) { parent_group = EXT4_I(parent)->i_last_alloc_group; if (flex_size > 1) parent_group >>= sbi->s_log_groups_per_flex; } |
ac27a0ec1
|
453 454 |
for (i = 0; i < ngroups; i++) { |
a4912123b
|
455 456 457 |
grp = (parent_group + i) % ngroups; get_orlov_stats(sb, grp, flex_size, &stats); if (stats.used_dirs >= max_dirs) |
ac27a0ec1
|
458 |
continue; |
a4912123b
|
459 |
if (stats.free_inodes < min_inodes) |
ac27a0ec1
|
460 |
continue; |
24aaa8ef4
|
461 |
if (stats.free_clusters < min_clusters) |
ac27a0ec1
|
462 |
continue; |
a4912123b
|
463 |
goto found_flex_bg; |
ac27a0ec1
|
464 465 466 |
} fallback: |
8df9675f8
|
467 |
ngroups = real_ngroups; |
a4912123b
|
468 |
avefreei = freei / ngroups; |
b5451f7b2
|
469 |
fallback_retry: |
a4912123b
|
470 |
parent_group = EXT4_I(parent)->i_block_group; |
ac27a0ec1
|
471 |
for (i = 0; i < ngroups; i++) { |
a4912123b
|
472 473 |
grp = (parent_group + i) % ngroups; desc = ext4_get_group_desc(sb, grp, NULL); |
14c83c9fd
|
474 475 |
grp_free = ext4_free_inodes_count(sb, desc); if (desc && grp_free && grp_free >= avefreei) { |
a4912123b
|
476 |
*group = grp; |
2aa9fc4c4
|
477 |
return 0; |
a4912123b
|
478 |
} |
ac27a0ec1
|
479 480 481 482 483 484 485 486 |
} if (avefreei) { /* * The free-inodes counter is approximate, and for really small * filesystems the above test can fail to find any blockgroups */ avefreei = 0; |
b5451f7b2
|
487 |
goto fallback_retry; |
ac27a0ec1
|
488 489 490 491 |
} return -1; } |
2aa9fc4c4
|
492 |
static int find_group_other(struct super_block *sb, struct inode *parent, |
dcca3fec9
|
493 |
ext4_group_t *group, umode_t mode) |
ac27a0ec1
|
494 |
{ |
fd2d42912
|
495 |
ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
8df9675f8
|
496 |
ext4_group_t i, last, ngroups = ext4_get_groups_count(sb); |
617ba13b3
|
497 |
struct ext4_group_desc *desc; |
a4912123b
|
498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 |
int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); /* * Try to place the inode is the same flex group as its * parent. If we can't find space, use the Orlov algorithm to * find another flex group, and store that information in the * parent directory's inode information so that use that flex * group for future allocations. */ if (flex_size > 1) { int retry = 0; try_again: parent_group &= ~(flex_size-1); last = parent_group + flex_size; if (last > ngroups) last = ngroups; for (i = parent_group; i < last; i++) { desc = ext4_get_group_desc(sb, i, NULL); if (desc && ext4_free_inodes_count(sb, desc)) { *group = i; return 0; } } if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) { retry = 1; parent_group = EXT4_I(parent)->i_last_alloc_group; goto try_again; } /* * If this didn't work, use the Orlov search algorithm * to find a new flex group; we pass in the mode to * avoid the topdir algorithms. */ *group = parent_group + flex_size; if (*group > ngroups) *group = 0; |
7dc576158
|
535 |
return find_group_orlov(sb, parent, group, mode, NULL); |
a4912123b
|
536 |
} |
ac27a0ec1
|
537 538 539 540 |
/* * Try to place the inode in its parent directory */ |
2aa9fc4c4
|
541 542 |
*group = parent_group; desc = ext4_get_group_desc(sb, *group, NULL); |
560671a0d
|
543 |
if (desc && ext4_free_inodes_count(sb, desc) && |
021b65bb1
|
544 |
ext4_free_group_clusters(sb, desc)) |
2aa9fc4c4
|
545 |
return 0; |
ac27a0ec1
|
546 547 548 549 550 551 552 553 554 555 |
/* * We're going to place this inode in a different blockgroup from its * parent. We want to cause files in a common directory to all land in * the same blockgroup. But we want files which are in a different * directory which shares a blockgroup with our parent to land in a * different blockgroup. * * So add our directory's i_ino into the starting point for the hash. */ |
2aa9fc4c4
|
556 |
*group = (*group + parent->i_ino) % ngroups; |
ac27a0ec1
|
557 558 559 560 561 562 |
/* * Use a quadratic hash to find a group with a free inode and some free * blocks. */ for (i = 1; i < ngroups; i <<= 1) { |
2aa9fc4c4
|
563 564 565 566 |
*group += i; if (*group >= ngroups) *group -= ngroups; desc = ext4_get_group_desc(sb, *group, NULL); |
560671a0d
|
567 |
if (desc && ext4_free_inodes_count(sb, desc) && |
021b65bb1
|
568 |
ext4_free_group_clusters(sb, desc)) |
2aa9fc4c4
|
569 |
return 0; |
ac27a0ec1
|
570 571 572 573 574 575 |
} /* * That failed: try linear search for a free inode, even if that group * has no free blocks. */ |
2aa9fc4c4
|
576 |
*group = parent_group; |
ac27a0ec1
|
577 |
for (i = 0; i < ngroups; i++) { |
2aa9fc4c4
|
578 579 580 |
if (++*group >= ngroups) *group = 0; desc = ext4_get_group_desc(sb, *group, NULL); |
560671a0d
|
581 |
if (desc && ext4_free_inodes_count(sb, desc)) |
2aa9fc4c4
|
582 |
return 0; |
ac27a0ec1
|
583 584 585 586 587 588 |
} return -1; } /* |
393418676
|
589 |
* claim the inode from the inode bitmap. If the group |
955ce5f5b
|
590 |
* is uninit we need to take the groups's ext4_group_lock |
393418676
|
591 592 |
* and clear the uninit flag. The inode bitmap update * and group desc uninit flag clear should be done |
955ce5f5b
|
593 |
* after holding ext4_group_lock so that ext4_read_inode_bitmap |
393418676
|
594 595 596 597 |
* doesn't race with the ext4_claim_inode */ static int ext4_claim_inode(struct super_block *sb, struct buffer_head *inode_bitmap_bh, |
dcca3fec9
|
598 |
unsigned long ino, ext4_group_t group, umode_t mode) |
393418676
|
599 600 601 |
{ int free = 0, retval = 0, count; struct ext4_sb_info *sbi = EXT4_SB(sb); |
bfff68738
|
602 |
struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
393418676
|
603 |
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); |
bfff68738
|
604 605 606 607 608 609 610 611 |
/* * We have to be sure that new inode allocation does not race with * inode table initialization, because otherwise we may end up * allocating and writing new inode right before sb_issue_zeroout * takes place and overwriting our new inode with zeroes. So we * take alloc_sem to prevent it. */ down_read(&grp->alloc_sem); |
955ce5f5b
|
612 |
ext4_lock_group(sb, group); |
597d508c1
|
613 |
if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) { |
393418676
|
614 615 616 617 618 619 620 |
/* not a free inode */ retval = 1; goto err_ret; } ino++; if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || ino > EXT4_INODES_PER_GROUP(sb)) { |
955ce5f5b
|
621 |
ext4_unlock_group(sb, group); |
bfff68738
|
622 |
up_read(&grp->alloc_sem); |
12062dddd
|
623 |
ext4_error(sb, "reserved inode or inode > inodes count - " |
393418676
|
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 |
"block_group = %u, inode=%lu", group, ino + group * EXT4_INODES_PER_GROUP(sb)); return 1; } /* If we didn't allocate from within the initialized part of the inode * table then we need to initialize up to this inode. */ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); /* When marking the block group with * ~EXT4_BG_INODE_UNINIT we don't want to depend * on the value of bg_itable_unused even though * mke2fs could have initialized the same for us. * Instead we calculated the value below */ free = 0; } else { free = EXT4_INODES_PER_GROUP(sb) - ext4_itable_unused_count(sb, gdp); } /* * Check the relative inode number against the last used * relative inode number in this group. if it is greater * we need to update the bg_itable_unused count * */ if (ino > free) ext4_itable_unused_set(sb, gdp, (EXT4_INODES_PER_GROUP(sb) - ino)); } count = ext4_free_inodes_count(sb, gdp) - 1; ext4_free_inodes_set(sb, gdp, count); if (S_ISDIR(mode)) { count = ext4_used_dirs_count(sb, gdp) + 1; ext4_used_dirs_set(sb, gdp, count); |
7d39db14a
|
662 663 |
if (sbi->s_log_groups_per_flex) { ext4_group_t f = ext4_flex_group(sbi, group); |
c4caae251
|
664 |
atomic_inc(&sbi->s_flex_groups[f].used_dirs); |
7d39db14a
|
665 |
} |
393418676
|
666 667 668 |
} gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); err_ret: |
955ce5f5b
|
669 |
ext4_unlock_group(sb, group); |
bfff68738
|
670 |
up_read(&grp->alloc_sem); |
393418676
|
671 672 673 674 |
return retval; } /* |
ac27a0ec1
|
675 676 677 678 679 680 681 682 683 |
* There are two policies for allocating an inode. If the new inode is * a directory, then a forward search is made for a block group with both * free space and a low directory-to-inode ratio; if that fails, then of * the groups with above-average free space, that group with the fewest * directories already is chosen. * * For other inodes, search forward from the parent directory's block * group to find a free inode. */ |
dcca3fec9
|
684 |
struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode, |
5cb81dabc
|
685 |
const struct qstr *qstr, __u32 goal, uid_t *owner) |
ac27a0ec1
|
686 687 |
{ struct super_block *sb; |
3300beda5
|
688 689 |
struct buffer_head *inode_bitmap_bh = NULL; struct buffer_head *group_desc_bh; |
8df9675f8
|
690 |
ext4_group_t ngroups, group = 0; |
ac27a0ec1
|
691 |
unsigned long ino = 0; |
af5bc92dd
|
692 693 |
struct inode *inode; struct ext4_group_desc *gdp = NULL; |
617ba13b3
|
694 695 |
struct ext4_inode_info *ei; struct ext4_sb_info *sbi; |
393418676
|
696 |
int ret2, err = 0; |
ac27a0ec1
|
697 |
struct inode *ret; |
2aa9fc4c4
|
698 |
ext4_group_t i; |
772cb7c83
|
699 |
ext4_group_t flex_group; |
ac27a0ec1
|
700 701 702 703 704 705 |
/* Cannot create files in a deleted directory */ if (!dir || !dir->i_nlink) return ERR_PTR(-EPERM); sb = dir->i_sb; |
8df9675f8
|
706 |
ngroups = ext4_get_groups_count(sb); |
9bffad1ed
|
707 |
trace_ext4_request_inode(dir, mode); |
ac27a0ec1
|
708 709 710 |
inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); |
617ba13b3
|
711 |
ei = EXT4_I(inode); |
617ba13b3
|
712 |
sbi = EXT4_SB(sb); |
772cb7c83
|
713 |
|
11013911d
|
714 715 |
if (!goal) goal = sbi->s_inode_goal; |
e6462869e
|
716 |
if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) { |
11013911d
|
717 718 719 720 721 |
group = (goal - 1) / EXT4_INODES_PER_GROUP(sb); ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb); ret2 = 0; goto got_group; } |
4113c4caa
|
722 723 724 |
if (S_ISDIR(mode)) ret2 = find_group_orlov(sb, dir, &group, mode, qstr); else |
a4912123b
|
725 |
ret2 = find_group_other(sb, dir, &group, mode); |
ac27a0ec1
|
726 |
|
772cb7c83
|
727 |
got_group: |
a4912123b
|
728 |
EXT4_I(dir)->i_last_alloc_group = group; |
ac27a0ec1
|
729 |
err = -ENOSPC; |
2aa9fc4c4
|
730 |
if (ret2 == -1) |
ac27a0ec1
|
731 |
goto out; |
11013911d
|
732 |
for (i = 0; i < ngroups; i++, ino = 0) { |
ac27a0ec1
|
733 |
err = -EIO; |
3300beda5
|
734 |
gdp = ext4_get_group_desc(sb, group, &group_desc_bh); |
ac27a0ec1
|
735 736 |
if (!gdp) goto fail; |
3300beda5
|
737 738 739 |
brelse(inode_bitmap_bh); inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); if (!inode_bitmap_bh) |
ac27a0ec1
|
740 |
goto fail; |
ac27a0ec1
|
741 |
repeat_in_this_group: |
617ba13b3
|
742 |
ino = ext4_find_next_zero_bit((unsigned long *) |
3300beda5
|
743 744 |
inode_bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino); |
617ba13b3
|
745 |
if (ino < EXT4_INODES_PER_GROUP(sb)) { |
ac27a0ec1
|
746 |
|
3300beda5
|
747 748 749 |
BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, inode_bitmap_bh); |
ac27a0ec1
|
750 751 |
if (err) goto fail; |
393418676
|
752 753 754 755 756 757 758 |
BUFFER_TRACE(group_desc_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, group_desc_bh); if (err) goto fail; if (!ext4_claim_inode(sb, inode_bitmap_bh, ino, group, mode)) { |
ac27a0ec1
|
759 |
/* we won it */ |
3300beda5
|
760 |
BUFFER_TRACE(inode_bitmap_bh, |
0390131ba
|
761 762 |
"call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, |
73b50c1c9
|
763 |
NULL, |
3300beda5
|
764 |
inode_bitmap_bh); |
ac27a0ec1
|
765 766 |
if (err) goto fail; |
393418676
|
767 768 |
/* zero bit is inode number 1*/ ino++; |
ac27a0ec1
|
769 770 771 |
goto got; } /* we lost it */ |
3300beda5
|
772 |
ext4_handle_release_buffer(handle, inode_bitmap_bh); |
393418676
|
773 |
ext4_handle_release_buffer(handle, group_desc_bh); |
ac27a0ec1
|
774 |
|
617ba13b3
|
775 |
if (++ino < EXT4_INODES_PER_GROUP(sb)) |
ac27a0ec1
|
776 777 778 779 780 781 782 783 784 785 |
goto repeat_in_this_group; } /* * This case is possible in concurrent environment. It is very * rare. We cannot repeat the find_group_xxx() call because * that will simply return the same blockgroup, because the * group descriptor metadata has not yet been updated. * So we just go onto the next blockgroup. */ |
8df9675f8
|
786 |
if (++group == ngroups) |
ac27a0ec1
|
787 788 789 790 791 792 |
group = 0; } err = -ENOSPC; goto out; got: |
717d50e49
|
793 794 795 |
/* We may have to initialize the block bitmap if it isn't already */ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
3300beda5
|
796 |
struct buffer_head *block_bitmap_bh; |
717d50e49
|
797 |
|
3300beda5
|
798 799 800 |
block_bitmap_bh = ext4_read_block_bitmap(sb, group); BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); err = ext4_journal_get_write_access(handle, block_bitmap_bh); |
717d50e49
|
801 |
if (err) { |
3300beda5
|
802 |
brelse(block_bitmap_bh); |
717d50e49
|
803 804 |
goto fail; } |
fd034a84e
|
805 806 807 |
BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh); brelse(block_bitmap_bh); |
717d50e49
|
808 |
/* recheck and clear flag under lock if we still need to */ |
fd034a84e
|
809 |
ext4_lock_group(sb, group); |
717d50e49
|
810 |
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
3300beda5
|
811 |
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
021b65bb1
|
812 |
ext4_free_group_clusters_set(sb, gdp, |
cff1dfd76
|
813 |
ext4_free_clusters_after_init(sb, group, gdp)); |
23712a9c2
|
814 815 |
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
717d50e49
|
816 |
} |
955ce5f5b
|
817 |
ext4_unlock_group(sb, group); |
717d50e49
|
818 |
|
717d50e49
|
819 820 821 |
if (err) goto fail; } |
3300beda5
|
822 823 |
BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); |
393418676
|
824 825 |
if (err) goto fail; |
ac27a0ec1
|
826 827 828 829 |
percpu_counter_dec(&sbi->s_freeinodes_counter); if (S_ISDIR(mode)) percpu_counter_inc(&sbi->s_dirs_counter); |
a0375156c
|
830 |
ext4_mark_super_dirty(sb); |
ac27a0ec1
|
831 |
|
772cb7c83
|
832 833 |
if (sbi->s_log_groups_per_flex) { flex_group = ext4_flex_group(sbi, group); |
9f24e4208
|
834 |
atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); |
772cb7c83
|
835 |
} |
5cb81dabc
|
836 837 838 839 840 |
if (owner) { inode->i_mode = mode; inode->i_uid = owner[0]; inode->i_gid = owner[1]; } else if (test_opt(sb, GRPID)) { |
b10b85209
|
841 842 |
inode->i_mode = mode; inode->i_uid = current_fsuid(); |
ac27a0ec1
|
843 |
inode->i_gid = dir->i_gid; |
ac27a0ec1
|
844 |
} else |
b10b85209
|
845 |
inode_init_owner(inode, dir, mode); |
ac27a0ec1
|
846 |
|
717d50e49
|
847 |
inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); |
ac27a0ec1
|
848 849 |
/* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; |
ef7f38359
|
850 851 |
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = ext4_current_time(inode); |
ac27a0ec1
|
852 853 854 855 |
memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_dir_start_lookup = 0; ei->i_disksize = 0; |
4af835089
|
856 |
/* Don't inherit extent flag from directory, amongst others. */ |
2dc6b0d48
|
857 858 |
ei->i_flags = ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); |
ac27a0ec1
|
859 |
ei->i_file_acl = 0; |
ac27a0ec1
|
860 |
ei->i_dtime = 0; |
ac27a0ec1
|
861 |
ei->i_block_group = group; |
a4912123b
|
862 |
ei->i_last_alloc_group = ~0; |
ac27a0ec1
|
863 |
|
617ba13b3
|
864 |
ext4_set_inode_flags(inode); |
ac27a0ec1
|
865 |
if (IS_DIRSYNC(inode)) |
0390131ba
|
866 |
ext4_handle_sync(handle); |
6b38e842b
|
867 |
if (insert_inode_locked(inode) < 0) { |
acd6ad835
|
868 869 870 871 872 873 |
/* * Likely a bitmap corruption causing inode to be allocated * twice. */ err = -EIO; goto fail; |
6b38e842b
|
874 |
} |
ac27a0ec1
|
875 876 877 |
spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); |
353eb83c1
|
878 |
ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ |
19f5fb7ad
|
879 |
ext4_set_inode_state(inode, EXT4_STATE_NEW); |
ef7f38359
|
880 881 |
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; |
ac27a0ec1
|
882 883 |
ret = inode; |
871a29315
|
884 |
dquot_initialize(inode); |
63936ddaa
|
885 886 |
err = dquot_alloc_inode(inode); if (err) |
ac27a0ec1
|
887 |
goto fail_drop; |
ac27a0ec1
|
888 |
|
617ba13b3
|
889 |
err = ext4_init_acl(handle, inode, dir); |
ac27a0ec1
|
890 891 |
if (err) goto fail_free_drop; |
2a7dba391
|
892 |
err = ext4_init_security(handle, inode, dir, qstr); |
ac27a0ec1
|
893 894 |
if (err) goto fail_free_drop; |
83982b6f4
|
895 |
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { |
e4079a11f
|
896 |
/* set extent flag only for directory, file and normal symlink*/ |
e65187e6d
|
897 |
if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { |
12e9b8920
|
898 |
ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); |
42bf0383d
|
899 |
ext4_ext_tree_init(handle, inode); |
42bf0383d
|
900 |
} |
a86c61812
|
901 |
} |
ac27a0ec1
|
902 |
|
688f869ce
|
903 904 905 906 |
if (ext4_handle_valid(handle)) { ei->i_sync_tid = handle->h_transaction->t_tid; ei->i_datasync_tid = handle->h_transaction->t_tid; } |
8753e88f1
|
907 908 909 910 911 |
err = ext4_mark_inode_dirty(handle, inode); if (err) { ext4_std_error(sb, err); goto fail_free_drop; } |
617ba13b3
|
912 913 |
ext4_debug("allocating inode %lu ", inode->i_ino); |
9bffad1ed
|
914 |
trace_ext4_allocate_inode(inode, dir, mode); |
ac27a0ec1
|
915 916 |
goto really_out; fail: |
617ba13b3
|
917 |
ext4_std_error(sb, err); |
ac27a0ec1
|
918 919 920 921 |
out: iput(inode); ret = ERR_PTR(err); really_out: |
3300beda5
|
922 |
brelse(inode_bitmap_bh); |
ac27a0ec1
|
923 924 925 |
return ret; fail_free_drop: |
63936ddaa
|
926 |
dquot_free_inode(inode); |
ac27a0ec1
|
927 928 |
fail_drop: |
9f7547580
|
929 |
dquot_drop(inode); |
ac27a0ec1
|
930 |
inode->i_flags |= S_NOQUOTA; |
6d6b77f16
|
931 |
clear_nlink(inode); |
6b38e842b
|
932 |
unlock_new_inode(inode); |
ac27a0ec1
|
933 |
iput(inode); |
3300beda5
|
934 |
brelse(inode_bitmap_bh); |
ac27a0ec1
|
935 936 937 938 |
return ERR_PTR(err); } /* Verify that we are loading a valid orphan from disk */ |
617ba13b3
|
939 |
struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) |
ac27a0ec1
|
940 |
{ |
617ba13b3
|
941 |
unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); |
fd2d42912
|
942 |
ext4_group_t block_group; |
ac27a0ec1
|
943 |
int bit; |
1d1fe1ee0
|
944 |
struct buffer_head *bitmap_bh; |
ac27a0ec1
|
945 |
struct inode *inode = NULL; |
1d1fe1ee0
|
946 |
long err = -EIO; |
ac27a0ec1
|
947 948 949 |
/* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { |
12062dddd
|
950 |
ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino); |
1d1fe1ee0
|
951 |
goto error; |
ac27a0ec1
|
952 |
} |
617ba13b3
|
953 954 |
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); |
e29d1cde6
|
955 |
bitmap_bh = ext4_read_inode_bitmap(sb, block_group); |
ac27a0ec1
|
956 |
if (!bitmap_bh) { |
12062dddd
|
957 |
ext4_warning(sb, "inode bitmap error for orphan %lu", ino); |
1d1fe1ee0
|
958 |
goto error; |
ac27a0ec1
|
959 960 961 962 963 964 |
} /* Having the inode bit set should be a 100% indicator that this * is a valid orphan (no e2fsck run on fs). Orphans also include * inodes that were being truncated, so we can't check i_nlink==0. */ |
1d1fe1ee0
|
965 966 967 968 969 970 |
if (!ext4_test_bit(bit, bitmap_bh->b_data)) goto bad_orphan; inode = ext4_iget(sb, ino); if (IS_ERR(inode)) goto iget_failed; |
91ef4caf8
|
971 972 973 974 975 976 977 |
/* * If the orphans has i_nlinks > 0 then it should be able to be * truncated, otherwise it won't be removed from the orphan list * during processing and an infinite loop will result. */ if (inode->i_nlink && !ext4_can_truncate(inode)) goto bad_orphan; |
1d1fe1ee0
|
978 979 980 981 982 983 984 985 986 |
if (NEXT_ORPHAN(inode) > max_ino) goto bad_orphan; brelse(bitmap_bh); return inode; iget_failed: err = PTR_ERR(inode); inode = NULL; bad_orphan: |
12062dddd
|
987 |
ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino); |
1d1fe1ee0
|
988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 |
printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d ", bit, (unsigned long long)bitmap_bh->b_blocknr, ext4_test_bit(bit, bitmap_bh->b_data)); printk(KERN_NOTICE "inode=%p ", inode); if (inode) { printk(KERN_NOTICE "is_bad_inode(inode)=%d ", is_bad_inode(inode)); printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u ", NEXT_ORPHAN(inode)); printk(KERN_NOTICE "max_ino=%lu ", max_ino); |
91ef4caf8
|
1003 1004 |
printk(KERN_NOTICE "i_nlink=%u ", inode->i_nlink); |
ac27a0ec1
|
1005 |
/* Avoid freeing blocks if we got a bad deleted inode */ |
1d1fe1ee0
|
1006 |
if (inode->i_nlink == 0) |
ac27a0ec1
|
1007 1008 |
inode->i_blocks = 0; iput(inode); |
ac27a0ec1
|
1009 |
} |
ac27a0ec1
|
1010 |
brelse(bitmap_bh); |
1d1fe1ee0
|
1011 1012 |
error: return ERR_PTR(err); |
ac27a0ec1
|
1013 |
} |
af5bc92dd
|
1014 |
unsigned long ext4_count_free_inodes(struct super_block *sb) |
ac27a0ec1
|
1015 1016 |
{ unsigned long desc_count; |
617ba13b3
|
1017 |
struct ext4_group_desc *gdp; |
8df9675f8
|
1018 |
ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
617ba13b3
|
1019 1020 |
#ifdef EXT4FS_DEBUG struct ext4_super_block *es; |
ac27a0ec1
|
1021 1022 |
unsigned long bitmap_count, x; struct buffer_head *bitmap_bh = NULL; |
617ba13b3
|
1023 |
es = EXT4_SB(sb)->s_es; |
ac27a0ec1
|
1024 1025 1026 |
desc_count = 0; bitmap_count = 0; gdp = NULL; |
8df9675f8
|
1027 |
for (i = 0; i < ngroups; i++) { |
af5bc92dd
|
1028 |
gdp = ext4_get_group_desc(sb, i, NULL); |
ac27a0ec1
|
1029 1030 |
if (!gdp) continue; |
560671a0d
|
1031 |
desc_count += ext4_free_inodes_count(sb, gdp); |
ac27a0ec1
|
1032 |
brelse(bitmap_bh); |
e29d1cde6
|
1033 |
bitmap_bh = ext4_read_inode_bitmap(sb, i); |
ac27a0ec1
|
1034 1035 |
if (!bitmap_bh) continue; |
617ba13b3
|
1036 |
x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); |
c549a95d4
|
1037 1038 |
printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu ", |
785b4b3a5
|
1039 |
(unsigned long) i, ext4_free_inodes_count(sb, gdp), x); |
ac27a0ec1
|
1040 1041 1042 |
bitmap_count += x; } brelse(bitmap_bh); |
4776004f5
|
1043 1044 1045 1046 |
printk(KERN_DEBUG "ext4_count_free_inodes: " "stored = %u, computed = %lu, %lu ", le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); |
ac27a0ec1
|
1047 1048 1049 |
return desc_count; #else desc_count = 0; |
8df9675f8
|
1050 |
for (i = 0; i < ngroups; i++) { |
af5bc92dd
|
1051 |
gdp = ext4_get_group_desc(sb, i, NULL); |
ac27a0ec1
|
1052 1053 |
if (!gdp) continue; |
560671a0d
|
1054 |
desc_count += ext4_free_inodes_count(sb, gdp); |
ac27a0ec1
|
1055 1056 1057 1058 1059 1060 1061 |
cond_resched(); } return desc_count; #endif } /* Called at mount-time, super-block is locked */ |
af5bc92dd
|
1062 |
unsigned long ext4_count_dirs(struct super_block * sb) |
ac27a0ec1
|
1063 1064 |
{ unsigned long count = 0; |
8df9675f8
|
1065 |
ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
ac27a0ec1
|
1066 |
|
8df9675f8
|
1067 |
for (i = 0; i < ngroups; i++) { |
af5bc92dd
|
1068 |
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
ac27a0ec1
|
1069 1070 |
if (!gdp) continue; |
560671a0d
|
1071 |
count += ext4_used_dirs_count(sb, gdp); |
ac27a0ec1
|
1072 1073 1074 |
} return count; } |
bfff68738
|
1075 1076 1077 1078 1079 1080 1081 1082 1083 |
/* * Zeroes not yet zeroed inode table - just write zeroes through the whole * inode table. Must be called without any spinlock held. The only place * where it is called from on active part of filesystem is ext4lazyinit * thread, so we do not need any special locks, however we have to prevent * inode allocation from the current group, so we take alloc_sem lock, to * block ext4_claim_inode until we are finished. */ |
e0cbee3e1
|
1084 |
int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, |
bfff68738
|
1085 1086 1087 1088 1089 1090 1091 1092 1093 |
int barrier) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; struct buffer_head *group_desc_bh; handle_t *handle; ext4_fsblk_t blk; int num, ret = 0, used_blks = 0; |
bfff68738
|
1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 |
/* This should not happen, but just to be sure check this */ if (sb->s_flags & MS_RDONLY) { ret = 1; goto out; } gdp = ext4_get_group_desc(sb, group, &group_desc_bh); if (!gdp) goto out; /* * We do not need to lock this, because we are the only one * handling this flag. */ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) goto out; handle = ext4_journal_start_sb(sb, 1); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; } down_write(&grp->alloc_sem); /* * If inode bitmap was already initialized there may be some * used inodes so we need to skip blocks with used inodes in * inode table. */ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) - ext4_itable_unused_count(sb, gdp)), sbi->s_inodes_per_block); |
857ac889c
|
1128 1129 1130 1131 1132 1133 1134 1135 1136 |
if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { ext4_error(sb, "Something is wrong with group %u " "Used itable blocks: %d" "itable unused count: %u ", group, used_blks, ext4_itable_unused_count(sb, gdp)); ret = 1; |
33853a0dd
|
1137 |
goto err_out; |
857ac889c
|
1138 |
} |
bfff68738
|
1139 1140 1141 1142 1143 1144 1145 1146 |
blk = ext4_inode_table(sb, gdp) + used_blks; num = sbi->s_itb_per_group - used_blks; BUFFER_TRACE(group_desc_bh, "get_write_access"); ret = ext4_journal_get_write_access(handle, group_desc_bh); if (ret) goto err_out; |
bfff68738
|
1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 |
/* * Skip zeroout if the inode table is full. But we set the ZEROED * flag anyway, because obviously, when it is full it does not need * further zeroing. */ if (unlikely(num == 0)) goto skip_zeroout; ext4_debug("going to zero out inode table in group %d ", group); |
a107e5a3a
|
1158 |
ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS); |
bfff68738
|
1159 1160 |
if (ret < 0) goto err_out; |
a107e5a3a
|
1161 1162 |
if (barrier) blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL); |
bfff68738
|
1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 |
skip_zeroout: ext4_lock_group(sb, group); gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); ext4_unlock_group(sb, group); BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); ret = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); err_out: up_write(&grp->alloc_sem); ext4_journal_stop(handle); out: return ret; } |