Blame view

fs/ext3/ialloc.c 20.7 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
  /*
   *  linux/fs/ext3/ialloc.c
   *
   * Copyright (C) 1992, 1993, 1994, 1995
   * Remy Card (card@masi.ibp.fr)
   * Laboratoire MASI - Institut Blaise Pascal
   * Universite Pierre et Marie Curie (Paris VI)
   *
   *  BSD ufs-inspired inode and directory allocation by
   *  Stephen Tweedie (sct@redhat.com), 1993
   *  Big-endian to little-endian byte-swapping/bitmaps by
   *        David S. Miller (davem@caip.rutgers.edu), 1995
   */
  
  #include <linux/time.h>
  #include <linux/fs.h>
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
  #include <linux/ext3_jbd.h>
  #include <linux/stat.h>
  #include <linux/string.h>
  #include <linux/quotaops.h>
  #include <linux/buffer_head.h>
  #include <linux/random.h>
  #include <linux/bitops.h>
  
  #include <asm/byteorder.h>
  
  #include "xattr.h"
  #include "acl.h"
  
  /*
   * ialloc.c contains the inodes allocation and deallocation routines
   */
  
  /*
   * The free inodes are managed by bitmaps.  A file system contains several
   * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
   * block for inodes, N blocks for the inode table and data blocks.
   *
   * The file system contains group descriptors which are located after the
   * super block.  Each descriptor contains the number of the bitmap block and
   * the free blocks count in the block.
   */
  
  
  /*
   * Read the inode allocation bitmap for a given block_group, reading
   * into the specified slot in the superblock's bitmap cache.
   *
   * Return buffer_head of bitmap on success or NULL.
   */
  static struct buffer_head *
  read_inode_bitmap(struct super_block * sb, unsigned long block_group)
  {
  	struct ext3_group_desc *desc;
  	struct buffer_head *bh = NULL;
  
  	desc = ext3_get_group_desc(sb, block_group, NULL);
  	if (!desc)
  		goto error_out;
  
  	bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
  	if (!bh)
  		ext3_error(sb, "read_inode_bitmap",
  			    "Cannot read inode bitmap - "
  			    "block_group = %lu, inode_bitmap = %u",
  			    block_group, le32_to_cpu(desc->bg_inode_bitmap));
  error_out:
  	return bh;
  }
  
  /*
   * NOTE! When we get the inode, we're the only people
   * that have access to it, and as such there are no
   * race conditions we have to worry about. The inode
   * is not on the hash-lists, and it cannot be reached
   * through the filesystem because the directory entry
   * has been deleted earlier.
   *
   * HOWEVER: we must make sure that we get no aliases,
   * which means that we have to call "clear_inode()"
   * _before_ we mark the inode not in use in the inode
   * bitmaps. Otherwise a newly created file might use
   * the same inode number (not actually the same pointer
   * though), and then we'd have two inodes sharing the
   * same inode number and space on the harddisk.
   */
  void ext3_free_inode (handle_t *handle, struct inode * inode)
  {
  	struct super_block * sb = inode->i_sb;
  	int is_directory;
  	unsigned long ino;
  	struct buffer_head *bitmap_bh = NULL;
  	struct buffer_head *bh2;
  	unsigned long block_group;
  	unsigned long bit;
  	struct ext3_group_desc * gdp;
  	struct ext3_super_block * es;
  	struct ext3_sb_info *sbi;
  	int fatal = 0, err;
  
  	if (atomic_read(&inode->i_count) > 1) {
  		printk ("ext3_free_inode: inode has count=%d
  ",
  					atomic_read(&inode->i_count));
  		return;
  	}
  	if (inode->i_nlink) {
  		printk ("ext3_free_inode: inode has nlink=%d
  ",
  			inode->i_nlink);
  		return;
  	}
  	if (!sb) {
  		printk("ext3_free_inode: inode on nonexistent device
  ");
  		return;
  	}
  	sbi = EXT3_SB(sb);
  
  	ino = inode->i_ino;
  	ext3_debug ("freeing inode %lu
  ", ino);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
125
  	is_directory = S_ISDIR(inode->i_mode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
  	es = EXT3_SB(sb)->s_es;
  	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
  		ext3_error (sb, "ext3_free_inode",
  			    "reserved or nonexistent inode %lu", ino);
  		goto error_return;
  	}
  	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
  	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
  	bitmap_bh = read_inode_bitmap(sb, block_group);
  	if (!bitmap_bh)
  		goto error_return;
  
  	BUFFER_TRACE(bitmap_bh, "get_write_access");
  	fatal = ext3_journal_get_write_access(handle, bitmap_bh);
  	if (fatal)
  		goto error_return;
  
  	/* Ok, now we can actually update the inode bitmaps.. */
  	if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
  					bit, bitmap_bh->b_data))
  		ext3_error (sb, "ext3_free_inode",
  			      "bit already cleared for inode %lu", ino);
  	else {
  		gdp = ext3_get_group_desc (sb, block_group, &bh2);
  
  		BUFFER_TRACE(bh2, "get_write_access");
  		fatal = ext3_journal_get_write_access(handle, bh2);
  		if (fatal) goto error_return;
  
  		if (gdp) {
  			spin_lock(sb_bgl_lock(sbi, block_group));
50e8a2890   Marcin Slusarz   ext3: replace all...
157
  			le16_add_cpu(&gdp->bg_free_inodes_count, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
  			if (is_directory)
50e8a2890   Marcin Slusarz   ext3: replace all...
159
  				le16_add_cpu(&gdp->bg_used_dirs_count, -1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
160
161
162
163
164
165
166
167
168
169
170
171
172
173
  			spin_unlock(sb_bgl_lock(sbi, block_group));
  			percpu_counter_inc(&sbi->s_freeinodes_counter);
  			if (is_directory)
  				percpu_counter_dec(&sbi->s_dirs_counter);
  
  		}
  		BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
  		err = ext3_journal_dirty_metadata(handle, bh2);
  		if (!fatal) fatal = err;
  	}
  	BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
  	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
  	if (!fatal)
  		fatal = err;
ca41f7b91   Christoph Hellwig   ext3: remove ->wr...
174

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
  error_return:
  	brelse(bitmap_bh);
  	ext3_std_error(sb, fatal);
  }
  
  /*
   * There are two policies for allocating an inode.  If the new inode is
   * a directory, then a forward search is made for a block group with both
   * free space and a low directory-to-inode ratio; if that fails, then of
   * the groups with above-average free space, that group with the fewest
   * directories already is chosen.
   *
   * For other inodes, search forward from the parent directory\'s block
   * group to find a free inode.
   */
  static int find_group_dir(struct super_block *sb, struct inode *parent)
  {
  	int ngroups = EXT3_SB(sb)->s_groups_count;
eee194e76   Eric Sandeen   [PATCH] ext3: ino...
193
  	unsigned int freei, avefreei;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
194
  	struct ext3_group_desc *desc, *best_desc = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
195
196
197
198
199
200
  	int group, best_group = -1;
  
  	freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
  	avefreei = freei / ngroups;
  
  	for (group = 0; group < ngroups; group++) {
ef2fb6798   Eric Sandeen   remove unused bh ...
201
  		desc = ext3_get_group_desc (sb, group, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
203
204
205
  		if (!desc || !desc->bg_free_inodes_count)
  			continue;
  		if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
  			continue;
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
206
  		if (!best_desc ||
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
207
208
209
210
211
212
213
214
  		    (le16_to_cpu(desc->bg_free_blocks_count) >
  		     le16_to_cpu(best_desc->bg_free_blocks_count))) {
  			best_group = group;
  			best_desc = desc;
  		}
  	}
  	return best_group;
  }
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
215
216
217
  /*
   * Orlov's allocator for directories.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
218
219
   * We always try to spread first-level directories.
   *
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
220
221
222
223
224
225
226
227
228
229
230
   * If there are blockgroups with both free inodes and free blocks counts
   * not worse than average we return one with smallest directory count.
   * Otherwise we simply return a random group.
   *
   * For the rest rules look so:
   *
   * It's OK to put directory into a group unless
   * it has too many directories already (max_dirs) or
   * it has too few free inodes left (min_inodes) or
   * it has too few free blocks left (min_blocks) or
   * it's already running too large debt (max_debt).
1cc8dcf56   Benoit Boissinot   ext*: spelling fi...
231
   * Parent's group is preferred, if it doesn't satisfy these
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
232
233
234
235
236
237
238
   * conditions we search cyclically through the rest. If none
   * of the groups look good we just look for a group with more
   * free inodes than average (starting at parent's group).
   *
   * Debt is incremented each time we allocate a directory and decremented
   * when we allocate an inode, within 0--255.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
239
240
241
242
243
244
245
246
247
248
249
  
  #define INODE_COST 64
  #define BLOCK_COST 256
  
  static int find_group_orlov(struct super_block *sb, struct inode *parent)
  {
  	int parent_group = EXT3_I(parent)->i_block_group;
  	struct ext3_sb_info *sbi = EXT3_SB(sb);
  	struct ext3_super_block *es = sbi->s_es;
  	int ngroups = sbi->s_groups_count;
  	int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
eee194e76   Eric Sandeen   [PATCH] ext3: ino...
250
  	unsigned int freei, avefreei;
1c2bf374a   Mingming Cao   [PATCH] ext3_fsbl...
251
252
  	ext3_fsblk_t freeb, avefreeb;
  	ext3_fsblk_t blocks_per_dir;
eee194e76   Eric Sandeen   [PATCH] ext3: ino...
253
  	unsigned int ndirs;
1c2bf374a   Mingming Cao   [PATCH] ext3_fsbl...
254
255
  	int max_debt, max_dirs, min_inodes;
  	ext3_grpblk_t min_blocks;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
256
257
  	int group = -1, i;
  	struct ext3_group_desc *desc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
  
  	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
  	avefreei = freei / ngroups;
  	freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
  	avefreeb = freeb / ngroups;
  	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
  
  	if ((parent == sb->s_root->d_inode) ||
  	    (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) {
  		int best_ndir = inodes_per_group;
  		int best_group = -1;
  
  		get_random_bytes(&group, sizeof(group));
  		parent_group = (unsigned)group % ngroups;
  		for (i = 0; i < ngroups; i++) {
  			group = (parent_group + i) % ngroups;
ef2fb6798   Eric Sandeen   remove unused bh ...
274
  			desc = ext3_get_group_desc (sb, group, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
  			if (!desc || !desc->bg_free_inodes_count)
  				continue;
  			if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
  				continue;
  			if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
  				continue;
  			if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
  				continue;
  			best_group = group;
  			best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
  		}
  		if (best_group >= 0)
  			return best_group;
  		goto fallback;
  	}
  
  	blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs;
  
  	max_dirs = ndirs / ngroups + inodes_per_group / 16;
  	min_inodes = avefreei - inodes_per_group / 4;
  	min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
1c2bf374a   Mingming Cao   [PATCH] ext3_fsbl...
296
  	max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297
298
299
300
301
302
303
304
305
  	if (max_debt * INODE_COST > inodes_per_group)
  		max_debt = inodes_per_group / INODE_COST;
  	if (max_debt > 255)
  		max_debt = 255;
  	if (max_debt == 0)
  		max_debt = 1;
  
  	for (i = 0; i < ngroups; i++) {
  		group = (parent_group + i) % ngroups;
ef2fb6798   Eric Sandeen   remove unused bh ...
306
  		desc = ext3_get_group_desc (sb, group, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307
308
309
310
311
312
313
314
315
316
317
318
319
320
  		if (!desc || !desc->bg_free_inodes_count)
  			continue;
  		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
  			continue;
  		if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
  			continue;
  		if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
  			continue;
  		return group;
  	}
  
  fallback:
  	for (i = 0; i < ngroups; i++) {
  		group = (parent_group + i) % ngroups;
ef2fb6798   Eric Sandeen   remove unused bh ...
321
  		desc = ext3_get_group_desc (sb, group, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
  		if (!desc || !desc->bg_free_inodes_count)
  			continue;
  		if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
  			return group;
  	}
  
  	if (avefreei) {
  		/*
  		 * The free-inodes counter is approximate, and for really small
  		 * filesystems the above test can fail to find any blockgroups
  		 */
  		avefreei = 0;
  		goto fallback;
  	}
  
  	return -1;
  }
  
  static int find_group_other(struct super_block *sb, struct inode *parent)
  {
  	int parent_group = EXT3_I(parent)->i_block_group;
  	int ngroups = EXT3_SB(sb)->s_groups_count;
  	struct ext3_group_desc *desc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
345
346
347
348
349
350
  	int group, i;
  
  	/*
  	 * Try to place the inode in its parent directory
  	 */
  	group = parent_group;
ef2fb6798   Eric Sandeen   remove unused bh ...
351
  	desc = ext3_get_group_desc (sb, group, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
  	if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
  			le16_to_cpu(desc->bg_free_blocks_count))
  		return group;
  
  	/*
  	 * We're going to place this inode in a different blockgroup from its
  	 * parent.  We want to cause files in a common directory to all land in
  	 * the same blockgroup.  But we want files which are in a different
  	 * directory which shares a blockgroup with our parent to land in a
  	 * different blockgroup.
  	 *
  	 * So add our directory's i_ino into the starting point for the hash.
  	 */
  	group = (group + parent->i_ino) % ngroups;
  
  	/*
  	 * Use a quadratic hash to find a group with a free inode and some free
  	 * blocks.
  	 */
  	for (i = 1; i < ngroups; i <<= 1) {
  		group += i;
  		if (group >= ngroups)
  			group -= ngroups;
ef2fb6798   Eric Sandeen   remove unused bh ...
375
  		desc = ext3_get_group_desc (sb, group, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
376
377
378
379
380
381
382
383
384
385
386
387
388
  		if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
  				le16_to_cpu(desc->bg_free_blocks_count))
  			return group;
  	}
  
  	/*
  	 * That failed: try linear search for a free inode, even if that group
  	 * has no free blocks.
  	 */
  	group = parent_group;
  	for (i = 0; i < ngroups; i++) {
  		if (++group >= ngroups)
  			group = 0;
ef2fb6798   Eric Sandeen   remove unused bh ...
389
  		desc = ext3_get_group_desc (sb, group, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
  		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
  			return group;
  	}
  
  	return -1;
  }
  
  /*
   * There are two policies for allocating an inode.  If the new inode is
   * a directory, then a forward search is made for a block group with both
   * free space and a low directory-to-inode ratio; if that fails, then of
   * the groups with above-average free space, that group with the fewest
   * directories already is chosen.
   *
   * For other inodes, search forward from the parent directory's block
   * group to find a free inode.
   */
  struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
  {
  	struct super_block *sb;
  	struct buffer_head *bitmap_bh = NULL;
  	struct buffer_head *bh2;
  	int group;
  	unsigned long ino = 0;
  	struct inode * inode;
  	struct ext3_group_desc * gdp = NULL;
  	struct ext3_super_block * es;
  	struct ext3_inode_info *ei;
  	struct ext3_sb_info *sbi;
  	int err = 0;
  	struct inode *ret;
  	int i;
  
  	/* Cannot create files in a deleted directory */
  	if (!dir || !dir->i_nlink)
  		return ERR_PTR(-EPERM);
  
  	sb = dir->i_sb;
  	inode = new_inode(sb);
  	if (!inode)
  		return ERR_PTR(-ENOMEM);
  	ei = EXT3_I(inode);
  
  	sbi = EXT3_SB(sb);
  	es = sbi->s_es;
  	if (S_ISDIR(mode)) {
  		if (test_opt (sb, OLDALLOC))
  			group = find_group_dir(sb, dir);
  		else
  			group = find_group_orlov(sb, dir);
ae6ddcc5f   Mingming Cao   [PATCH] ext3 and ...
440
  	} else
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
  		group = find_group_other(sb, dir);
  
  	err = -ENOSPC;
  	if (group == -1)
  		goto out;
  
  	for (i = 0; i < sbi->s_groups_count; i++) {
  		err = -EIO;
  
  		gdp = ext3_get_group_desc(sb, group, &bh2);
  		if (!gdp)
  			goto fail;
  
  		brelse(bitmap_bh);
  		bitmap_bh = read_inode_bitmap(sb, group);
  		if (!bitmap_bh)
  			goto fail;
  
  		ino = 0;
  
  repeat_in_this_group:
  		ino = ext3_find_next_zero_bit((unsigned long *)
  				bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino);
  		if (ino < EXT3_INODES_PER_GROUP(sb)) {
  
  			BUFFER_TRACE(bitmap_bh, "get_write_access");
  			err = ext3_journal_get_write_access(handle, bitmap_bh);
  			if (err)
  				goto fail;
  
  			if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
  						ino, bitmap_bh->b_data)) {
  				/* we won it */
  				BUFFER_TRACE(bitmap_bh,
  					"call ext3_journal_dirty_metadata");
  				err = ext3_journal_dirty_metadata(handle,
  								bitmap_bh);
  				if (err)
  					goto fail;
  				goto got;
  			}
  			/* we lost it */
  			journal_release_buffer(handle, bitmap_bh);
  
  			if (++ino < EXT3_INODES_PER_GROUP(sb))
  				goto repeat_in_this_group;
  		}
  
  		/*
  		 * This case is possible in concurrent environment.  It is very
  		 * rare.  We cannot repeat the find_group_xxx() call because
  		 * that will simply return the same blockgroup, because the
  		 * group descriptor metadata has not yet been updated.
  		 * So we just go onto the next blockgroup.
  		 */
  		if (++group == sbi->s_groups_count)
  			group = 0;
  	}
  	err = -ENOSPC;
  	goto out;
  
  got:
  	ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
  	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
  		ext3_error (sb, "ext3_new_inode",
  			    "reserved inode or inode > inodes count - "
  			    "block_group = %d, inode=%lu", group, ino);
  		err = -EIO;
  		goto fail;
  	}
  
  	BUFFER_TRACE(bh2, "get_write_access");
  	err = ext3_journal_get_write_access(handle, bh2);
  	if (err) goto fail;
  	spin_lock(sb_bgl_lock(sbi, group));
50e8a2890   Marcin Slusarz   ext3: replace all...
516
  	le16_add_cpu(&gdp->bg_free_inodes_count, -1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
517
  	if (S_ISDIR(mode)) {
50e8a2890   Marcin Slusarz   ext3: replace all...
518
  		le16_add_cpu(&gdp->bg_used_dirs_count, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
519
520
521
522
523
524
525
526
527
  	}
  	spin_unlock(sb_bgl_lock(sbi, group));
  	BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
  	err = ext3_journal_dirty_metadata(handle, bh2);
  	if (err) goto fail;
  
  	percpu_counter_dec(&sbi->s_freeinodes_counter);
  	if (S_ISDIR(mode))
  		percpu_counter_inc(&sbi->s_dirs_counter);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
528

aab99c2c2   Dmitry Monakhov   ext3: replace ino...
529
530
531
532
  
  	if (test_opt(sb, GRPID)) {
  		inode->i_mode = mode;
  		inode->i_uid = current_fsuid();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
533
  		inode->i_gid = dir->i_gid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
534
  	} else
aab99c2c2   Dmitry Monakhov   ext3: replace ino...
535
  		inode_init_owner(inode, dir, mode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
536
537
538
  
  	inode->i_ino = ino;
  	/* This is the optimal IO size (for stat), not the fs block size */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
539
540
541
542
543
544
  	inode->i_blocks = 0;
  	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
  
  	memset(ei->i_data, 0, sizeof(ei->i_data));
  	ei->i_dir_start_lookup = 0;
  	ei->i_disksize = 0;
04143e2fb   Duane Griffin   ext3: tighten res...
545
546
  	ei->i_flags =
  		ext3_mask_flags(mode, EXT3_I(dir)->i_flags & EXT3_FL_INHERITED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
547
548
549
550
551
552
553
554
555
556
557
558
559
560
  #ifdef EXT3_FRAGMENTS
  	ei->i_faddr = 0;
  	ei->i_frag_no = 0;
  	ei->i_frag_size = 0;
  #endif
  	ei->i_file_acl = 0;
  	ei->i_dir_acl = 0;
  	ei->i_dtime = 0;
  	ei->i_block_alloc_info = NULL;
  	ei->i_block_group = group;
  
  	ext3_set_inode_flags(inode);
  	if (IS_DIRSYNC(inode))
  		handle->h_sync = 1;
c38012daa   Al Viro   nfsd race fixes: ...
561
562
563
564
  	if (insert_inode_locked(inode) < 0) {
  		err = -EINVAL;
  		goto fail_drop;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
565
566
567
  	spin_lock(&sbi->s_next_gen_lock);
  	inode->i_generation = sbi->s_next_generation++;
  	spin_unlock(&sbi->s_next_gen_lock);
de329820e   Linus Torvalds   ext3: fix broken ...
568
569
  	ei->i_state_flags = 0;
  	ext3_set_inode_state(inode, EXT3_STATE_NEW);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
570
571
572
573
574
  	ei->i_extra_isize =
  		(EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
  		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
  
  	ret = inode;
871a29315   Christoph Hellwig   dquot: cleanup dq...
575
  	dquot_initialize(inode);
63936ddaa   Christoph Hellwig   dquot: cleanup in...
576
577
  	err = dquot_alloc_inode(inode);
  	if (err)
dc7b5fd6b   Chris Sykes   [PATCH] Fix ext3_...
578
  		goto fail_drop;
dc7b5fd6b   Chris Sykes   [PATCH] Fix ext3_...
579

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
580
  	err = ext3_init_acl(handle, inode, dir);
dc7b5fd6b   Chris Sykes   [PATCH] Fix ext3_...
581
582
  	if (err)
  		goto fail_free_drop;
ac50960af   Stephen Smalley   [PATCH] ext3: Ena...
583
  	err = ext3_init_security(handle,inode, dir);
dc7b5fd6b   Chris Sykes   [PATCH] Fix ext3_...
584
585
  	if (err)
  		goto fail_free_drop;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
586
587
588
  	err = ext3_mark_inode_dirty(handle, inode);
  	if (err) {
  		ext3_std_error(sb, err);
dc7b5fd6b   Chris Sykes   [PATCH] Fix ext3_...
589
  		goto fail_free_drop;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
590
591
592
593
594
595
596
597
598
599
600
601
602
  	}
  
  	ext3_debug("allocating inode %lu
  ", inode->i_ino);
  	goto really_out;
  fail:
  	ext3_std_error(sb, err);
  out:
  	iput(inode);
  	ret = ERR_PTR(err);
  really_out:
  	brelse(bitmap_bh);
  	return ret;
dc7b5fd6b   Chris Sykes   [PATCH] Fix ext3_...
603
  fail_free_drop:
63936ddaa   Christoph Hellwig   dquot: cleanup in...
604
  	dquot_free_inode(inode);
dc7b5fd6b   Chris Sykes   [PATCH] Fix ext3_...
605
606
  
  fail_drop:
9f7547580   Christoph Hellwig   dquot: cleanup dq...
607
  	dquot_drop(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
608
609
  	inode->i_flags |= S_NOQUOTA;
  	inode->i_nlink = 0;
c38012daa   Al Viro   nfsd race fixes: ...
610
  	unlock_new_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
611
612
613
614
615
616
617
618
619
620
621
  	iput(inode);
  	brelse(bitmap_bh);
  	return ERR_PTR(err);
  }
  
  /* Verify that we are loading a valid orphan from disk */
  struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
  {
  	unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
  	unsigned long block_group;
  	int bit;
473043dce   David Howells   iget: stop EXT3 f...
622
  	struct buffer_head *bitmap_bh;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
623
  	struct inode *inode = NULL;
473043dce   David Howells   iget: stop EXT3 f...
624
  	long err = -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
625
626
627
  
  	/* Error cases - e2fsck has already cleaned up for us */
  	if (ino > max_ino) {
e05b6b524   Harvey Harrison   ext3: replace rem...
628
  		ext3_warning(sb, __func__,
9f40668d7   Glauber de Oliveira Costa   [PATCH] ext3: rem...
629
  			     "bad orphan ino %lu!  e2fsck was run?", ino);
473043dce   David Howells   iget: stop EXT3 f...
630
  		goto error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
631
632
633
634
635
636
  	}
  
  	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
  	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
  	bitmap_bh = read_inode_bitmap(sb, block_group);
  	if (!bitmap_bh) {
e05b6b524   Harvey Harrison   ext3: replace rem...
637
  		ext3_warning(sb, __func__,
9f40668d7   Glauber de Oliveira Costa   [PATCH] ext3: rem...
638
  			     "inode bitmap error for orphan %lu", ino);
473043dce   David Howells   iget: stop EXT3 f...
639
  		goto error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
640
641
642
643
644
645
  	}
  
  	/* Having the inode bit set should be a 100% indicator that this
  	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
  	 * inodes that were being truncated, so we can't check i_nlink==0.
  	 */
473043dce   David Howells   iget: stop EXT3 f...
646
647
648
649
650
651
  	if (!ext3_test_bit(bit, bitmap_bh->b_data))
  		goto bad_orphan;
  
  	inode = ext3_iget(sb, ino);
  	if (IS_ERR(inode))
  		goto iget_failed;
ae76dd9a6   Duane Griffin   ext3: handle corr...
652
653
654
655
656
657
658
  	/*
  	 * If the orphans has i_nlinks > 0 then it should be able to be
  	 * truncated, otherwise it won't be removed from the orphan list
  	 * during processing and an infinite loop will result.
  	 */
  	if (inode->i_nlink && !ext3_can_truncate(inode))
  		goto bad_orphan;
473043dce   David Howells   iget: stop EXT3 f...
659
660
661
662
663
664
665
666
667
  	if (NEXT_ORPHAN(inode) > max_ino)
  		goto bad_orphan;
  	brelse(bitmap_bh);
  	return inode;
  
  iget_failed:
  	err = PTR_ERR(inode);
  	inode = NULL;
  bad_orphan:
e05b6b524   Harvey Harrison   ext3: replace rem...
668
  	ext3_warning(sb, __func__,
473043dce   David Howells   iget: stop EXT3 f...
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
  		     "bad orphan inode %lu!  e2fsck was run?", ino);
  	printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d
  ",
  	       bit, (unsigned long long)bitmap_bh->b_blocknr,
  	       ext3_test_bit(bit, bitmap_bh->b_data));
  	printk(KERN_NOTICE "inode=%p
  ", inode);
  	if (inode) {
  		printk(KERN_NOTICE "is_bad_inode(inode)=%d
  ",
  		       is_bad_inode(inode));
  		printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u
  ",
  		       NEXT_ORPHAN(inode));
  		printk(KERN_NOTICE "max_ino=%lu
  ", max_ino);
ae76dd9a6   Duane Griffin   ext3: handle corr...
685
686
  		printk(KERN_NOTICE "i_nlink=%u
  ", inode->i_nlink);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
687
  		/* Avoid freeing blocks if we got a bad deleted inode */
473043dce   David Howells   iget: stop EXT3 f...
688
  		if (inode->i_nlink == 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
689
690
  			inode->i_blocks = 0;
  		iput(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
691
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
692
  	brelse(bitmap_bh);
473043dce   David Howells   iget: stop EXT3 f...
693
694
  error:
  	return ERR_PTR(err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
695
696
697
698
699
700
701
702
703
704
705
  }
  
  unsigned long ext3_count_free_inodes (struct super_block * sb)
  {
  	unsigned long desc_count;
  	struct ext3_group_desc *gdp;
  	int i;
  #ifdef EXT3FS_DEBUG
  	struct ext3_super_block *es;
  	unsigned long bitmap_count, x;
  	struct buffer_head *bitmap_bh = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
  	es = EXT3_SB(sb)->s_es;
  	desc_count = 0;
  	bitmap_count = 0;
  	gdp = NULL;
  	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
  		gdp = ext3_get_group_desc (sb, i, NULL);
  		if (!gdp)
  			continue;
  		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
  		brelse(bitmap_bh);
  		bitmap_bh = read_inode_bitmap(sb, i);
  		if (!bitmap_bh)
  			continue;
  
  		x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8);
  		printk("group %d: stored = %d, counted = %lu
  ",
  			i, le16_to_cpu(gdp->bg_free_inodes_count), x);
  		bitmap_count += x;
  	}
  	brelse(bitmap_bh);
  	printk("ext3_count_free_inodes: stored = %u, computed = %lu, %lu
  ",
  		le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
  	return desc_count;
  #else
  	desc_count = 0;
  	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
  		gdp = ext3_get_group_desc (sb, i, NULL);
  		if (!gdp)
  			continue;
  		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
  		cond_resched();
  	}
  	return desc_count;
  #endif
  }
  
  /* Called at mount-time, super-block is locked */
  unsigned long ext3_count_dirs (struct super_block * sb)
  {
  	unsigned long count = 0;
  	int i;
  
  	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
  		struct ext3_group_desc *gdp = ext3_get_group_desc (sb, i, NULL);
  		if (!gdp)
  			continue;
  		count += le16_to_cpu(gdp->bg_used_dirs_count);
  	}
  	return count;
  }