Blame view

fs/ext4/mballoc.c 127 KB
c9de560de   Alex Tomas   ext4: Add multi b...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
  /*
   * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
   * Written by Alex Tomas <alex@clusterfs.com>
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License version 2 as
   * published by the Free Software Foundation.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   *
   * You should have received a copy of the GNU General Public Licens
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
   */
  
  
  /*
   * mballoc.c contains the multiblocks allocation routines
   */
8f6e39a7a   Mingming Cao   ext4: Move mballo...
23
  #include "mballoc.h"
6ba495e92   Theodore Ts'o   ext4: Add configu...
24
  #include <linux/debugfs.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
25
  #include <linux/slab.h>
9bffad1ed   Theodore Ts'o   ext4: convert ins...
26
  #include <trace/events/ext4.h>
c9de560de   Alex Tomas   ext4: Add multi b...
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
  /*
   * MUSTDO:
   *   - test ext4_ext_search_left() and ext4_ext_search_right()
   *   - search for metadata in few groups
   *
   * TODO v4:
   *   - normalization should take into account whether file is still open
   *   - discard preallocations if no free space left (policy?)
   *   - don't normalize tails
   *   - quota
   *   - reservation for superuser
   *
   * TODO v3:
   *   - bitmap read-ahead (proposed by Oleg Drokin aka green)
   *   - track min/max extents in each group for better group selection
   *   - mb_mark_used() may allocate chunk right after splitting buddy
   *   - tree of groups sorted by number of free blocks
   *   - error handling
   */
  
  /*
   * The allocation request involve request for multiple number of blocks
   * near to the goal(block) value specified.
   *
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
51
52
53
54
55
56
57
58
59
   * During initialization phase of the allocator we decide to use the
   * group preallocation or inode preallocation depending on the size of
   * the file. The size of the file could be the resulting file size we
   * would have after allocation, or the current file size, which ever
   * is larger. If the size is less than sbi->s_mb_stream_request we
   * select to use the group preallocation. The default value of
   * s_mb_stream_request is 16 blocks. This can also be tuned via
   * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in
   * terms of number of blocks.
c9de560de   Alex Tomas   ext4: Add multi b...
60
61
   *
   * The main motivation for having small file use group preallocation is to
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
62
   * ensure that we have small files closer together on the disk.
c9de560de   Alex Tomas   ext4: Add multi b...
63
   *
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
64
65
66
67
   * First stage the allocator looks at the inode prealloc list,
   * ext4_inode_info->i_prealloc_list, which contains list of prealloc
   * spaces for this particular inode. The inode prealloc space is
   * represented as:
c9de560de   Alex Tomas   ext4: Add multi b...
68
69
70
   *
   * pa_lstart -> the logical start block for this prealloc space
   * pa_pstart -> the physical start block for this prealloc space
1537a3638   Daniel Mack   tree-wide: fix 'l...
71
   * pa_len    -> length for this prealloc space
c9de560de   Alex Tomas   ext4: Add multi b...
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
   * pa_free   ->  free space available in this prealloc space
   *
   * The inode preallocation space is used looking at the _logical_ start
   * block. If only the logical file block falls within the range of prealloc
   * space we will consume the particular prealloc space. This make sure that
   * that the we have contiguous physical blocks representing the file blocks
   *
   * The important thing to be noted in case of inode prealloc space is that
   * we don't modify the values associated to inode prealloc space except
   * pa_free.
   *
   * If we are not able to find blocks in the inode prealloc space and if we
   * have the group allocation flag set then we look at the locality group
   * prealloc space. These are per CPU prealloc list repreasented as
   *
   * ext4_sb_info.s_locality_groups[smp_processor_id()]
   *
   * The reason for having a per cpu locality group is to reduce the contention
   * between CPUs. It is possible to get scheduled at this point.
   *
   * The locality group prealloc space is used looking at whether we have
   * enough free space (pa_free) withing the prealloc space.
   *
   * If we can't allocate blocks via inode prealloc or/and locality group
   * prealloc then we look at the buddy cache. The buddy cache is represented
   * by ext4_sb_info.s_buddy_cache (struct inode) whose file offset gets
   * mapped to the buddy and bitmap information regarding different
   * groups. The buddy information is attached to buddy cache inode so that
   * we can access them through the page cache. The information regarding
   * each group is loaded via ext4_mb_load_buddy.  The information involve
   * block bitmap and buddy information. The information are stored in the
   * inode as:
   *
   *  {                        page                        }
c3a326a65   Aneesh Kumar K.V   ext4: cleanup mba...
106
   *  [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
c9de560de   Alex Tomas   ext4: Add multi b...
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
   *
   *
   * one block each for bitmap and buddy information.  So for each group we
   * take up 2 blocks. A page can contain blocks_per_page (PAGE_CACHE_SIZE /
   * blocksize) blocks.  So it can have information regarding groups_per_page
   * which is blocks_per_page/2
   *
   * The buddy cache inode is not stored on disk. The inode is thrown
   * away when the filesystem is unmounted.
   *
   * We look for count number of blocks in the buddy cache. If we were able
   * to locate that many free blocks we return with additional information
   * regarding rest of the contiguous physical block available
   *
   * Before allocating blocks via buddy cache we normalize the request
   * blocks. This ensure we ask for more blocks that we needed. The extra
   * blocks that we get after allocation is added to the respective prealloc
   * list. In case of inode preallocation we follow a list of heuristics
   * based on file size. This can be found in ext4_mb_normalize_request. If
   * we are doing a group prealloc we try to normalize the request to
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
127
   * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
c9de560de   Alex Tomas   ext4: Add multi b...
128
   * 512 blocks. This can be tuned via
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
129
   * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in
c9de560de   Alex Tomas   ext4: Add multi b...
130
131
132
133
   * terms of number of blocks. If we have mounted the file system with -O
   * stripe=<value> option the group prealloc request is normalized to the
   * stripe value (sbi->s_stripe)
   *
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
134
   * The regular allocator(using the buddy cache) supports few tunables.
c9de560de   Alex Tomas   ext4: Add multi b...
135
   *
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
136
137
138
   * /sys/fs/ext4/<partition>/mb_min_to_scan
   * /sys/fs/ext4/<partition>/mb_max_to_scan
   * /sys/fs/ext4/<partition>/mb_order2_req
c9de560de   Alex Tomas   ext4: Add multi b...
139
   *
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
140
   * The regular allocator uses buddy scan only if the request len is power of
c9de560de   Alex Tomas   ext4: Add multi b...
141
142
   * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
   * value of s_mb_order2_reqs can be tuned via
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
143
   * /sys/fs/ext4/<partition>/mb_order2_req.  If the request len is equal to
af901ca18   AndrĂ© Goddard Rosa   tree-wide: fix as...
144
   * stripe size (sbi->s_stripe), we try to search for contiguous block in
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
145
146
147
   * stripe size. This should result in better allocation on RAID setups. If
   * not, we search in the specific group using bitmap for best extents. The
   * tunable min_to_scan and max_to_scan control the behaviour here.
c9de560de   Alex Tomas   ext4: Add multi b...
148
   * min_to_scan indicate how long the mballoc __must__ look for a best
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
149
   * extent and max_to_scan indicates how long the mballoc __can__ look for a
c9de560de   Alex Tomas   ext4: Add multi b...
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
   * best extent in the found extents. Searching for the blocks starts with
   * the group specified as the goal value in allocation context via
   * ac_g_ex. Each group is first checked based on the criteria whether it
   * can used for allocation. ext4_mb_good_group explains how the groups are
   * checked.
   *
   * Both the prealloc space are getting populated as above. So for the first
   * request we will hit the buddy cache which will result in this prealloc
   * space getting filled. The prealloc space is then later used for the
   * subsequent request.
   */
  
  /*
   * mballoc operates on the following data:
   *  - on-disk bitmap
   *  - in-core buddy (actually includes buddy and bitmap)
   *  - preallocation descriptors (PAs)
   *
   * there are two types of preallocations:
   *  - inode
   *    assiged to specific inode and can be used for this inode only.
   *    it describes part of inode's space preallocated to specific
   *    physical blocks. any block from that preallocated can be used
   *    independent. the descriptor just tracks number of blocks left
   *    unused. so, before taking some block from descriptor, one must
   *    make sure corresponded logical block isn't allocated yet. this
   *    also means that freeing any block within descriptor's range
   *    must discard all preallocated blocks.
   *  - locality group
   *    assigned to specific locality group which does not translate to
   *    permanent set of inodes: inode can join and leave group. space
   *    from this type of preallocation can be used for any inode. thus
   *    it's consumed from the beginning to the end.
   *
   * relation between them can be expressed as:
   *    in-core buddy = on-disk bitmap + preallocation descriptors
   *
   * this mean blocks mballoc considers used are:
   *  - allocated blocks (persistent)
   *  - preallocated blocks (non-persistent)
   *
   * consistency in mballoc world means that at any time a block is either
   * free or used in ALL structures. notice: "any time" should not be read
   * literally -- time is discrete and delimited by locks.
   *
   *  to keep it simple, we don't use block numbers, instead we count number of
   *  blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA.
   *
   * all operations can be expressed as:
   *  - init buddy:			buddy = on-disk + PAs
   *  - new PA:				buddy += N; PA = N
   *  - use inode PA:			on-disk += N; PA -= N
   *  - discard inode PA			buddy -= on-disk - PA; PA = 0
   *  - use locality group PA		on-disk += N; PA -= N
   *  - discard locality group PA		buddy -= PA; PA = 0
   *  note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap
   *        is used in real operation because we can't know actual used
   *        bits from PA, only from on-disk bitmap
   *
   * if we follow this strict logic, then all operations above should be atomic.
   * given some of them can block, we'd have to use something like semaphores
   * killing performance on high-end SMP hardware. let's try to relax it using
   * the following knowledge:
   *  1) if buddy is referenced, it's already initialized
   *  2) while block is used in buddy and the buddy is referenced,
   *     nobody can re-allocate that block
   *  3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has
   *     bit set and PA claims same block, it's OK. IOW, one can set bit in
   *     on-disk bitmap if buddy has same bit set or/and PA covers corresponded
   *     block
   *
   * so, now we're building a concurrency table:
   *  - init buddy vs.
   *    - new PA
   *      blocks for PA are allocated in the buddy, buddy must be referenced
   *      until PA is linked to allocation group to avoid concurrent buddy init
   *    - use inode PA
   *      we need to make sure that either on-disk bitmap or PA has uptodate data
   *      given (3) we care that PA-=N operation doesn't interfere with init
   *    - discard inode PA
   *      the simplest way would be to have buddy initialized by the discard
   *    - use locality group PA
   *      again PA-=N must be serialized with init
   *    - discard locality group PA
   *      the simplest way would be to have buddy initialized by the discard
   *  - new PA vs.
   *    - use inode PA
   *      i_data_sem serializes them
   *    - discard inode PA
   *      discard process must wait until PA isn't used by another process
   *    - use locality group PA
   *      some mutex should serialize them
   *    - discard locality group PA
   *      discard process must wait until PA isn't used by another process
   *  - use inode PA
   *    - use inode PA
   *      i_data_sem or another mutex should serializes them
   *    - discard inode PA
   *      discard process must wait until PA isn't used by another process
   *    - use locality group PA
   *      nothing wrong here -- they're different PAs covering different blocks
   *    - discard locality group PA
   *      discard process must wait until PA isn't used by another process
   *
   * now we're ready to make few consequences:
   *  - PA is referenced and while it is no discard is possible
   *  - PA is referenced until block isn't marked in on-disk bitmap
   *  - PA changes only after on-disk bitmap
   *  - discard must not compete with init. either init is done before
   *    any discard or they're serialized somehow
   *  - buddy init as sum of on-disk bitmap and PAs is done atomically
   *
   * a special case when we've used PA to emptiness. no need to modify buddy
   * in this case, but we should care about concurrent init
   *
   */
  
   /*
   * Logic in few words:
   *
   *  - allocation:
   *    load group
   *    find blocks
   *    mark bits in on-disk bitmap
   *    release group
   *
   *  - use preallocation:
   *    find proper PA (per-inode or group)
   *    load group
   *    mark bits in on-disk bitmap
   *    release group
   *    release PA
   *
   *  - free:
   *    load group
   *    mark bits in on-disk bitmap
   *    release group
   *
   *  - discard preallocations in group:
   *    mark PAs deleted
   *    move them onto local list
   *    load on-disk bitmap
   *    load group
   *    remove PA from object (inode or locality group)
   *    mark free blocks in-core
   *
   *  - discard inode's preallocations:
   */
  
  /*
   * Locking rules
   *
   * Locks:
   *  - bitlock on a group	(group)
   *  - object (inode/locality)	(object)
   *  - per-pa lock		(pa)
   *
   * Paths:
   *  - new pa
   *    object
   *    group
   *
   *  - find and use pa:
   *    pa
   *
   *  - release consumed pa:
   *    pa
   *    group
   *    object
   *
   *  - generate in-core bitmap:
   *    group
   *        pa
   *
   *  - discard all for given object (inode, locality group):
   *    object
   *        pa
   *    group
   *
   *  - discard all for given group:
   *    group
   *        pa
   *    group
   *        object
   *
   */
c3a326a65   Aneesh Kumar K.V   ext4: cleanup mba...
336
337
338
339
340
  static struct kmem_cache *ext4_pspace_cachep;
  static struct kmem_cache *ext4_ac_cachep;
  static struct kmem_cache *ext4_free_ext_cachep;
  static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
  					ext4_group_t group);
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
341
342
  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
  						ext4_group_t group);
c3a326a65   Aneesh Kumar K.V   ext4: cleanup mba...
343
  static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
344
345
  static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
  {
c9de560de   Alex Tomas   ext4: Add multi b...
346
  #if BITS_PER_LONG == 64
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
347
348
  	*bit += ((unsigned long) addr & 7UL) << 3;
  	addr = (void *) ((unsigned long) addr & ~7UL);
c9de560de   Alex Tomas   ext4: Add multi b...
349
  #elif BITS_PER_LONG == 32
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
350
351
  	*bit += ((unsigned long) addr & 3UL) << 3;
  	addr = (void *) ((unsigned long) addr & ~3UL);
c9de560de   Alex Tomas   ext4: Add multi b...
352
353
354
  #else
  #error "how many bits you are?!"
  #endif
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
355
356
  	return addr;
  }
c9de560de   Alex Tomas   ext4: Add multi b...
357
358
359
360
361
362
363
  
  static inline int mb_test_bit(int bit, void *addr)
  {
  	/*
  	 * ext4_test_bit on architecture like powerpc
  	 * needs unsigned long aligned address
  	 */
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
364
  	addr = mb_correct_addr_and_bit(&bit, addr);
c9de560de   Alex Tomas   ext4: Add multi b...
365
366
367
368
369
  	return ext4_test_bit(bit, addr);
  }
  
  static inline void mb_set_bit(int bit, void *addr)
  {
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
370
  	addr = mb_correct_addr_and_bit(&bit, addr);
c9de560de   Alex Tomas   ext4: Add multi b...
371
372
  	ext4_set_bit(bit, addr);
  }
c9de560de   Alex Tomas   ext4: Add multi b...
373
374
  static inline void mb_clear_bit(int bit, void *addr)
  {
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
375
  	addr = mb_correct_addr_and_bit(&bit, addr);
c9de560de   Alex Tomas   ext4: Add multi b...
376
377
  	ext4_clear_bit(bit, addr);
  }
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
378
379
  static inline int mb_find_next_zero_bit(void *addr, int max, int start)
  {
e7dfb2463   Aneesh Kumar K.V   ext4: Fix mb_find...
380
  	int fix = 0, ret, tmpmax;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
381
  	addr = mb_correct_addr_and_bit(&fix, addr);
e7dfb2463   Aneesh Kumar K.V   ext4: Fix mb_find...
382
  	tmpmax = max + fix;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
383
  	start += fix;
e7dfb2463   Aneesh Kumar K.V   ext4: Fix mb_find...
384
385
386
387
  	ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
  	if (ret > max)
  		return max;
  	return ret;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
388
389
390
391
  }
  
  static inline int mb_find_next_bit(void *addr, int max, int start)
  {
e7dfb2463   Aneesh Kumar K.V   ext4: Fix mb_find...
392
  	int fix = 0, ret, tmpmax;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
393
  	addr = mb_correct_addr_and_bit(&fix, addr);
e7dfb2463   Aneesh Kumar K.V   ext4: Fix mb_find...
394
  	tmpmax = max + fix;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
395
  	start += fix;
e7dfb2463   Aneesh Kumar K.V   ext4: Fix mb_find...
396
397
398
399
  	ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
  	if (ret > max)
  		return max;
  	return ret;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
400
  }
c9de560de   Alex Tomas   ext4: Add multi b...
401
402
403
  static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
  {
  	char *bb;
c9de560de   Alex Tomas   ext4: Add multi b...
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
  	BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
  	BUG_ON(max == NULL);
  
  	if (order > e4b->bd_blkbits + 1) {
  		*max = 0;
  		return NULL;
  	}
  
  	/* at order 0 we see each particular block */
  	*max = 1 << (e4b->bd_blkbits + 3);
  	if (order == 0)
  		return EXT4_MB_BITMAP(e4b);
  
  	bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
  	*max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
  
  	return bb;
  }
  
  #ifdef DOUBLE_CHECK
  static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
  			   int first, int count)
  {
  	int i;
  	struct super_block *sb = e4b->bd_sb;
  
  	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
  		return;
bc8e67409   Vincent Minet   ext4: Fix spinloc...
432
  	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
c9de560de   Alex Tomas   ext4: Add multi b...
433
434
435
  	for (i = 0; i < count; i++) {
  		if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
  			ext4_fsblk_t blocknr;
5661bd686   Akinobu Mita   ext4: cleanup to ...
436
437
  
  			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
c9de560de   Alex Tomas   ext4: Add multi b...
438
  			blocknr += first + i;
5d1b1b3f4   Aneesh Kumar K.V   ext4: fix BUG whe...
439
  			ext4_grp_locked_error(sb, e4b->bd_group,
e29136f80   Theodore Ts'o   ext4: Enhance ext...
440
441
442
443
444
  					      inode ? inode->i_ino : 0,
  					      blocknr,
  					      "freeing block already freed "
  					      "(bit %u)",
  					      first + i);
c9de560de   Alex Tomas   ext4: Add multi b...
445
446
447
448
449
450
451
452
453
454
455
  		}
  		mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
  	}
  }
  
  static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
  {
  	int i;
  
  	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
  		return;
bc8e67409   Vincent Minet   ext4: Fix spinloc...
456
  	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
c9de560de   Alex Tomas   ext4: Add multi b...
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
  	for (i = 0; i < count; i++) {
  		BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
  		mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
  	}
  }
  
  static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
  {
  	if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
  		unsigned char *b1, *b2;
  		int i;
  		b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
  		b2 = (unsigned char *) bitmap;
  		for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
  			if (b1[i] != b2[i]) {
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
472
  				printk(KERN_ERR "corruption in group %u "
4776004f5   Theodore Ts'o   ext4: Add printk ...
473
474
475
476
  				       "at byte %u(%u): %x in copy != %x "
  				       "on disk/prealloc
  ",
  				       e4b->bd_group, i, i * 8, b1[i], b2[i]);
c9de560de   Alex Tomas   ext4: Add multi b...
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
  				BUG();
  			}
  		}
  	}
  }
  
  #else
  static inline void mb_free_blocks_double(struct inode *inode,
  				struct ext4_buddy *e4b, int first, int count)
  {
  	return;
  }
  static inline void mb_mark_used_double(struct ext4_buddy *e4b,
  						int first, int count)
  {
  	return;
  }
  static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
  {
  	return;
  }
  #endif
  
  #ifdef AGGRESSIVE_CHECK
  
  #define MB_CHECK_ASSERT(assert)						\
  do {									\
  	if (!(assert)) {						\
  		printk(KERN_EMERG					\
  			"Assertion failure in %s() at %s:%d: \"%s\"
  ",	\
  			function, file, line, # assert);		\
  		BUG();							\
  	}								\
  } while (0)
  
  static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
  				const char *function, int line)
  {
  	struct super_block *sb = e4b->bd_sb;
  	int order = e4b->bd_blkbits + 1;
  	int max;
  	int max2;
  	int i;
  	int j;
  	int k;
  	int count;
  	struct ext4_group_info *grp;
  	int fragments = 0;
  	int fstart;
  	struct list_head *cur;
  	void *buddy;
  	void *buddy2;
c9de560de   Alex Tomas   ext4: Add multi b...
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
  	{
  		static int mb_check_counter;
  		if (mb_check_counter++ % 100 != 0)
  			return 0;
  	}
  
  	while (order > 1) {
  		buddy = mb_find_buddy(e4b, order, &max);
  		MB_CHECK_ASSERT(buddy);
  		buddy2 = mb_find_buddy(e4b, order - 1, &max2);
  		MB_CHECK_ASSERT(buddy2);
  		MB_CHECK_ASSERT(buddy != buddy2);
  		MB_CHECK_ASSERT(max * 2 == max2);
  
  		count = 0;
  		for (i = 0; i < max; i++) {
  
  			if (mb_test_bit(i, buddy)) {
  				/* only single bit in buddy2 may be 1 */
  				if (!mb_test_bit(i << 1, buddy2)) {
  					MB_CHECK_ASSERT(
  						mb_test_bit((i<<1)+1, buddy2));
  				} else if (!mb_test_bit((i << 1) + 1, buddy2)) {
  					MB_CHECK_ASSERT(
  						mb_test_bit(i << 1, buddy2));
  				}
  				continue;
  			}
  
  			/* both bits in buddy2 must be 0 */
  			MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
  			MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
  
  			for (j = 0; j < (1 << order); j++) {
  				k = (i * (1 << order)) + j;
  				MB_CHECK_ASSERT(
  					!mb_test_bit(k, EXT4_MB_BITMAP(e4b)));
  			}
  			count++;
  		}
  		MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
  		order--;
  	}
  
  	fstart = -1;
  	buddy = mb_find_buddy(e4b, 0, &max);
  	for (i = 0; i < max; i++) {
  		if (!mb_test_bit(i, buddy)) {
  			MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
  			if (fstart == -1) {
  				fragments++;
  				fstart = i;
  			}
  			continue;
  		}
  		fstart = -1;
  		/* check used bits only */
  		for (j = 0; j < e4b->bd_blkbits + 1; j++) {
  			buddy2 = mb_find_buddy(e4b, j, &max2);
  			k = i >> j;
  			MB_CHECK_ASSERT(k < max2);
  			MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
  		}
  	}
  	MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
  	MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
  
  	grp = ext4_get_group_info(sb, e4b->bd_group);
  	buddy = mb_find_buddy(e4b, 0, &max);
  	list_for_each(cur, &grp->bb_prealloc_list) {
  		ext4_group_t groupnr;
  		struct ext4_prealloc_space *pa;
60bd63d19   Solofo Ramangalahy   ext4: cleanup for...
602
603
  		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
  		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
c9de560de   Alex Tomas   ext4: Add multi b...
604
  		MB_CHECK_ASSERT(groupnr == e4b->bd_group);
60bd63d19   Solofo Ramangalahy   ext4: cleanup for...
605
  		for (i = 0; i < pa->pa_len; i++)
c9de560de   Alex Tomas   ext4: Add multi b...
606
607
608
609
610
611
  			MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
  	}
  	return 0;
  }
  #undef MB_CHECK_ASSERT
  #define mb_check_buddy(e4b) __mb_check_buddy(e4b,	\
46e665e9d   Harvey Harrison   ext4: replace rem...
612
  					__FILE__, __func__, __LINE__)
c9de560de   Alex Tomas   ext4: Add multi b...
613
614
615
616
617
618
  #else
  #define mb_check_buddy(e4b)
  #endif
  
  /* FIXME!! need more doc */
  static void ext4_mb_mark_free_simple(struct super_block *sb,
a36b44988   Eric Sandeen   ext4: use ext4_gr...
619
  				void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
c9de560de   Alex Tomas   ext4: Add multi b...
620
621
622
  					struct ext4_group_info *grp)
  {
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
a36b44988   Eric Sandeen   ext4: use ext4_gr...
623
624
625
  	ext4_grpblk_t min;
  	ext4_grpblk_t max;
  	ext4_grpblk_t chunk;
c9de560de   Alex Tomas   ext4: Add multi b...
626
  	unsigned short border;
b73fce69e   Valerie Clement   ext4: Fix kernel ...
627
  	BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
c9de560de   Alex Tomas   ext4: Add multi b...
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
  
  	border = 2 << sb->s_blocksize_bits;
  
  	while (len > 0) {
  		/* find how many blocks can be covered since this position */
  		max = ffs(first | border) - 1;
  
  		/* find how many blocks of power 2 we need to mark */
  		min = fls(len) - 1;
  
  		if (max < min)
  			min = max;
  		chunk = 1 << min;
  
  		/* mark multiblock chunks only */
  		grp->bb_counters[min]++;
  		if (min > 0)
  			mb_clear_bit(first >> min,
  				     buddy + sbi->s_mb_offsets[min]);
  
  		len -= chunk;
  		first += chunk;
  	}
  }
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
  /*
   * Cache the order of the largest free extent we have available in this block
   * group.
   */
  static void
  mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
  {
  	int i;
  	int bits;
  
  	grp->bb_largest_free_order = -1; /* uninit */
  
  	bits = sb->s_blocksize_bits + 1;
  	for (i = bits; i >= 0; i--) {
  		if (grp->bb_counters[i] > 0) {
  			grp->bb_largest_free_order = i;
  			break;
  		}
  	}
  }
089ceecc1   Eric Sandeen   ext4: mark severa...
672
673
  static noinline_for_stack
  void ext4_mb_generate_buddy(struct super_block *sb,
c9de560de   Alex Tomas   ext4: Add multi b...
674
675
676
  				void *buddy, void *bitmap, ext4_group_t group)
  {
  	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
a36b44988   Eric Sandeen   ext4: use ext4_gr...
677
678
679
680
  	ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb);
  	ext4_grpblk_t i = 0;
  	ext4_grpblk_t first;
  	ext4_grpblk_t len;
c9de560de   Alex Tomas   ext4: Add multi b...
681
682
683
684
685
686
  	unsigned free = 0;
  	unsigned fragments = 0;
  	unsigned long long period = get_cycles();
  
  	/* initialize buddy from bitmap which is aggregation
  	 * of on-disk bitmap and preallocations */
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
687
  	i = mb_find_next_zero_bit(bitmap, max, 0);
c9de560de   Alex Tomas   ext4: Add multi b...
688
689
690
691
  	grp->bb_first_free = i;
  	while (i < max) {
  		fragments++;
  		first = i;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
692
  		i = mb_find_next_bit(bitmap, max, i);
c9de560de   Alex Tomas   ext4: Add multi b...
693
694
695
696
697
698
699
  		len = i - first;
  		free += len;
  		if (len > 1)
  			ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
  		else
  			grp->bb_counters[0]++;
  		if (i < max)
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
700
  			i = mb_find_next_zero_bit(bitmap, max, i);
c9de560de   Alex Tomas   ext4: Add multi b...
701
702
703
704
  	}
  	grp->bb_fragments = fragments;
  
  	if (free != grp->bb_free) {
e29136f80   Theodore Ts'o   ext4: Enhance ext...
705
706
707
  		ext4_grp_locked_error(sb, group, 0, 0,
  				      "%u blocks in bitmap, %u in gd",
  				      free, grp->bb_free);
e56eb6590   Aneesh Kumar K.V   ext4: Don't claim...
708
709
710
711
  		/*
  		 * If we intent to continue, we consider group descritor
  		 * corrupt and update bb_free using bitmap value
  		 */
c9de560de   Alex Tomas   ext4: Add multi b...
712
713
  		grp->bb_free = free;
  	}
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
714
  	mb_set_largest_free_order(sb, grp);
c9de560de   Alex Tomas   ext4: Add multi b...
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
  
  	clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
  
  	period = get_cycles() - period;
  	spin_lock(&EXT4_SB(sb)->s_bal_lock);
  	EXT4_SB(sb)->s_mb_buddies_generated++;
  	EXT4_SB(sb)->s_mb_generation_time += period;
  	spin_unlock(&EXT4_SB(sb)->s_bal_lock);
  }
  
  /* The buddy information is attached the buddy cache inode
   * for convenience. The information regarding each group
   * is loaded via ext4_mb_load_buddy. The information involve
   * block bitmap and buddy information. The information are
   * stored in the inode as
   *
   * {                        page                        }
c3a326a65   Aneesh Kumar K.V   ext4: cleanup mba...
732
   * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
c9de560de   Alex Tomas   ext4: Add multi b...
733
734
735
736
737
738
739
   *
   *
   * one block each for bitmap and buddy information.
   * So for each group we take up 2 blocks. A page can
   * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize)  blocks.
   * So it can have information regarding groups_per_page which
   * is blocks_per_page/2
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
740
741
742
   *
   * Locking note:  This routine takes the block group lock of all groups
   * for this page; do not hold this lock when calling this routine!
c9de560de   Alex Tomas   ext4: Add multi b...
743
744
745
746
   */
  
  static int ext4_mb_init_cache(struct page *page, char *incore)
  {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
747
  	ext4_group_t ngroups;
c9de560de   Alex Tomas   ext4: Add multi b...
748
749
750
751
752
753
754
755
756
757
758
759
760
  	int blocksize;
  	int blocks_per_page;
  	int groups_per_page;
  	int err = 0;
  	int i;
  	ext4_group_t first_group;
  	int first_block;
  	struct super_block *sb;
  	struct buffer_head *bhs;
  	struct buffer_head **bh;
  	struct inode *inode;
  	char *data;
  	char *bitmap;
6ba495e92   Theodore Ts'o   ext4: Add configu...
761
762
  	mb_debug(1, "init page %lu
  ", page->index);
c9de560de   Alex Tomas   ext4: Add multi b...
763
764
765
  
  	inode = page->mapping->host;
  	sb = inode->i_sb;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
766
  	ngroups = ext4_get_groups_count(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
  	blocksize = 1 << inode->i_blkbits;
  	blocks_per_page = PAGE_CACHE_SIZE / blocksize;
  
  	groups_per_page = blocks_per_page >> 1;
  	if (groups_per_page == 0)
  		groups_per_page = 1;
  
  	/* allocate buffer_heads to read bitmaps */
  	if (groups_per_page > 1) {
  		err = -ENOMEM;
  		i = sizeof(struct buffer_head *) * groups_per_page;
  		bh = kzalloc(i, GFP_NOFS);
  		if (bh == NULL)
  			goto out;
  	} else
  		bh = &bhs;
  
  	first_group = page->index * blocks_per_page / 2;
  
  	/* read all groups the page covers into the cache */
  	for (i = 0; i < groups_per_page; i++) {
  		struct ext4_group_desc *desc;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
789
  		if (first_group + i >= ngroups)
c9de560de   Alex Tomas   ext4: Add multi b...
790
791
792
793
794
795
796
797
798
799
800
  			break;
  
  		err = -EIO;
  		desc = ext4_get_group_desc(sb, first_group + i, NULL);
  		if (desc == NULL)
  			goto out;
  
  		err = -ENOMEM;
  		bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
  		if (bh[i] == NULL)
  			goto out;
2ccb5fb9f   Aneesh Kumar K.V   ext4: Use new buf...
801
  		if (bitmap_uptodate(bh[i]))
c9de560de   Alex Tomas   ext4: Add multi b...
802
  			continue;
c806e68f5   Frederic Bohe   ext4: fix initial...
803
  		lock_buffer(bh[i]);
2ccb5fb9f   Aneesh Kumar K.V   ext4: Use new buf...
804
805
806
807
  		if (bitmap_uptodate(bh[i])) {
  			unlock_buffer(bh[i]);
  			continue;
  		}
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
808
  		ext4_lock_group(sb, first_group + i);
c9de560de   Alex Tomas   ext4: Add multi b...
809
810
811
  		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
  			ext4_init_block_bitmap(sb, bh[i],
  						first_group + i, desc);
2ccb5fb9f   Aneesh Kumar K.V   ext4: Use new buf...
812
  			set_bitmap_uptodate(bh[i]);
c9de560de   Alex Tomas   ext4: Add multi b...
813
  			set_buffer_uptodate(bh[i]);
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
814
  			ext4_unlock_group(sb, first_group + i);
3300beda5   Aneesh Kumar K.V   ext4: code cleanup
815
  			unlock_buffer(bh[i]);
c9de560de   Alex Tomas   ext4: Add multi b...
816
817
  			continue;
  		}
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
818
  		ext4_unlock_group(sb, first_group + i);
2ccb5fb9f   Aneesh Kumar K.V   ext4: Use new buf...
819
820
821
822
823
824
825
826
827
  		if (buffer_uptodate(bh[i])) {
  			/*
  			 * if not uninit if bh is uptodate,
  			 * bitmap is also uptodate
  			 */
  			set_bitmap_uptodate(bh[i]);
  			unlock_buffer(bh[i]);
  			continue;
  		}
c9de560de   Alex Tomas   ext4: Add multi b...
828
  		get_bh(bh[i]);
2ccb5fb9f   Aneesh Kumar K.V   ext4: Use new buf...
829
830
831
832
833
834
835
  		/*
  		 * submit the buffer_head for read. We can
  		 * safely mark the bitmap as uptodate now.
  		 * We do it here so the bitmap uptodate bit
  		 * get set with buffer lock held.
  		 */
  		set_bitmap_uptodate(bh[i]);
c9de560de   Alex Tomas   ext4: Add multi b...
836
837
  		bh[i]->b_end_io = end_buffer_read_sync;
  		submit_bh(READ, bh[i]);
6ba495e92   Theodore Ts'o   ext4: Add configu...
838
839
  		mb_debug(1, "read bitmap for group %u
  ", first_group + i);
c9de560de   Alex Tomas   ext4: Add multi b...
840
841
842
843
844
845
846
847
848
849
  	}
  
  	/* wait for I/O completion */
  	for (i = 0; i < groups_per_page && bh[i]; i++)
  		wait_on_buffer(bh[i]);
  
  	err = -EIO;
  	for (i = 0; i < groups_per_page && bh[i]; i++)
  		if (!buffer_uptodate(bh[i]))
  			goto out;
31b481dc7   Mingming Cao   ext4: Fix ext4_mb...
850
  	err = 0;
c9de560de   Alex Tomas   ext4: Add multi b...
851
  	first_block = page->index * blocks_per_page;
29eaf0249   Aneesh Kumar K.V   ext4: Init the co...
852
853
  	/* init the page  */
  	memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
c9de560de   Alex Tomas   ext4: Add multi b...
854
855
856
857
858
  	for (i = 0; i < blocks_per_page; i++) {
  		int group;
  		struct ext4_group_info *grinfo;
  
  		group = (first_block + i) >> 1;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
859
  		if (group >= ngroups)
c9de560de   Alex Tomas   ext4: Add multi b...
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
  			break;
  
  		/*
  		 * data carry information regarding this
  		 * particular group in the format specified
  		 * above
  		 *
  		 */
  		data = page_address(page) + (i * blocksize);
  		bitmap = bh[group - first_group]->b_data;
  
  		/*
  		 * We place the buddy block and bitmap block
  		 * close together
  		 */
  		if ((first_block + i) & 1) {
  			/* this is block of buddy */
  			BUG_ON(incore == NULL);
6ba495e92   Theodore Ts'o   ext4: Add configu...
878
879
  			mb_debug(1, "put buddy for group %u in page %lu/%x
  ",
c9de560de   Alex Tomas   ext4: Add multi b...
880
  				group, page->index, i * blocksize);
f307333e1   Theodore Ts'o   ext4: Add new tra...
881
  			trace_ext4_mb_buddy_bitmap_load(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
882
883
884
  			grinfo = ext4_get_group_info(sb, group);
  			grinfo->bb_fragments = 0;
  			memset(grinfo->bb_counters, 0,
1927805e6   Eric Sandeen   ext4: use variabl...
885
886
  			       sizeof(*grinfo->bb_counters) *
  				(sb->s_blocksize_bits+2));
c9de560de   Alex Tomas   ext4: Add multi b...
887
888
889
  			/*
  			 * incore got set to the group block bitmap below
  			 */
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
890
  			ext4_lock_group(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
891
  			ext4_mb_generate_buddy(sb, data, incore, group);
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
892
  			ext4_unlock_group(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
893
894
895
896
  			incore = NULL;
  		} else {
  			/* this is block of bitmap */
  			BUG_ON(incore != NULL);
6ba495e92   Theodore Ts'o   ext4: Add configu...
897
898
  			mb_debug(1, "put bitmap for group %u in page %lu/%x
  ",
c9de560de   Alex Tomas   ext4: Add multi b...
899
  				group, page->index, i * blocksize);
f307333e1   Theodore Ts'o   ext4: Add new tra...
900
  			trace_ext4_mb_bitmap_load(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
901
902
903
904
905
906
907
  
  			/* see comments in ext4_mb_put_pa() */
  			ext4_lock_group(sb, group);
  			memcpy(data, bitmap, blocksize);
  
  			/* mark all preallocated blks used in in-core bitmap */
  			ext4_mb_generate_from_pa(sb, data, group);
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
908
  			ext4_mb_generate_from_freelist(sb, data, group);
c9de560de   Alex Tomas   ext4: Add multi b...
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
  			ext4_unlock_group(sb, group);
  
  			/* set incore so that the buddy information can be
  			 * generated using this
  			 */
  			incore = data;
  		}
  	}
  	SetPageUptodate(page);
  
  out:
  	if (bh) {
  		for (i = 0; i < groups_per_page && bh[i]; i++)
  			brelse(bh[i]);
  		if (bh != &bhs)
  			kfree(bh);
  	}
  	return err;
  }
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
928
929
930
931
932
  /*
   * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
   * block group lock of all groups for this page; do not hold the BG lock when
   * calling this routine!
   */
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
  static noinline_for_stack
  int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
  {
  
  	int ret = 0;
  	void *bitmap;
  	int blocks_per_page;
  	int block, pnum, poff;
  	int num_grp_locked = 0;
  	struct ext4_group_info *this_grp;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
  	struct inode *inode = sbi->s_buddy_cache;
  	struct page *page = NULL, *bitmap_page = NULL;
  
  	mb_debug(1, "init group %u
  ", group);
  	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
  	this_grp = ext4_get_group_info(sb, group);
  	/*
08c3a8133   Aneesh Kumar K.V   ext4: Clarify the...
952
953
954
955
956
  	 * This ensures that we don't reinit the buddy cache
  	 * page which map to the group from which we are already
  	 * allocating. If we are looking at the buddy cache we would
  	 * have taken a reference using ext4_mb_load_buddy and that
  	 * would have taken the alloc_sem lock.
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
  	 */
  	num_grp_locked =  ext4_mb_get_buddy_cache_lock(sb, group);
  	if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
  		/*
  		 * somebody initialized the group
  		 * return without doing anything
  		 */
  		ret = 0;
  		goto err;
  	}
  	/*
  	 * the buddy cache inode stores the block bitmap
  	 * and buddy information in consecutive blocks.
  	 * So for each group we need two blocks.
  	 */
  	block = group * 2;
  	pnum = block / blocks_per_page;
  	poff = block % blocks_per_page;
  	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
  	if (page) {
  		BUG_ON(page->mapping != inode->i_mapping);
  		ret = ext4_mb_init_cache(page, NULL);
  		if (ret) {
  			unlock_page(page);
  			goto err;
  		}
  		unlock_page(page);
  	}
  	if (page == NULL || !PageUptodate(page)) {
  		ret = -EIO;
  		goto err;
  	}
  	mark_page_accessed(page);
  	bitmap_page = page;
  	bitmap = page_address(page) + (poff * sb->s_blocksize);
  
  	/* init buddy cache */
  	block++;
  	pnum = block / blocks_per_page;
  	poff = block % blocks_per_page;
  	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
  	if (page == bitmap_page) {
  		/*
  		 * If both the bitmap and buddy are in
  		 * the same page we don't need to force
  		 * init the buddy
  		 */
  		unlock_page(page);
  	} else if (page) {
  		BUG_ON(page->mapping != inode->i_mapping);
  		ret = ext4_mb_init_cache(page, bitmap);
  		if (ret) {
  			unlock_page(page);
  			goto err;
  		}
  		unlock_page(page);
  	}
  	if (page == NULL || !PageUptodate(page)) {
  		ret = -EIO;
  		goto err;
  	}
  	mark_page_accessed(page);
  err:
  	ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
  	if (bitmap_page)
  		page_cache_release(bitmap_page);
  	if (page)
  		page_cache_release(page);
  	return ret;
  }
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1027
1028
1029
1030
1031
  /*
   * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
   * block group lock of all groups for this page; do not hold the BG lock when
   * calling this routine!
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
1032
1033
1034
  static noinline_for_stack int
  ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
  					struct ext4_buddy *e4b)
c9de560de   Alex Tomas   ext4: Add multi b...
1035
  {
c9de560de   Alex Tomas   ext4: Add multi b...
1036
1037
1038
1039
1040
  	int blocks_per_page;
  	int block;
  	int pnum;
  	int poff;
  	struct page *page;
fdf6c7a76   Shen Feng   ext4: add error p...
1041
  	int ret;
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1042
1043
1044
  	struct ext4_group_info *grp;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
  	struct inode *inode = sbi->s_buddy_cache;
c9de560de   Alex Tomas   ext4: Add multi b...
1045

6ba495e92   Theodore Ts'o   ext4: Add configu...
1046
1047
  	mb_debug(1, "load group %u
  ", group);
c9de560de   Alex Tomas   ext4: Add multi b...
1048
1049
  
  	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1050
  	grp = ext4_get_group_info(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
1051
1052
1053
1054
1055
1056
1057
  
  	e4b->bd_blkbits = sb->s_blocksize_bits;
  	e4b->bd_info = ext4_get_group_info(sb, group);
  	e4b->bd_sb = sb;
  	e4b->bd_group = group;
  	e4b->bd_buddy_page = NULL;
  	e4b->bd_bitmap_page = NULL;
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1058
1059
1060
1061
1062
1063
1064
1065
  	e4b->alloc_semp = &grp->alloc_sem;
  
  	/* Take the read lock on the group alloc
  	 * sem. This would make sure a parallel
  	 * ext4_mb_init_group happening on other
  	 * groups mapped by the page is blocked
  	 * till we are done with allocation
  	 */
f41c07505   Aneesh Kumar K.V   ext4: check for n...
1066
  repeat_load_buddy:
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1067
  	down_read(e4b->alloc_semp);
c9de560de   Alex Tomas   ext4: Add multi b...
1068

f41c07505   Aneesh Kumar K.V   ext4: check for n...
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
  	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
  		/* we need to check for group need init flag
  		 * with alloc_semp held so that we can be sure
  		 * that new blocks didn't get added to the group
  		 * when we are loading the buddy cache
  		 */
  		up_read(e4b->alloc_semp);
  		/*
  		 * we need full data about the group
  		 * to make a good selection
  		 */
  		ret = ext4_mb_init_group(sb, group);
  		if (ret)
  			return ret;
  		goto repeat_load_buddy;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
  	/*
  	 * the buddy cache inode stores the block bitmap
  	 * and buddy information in consecutive blocks.
  	 * So for each group we need two blocks.
  	 */
  	block = group * 2;
  	pnum = block / blocks_per_page;
  	poff = block % blocks_per_page;
  
  	/* we could use find_or_create_page(), but it locks page
  	 * what we'd like to avoid in fast path ... */
  	page = find_get_page(inode->i_mapping, pnum);
  	if (page == NULL || !PageUptodate(page)) {
  		if (page)
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1099
1100
1101
1102
1103
1104
1105
1106
  			/*
  			 * drop the page reference and try
  			 * to get the page with lock. If we
  			 * are not uptodate that implies
  			 * somebody just created the page but
  			 * is yet to initialize the same. So
  			 * wait for it to initialize.
  			 */
c9de560de   Alex Tomas   ext4: Add multi b...
1107
1108
1109
1110
1111
  			page_cache_release(page);
  		page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
  		if (page) {
  			BUG_ON(page->mapping != inode->i_mapping);
  			if (!PageUptodate(page)) {
fdf6c7a76   Shen Feng   ext4: add error p...
1112
1113
1114
1115
1116
  				ret = ext4_mb_init_cache(page, NULL);
  				if (ret) {
  					unlock_page(page);
  					goto err;
  				}
c9de560de   Alex Tomas   ext4: Add multi b...
1117
1118
1119
1120
1121
1122
  				mb_cmp_bitmaps(e4b, page_address(page) +
  					       (poff * sb->s_blocksize));
  			}
  			unlock_page(page);
  		}
  	}
fdf6c7a76   Shen Feng   ext4: add error p...
1123
1124
  	if (page == NULL || !PageUptodate(page)) {
  		ret = -EIO;
c9de560de   Alex Tomas   ext4: Add multi b...
1125
  		goto err;
fdf6c7a76   Shen Feng   ext4: add error p...
1126
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
  	e4b->bd_bitmap_page = page;
  	e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
  	mark_page_accessed(page);
  
  	block++;
  	pnum = block / blocks_per_page;
  	poff = block % blocks_per_page;
  
  	page = find_get_page(inode->i_mapping, pnum);
  	if (page == NULL || !PageUptodate(page)) {
  		if (page)
  			page_cache_release(page);
  		page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
  		if (page) {
  			BUG_ON(page->mapping != inode->i_mapping);
fdf6c7a76   Shen Feng   ext4: add error p...
1142
1143
1144
1145
1146
1147
1148
  			if (!PageUptodate(page)) {
  				ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
  				if (ret) {
  					unlock_page(page);
  					goto err;
  				}
  			}
c9de560de   Alex Tomas   ext4: Add multi b...
1149
1150
1151
  			unlock_page(page);
  		}
  	}
fdf6c7a76   Shen Feng   ext4: add error p...
1152
1153
  	if (page == NULL || !PageUptodate(page)) {
  		ret = -EIO;
c9de560de   Alex Tomas   ext4: Add multi b...
1154
  		goto err;
fdf6c7a76   Shen Feng   ext4: add error p...
1155
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
  	e4b->bd_buddy_page = page;
  	e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
  	mark_page_accessed(page);
  
  	BUG_ON(e4b->bd_bitmap_page == NULL);
  	BUG_ON(e4b->bd_buddy_page == NULL);
  
  	return 0;
  
  err:
  	if (e4b->bd_bitmap_page)
  		page_cache_release(e4b->bd_bitmap_page);
  	if (e4b->bd_buddy_page)
  		page_cache_release(e4b->bd_buddy_page);
  	e4b->bd_buddy = NULL;
  	e4b->bd_bitmap = NULL;
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1172
1173
1174
  
  	/* Done with the buddy cache */
  	up_read(e4b->alloc_semp);
fdf6c7a76   Shen Feng   ext4: add error p...
1175
  	return ret;
c9de560de   Alex Tomas   ext4: Add multi b...
1176
  }
e39e07fdf   Jing Zhang   ext4: rename ext4...
1177
  static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
c9de560de   Alex Tomas   ext4: Add multi b...
1178
1179
1180
1181
1182
  {
  	if (e4b->bd_bitmap_page)
  		page_cache_release(e4b->bd_bitmap_page);
  	if (e4b->bd_buddy_page)
  		page_cache_release(e4b->bd_buddy_page);
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1183
  	/* Done with the buddy cache */
8556e8f3b   Aneesh Kumar K.V   ext4: Don't allow...
1184
1185
  	if (e4b->alloc_semp)
  		up_read(e4b->alloc_semp);
c9de560de   Alex Tomas   ext4: Add multi b...
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
  }
  
  
  static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
  {
  	int order = 1;
  	void *bb;
  
  	BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
  	BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
  
  	bb = EXT4_MB_BUDDY(e4b);
  	while (order <= e4b->bd_blkbits + 1) {
  		block = block >> 1;
  		if (!mb_test_bit(block, bb)) {
  			/* this block is part of buddy of order 'order' */
  			return order;
  		}
  		bb += 1 << (e4b->bd_blkbits - order);
  		order++;
  	}
  	return 0;
  }
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
1209
  static void mb_clear_bits(void *bm, int cur, int len)
c9de560de   Alex Tomas   ext4: Add multi b...
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
  {
  	__u32 *addr;
  
  	len = cur + len;
  	while (cur < len) {
  		if ((cur & 31) == 0 && (len - cur) >= 32) {
  			/* fast path: clear whole word at once */
  			addr = bm + (cur >> 3);
  			*addr = 0;
  			cur += 32;
  			continue;
  		}
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
1222
  		mb_clear_bit(cur, bm);
c9de560de   Alex Tomas   ext4: Add multi b...
1223
1224
1225
  		cur++;
  	}
  }
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
1226
  static void mb_set_bits(void *bm, int cur, int len)
c9de560de   Alex Tomas   ext4: Add multi b...
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
  {
  	__u32 *addr;
  
  	len = cur + len;
  	while (cur < len) {
  		if ((cur & 31) == 0 && (len - cur) >= 32) {
  			/* fast path: set whole word at once */
  			addr = bm + (cur >> 3);
  			*addr = 0xffffffff;
  			cur += 32;
  			continue;
  		}
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
1239
  		mb_set_bit(cur, bm);
c9de560de   Alex Tomas   ext4: Add multi b...
1240
1241
1242
  		cur++;
  	}
  }
7e5a8cdd8   Shen Feng   ext4: fix error p...
1243
  static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
c9de560de   Alex Tomas   ext4: Add multi b...
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
  			  int first, int count)
  {
  	int block = 0;
  	int max = 0;
  	int order;
  	void *buddy;
  	void *buddy2;
  	struct super_block *sb = e4b->bd_sb;
  
  	BUG_ON(first + count > (sb->s_blocksize << 3));
bc8e67409   Vincent Minet   ext4: Fix spinloc...
1254
  	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
c9de560de   Alex Tomas   ext4: Add multi b...
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
  	mb_check_buddy(e4b);
  	mb_free_blocks_double(inode, e4b, first, count);
  
  	e4b->bd_info->bb_free += count;
  	if (first < e4b->bd_info->bb_first_free)
  		e4b->bd_info->bb_first_free = first;
  
  	/* let's maintain fragments counter */
  	if (first != 0)
  		block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b));
  	if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
  		max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b));
  	if (block && max)
  		e4b->bd_info->bb_fragments--;
  	else if (!block && !max)
  		e4b->bd_info->bb_fragments++;
  
  	/* let's maintain buddy itself */
  	while (count-- > 0) {
  		block = first++;
  		order = 0;
  
  		if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
  			ext4_fsblk_t blocknr;
5661bd686   Akinobu Mita   ext4: cleanup to ...
1279
1280
  
  			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
c9de560de   Alex Tomas   ext4: Add multi b...
1281
  			blocknr += block;
5d1b1b3f4   Aneesh Kumar K.V   ext4: fix BUG whe...
1282
  			ext4_grp_locked_error(sb, e4b->bd_group,
e29136f80   Theodore Ts'o   ext4: Enhance ext...
1283
1284
1285
1286
  					      inode ? inode->i_ino : 0,
  					      blocknr,
  					      "freeing already freed block "
  					      "(bit %u)", block);
c9de560de   Alex Tomas   ext4: Add multi b...
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
  		}
  		mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
  		e4b->bd_info->bb_counters[order]++;
  
  		/* start of the buddy */
  		buddy = mb_find_buddy(e4b, order, &max);
  
  		do {
  			block &= ~1UL;
  			if (mb_test_bit(block, buddy) ||
  					mb_test_bit(block + 1, buddy))
  				break;
  
  			/* both the buddies are free, try to coalesce them */
  			buddy2 = mb_find_buddy(e4b, order + 1, &max);
  
  			if (!buddy2)
  				break;
  
  			if (order > 0) {
  				/* for special purposes, we don't set
  				 * free bits in bitmap */
  				mb_set_bit(block, buddy);
  				mb_set_bit(block + 1, buddy);
  			}
  			e4b->bd_info->bb_counters[order]--;
  			e4b->bd_info->bb_counters[order]--;
  
  			block = block >> 1;
  			order++;
  			e4b->bd_info->bb_counters[order]++;
  
  			mb_clear_bit(block, buddy2);
  			buddy = buddy2;
  		} while (1);
  	}
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1323
  	mb_set_largest_free_order(sb, e4b->bd_info);
c9de560de   Alex Tomas   ext4: Add multi b...
1324
  	mb_check_buddy(e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
1325
1326
1327
1328
1329
1330
1331
1332
1333
  }
  
  static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
  				int needed, struct ext4_free_extent *ex)
  {
  	int next = block;
  	int max;
  	int ord;
  	void *buddy;
bc8e67409   Vincent Minet   ext4: Fix spinloc...
1334
  	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
c9de560de   Alex Tomas   ext4: Add multi b...
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
  	BUG_ON(ex == NULL);
  
  	buddy = mb_find_buddy(e4b, order, &max);
  	BUG_ON(buddy == NULL);
  	BUG_ON(block >= max);
  	if (mb_test_bit(block, buddy)) {
  		ex->fe_len = 0;
  		ex->fe_start = 0;
  		ex->fe_group = 0;
  		return 0;
  	}
  
  	/* FIXME dorp order completely ? */
  	if (likely(order == 0)) {
  		/* find actual order */
  		order = mb_find_order_for_block(e4b, block);
  		block = block >> order;
  	}
  
  	ex->fe_len = 1 << order;
  	ex->fe_start = block << order;
  	ex->fe_group = e4b->bd_group;
  
  	/* calc difference from given start */
  	next = next - ex->fe_start;
  	ex->fe_len -= next;
  	ex->fe_start += next;
  
  	while (needed > ex->fe_len &&
  	       (buddy = mb_find_buddy(e4b, order, &max))) {
  
  		if (block + 1 >= max)
  			break;
  
  		next = (block + 1) * (1 << order);
  		if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
  			break;
  
  		ord = mb_find_order_for_block(e4b, next);
  
  		order = ord;
  		block = next >> order;
  		ex->fe_len += 1 << order;
  	}
  
  	BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
  	return ex->fe_len;
  }
  
  static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
  {
  	int ord;
  	int mlen = 0;
  	int max = 0;
  	int cur;
  	int start = ex->fe_start;
  	int len = ex->fe_len;
  	unsigned ret = 0;
  	int len0 = len;
  	void *buddy;
  
  	BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
  	BUG_ON(e4b->bd_group != ex->fe_group);
bc8e67409   Vincent Minet   ext4: Fix spinloc...
1398
  	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
c9de560de   Alex Tomas   ext4: Add multi b...
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
  	mb_check_buddy(e4b);
  	mb_mark_used_double(e4b, start, len);
  
  	e4b->bd_info->bb_free -= len;
  	if (e4b->bd_info->bb_first_free == start)
  		e4b->bd_info->bb_first_free += len;
  
  	/* let's maintain fragments counter */
  	if (start != 0)
  		mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b));
  	if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
  		max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b));
  	if (mlen && max)
  		e4b->bd_info->bb_fragments++;
  	else if (!mlen && !max)
  		e4b->bd_info->bb_fragments--;
  
  	/* let's maintain buddy itself */
  	while (len) {
  		ord = mb_find_order_for_block(e4b, start);
  
  		if (((start >> ord) << ord) == start && len >= (1 << ord)) {
  			/* the whole chunk may be allocated at once! */
  			mlen = 1 << ord;
  			buddy = mb_find_buddy(e4b, ord, &max);
  			BUG_ON((start >> ord) >= max);
  			mb_set_bit(start >> ord, buddy);
  			e4b->bd_info->bb_counters[ord]--;
  			start += mlen;
  			len -= mlen;
  			BUG_ON(len < 0);
  			continue;
  		}
  
  		/* store for history */
  		if (ret == 0)
  			ret = len | (ord << 16);
  
  		/* we have to split large buddy */
  		BUG_ON(ord <= 0);
  		buddy = mb_find_buddy(e4b, ord, &max);
  		mb_set_bit(start >> ord, buddy);
  		e4b->bd_info->bb_counters[ord]--;
  
  		ord--;
  		cur = (start >> ord) & ~1U;
  		buddy = mb_find_buddy(e4b, ord, &max);
  		mb_clear_bit(cur, buddy);
  		mb_clear_bit(cur + 1, buddy);
  		e4b->bd_info->bb_counters[ord]++;
  		e4b->bd_info->bb_counters[ord]++;
  	}
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1451
  	mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
c9de560de   Alex Tomas   ext4: Add multi b...
1452

955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
1453
  	mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
c9de560de   Alex Tomas   ext4: Add multi b...
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
  	mb_check_buddy(e4b);
  
  	return ret;
  }
  
  /*
   * Must be called under group lock!
   */
  static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
  					struct ext4_buddy *e4b)
  {
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
  	int ret;
  
  	BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
  	BUG_ON(ac->ac_status == AC_STATUS_FOUND);
  
  	ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
  	ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
  	ret = mb_mark_used(e4b, &ac->ac_b_ex);
  
  	/* preallocation can change ac_b_ex, thus we store actually
  	 * allocated blocks for history */
  	ac->ac_f_ex = ac->ac_b_ex;
  
  	ac->ac_status = AC_STATUS_FOUND;
  	ac->ac_tail = ret & 0xffff;
  	ac->ac_buddy = ret >> 16;
c3a326a65   Aneesh Kumar K.V   ext4: cleanup mba...
1482
1483
1484
1485
1486
1487
1488
  	/*
  	 * take the page reference. We want the page to be pinned
  	 * so that we don't get a ext4_mb_init_cache_call for this
  	 * group until we update the bitmap. That would mean we
  	 * double allocate blocks. The reference is dropped
  	 * in ext4_mb_release_context
  	 */
c9de560de   Alex Tomas   ext4: Add multi b...
1489
1490
1491
1492
  	ac->ac_bitmap_page = e4b->bd_bitmap_page;
  	get_page(ac->ac_bitmap_page);
  	ac->ac_buddy_page = e4b->bd_buddy_page;
  	get_page(ac->ac_buddy_page);
8556e8f3b   Aneesh Kumar K.V   ext4: Don't allow...
1493
1494
1495
  	/* on allocation we use ac to track the held semaphore */
  	ac->alloc_semp =  e4b->alloc_semp;
  	e4b->alloc_semp = NULL;
c9de560de   Alex Tomas   ext4: Add multi b...
1496
  	/* store last allocated for subsequent stream allocation */
4ba74d00a   Theodore Ts'o   ext4: Fix bugs in...
1497
  	if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
c9de560de   Alex Tomas   ext4: Add multi b...
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
  		spin_lock(&sbi->s_md_lock);
  		sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
  		sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
  		spin_unlock(&sbi->s_md_lock);
  	}
  }
  
  /*
   * regular allocator, for general purposes allocation
   */
  
  static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
  					struct ext4_buddy *e4b,
  					int finish_group)
  {
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
  	struct ext4_free_extent *bex = &ac->ac_b_ex;
  	struct ext4_free_extent *gex = &ac->ac_g_ex;
  	struct ext4_free_extent ex;
  	int max;
032115fce   Aneesh Kumar K.V   ext4: Don't overw...
1518
1519
  	if (ac->ac_status == AC_STATUS_FOUND)
  		return;
c9de560de   Alex Tomas   ext4: Add multi b...
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
  	/*
  	 * We don't want to scan for a whole year
  	 */
  	if (ac->ac_found > sbi->s_mb_max_to_scan &&
  			!(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
  		ac->ac_status = AC_STATUS_BREAK;
  		return;
  	}
  
  	/*
  	 * Haven't found good chunk so far, let's continue
  	 */
  	if (bex->fe_len < gex->fe_len)
  		return;
  
  	if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
  			&& bex->fe_group == e4b->bd_group) {
  		/* recheck chunk's availability - we don't know
  		 * when it was found (within this lock-unlock
  		 * period or not) */
  		max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
  		if (max >= gex->fe_len) {
  			ext4_mb_use_best_found(ac, e4b);
  			return;
  		}
  	}
  }
  
  /*
   * The routine checks whether found extent is good enough. If it is,
   * then the extent gets marked used and flag is set to the context
   * to stop scanning. Otherwise, the extent is compared with the
   * previous found extent and if new one is better, then it's stored
   * in the context. Later, the best found extent will be used, if
   * mballoc can't find good enough extent.
   *
   * FIXME: real allocation policy is to be designed yet!
   */
  static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
  					struct ext4_free_extent *ex,
  					struct ext4_buddy *e4b)
  {
  	struct ext4_free_extent *bex = &ac->ac_b_ex;
  	struct ext4_free_extent *gex = &ac->ac_g_ex;
  
  	BUG_ON(ex->fe_len <= 0);
8d03c7a0c   Eric Sandeen   ext4: fix bogus B...
1566
  	BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
c9de560de   Alex Tomas   ext4: Add multi b...
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
  	BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
  	BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
  
  	ac->ac_found++;
  
  	/*
  	 * The special case - take what you catch first
  	 */
  	if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
  		*bex = *ex;
  		ext4_mb_use_best_found(ac, e4b);
  		return;
  	}
  
  	/*
  	 * Let's check whether the chuck is good enough
  	 */
  	if (ex->fe_len == gex->fe_len) {
  		*bex = *ex;
  		ext4_mb_use_best_found(ac, e4b);
  		return;
  	}
  
  	/*
  	 * If this is first found extent, just store it in the context
  	 */
  	if (bex->fe_len == 0) {
  		*bex = *ex;
  		return;
  	}
  
  	/*
  	 * If new found extent is better, store it in the context
  	 */
  	if (bex->fe_len < gex->fe_len) {
  		/* if the request isn't satisfied, any found extent
  		 * larger than previous best one is better */
  		if (ex->fe_len > bex->fe_len)
  			*bex = *ex;
  	} else if (ex->fe_len > gex->fe_len) {
  		/* if the request is satisfied, then we try to find
  		 * an extent that still satisfy the request, but is
  		 * smaller than previous one */
  		if (ex->fe_len < bex->fe_len)
  			*bex = *ex;
  	}
  
  	ext4_mb_check_limits(ac, e4b, 0);
  }
089ceecc1   Eric Sandeen   ext4: mark severa...
1616
1617
  static noinline_for_stack
  int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
  					struct ext4_buddy *e4b)
  {
  	struct ext4_free_extent ex = ac->ac_b_ex;
  	ext4_group_t group = ex.fe_group;
  	int max;
  	int err;
  
  	BUG_ON(ex.fe_len <= 0);
  	err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
  	if (err)
  		return err;
  
  	ext4_lock_group(ac->ac_sb, group);
  	max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
  
  	if (max > 0) {
  		ac->ac_b_ex = ex;
  		ext4_mb_use_best_found(ac, e4b);
  	}
  
  	ext4_unlock_group(ac->ac_sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
1639
  	ext4_mb_unload_buddy(e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
1640
1641
1642
  
  	return 0;
  }
089ceecc1   Eric Sandeen   ext4: mark severa...
1643
1644
  static noinline_for_stack
  int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
1645
1646
1647
1648
1649
1650
  				struct ext4_buddy *e4b)
  {
  	ext4_group_t group = ac->ac_g_ex.fe_group;
  	int max;
  	int err;
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
c9de560de   Alex Tomas   ext4: Add multi b...
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
  	struct ext4_free_extent ex;
  
  	if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
  		return 0;
  
  	err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
  	if (err)
  		return err;
  
  	ext4_lock_group(ac->ac_sb, group);
  	max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
  			     ac->ac_g_ex.fe_len, &ex);
  
  	if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
  		ext4_fsblk_t start;
5661bd686   Akinobu Mita   ext4: cleanup to ...
1666
1667
  		start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
  			ex.fe_start;
c9de560de   Alex Tomas   ext4: Add multi b...
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
  		/* use do_div to get remainder (would be 64-bit modulo) */
  		if (do_div(start, sbi->s_stripe) == 0) {
  			ac->ac_found++;
  			ac->ac_b_ex = ex;
  			ext4_mb_use_best_found(ac, e4b);
  		}
  	} else if (max >= ac->ac_g_ex.fe_len) {
  		BUG_ON(ex.fe_len <= 0);
  		BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
  		BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
  		ac->ac_found++;
  		ac->ac_b_ex = ex;
  		ext4_mb_use_best_found(ac, e4b);
  	} else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
  		/* Sometimes, caller may want to merge even small
  		 * number of blocks to an existing extent */
  		BUG_ON(ex.fe_len <= 0);
  		BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
  		BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
  		ac->ac_found++;
  		ac->ac_b_ex = ex;
  		ext4_mb_use_best_found(ac, e4b);
  	}
  	ext4_unlock_group(ac->ac_sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
1692
  	ext4_mb_unload_buddy(e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
1693
1694
1695
1696
1697
1698
1699
1700
  
  	return 0;
  }
  
  /*
   * The routine scans buddy structures (not bitmap!) from given order
   * to max order and tries to find big enough chunk to satisfy the req
   */
089ceecc1   Eric Sandeen   ext4: mark severa...
1701
1702
  static noinline_for_stack
  void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
  					struct ext4_buddy *e4b)
  {
  	struct super_block *sb = ac->ac_sb;
  	struct ext4_group_info *grp = e4b->bd_info;
  	void *buddy;
  	int i;
  	int k;
  	int max;
  
  	BUG_ON(ac->ac_2order <= 0);
  	for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
  		if (grp->bb_counters[i] == 0)
  			continue;
  
  		buddy = mb_find_buddy(e4b, i, &max);
  		BUG_ON(buddy == NULL);
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
1719
  		k = mb_find_next_zero_bit(buddy, max, 0);
c9de560de   Alex Tomas   ext4: Add multi b...
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
  		BUG_ON(k >= max);
  
  		ac->ac_found++;
  
  		ac->ac_b_ex.fe_len = 1 << i;
  		ac->ac_b_ex.fe_start = k << i;
  		ac->ac_b_ex.fe_group = e4b->bd_group;
  
  		ext4_mb_use_best_found(ac, e4b);
  
  		BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
  
  		if (EXT4_SB(sb)->s_mb_stats)
  			atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
  
  		break;
  	}
  }
  
  /*
   * The routine scans the group and measures all found extents.
   * In order to optimize scanning, caller must pass number of
   * free blocks in the group, so the routine can know upper limit.
   */
089ceecc1   Eric Sandeen   ext4: mark severa...
1744
1745
  static noinline_for_stack
  void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
  					struct ext4_buddy *e4b)
  {
  	struct super_block *sb = ac->ac_sb;
  	void *bitmap = EXT4_MB_BITMAP(e4b);
  	struct ext4_free_extent ex;
  	int i;
  	int free;
  
  	free = e4b->bd_info->bb_free;
  	BUG_ON(free <= 0);
  
  	i = e4b->bd_info->bb_first_free;
  
  	while (free && ac->ac_status == AC_STATUS_CONTINUE) {
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
1760
  		i = mb_find_next_zero_bit(bitmap,
c9de560de   Alex Tomas   ext4: Add multi b...
1761
1762
  						EXT4_BLOCKS_PER_GROUP(sb), i);
  		if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
1763
  			/*
e56eb6590   Aneesh Kumar K.V   ext4: Don't claim...
1764
  			 * IF we have corrupt bitmap, we won't find any
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
1765
1766
1767
  			 * free blocks even though group info says we
  			 * we have free blocks
  			 */
e29136f80   Theodore Ts'o   ext4: Enhance ext...
1768
1769
  			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
  					"%d free blocks as per "
fde4d95ad   Theodore Ts'o   ext4: remove extr...
1770
  					"group info. But bitmap says 0",
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
1771
  					free);
c9de560de   Alex Tomas   ext4: Add multi b...
1772
1773
1774
1775
1776
  			break;
  		}
  
  		mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
  		BUG_ON(ex.fe_len <= 0);
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
1777
  		if (free < ex.fe_len) {
e29136f80   Theodore Ts'o   ext4: Enhance ext...
1778
1779
  			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
  					"%d free blocks as per "
fde4d95ad   Theodore Ts'o   ext4: remove extr...
1780
  					"group info. But got %d blocks",
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
1781
  					free, ex.fe_len);
e56eb6590   Aneesh Kumar K.V   ext4: Don't claim...
1782
1783
1784
1785
1786
1787
  			/*
  			 * The number of free blocks differs. This mostly
  			 * indicate that the bitmap is corrupt. So exit
  			 * without claiming the space.
  			 */
  			break;
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
1788
  		}
c9de560de   Alex Tomas   ext4: Add multi b...
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
  
  		ext4_mb_measure_extent(ac, &ex, e4b);
  
  		i += ex.fe_len;
  		free -= ex.fe_len;
  	}
  
  	ext4_mb_check_limits(ac, e4b, 1);
  }
  
  /*
   * This is a special case for storages like raid5
506bf2d82   Eric Sandeen   ext4: allocate st...
1801
   * we try to find stripe-aligned chunks for stripe-size-multiple requests
c9de560de   Alex Tomas   ext4: Add multi b...
1802
   */
089ceecc1   Eric Sandeen   ext4: mark severa...
1803
1804
  static noinline_for_stack
  void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
  				 struct ext4_buddy *e4b)
  {
  	struct super_block *sb = ac->ac_sb;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
  	void *bitmap = EXT4_MB_BITMAP(e4b);
  	struct ext4_free_extent ex;
  	ext4_fsblk_t first_group_block;
  	ext4_fsblk_t a;
  	ext4_grpblk_t i;
  	int max;
  
  	BUG_ON(sbi->s_stripe == 0);
  
  	/* find first stripe-aligned block in group */
5661bd686   Akinobu Mita   ext4: cleanup to ...
1819
  	first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
c9de560de   Alex Tomas   ext4: Add multi b...
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
  	a = first_group_block + sbi->s_stripe - 1;
  	do_div(a, sbi->s_stripe);
  	i = (a * sbi->s_stripe) - first_group_block;
  
  	while (i < EXT4_BLOCKS_PER_GROUP(sb)) {
  		if (!mb_test_bit(i, bitmap)) {
  			max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
  			if (max >= sbi->s_stripe) {
  				ac->ac_found++;
  				ac->ac_b_ex = ex;
  				ext4_mb_use_best_found(ac, e4b);
  				break;
  			}
  		}
  		i += sbi->s_stripe;
  	}
  }
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1837
  /* This is now called BEFORE we load the buddy bitmap. */
c9de560de   Alex Tomas   ext4: Add multi b...
1838
1839
1840
1841
  static int ext4_mb_good_group(struct ext4_allocation_context *ac,
  				ext4_group_t group, int cr)
  {
  	unsigned free, fragments;
a4912123b   Theodore Ts'o   ext4: New inode/b...
1842
  	int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
c9de560de   Alex Tomas   ext4: Add multi b...
1843
1844
1845
  	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
  
  	BUG_ON(cr < 0 || cr >= 4);
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1846
1847
1848
1849
1850
1851
1852
  
  	/* We only do this if the grp has never been initialized */
  	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
  		int ret = ext4_mb_init_group(ac->ac_sb, group);
  		if (ret)
  			return 0;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
  
  	free = grp->bb_free;
  	fragments = grp->bb_fragments;
  	if (free == 0)
  		return 0;
  	if (fragments == 0)
  		return 0;
  
  	switch (cr) {
  	case 0:
  		BUG_ON(ac->ac_2order == 0);
c9de560de   Alex Tomas   ext4: Add multi b...
1864

8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1865
1866
  		if (grp->bb_largest_free_order < ac->ac_2order)
  			return 0;
a4912123b   Theodore Ts'o   ext4: New inode/b...
1867
1868
1869
1870
1871
  		/* Avoid using the first bg of a flexgroup for data files */
  		if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
  		    (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
  		    ((group % flex_size) == 0))
  			return 0;
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1872
  		return 1;
c9de560de   Alex Tomas   ext4: Add multi b...
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
  	case 1:
  		if ((free / fragments) >= ac->ac_g_ex.fe_len)
  			return 1;
  		break;
  	case 2:
  		if (free >= ac->ac_g_ex.fe_len)
  			return 1;
  		break;
  	case 3:
  		return 1;
  	default:
  		BUG();
  	}
  
  	return 0;
  }
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
  /*
   * lock the group_info alloc_sem of all the groups
   * belonging to the same buddy cache page. This
   * make sure other parallel operation on the buddy
   * cache doesn't happen  whild holding the buddy cache
   * lock
   */
  int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
  {
  	int i;
  	int block, pnum;
  	int blocks_per_page;
  	int groups_per_page;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
1902
  	ext4_group_t ngroups = ext4_get_groups_count(sb);
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
  	ext4_group_t first_group;
  	struct ext4_group_info *grp;
  
  	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
  	/*
  	 * the buddy cache inode stores the block bitmap
  	 * and buddy information in consecutive blocks.
  	 * So for each group we need two blocks.
  	 */
  	block = group * 2;
  	pnum = block / blocks_per_page;
  	first_group = pnum * blocks_per_page / 2;
  
  	groups_per_page = blocks_per_page >> 1;
  	if (groups_per_page == 0)
  		groups_per_page = 1;
  	/* read all groups the page covers into the cache */
  	for (i = 0; i < groups_per_page; i++) {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
1921
  		if ((first_group + i) >= ngroups)
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1922
1923
1924
1925
1926
1927
1928
  			break;
  		grp = ext4_get_group_info(sb, first_group + i);
  		/* take all groups write allocation
  		 * semaphore. This make sure there is
  		 * no block allocation going on in any
  		 * of that groups
  		 */
b7be019e8   Aneesh Kumar K.V   ext4: Fix lockdep...
1929
  		down_write_nested(&grp->alloc_sem, i);
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
  	}
  	return i;
  }
  
  void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
  					ext4_group_t group, int locked_group)
  {
  	int i;
  	int block, pnum;
  	int blocks_per_page;
  	ext4_group_t first_group;
  	struct ext4_group_info *grp;
  
  	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
  	/*
  	 * the buddy cache inode stores the block bitmap
  	 * and buddy information in consecutive blocks.
  	 * So for each group we need two blocks.
  	 */
  	block = group * 2;
  	pnum = block / blocks_per_page;
  	first_group = pnum * blocks_per_page / 2;
  	/* release locks on all the groups */
  	for (i = 0; i < locked_group; i++) {
  
  		grp = ext4_get_group_info(sb, first_group + i);
  		/* take all groups write allocation
  		 * semaphore. This make sure there is
  		 * no block allocation going on in any
  		 * of that groups
  		 */
  		up_write(&grp->alloc_sem);
  	}
  
  }
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
1965
1966
  static noinline_for_stack int
  ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
c9de560de   Alex Tomas   ext4: Add multi b...
1967
  {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
1968
  	ext4_group_t ngroups, group, i;
c9de560de   Alex Tomas   ext4: Add multi b...
1969
1970
  	int cr;
  	int err = 0;
c9de560de   Alex Tomas   ext4: Add multi b...
1971
1972
1973
  	struct ext4_sb_info *sbi;
  	struct super_block *sb;
  	struct ext4_buddy e4b;
c9de560de   Alex Tomas   ext4: Add multi b...
1974
1975
1976
  
  	sb = ac->ac_sb;
  	sbi = EXT4_SB(sb);
8df9675f8   Theodore Ts'o   ext4: Avoid races...
1977
  	ngroups = ext4_get_groups_count(sb);
fb0a387dc   Eric Sandeen   ext4: limit block...
1978
  	/* non-extent files are limited to low blocks/groups */
12e9b8920   Dmitry Monakhov   ext4: Use bitops ...
1979
  	if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
fb0a387dc   Eric Sandeen   ext4: limit block...
1980
  		ngroups = sbi->s_blockfile_groups;
c9de560de   Alex Tomas   ext4: Add multi b...
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
  	BUG_ON(ac->ac_status == AC_STATUS_FOUND);
  
  	/* first, try the goal */
  	err = ext4_mb_find_by_goal(ac, &e4b);
  	if (err || ac->ac_status == AC_STATUS_FOUND)
  		goto out;
  
  	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
  		goto out;
  
  	/*
  	 * ac->ac2_order is set only if the fe_len is a power of 2
  	 * if ac2_order is set we also set criteria to 0 so that we
  	 * try exact allocation using buddy.
  	 */
  	i = fls(ac->ac_g_ex.fe_len);
  	ac->ac_2order = 0;
  	/*
  	 * We search using buddy data only if the order of the request
  	 * is greater than equal to the sbi_s_mb_order2_reqs
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
2001
  	 * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
c9de560de   Alex Tomas   ext4: Add multi b...
2002
2003
2004
2005
2006
2007
2008
2009
  	 */
  	if (i >= sbi->s_mb_order2_reqs) {
  		/*
  		 * This should tell if fe_len is exactly power of 2
  		 */
  		if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
  			ac->ac_2order = i - 1;
  	}
4ba74d00a   Theodore Ts'o   ext4: Fix bugs in...
2010
2011
  	/* if stream allocation is enabled, use global goal */
  	if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
c9de560de   Alex Tomas   ext4: Add multi b...
2012
2013
2014
2015
2016
2017
  		/* TBD: may be hot point */
  		spin_lock(&sbi->s_md_lock);
  		ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
  		ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
  		spin_unlock(&sbi->s_md_lock);
  	}
4ba74d00a   Theodore Ts'o   ext4: Fix bugs in...
2018

c9de560de   Alex Tomas   ext4: Add multi b...
2019
2020
2021
2022
2023
2024
2025
2026
2027
  	/* Let's just scan groups to find more-less suitable blocks */
  	cr = ac->ac_2order ? 0 : 1;
  	/*
  	 * cr == 0 try to get exact allocation,
  	 * cr == 3  try to get anything
  	 */
  repeat:
  	for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
  		ac->ac_criteria = cr;
ed8f9c751   Aneesh Kumar K.V   ext4: start searc...
2028
2029
2030
2031
2032
  		/*
  		 * searching for the right group start
  		 * from the goal value specified
  		 */
  		group = ac->ac_g_ex.fe_group;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2033
  		for (i = 0; i < ngroups; group++, i++) {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2034
  			if (group == ngroups)
c9de560de   Alex Tomas   ext4: Add multi b...
2035
  				group = 0;
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
2036
2037
  			/* This now checks without needing the buddy page */
  			if (!ext4_mb_good_group(ac, group, cr))
c9de560de   Alex Tomas   ext4: Add multi b...
2038
  				continue;
c9de560de   Alex Tomas   ext4: Add multi b...
2039
2040
2041
2042
2043
  			err = ext4_mb_load_buddy(sb, group, &e4b);
  			if (err)
  				goto out;
  
  			ext4_lock_group(sb, group);
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
2044
2045
2046
2047
2048
  
  			/*
  			 * We need to check again after locking the
  			 * block group
  			 */
c9de560de   Alex Tomas   ext4: Add multi b...
2049
  			if (!ext4_mb_good_group(ac, group, cr)) {
c9de560de   Alex Tomas   ext4: Add multi b...
2050
  				ext4_unlock_group(sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
2051
  				ext4_mb_unload_buddy(&e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
2052
2053
2054
2055
  				continue;
  			}
  
  			ac->ac_groups_scanned++;
75507efb1   Theodore Ts'o   ext4: Don't avoid...
2056
  			if (cr == 0)
c9de560de   Alex Tomas   ext4: Add multi b...
2057
  				ext4_mb_simple_scan_group(ac, &e4b);
506bf2d82   Eric Sandeen   ext4: allocate st...
2058
2059
  			else if (cr == 1 && sbi->s_stripe &&
  					!(ac->ac_g_ex.fe_len % sbi->s_stripe))
c9de560de   Alex Tomas   ext4: Add multi b...
2060
2061
2062
2063
2064
  				ext4_mb_scan_aligned(ac, &e4b);
  			else
  				ext4_mb_complex_scan_group(ac, &e4b);
  
  			ext4_unlock_group(sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
2065
  			ext4_mb_unload_buddy(&e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
  
  			if (ac->ac_status != AC_STATUS_CONTINUE)
  				break;
  		}
  	}
  
  	if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
  	    !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
  		/*
  		 * We've been searching too long. Let's try to allocate
  		 * the best chunk we've found so far
  		 */
  
  		ext4_mb_try_best_found(ac, &e4b);
  		if (ac->ac_status != AC_STATUS_FOUND) {
  			/*
  			 * Someone more lucky has already allocated it.
  			 * The only thing we can do is just take first
  			 * found block(s)
  			printk(KERN_DEBUG "EXT4-fs: someone won our chunk
  ");
  			 */
  			ac->ac_b_ex.fe_group = 0;
  			ac->ac_b_ex.fe_start = 0;
  			ac->ac_b_ex.fe_len = 0;
  			ac->ac_status = AC_STATUS_CONTINUE;
  			ac->ac_flags |= EXT4_MB_HINT_FIRST;
  			cr = 3;
  			atomic_inc(&sbi->s_mb_lost_chunks);
  			goto repeat;
  		}
  	}
  out:
  	return err;
  }
c9de560de   Alex Tomas   ext4: Add multi b...
2101
2102
2103
  static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
  {
  	struct super_block *sb = seq->private;
c9de560de   Alex Tomas   ext4: Add multi b...
2104
  	ext4_group_t group;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2105
  	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
c9de560de   Alex Tomas   ext4: Add multi b...
2106
  		return NULL;
c9de560de   Alex Tomas   ext4: Add multi b...
2107
  	group = *pos + 1;
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2108
  	return (void *) ((unsigned long) group);
c9de560de   Alex Tomas   ext4: Add multi b...
2109
2110
2111
2112
2113
  }
  
  static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
  {
  	struct super_block *sb = seq->private;
c9de560de   Alex Tomas   ext4: Add multi b...
2114
2115
2116
  	ext4_group_t group;
  
  	++*pos;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2117
  	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
c9de560de   Alex Tomas   ext4: Add multi b...
2118
2119
  		return NULL;
  	group = *pos + 1;
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2120
  	return (void *) ((unsigned long) group);
c9de560de   Alex Tomas   ext4: Add multi b...
2121
2122
2123
2124
2125
  }
  
  static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
  {
  	struct super_block *sb = seq->private;
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2126
  	ext4_group_t group = (ext4_group_t) ((unsigned long) v);
c9de560de   Alex Tomas   ext4: Add multi b...
2127
2128
2129
2130
2131
  	int i;
  	int err;
  	struct ext4_buddy e4b;
  	struct sg {
  		struct ext4_group_info info;
a36b44988   Eric Sandeen   ext4: use ext4_gr...
2132
  		ext4_grpblk_t counters[16];
c9de560de   Alex Tomas   ext4: Add multi b...
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
  	} sg;
  
  	group--;
  	if (group == 0)
  		seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
  				"[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
  				  "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]
  ",
  			   "group", "free", "frags", "first",
  			   "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
  			   "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
  
  	i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
  		sizeof(struct ext4_group_info);
  	err = ext4_mb_load_buddy(sb, group, &e4b);
  	if (err) {
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2149
2150
  		seq_printf(seq, "#%-5u: I/O error
  ", group);
c9de560de   Alex Tomas   ext4: Add multi b...
2151
2152
2153
2154
2155
  		return 0;
  	}
  	ext4_lock_group(sb, group);
  	memcpy(&sg, ext4_get_group_info(sb, group), i);
  	ext4_unlock_group(sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
2156
  	ext4_mb_unload_buddy(&e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
2157

a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2158
  	seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
c9de560de   Alex Tomas   ext4: Add multi b...
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
  			sg.info.bb_fragments, sg.info.bb_first_free);
  	for (i = 0; i <= 13; i++)
  		seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
  				sg.info.bb_counters[i] : 0);
  	seq_printf(seq, " ]
  ");
  
  	return 0;
  }
  
  static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
  {
  }
7f1346a9d   Tobias Klauser   ext4: Declare seq...
2172
  static const struct seq_operations ext4_mb_seq_groups_ops = {
c9de560de   Alex Tomas   ext4: Add multi b...
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
  	.start  = ext4_mb_seq_groups_start,
  	.next   = ext4_mb_seq_groups_next,
  	.stop   = ext4_mb_seq_groups_stop,
  	.show   = ext4_mb_seq_groups_show,
  };
  
  static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
  {
  	struct super_block *sb = PDE(inode)->data;
  	int rc;
  
  	rc = seq_open(file, &ext4_mb_seq_groups_ops);
  	if (rc == 0) {
a271fe852   Joe Perches   ext4: Remove unne...
2186
  		struct seq_file *m = file->private_data;
c9de560de   Alex Tomas   ext4: Add multi b...
2187
2188
2189
2190
2191
  		m->private = sb;
  	}
  	return rc;
  
  }
7f1346a9d   Tobias Klauser   ext4: Declare seq...
2192
  static const struct file_operations ext4_mb_seq_groups_fops = {
c9de560de   Alex Tomas   ext4: Add multi b...
2193
2194
2195
2196
2197
2198
  	.owner		= THIS_MODULE,
  	.open		= ext4_mb_seq_groups_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= seq_release,
  };
5f21b0e64   Frederic Bohe   ext4: fix online ...
2199
2200
  
  /* Create and initialize ext4_group_info data for the given group. */
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
2201
  int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
5f21b0e64   Frederic Bohe   ext4: fix online ...
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
  			  struct ext4_group_desc *desc)
  {
  	int i, len;
  	int metalen = 0;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
  	struct ext4_group_info **meta_group_info;
  
  	/*
  	 * First check if this group is the first of a reserved block.
  	 * If it's true, we have to allocate a new table of pointers
  	 * to ext4_group_info structures
  	 */
  	if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
  		metalen = sizeof(*meta_group_info) <<
  			EXT4_DESC_PER_BLOCK_BITS(sb);
  		meta_group_info = kmalloc(metalen, GFP_KERNEL);
  		if (meta_group_info == NULL) {
  			printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
  			       "buddy group
  ");
  			goto exit_meta_group_info;
  		}
  		sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
  			meta_group_info;
  	}
  
  	/*
  	 * calculate needed size. if change bb_counters size,
  	 * don't forget about ext4_mb_generate_buddy()
  	 */
  	len = offsetof(typeof(**meta_group_info),
  		       bb_counters[sb->s_blocksize_bits + 2]);
  
  	meta_group_info =
  		sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
  	i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
  
  	meta_group_info[i] = kzalloc(len, GFP_KERNEL);
  	if (meta_group_info[i] == NULL) {
  		printk(KERN_ERR "EXT4-fs: can't allocate buddy mem
  ");
  		goto exit_group_info;
  	}
  	set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
  		&(meta_group_info[i]->bb_state));
  
  	/*
  	 * initialize bb_free to be able to skip
  	 * empty groups without initialization
  	 */
  	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
  		meta_group_info[i]->bb_free =
  			ext4_free_blocks_after_init(sb, group, desc);
  	} else {
  		meta_group_info[i]->bb_free =
560671a0d   Aneesh Kumar K.V   ext4: Use high 16...
2257
  			ext4_free_blks_count(sb, desc);
5f21b0e64   Frederic Bohe   ext4: fix online ...
2258
2259
2260
  	}
  
  	INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
2261
  	init_rwsem(&meta_group_info[i]->alloc_sem);
64e290ec6   Venkatesh Pallipadi   ext4: fix up rb_r...
2262
  	meta_group_info[i]->bb_free_root = RB_ROOT;
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
2263
  	meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
5f21b0e64   Frederic Bohe   ext4: fix online ...
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
  
  #ifdef DOUBLE_CHECK
  	{
  		struct buffer_head *bh;
  		meta_group_info[i]->bb_bitmap =
  			kmalloc(sb->s_blocksize, GFP_KERNEL);
  		BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
  		bh = ext4_read_block_bitmap(sb, group);
  		BUG_ON(bh == NULL);
  		memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
  			sb->s_blocksize);
  		put_bh(bh);
  	}
  #endif
  
  	return 0;
  
  exit_group_info:
  	/* If a meta_group_info table has been allocated, release it now */
  	if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
  		kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
  exit_meta_group_info:
  	return -ENOMEM;
  } /* ext4_mb_add_groupinfo */
c9de560de   Alex Tomas   ext4: Add multi b...
2288
2289
  static int ext4_mb_init_backend(struct super_block *sb)
  {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2290
  	ext4_group_t ngroups = ext4_get_groups_count(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
2291
  	ext4_group_t i;
c9de560de   Alex Tomas   ext4: Add multi b...
2292
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
5f21b0e64   Frederic Bohe   ext4: fix online ...
2293
2294
2295
2296
  	struct ext4_super_block *es = sbi->s_es;
  	int num_meta_group_infos;
  	int num_meta_group_infos_max;
  	int array_size;
5f21b0e64   Frederic Bohe   ext4: fix online ...
2297
2298
2299
  	struct ext4_group_desc *desc;
  
  	/* This is the number of blocks used by GDT */
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2300
  	num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
5f21b0e64   Frederic Bohe   ext4: fix online ...
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
  				1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
  
  	/*
  	 * This is the total number of blocks used by GDT including
  	 * the number of reserved blocks for GDT.
  	 * The s_group_info array is allocated with this value
  	 * to allow a clean online resize without a complex
  	 * manipulation of pointer.
  	 * The drawback is the unused memory when no resize
  	 * occurs but it's very low in terms of pages
  	 * (see comments below)
  	 * Need to handle this properly when META_BG resizing is allowed
  	 */
  	num_meta_group_infos_max = num_meta_group_infos +
  				le16_to_cpu(es->s_reserved_gdt_blocks);
c9de560de   Alex Tomas   ext4: Add multi b...
2316

5f21b0e64   Frederic Bohe   ext4: fix online ...
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
  	/*
  	 * array_size is the size of s_group_info array. We round it
  	 * to the next power of two because this approximation is done
  	 * internally by kmalloc so we can have some more memory
  	 * for free here (e.g. may be used for META_BG resize).
  	 */
  	array_size = 1;
  	while (array_size < sizeof(*sbi->s_group_info) *
  	       num_meta_group_infos_max)
  		array_size = array_size << 1;
c9de560de   Alex Tomas   ext4: Add multi b...
2327
2328
2329
  	/* An 8TB filesystem with 64-bit pointers requires a 4096 byte
  	 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
  	 * So a two level scheme suffices for now. */
5f21b0e64   Frederic Bohe   ext4: fix online ...
2330
  	sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
c9de560de   Alex Tomas   ext4: Add multi b...
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
  	if (sbi->s_group_info == NULL) {
  		printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group
  ");
  		return -ENOMEM;
  	}
  	sbi->s_buddy_cache = new_inode(sb);
  	if (sbi->s_buddy_cache == NULL) {
  		printk(KERN_ERR "EXT4-fs: can't get new inode
  ");
  		goto err_freesgi;
  	}
  	EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2343
  	for (i = 0; i < ngroups; i++) {
c9de560de   Alex Tomas   ext4: Add multi b...
2344
2345
2346
  		desc = ext4_get_group_desc(sb, i, NULL);
  		if (desc == NULL) {
  			printk(KERN_ERR
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2347
2348
  				"EXT4-fs: can't read descriptor %u
  ", i);
c9de560de   Alex Tomas   ext4: Add multi b...
2349
2350
  			goto err_freebuddy;
  		}
5f21b0e64   Frederic Bohe   ext4: fix online ...
2351
2352
  		if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
  			goto err_freebuddy;
c9de560de   Alex Tomas   ext4: Add multi b...
2353
2354
2355
2356
2357
  	}
  
  	return 0;
  
  err_freebuddy:
f1fa3342e   Roel Kluin   ext4: fix hot spi...
2358
  	while (i-- > 0)
c9de560de   Alex Tomas   ext4: Add multi b...
2359
  		kfree(ext4_get_group_info(sb, i));
c9de560de   Alex Tomas   ext4: Add multi b...
2360
  	i = num_meta_group_infos;
f1fa3342e   Roel Kluin   ext4: fix hot spi...
2361
  	while (i-- > 0)
c9de560de   Alex Tomas   ext4: Add multi b...
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
  		kfree(sbi->s_group_info[i]);
  	iput(sbi->s_buddy_cache);
  err_freesgi:
  	kfree(sbi->s_group_info);
  	return -ENOMEM;
  }
  
  int ext4_mb_init(struct super_block *sb, int needs_recovery)
  {
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
2372
  	unsigned i, j;
c9de560de   Alex Tomas   ext4: Add multi b...
2373
2374
  	unsigned offset;
  	unsigned max;
74767c5a2   Shen Feng   ext4: miscellaneo...
2375
  	int ret;
c9de560de   Alex Tomas   ext4: Add multi b...
2376

1927805e6   Eric Sandeen   ext4: use variabl...
2377
  	i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
c9de560de   Alex Tomas   ext4: Add multi b...
2378
2379
2380
  
  	sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
  	if (sbi->s_mb_offsets == NULL) {
c9de560de   Alex Tomas   ext4: Add multi b...
2381
2382
  		return -ENOMEM;
  	}
ff7ef329b   Yasunori Goto   ext4: Widen type ...
2383

1927805e6   Eric Sandeen   ext4: use variabl...
2384
  	i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
c9de560de   Alex Tomas   ext4: Add multi b...
2385
2386
  	sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
  	if (sbi->s_mb_maxs == NULL) {
a7b19448d   Dan Carpenter   ext4: fix typo wh...
2387
  		kfree(sbi->s_mb_offsets);
c9de560de   Alex Tomas   ext4: Add multi b...
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
  		return -ENOMEM;
  	}
  
  	/* order 0 is regular bitmap */
  	sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
  	sbi->s_mb_offsets[0] = 0;
  
  	i = 1;
  	offset = 0;
  	max = sb->s_blocksize << 2;
  	do {
  		sbi->s_mb_offsets[i] = offset;
  		sbi->s_mb_maxs[i] = max;
  		offset += 1 << (sb->s_blocksize_bits - i);
  		max = max >> 1;
  		i++;
  	} while (i <= sb->s_blocksize_bits + 1);
  
  	/* init file for buddy data */
74767c5a2   Shen Feng   ext4: miscellaneo...
2407
2408
  	ret = ext4_mb_init_backend(sb);
  	if (ret != 0) {
c9de560de   Alex Tomas   ext4: Add multi b...
2409
2410
  		kfree(sbi->s_mb_offsets);
  		kfree(sbi->s_mb_maxs);
74767c5a2   Shen Feng   ext4: miscellaneo...
2411
  		return ret;
c9de560de   Alex Tomas   ext4: Add multi b...
2412
2413
2414
  	}
  
  	spin_lock_init(&sbi->s_md_lock);
c9de560de   Alex Tomas   ext4: Add multi b...
2415
2416
2417
2418
2419
2420
2421
  	spin_lock_init(&sbi->s_bal_lock);
  
  	sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
  	sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
  	sbi->s_mb_stats = MB_DEFAULT_STATS;
  	sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
  	sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
c9de560de   Alex Tomas   ext4: Add multi b...
2422
  	sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
730c213c7   Eric Sandeen   ext4: use percpu ...
2423
  	sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
c9de560de   Alex Tomas   ext4: Add multi b...
2424
  	if (sbi->s_locality_groups == NULL) {
c9de560de   Alex Tomas   ext4: Add multi b...
2425
2426
2427
2428
  		kfree(sbi->s_mb_offsets);
  		kfree(sbi->s_mb_maxs);
  		return -ENOMEM;
  	}
730c213c7   Eric Sandeen   ext4: use percpu ...
2429
  	for_each_possible_cpu(i) {
c9de560de   Alex Tomas   ext4: Add multi b...
2430
  		struct ext4_locality_group *lg;
730c213c7   Eric Sandeen   ext4: use percpu ...
2431
  		lg = per_cpu_ptr(sbi->s_locality_groups, i);
c9de560de   Alex Tomas   ext4: Add multi b...
2432
  		mutex_init(&lg->lg_mutex);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
2433
2434
  		for (j = 0; j < PREALLOC_TB_SIZE; j++)
  			INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
c9de560de   Alex Tomas   ext4: Add multi b...
2435
2436
  		spin_lock_init(&lg->lg_prealloc_lock);
  	}
296c355cd   Theodore Ts'o   ext4: Use tracepo...
2437
2438
2439
  	if (sbi->s_proc)
  		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
  				 &ext4_mb_seq_groups_fops, sb);
c9de560de   Alex Tomas   ext4: Add multi b...
2440

0390131ba   Frank Mayhar   ext4: Allow ext4 ...
2441
2442
  	if (sbi->s_journal)
  		sbi->s_journal->j_commit_callback = release_blocks_on_commit;
c9de560de   Alex Tomas   ext4: Add multi b...
2443
2444
  	return 0;
  }
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
2445
  /* need to called with the ext4 group lock held */
c9de560de   Alex Tomas   ext4: Add multi b...
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
  static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
  {
  	struct ext4_prealloc_space *pa;
  	struct list_head *cur, *tmp;
  	int count = 0;
  
  	list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
  		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
  		list_del(&pa->pa_group_list);
  		count++;
688f05a01   Aneesh Kumar K.V   ext4: Free ext4_p...
2456
  		kmem_cache_free(ext4_pspace_cachep, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
2457
2458
  	}
  	if (count)
6ba495e92   Theodore Ts'o   ext4: Add configu...
2459
2460
  		mb_debug(1, "mballoc: %u PAs left
  ", count);
c9de560de   Alex Tomas   ext4: Add multi b...
2461
2462
2463
2464
2465
  
  }
  
  int ext4_mb_release(struct super_block *sb)
  {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2466
  	ext4_group_t ngroups = ext4_get_groups_count(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
2467
2468
2469
2470
  	ext4_group_t i;
  	int num_meta_group_infos;
  	struct ext4_group_info *grinfo;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
2471
  	if (sbi->s_group_info) {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2472
  		for (i = 0; i < ngroups; i++) {
c9de560de   Alex Tomas   ext4: Add multi b...
2473
2474
2475
2476
2477
2478
2479
2480
2481
  			grinfo = ext4_get_group_info(sb, i);
  #ifdef DOUBLE_CHECK
  			kfree(grinfo->bb_bitmap);
  #endif
  			ext4_lock_group(sb, i);
  			ext4_mb_cleanup_pa(grinfo);
  			ext4_unlock_group(sb, i);
  			kfree(grinfo);
  		}
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2482
  		num_meta_group_infos = (ngroups +
c9de560de   Alex Tomas   ext4: Add multi b...
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
  				EXT4_DESC_PER_BLOCK(sb) - 1) >>
  			EXT4_DESC_PER_BLOCK_BITS(sb);
  		for (i = 0; i < num_meta_group_infos; i++)
  			kfree(sbi->s_group_info[i]);
  		kfree(sbi->s_group_info);
  	}
  	kfree(sbi->s_mb_offsets);
  	kfree(sbi->s_mb_maxs);
  	if (sbi->s_buddy_cache)
  		iput(sbi->s_buddy_cache);
  	if (sbi->s_mb_stats) {
  		printk(KERN_INFO
  		       "EXT4-fs: mballoc: %u blocks %u reqs (%u success)
  ",
  				atomic_read(&sbi->s_bal_allocated),
  				atomic_read(&sbi->s_bal_reqs),
  				atomic_read(&sbi->s_bal_success));
  		printk(KERN_INFO
  		      "EXT4-fs: mballoc: %u extents scanned, %u goal hits, "
  				"%u 2^N hits, %u breaks, %u lost
  ",
  				atomic_read(&sbi->s_bal_ex_scanned),
  				atomic_read(&sbi->s_bal_goals),
  				atomic_read(&sbi->s_bal_2orders),
  				atomic_read(&sbi->s_bal_breaks),
  				atomic_read(&sbi->s_mb_lost_chunks));
  		printk(KERN_INFO
  		       "EXT4-fs: mballoc: %lu generated and it took %Lu
  ",
  				sbi->s_mb_buddies_generated++,
  				sbi->s_mb_generation_time);
  		printk(KERN_INFO
  		       "EXT4-fs: mballoc: %u preallocated, %u discarded
  ",
  				atomic_read(&sbi->s_mb_preallocated),
  				atomic_read(&sbi->s_mb_discarded));
  	}
730c213c7   Eric Sandeen   ext4: use percpu ...
2520
  	free_percpu(sbi->s_locality_groups);
296c355cd   Theodore Ts'o   ext4: Use tracepo...
2521
2522
  	if (sbi->s_proc)
  		remove_proc_entry("mb_groups", sbi->s_proc);
c9de560de   Alex Tomas   ext4: Add multi b...
2523
2524
2525
  
  	return 0;
  }
5c521830c   Jiaying Zhang   ext4: Support dis...
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
  static inline void ext4_issue_discard(struct super_block *sb,
  		ext4_group_t block_group, ext4_grpblk_t block, int count)
  {
  	int ret;
  	ext4_fsblk_t discard_block;
  
  	discard_block = block + ext4_group_first_block_no(sb, block_group);
  	trace_ext4_discard_blocks(sb,
  			(unsigned long long) discard_block, count);
  	ret = sb_issue_discard(sb, discard_block, count);
  	if (ret == EOPNOTSUPP) {
  		ext4_warning(sb, "discard not supported, disabling");
  		clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
  	}
  }
3e624fc72   Theodore Ts'o   ext4: Replace hac...
2541
2542
2543
2544
2545
  /*
   * This function is called by the jbd2 layer once the commit has finished,
   * so we know we can free the blocks that were released with that commit.
   */
  static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
c9de560de   Alex Tomas   ext4: Add multi b...
2546
  {
3e624fc72   Theodore Ts'o   ext4: Replace hac...
2547
  	struct super_block *sb = journal->j_private;
c9de560de   Alex Tomas   ext4: Add multi b...
2548
  	struct ext4_buddy e4b;
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2549
  	struct ext4_group_info *db;
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2550
2551
  	int err, count = 0, count2 = 0;
  	struct ext4_free_data *entry;
3e624fc72   Theodore Ts'o   ext4: Replace hac...
2552
  	struct list_head *l, *ltmp;
c9de560de   Alex Tomas   ext4: Add multi b...
2553

3e624fc72   Theodore Ts'o   ext4: Replace hac...
2554
2555
  	list_for_each_safe(l, ltmp, &txn->t_private_list) {
  		entry = list_entry(l, struct ext4_free_data, list);
c9de560de   Alex Tomas   ext4: Add multi b...
2556

6ba495e92   Theodore Ts'o   ext4: Add configu...
2557
  		mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
3e624fc72   Theodore Ts'o   ext4: Replace hac...
2558
  			 entry->count, entry->group, entry);
c9de560de   Alex Tomas   ext4: Add multi b...
2559

5c521830c   Jiaying Zhang   ext4: Support dis...
2560
2561
2562
  		if (test_opt(sb, DISCARD))
  			ext4_issue_discard(sb, entry->group,
  					entry->start_blk, entry->count);
b90f68701   Theodore Ts'o   ext4: Issue the d...
2563

c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2564
  		err = ext4_mb_load_buddy(sb, entry->group, &e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
2565
2566
  		/* we expect to find existing buddy because it's pinned */
  		BUG_ON(err != 0);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2567
  		db = e4b.bd_info;
c9de560de   Alex Tomas   ext4: Add multi b...
2568
  		/* there are blocks to put in buddy to make them really free */
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2569
  		count += entry->count;
c9de560de   Alex Tomas   ext4: Add multi b...
2570
  		count2++;
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
  		ext4_lock_group(sb, entry->group);
  		/* Take it out of per group rb tree */
  		rb_erase(&entry->node, &(db->bb_free_root));
  		mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
  
  		if (!db->bb_free_root.rb_node) {
  			/* No more items in the per group rb tree
  			 * balance refcounts from ext4_mb_free_metadata()
  			 */
  			page_cache_release(e4b.bd_buddy_page);
  			page_cache_release(e4b.bd_bitmap_page);
c9de560de   Alex Tomas   ext4: Add multi b...
2582
  		}
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2583
  		ext4_unlock_group(sb, entry->group);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2584
  		kmem_cache_free(ext4_free_ext_cachep, entry);
e39e07fdf   Jing Zhang   ext4: rename ext4...
2585
  		ext4_mb_unload_buddy(&e4b);
3e624fc72   Theodore Ts'o   ext4: Replace hac...
2586
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
2587

6ba495e92   Theodore Ts'o   ext4: Add configu...
2588
2589
  	mb_debug(1, "freed %u blocks in %u structures
  ", count, count2);
c9de560de   Alex Tomas   ext4: Add multi b...
2590
  }
6ba495e92   Theodore Ts'o   ext4: Add configu...
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
  #ifdef CONFIG_EXT4_DEBUG
  u8 mb_enable_debug __read_mostly;
  
  static struct dentry *debugfs_dir;
  static struct dentry *debugfs_debug;
  
  static void __init ext4_create_debugfs_entry(void)
  {
  	debugfs_dir = debugfs_create_dir("ext4", NULL);
  	if (debugfs_dir)
  		debugfs_debug = debugfs_create_u8("mballoc-debug",
  						  S_IRUGO | S_IWUSR,
  						  debugfs_dir,
  						  &mb_enable_debug);
  }
  
  static void ext4_remove_debugfs_entry(void)
  {
  	debugfs_remove(debugfs_debug);
  	debugfs_remove(debugfs_dir);
  }
  
  #else
  
  static void __init ext4_create_debugfs_entry(void)
  {
  }
  
  static void ext4_remove_debugfs_entry(void)
  {
  }
  
  #endif
c9de560de   Alex Tomas   ext4: Add multi b...
2624
2625
2626
2627
2628
2629
2630
2631
  int __init init_ext4_mballoc(void)
  {
  	ext4_pspace_cachep =
  		kmem_cache_create("ext4_prealloc_space",
  				     sizeof(struct ext4_prealloc_space),
  				     0, SLAB_RECLAIM_ACCOUNT, NULL);
  	if (ext4_pspace_cachep == NULL)
  		return -ENOMEM;
256bdb497   Eric Sandeen   ext4: allocate st...
2632
2633
2634
2635
2636
2637
2638
2639
  	ext4_ac_cachep =
  		kmem_cache_create("ext4_alloc_context",
  				     sizeof(struct ext4_allocation_context),
  				     0, SLAB_RECLAIM_ACCOUNT, NULL);
  	if (ext4_ac_cachep == NULL) {
  		kmem_cache_destroy(ext4_pspace_cachep);
  		return -ENOMEM;
  	}
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
  
  	ext4_free_ext_cachep =
  		kmem_cache_create("ext4_free_block_extents",
  				     sizeof(struct ext4_free_data),
  				     0, SLAB_RECLAIM_ACCOUNT, NULL);
  	if (ext4_free_ext_cachep == NULL) {
  		kmem_cache_destroy(ext4_pspace_cachep);
  		kmem_cache_destroy(ext4_ac_cachep);
  		return -ENOMEM;
  	}
6ba495e92   Theodore Ts'o   ext4: Add configu...
2650
  	ext4_create_debugfs_entry();
c9de560de   Alex Tomas   ext4: Add multi b...
2651
2652
2653
2654
2655
  	return 0;
  }
  
  void exit_ext4_mballoc(void)
  {
60e6679e2   Theodore Ts'o   ext4: Drop whites...
2656
  	/*
3e03f9ca6   Jesper Dangaard Brouer   ext4: Use rcu_bar...
2657
2658
2659
2660
  	 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
  	 * before destroying the slab cache.
  	 */
  	rcu_barrier();
c9de560de   Alex Tomas   ext4: Add multi b...
2661
  	kmem_cache_destroy(ext4_pspace_cachep);
256bdb497   Eric Sandeen   ext4: allocate st...
2662
  	kmem_cache_destroy(ext4_ac_cachep);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2663
  	kmem_cache_destroy(ext4_free_ext_cachep);
6ba495e92   Theodore Ts'o   ext4: Add configu...
2664
  	ext4_remove_debugfs_entry();
c9de560de   Alex Tomas   ext4: Add multi b...
2665
2666
2667
2668
  }
  
  
  /*
73b2c7165   Uwe Kleine-König   fix comment typo ...
2669
   * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
c9de560de   Alex Tomas   ext4: Add multi b...
2670
2671
   * Returns 0 if success or error code
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
2672
2673
  static noinline_for_stack int
  ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
498e5f241   Theodore Ts'o   ext4: Change unsi...
2674
  				handle_t *handle, unsigned int reserv_blks)
c9de560de   Alex Tomas   ext4: Add multi b...
2675
2676
  {
  	struct buffer_head *bitmap_bh = NULL;
c9de560de   Alex Tomas   ext4: Add multi b...
2677
2678
2679
2680
2681
  	struct ext4_group_desc *gdp;
  	struct buffer_head *gdp_bh;
  	struct ext4_sb_info *sbi;
  	struct super_block *sb;
  	ext4_fsblk_t block;
519deca04   Aneesh Kumar K.V   ext4: Retry block...
2682
  	int err, len;
c9de560de   Alex Tomas   ext4: Add multi b...
2683
2684
2685
2686
2687
2688
  
  	BUG_ON(ac->ac_status != AC_STATUS_FOUND);
  	BUG_ON(ac->ac_b_ex.fe_len <= 0);
  
  	sb = ac->ac_sb;
  	sbi = EXT4_SB(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
2689
2690
  
  	err = -EIO;
574ca174c   Theodore Ts'o   ext4: Rename read...
2691
  	bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
c9de560de   Alex Tomas   ext4: Add multi b...
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
  	if (!bitmap_bh)
  		goto out_err;
  
  	err = ext4_journal_get_write_access(handle, bitmap_bh);
  	if (err)
  		goto out_err;
  
  	err = -EIO;
  	gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
  	if (!gdp)
  		goto out_err;
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2703
2704
  	ext4_debug("using block group %u(%d)
  ", ac->ac_b_ex.fe_group,
9fd9784c9   Thadeu Lima de Souza Cascardo   ext4: Fix buildin...
2705
  			ext4_free_blks_count(sb, gdp));
03cddb80e   Aneesh Kumar K.V   ext4: Fix use of ...
2706

c9de560de   Alex Tomas   ext4: Add multi b...
2707
2708
2709
  	err = ext4_journal_get_write_access(handle, gdp_bh);
  	if (err)
  		goto out_err;
bda00de7e   Akinobu Mita   ext4: cleanup to ...
2710
  	block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
c9de560de   Alex Tomas   ext4: Add multi b...
2711

519deca04   Aneesh Kumar K.V   ext4: Retry block...
2712
  	len = ac->ac_b_ex.fe_len;
6fd058f77   Theodore Ts'o   ext4: Add a compr...
2713
  	if (!ext4_data_block_valid(sbi, block, len)) {
12062dddd   Eric Sandeen   ext4: move __func...
2714
  		ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
6fd058f77   Theodore Ts'o   ext4: Add a compr...
2715
2716
  			   "fs metadata
  ", block, block+len);
519deca04   Aneesh Kumar K.V   ext4: Retry block...
2717
2718
2719
2720
  		/* File system mounted not to panic on error
  		 * Fix the bitmap and repeat the block allocation
  		 * We leak some of the blocks here.
  		 */
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
2721
2722
2723
2724
  		ext4_lock_group(sb, ac->ac_b_ex.fe_group);
  		mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
  			    ac->ac_b_ex.fe_len);
  		ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
0390131ba   Frank Mayhar   ext4: Allow ext4 ...
2725
  		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
519deca04   Aneesh Kumar K.V   ext4: Retry block...
2726
2727
2728
  		if (!err)
  			err = -EAGAIN;
  		goto out_err;
c9de560de   Alex Tomas   ext4: Add multi b...
2729
  	}
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
2730
2731
  
  	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
c9de560de   Alex Tomas   ext4: Add multi b...
2732
2733
2734
2735
2736
2737
2738
2739
2740
  #ifdef AGGRESSIVE_CHECK
  	{
  		int i;
  		for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
  			BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
  						bitmap_bh->b_data));
  		}
  	}
  #endif
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
2741
  	mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
c9de560de   Alex Tomas   ext4: Add multi b...
2742
2743
  	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
  		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
560671a0d   Aneesh Kumar K.V   ext4: Use high 16...
2744
2745
2746
  		ext4_free_blks_set(sb, gdp,
  					ext4_free_blocks_after_init(sb,
  					ac->ac_b_ex.fe_group, gdp));
c9de560de   Alex Tomas   ext4: Add multi b...
2747
  	}
560671a0d   Aneesh Kumar K.V   ext4: Use high 16...
2748
2749
  	len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
  	ext4_free_blks_set(sb, gdp, len);
c9de560de   Alex Tomas   ext4: Add multi b...
2750
  	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
2751
2752
  
  	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
6bc6e63fc   Aneesh Kumar K.V   ext4: Add percpu ...
2753
  	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
d2a176379   Mingming Cao   ext4: delayed all...
2754
  	/*
6bc6e63fc   Aneesh Kumar K.V   ext4: Add percpu ...
2755
  	 * Now reduce the dirty block count also. Should not go negative
d2a176379   Mingming Cao   ext4: delayed all...
2756
  	 */
6bc6e63fc   Aneesh Kumar K.V   ext4: Add percpu ...
2757
2758
2759
  	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
  		/* release all the reserved blocks if non delalloc */
  		percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
c9de560de   Alex Tomas   ext4: Add multi b...
2760

772cb7c83   Jose R. Santos   ext4: New inode a...
2761
2762
2763
  	if (sbi->s_log_groups_per_flex) {
  		ext4_group_t flex_group = ext4_flex_group(sbi,
  							  ac->ac_b_ex.fe_group);
9f24e4208   Theodore Ts'o   ext4: Use atomic_...
2764
2765
  		atomic_sub(ac->ac_b_ex.fe_len,
  			   &sbi->s_flex_groups[flex_group].free_blocks);
772cb7c83   Jose R. Santos   ext4: New inode a...
2766
  	}
0390131ba   Frank Mayhar   ext4: Allow ext4 ...
2767
  	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
c9de560de   Alex Tomas   ext4: Add multi b...
2768
2769
  	if (err)
  		goto out_err;
0390131ba   Frank Mayhar   ext4: Allow ext4 ...
2770
  	err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
c9de560de   Alex Tomas   ext4: Add multi b...
2771
2772
  
  out_err:
a0375156c   Theodore Ts'o   ext4: Clean up s_...
2773
  	ext4_mark_super_dirty(sb);
42a10add8   Aneesh Kumar K.V   ext4: Fix null bh...
2774
  	brelse(bitmap_bh);
c9de560de   Alex Tomas   ext4: Add multi b...
2775
2776
2777
2778
2779
2780
2781
  	return err;
  }
  
  /*
   * here we normalize request for locality group
   * Group request are normalized to s_strip size if we set the same via mount
   * option. If not we set it to s_mb_group_prealloc which can be configured via
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
2782
   * /sys/fs/ext4/<partition>/mb_group_prealloc
c9de560de   Alex Tomas   ext4: Add multi b...
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
   *
   * XXX: should we try to preallocate more than the group has now?
   */
  static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
  {
  	struct super_block *sb = ac->ac_sb;
  	struct ext4_locality_group *lg = ac->ac_lg;
  
  	BUG_ON(lg == NULL);
  	if (EXT4_SB(sb)->s_stripe)
  		ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
  	else
  		ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
6ba495e92   Theodore Ts'o   ext4: Add configu...
2796
2797
  	mb_debug(1, "#%u: goal %u blocks for locality group
  ",
c9de560de   Alex Tomas   ext4: Add multi b...
2798
2799
2800
2801
2802
2803
2804
  		current->pid, ac->ac_g_ex.fe_len);
  }
  
  /*
   * Normalization means making request better in terms of
   * size and alignment
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
2805
2806
  static noinline_for_stack void
  ext4_mb_normalize_request(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
2807
2808
2809
2810
  				struct ext4_allocation_request *ar)
  {
  	int bsbits, max;
  	ext4_lblk_t end;
c9de560de   Alex Tomas   ext4: Add multi b...
2811
  	loff_t size, orig_size, start_off;
5a0790c2c   Andi Kleen   ext4: remove init...
2812
  	ext4_lblk_t start;
c9de560de   Alex Tomas   ext4: Add multi b...
2813
  	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
9a0762c5a   Aneesh Kumar K.V   ext4: Convert li...
2814
  	struct ext4_prealloc_space *pa;
c9de560de   Alex Tomas   ext4: Add multi b...
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
  
  	/* do normalize only data requests, metadata requests
  	   do not need preallocation */
  	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
  		return;
  
  	/* sometime caller may want exact blocks */
  	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
  		return;
  
  	/* caller may indicate that preallocation isn't
  	 * required (it's a tail, for example) */
  	if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
  		return;
  
  	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
  		ext4_mb_normalize_group_request(ac);
  		return ;
  	}
  
  	bsbits = ac->ac_sb->s_blocksize_bits;
  
  	/* first, let's learn actual file size
  	 * given current request is allocated */
  	size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
  	size = size << bsbits;
  	if (size < i_size_read(ac->ac_inode))
  		size = i_size_read(ac->ac_inode);
5a0790c2c   Andi Kleen   ext4: remove init...
2843
  	orig_size = size;
c9de560de   Alex Tomas   ext4: Add multi b...
2844

1930479c4   Valerie Clement   ext4: mballoc fix...
2845
2846
  	/* max size of free chunks */
  	max = 2 << bsbits;
c9de560de   Alex Tomas   ext4: Add multi b...
2847

1930479c4   Valerie Clement   ext4: mballoc fix...
2848
2849
  #define NRL_CHECK_SIZE(req, size, max, chunk_size)	\
  		(req <= (size) || max <= (chunk_size))
c9de560de   Alex Tomas   ext4: Add multi b...
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
  
  	/* first, try to predict filesize */
  	/* XXX: should this table be tunable? */
  	start_off = 0;
  	if (size <= 16 * 1024) {
  		size = 16 * 1024;
  	} else if (size <= 32 * 1024) {
  		size = 32 * 1024;
  	} else if (size <= 64 * 1024) {
  		size = 64 * 1024;
  	} else if (size <= 128 * 1024) {
  		size = 128 * 1024;
  	} else if (size <= 256 * 1024) {
  		size = 256 * 1024;
  	} else if (size <= 512 * 1024) {
  		size = 512 * 1024;
  	} else if (size <= 1024 * 1024) {
  		size = 1024 * 1024;
1930479c4   Valerie Clement   ext4: mballoc fix...
2868
  	} else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
c9de560de   Alex Tomas   ext4: Add multi b...
2869
  		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
1930479c4   Valerie Clement   ext4: mballoc fix...
2870
2871
2872
  						(21 - bsbits)) << 21;
  		size = 2 * 1024 * 1024;
  	} else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
c9de560de   Alex Tomas   ext4: Add multi b...
2873
2874
2875
2876
  		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
  							(22 - bsbits)) << 22;
  		size = 4 * 1024 * 1024;
  	} else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
1930479c4   Valerie Clement   ext4: mballoc fix...
2877
  					(8<<20)>>bsbits, max, 8 * 1024)) {
c9de560de   Alex Tomas   ext4: Add multi b...
2878
2879
2880
2881
2882
2883
2884
  		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
  							(23 - bsbits)) << 23;
  		size = 8 * 1024 * 1024;
  	} else {
  		start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
  		size	  = ac->ac_o_ex.fe_len << bsbits;
  	}
5a0790c2c   Andi Kleen   ext4: remove init...
2885
2886
  	size = size >> bsbits;
  	start = start_off >> bsbits;
c9de560de   Alex Tomas   ext4: Add multi b...
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
  
  	/* don't cover already allocated blocks in selected range */
  	if (ar->pleft && start <= ar->lleft) {
  		size -= ar->lleft + 1 - start;
  		start = ar->lleft + 1;
  	}
  	if (ar->pright && start + size - 1 >= ar->lright)
  		size -= start + size - ar->lright;
  
  	end = start + size;
  
  	/* check we don't cross already preallocated blocks */
  	rcu_read_lock();
9a0762c5a   Aneesh Kumar K.V   ext4: Convert li...
2900
  	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
498e5f241   Theodore Ts'o   ext4: Change unsi...
2901
  		ext4_lblk_t pa_end;
c9de560de   Alex Tomas   ext4: Add multi b...
2902

c9de560de   Alex Tomas   ext4: Add multi b...
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
  		if (pa->pa_deleted)
  			continue;
  		spin_lock(&pa->pa_lock);
  		if (pa->pa_deleted) {
  			spin_unlock(&pa->pa_lock);
  			continue;
  		}
  
  		pa_end = pa->pa_lstart + pa->pa_len;
  
  		/* PA must not overlap original request */
  		BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
  			ac->ac_o_ex.fe_logical < pa->pa_lstart));
38877f4e8   Eric Sandeen   simplify some log...
2916
2917
  		/* skip PAs this normalized request doesn't overlap with */
  		if (pa->pa_lstart >= end || pa_end <= start) {
c9de560de   Alex Tomas   ext4: Add multi b...
2918
2919
2920
2921
  			spin_unlock(&pa->pa_lock);
  			continue;
  		}
  		BUG_ON(pa->pa_lstart <= start && pa_end >= end);
38877f4e8   Eric Sandeen   simplify some log...
2922
  		/* adjust start or end to be adjacent to this pa */
c9de560de   Alex Tomas   ext4: Add multi b...
2923
2924
2925
  		if (pa_end <= ac->ac_o_ex.fe_logical) {
  			BUG_ON(pa_end < start);
  			start = pa_end;
38877f4e8   Eric Sandeen   simplify some log...
2926
  		} else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
c9de560de   Alex Tomas   ext4: Add multi b...
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
  			BUG_ON(pa->pa_lstart > end);
  			end = pa->pa_lstart;
  		}
  		spin_unlock(&pa->pa_lock);
  	}
  	rcu_read_unlock();
  	size = end - start;
  
  	/* XXX: extra loop to check we really don't overlap preallocations */
  	rcu_read_lock();
9a0762c5a   Aneesh Kumar K.V   ext4: Convert li...
2937
  	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
498e5f241   Theodore Ts'o   ext4: Change unsi...
2938
  		ext4_lblk_t pa_end;
c9de560de   Alex Tomas   ext4: Add multi b...
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
  		spin_lock(&pa->pa_lock);
  		if (pa->pa_deleted == 0) {
  			pa_end = pa->pa_lstart + pa->pa_len;
  			BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
  		}
  		spin_unlock(&pa->pa_lock);
  	}
  	rcu_read_unlock();
  
  	if (start + size <= ac->ac_o_ex.fe_logical &&
  			start > ac->ac_o_ex.fe_logical) {
  		printk(KERN_ERR "start %lu, size %lu, fe_logical %lu
  ",
  			(unsigned long) start, (unsigned long) size,
  			(unsigned long) ac->ac_o_ex.fe_logical);
  	}
  	BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
  			start > ac->ac_o_ex.fe_logical);
8d03c7a0c   Eric Sandeen   ext4: fix bogus B...
2957
  	BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
c9de560de   Alex Tomas   ext4: Add multi b...
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
  
  	/* now prepare goal request */
  
  	/* XXX: is it better to align blocks WRT to logical
  	 * placement or satisfy big request as is */
  	ac->ac_g_ex.fe_logical = start;
  	ac->ac_g_ex.fe_len = size;
  
  	/* define goal start in order to merge */
  	if (ar->pright && (ar->lright == (start + size))) {
  		/* merge to the right */
  		ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
  						&ac->ac_f_ex.fe_group,
  						&ac->ac_f_ex.fe_start);
  		ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
  	}
  	if (ar->pleft && (ar->lleft + 1 == start)) {
  		/* merge to the left */
  		ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
  						&ac->ac_f_ex.fe_group,
  						&ac->ac_f_ex.fe_start);
  		ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
  	}
6ba495e92   Theodore Ts'o   ext4: Add configu...
2981
2982
  	mb_debug(1, "goal: %u(was %u) blocks at %u
  ", (unsigned) size,
c9de560de   Alex Tomas   ext4: Add multi b...
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
  		(unsigned) orig_size, (unsigned) start);
  }
  
  static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
  {
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
  
  	if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
  		atomic_inc(&sbi->s_bal_reqs);
  		atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
291dae472   Curt Wohlgemuth   ext4: Fix for ext...
2993
  		if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
c9de560de   Alex Tomas   ext4: Add multi b...
2994
2995
2996
2997
2998
2999
3000
3001
  			atomic_inc(&sbi->s_bal_success);
  		atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
  		if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
  				ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
  			atomic_inc(&sbi->s_bal_goals);
  		if (ac->ac_found > sbi->s_mb_max_to_scan)
  			atomic_inc(&sbi->s_bal_breaks);
  	}
296c355cd   Theodore Ts'o   ext4: Use tracepo...
3002
3003
3004
3005
  	if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
  		trace_ext4_mballoc_alloc(ac);
  	else
  		trace_ext4_mballoc_prealloc(ac);
c9de560de   Alex Tomas   ext4: Add multi b...
3006
3007
3008
  }
  
  /*
b844167ed   Curt Wohlgemuth   ext4: remove bloc...
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
   * Called on failure; free up any blocks from the inode PA for this
   * context.  We don't need this for MB_GROUP_PA because we only change
   * pa_free in ext4_mb_release_context(), but on failure, we've already
   * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
   */
  static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
  {
  	struct ext4_prealloc_space *pa = ac->ac_pa;
  	int len;
  
  	if (pa && pa->pa_type == MB_INODE_PA) {
  		len = ac->ac_b_ex.fe_len;
  		pa->pa_free += len;
  	}
  
  }
  
  /*
c9de560de   Alex Tomas   ext4: Add multi b...
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
   * use blocks preallocated to inode
   */
  static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
  				struct ext4_prealloc_space *pa)
  {
  	ext4_fsblk_t start;
  	ext4_fsblk_t end;
  	int len;
  
  	/* found preallocated blocks, use them */
  	start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
  	end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len);
  	len = end - start;
  	ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
  					&ac->ac_b_ex.fe_start);
  	ac->ac_b_ex.fe_len = len;
  	ac->ac_status = AC_STATUS_FOUND;
  	ac->ac_pa = pa;
  
  	BUG_ON(start < pa->pa_pstart);
  	BUG_ON(start + len > pa->pa_pstart + pa->pa_len);
  	BUG_ON(pa->pa_free < len);
  	pa->pa_free -= len;
6ba495e92   Theodore Ts'o   ext4: Add configu...
3050
3051
  	mb_debug(1, "use %llu/%u from inode pa %p
  ", start, len, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3052
3053
3054
3055
3056
3057
3058
3059
  }
  
  /*
   * use blocks preallocated to locality group
   */
  static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
  				struct ext4_prealloc_space *pa)
  {
03cddb80e   Aneesh Kumar K.V   ext4: Fix use of ...
3060
  	unsigned int len = ac->ac_o_ex.fe_len;
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3061

c9de560de   Alex Tomas   ext4: Add multi b...
3062
3063
3064
3065
3066
3067
3068
3069
  	ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
  					&ac->ac_b_ex.fe_group,
  					&ac->ac_b_ex.fe_start);
  	ac->ac_b_ex.fe_len = len;
  	ac->ac_status = AC_STATUS_FOUND;
  	ac->ac_pa = pa;
  
  	/* we don't correct pa_pstart or pa_plen here to avoid
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
3070
  	 * possible race when the group is being loaded concurrently
c9de560de   Alex Tomas   ext4: Add multi b...
3071
  	 * instead we correct pa later, after blocks are marked
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
3072
3073
  	 * in on-disk bitmap -- see ext4_mb_release_context()
  	 * Other CPUs are prevented from allocating from this pa by lg_mutex
c9de560de   Alex Tomas   ext4: Add multi b...
3074
  	 */
6ba495e92   Theodore Ts'o   ext4: Add configu...
3075
3076
  	mb_debug(1, "use %u/%u from group pa %p
  ", pa->pa_lstart-len, len, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3077
3078
3079
  }
  
  /*
5e745b041   Aneesh Kumar K.V   ext4: Fix small f...
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
   * Return the prealloc space that have minimal distance
   * from the goal block. @cpa is the prealloc
   * space that is having currently known minimal distance
   * from the goal block.
   */
  static struct ext4_prealloc_space *
  ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
  			struct ext4_prealloc_space *pa,
  			struct ext4_prealloc_space *cpa)
  {
  	ext4_fsblk_t cur_distance, new_distance;
  
  	if (cpa == NULL) {
  		atomic_inc(&pa->pa_count);
  		return pa;
  	}
  	cur_distance = abs(goal_block - cpa->pa_pstart);
  	new_distance = abs(goal_block - pa->pa_pstart);
  
  	if (cur_distance < new_distance)
  		return cpa;
  
  	/* drop the previous reference */
  	atomic_dec(&cpa->pa_count);
  	atomic_inc(&pa->pa_count);
  	return pa;
  }
  
  /*
c9de560de   Alex Tomas   ext4: Add multi b...
3109
3110
   * search goal blocks in preallocated space
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3111
3112
  static noinline_for_stack int
  ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
c9de560de   Alex Tomas   ext4: Add multi b...
3113
  {
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3114
  	int order, i;
c9de560de   Alex Tomas   ext4: Add multi b...
3115
3116
  	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
  	struct ext4_locality_group *lg;
5e745b041   Aneesh Kumar K.V   ext4: Fix small f...
3117
3118
  	struct ext4_prealloc_space *pa, *cpa = NULL;
  	ext4_fsblk_t goal_block;
c9de560de   Alex Tomas   ext4: Add multi b...
3119
3120
3121
3122
3123
3124
3125
  
  	/* only data can be preallocated */
  	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
  		return 0;
  
  	/* first, try per-file preallocation */
  	rcu_read_lock();
9a0762c5a   Aneesh Kumar K.V   ext4: Convert li...
3126
  	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
c9de560de   Alex Tomas   ext4: Add multi b...
3127
3128
3129
3130
3131
3132
  
  		/* all fields in this condition don't change,
  		 * so we can skip locking for them */
  		if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
  			ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len)
  			continue;
fb0a387dc   Eric Sandeen   ext4: limit block...
3133
  		/* non-extent files can't have physical blocks past 2^32 */
12e9b8920   Dmitry Monakhov   ext4: Use bitops ...
3134
  		if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
fb0a387dc   Eric Sandeen   ext4: limit block...
3135
3136
  			pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
  			continue;
c9de560de   Alex Tomas   ext4: Add multi b...
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
  		/* found preallocated blocks, use them */
  		spin_lock(&pa->pa_lock);
  		if (pa->pa_deleted == 0 && pa->pa_free) {
  			atomic_inc(&pa->pa_count);
  			ext4_mb_use_inode_pa(ac, pa);
  			spin_unlock(&pa->pa_lock);
  			ac->ac_criteria = 10;
  			rcu_read_unlock();
  			return 1;
  		}
  		spin_unlock(&pa->pa_lock);
  	}
  	rcu_read_unlock();
  
  	/* can we use group allocation? */
  	if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
  		return 0;
  
  	/* inode may have no locality group for some reason */
  	lg = ac->ac_lg;
  	if (lg == NULL)
  		return 0;
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3159
3160
3161
3162
  	order  = fls(ac->ac_o_ex.fe_len) - 1;
  	if (order > PREALLOC_TB_SIZE - 1)
  		/* The max size of hash table is PREALLOC_TB_SIZE */
  		order = PREALLOC_TB_SIZE - 1;
bda00de7e   Akinobu Mita   ext4: cleanup to ...
3163
  	goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
5e745b041   Aneesh Kumar K.V   ext4: Fix small f...
3164
3165
3166
3167
  	/*
  	 * search for the prealloc space that is having
  	 * minimal distance from the goal block.
  	 */
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3168
3169
3170
3171
3172
3173
3174
  	for (i = order; i < PREALLOC_TB_SIZE; i++) {
  		rcu_read_lock();
  		list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
  					pa_inode_list) {
  			spin_lock(&pa->pa_lock);
  			if (pa->pa_deleted == 0 &&
  					pa->pa_free >= ac->ac_o_ex.fe_len) {
5e745b041   Aneesh Kumar K.V   ext4: Fix small f...
3175
3176
3177
  
  				cpa = ext4_mb_check_group_pa(goal_block,
  								pa, cpa);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3178
  			}
c9de560de   Alex Tomas   ext4: Add multi b...
3179
  			spin_unlock(&pa->pa_lock);
c9de560de   Alex Tomas   ext4: Add multi b...
3180
  		}
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3181
  		rcu_read_unlock();
c9de560de   Alex Tomas   ext4: Add multi b...
3182
  	}
5e745b041   Aneesh Kumar K.V   ext4: Fix small f...
3183
3184
3185
3186
3187
  	if (cpa) {
  		ext4_mb_use_group_pa(ac, cpa);
  		ac->ac_criteria = 20;
  		return 1;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3188
3189
3190
3191
  	return 0;
  }
  
  /*
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
3192
3193
3194
   * the function goes through all block freed in the group
   * but not yet committed and marks them used in in-core bitmap.
   * buddy must be generated from this bitmap
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
3195
   * Need to be called with the ext4 group lock held
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
   */
  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
  						ext4_group_t group)
  {
  	struct rb_node *n;
  	struct ext4_group_info *grp;
  	struct ext4_free_data *entry;
  
  	grp = ext4_get_group_info(sb, group);
  	n = rb_first(&(grp->bb_free_root));
  
  	while (n) {
  		entry = rb_entry(n, struct ext4_free_data, node);
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
3209
  		mb_set_bits(bitmap, entry->start_blk, entry->count);
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
3210
3211
3212
3213
3214
3215
  		n = rb_next(n);
  	}
  	return;
  }
  
  /*
c9de560de   Alex Tomas   ext4: Add multi b...
3216
3217
   * the function goes through all preallocation in this group and marks them
   * used in in-core bitmap. buddy must be generated from this bitmap
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
3218
   * Need to be called with ext4 group lock held
c9de560de   Alex Tomas   ext4: Add multi b...
3219
   */
089ceecc1   Eric Sandeen   ext4: mark severa...
3220
3221
  static noinline_for_stack
  void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
c9de560de   Alex Tomas   ext4: Add multi b...
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
  					ext4_group_t group)
  {
  	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
  	struct ext4_prealloc_space *pa;
  	struct list_head *cur;
  	ext4_group_t groupnr;
  	ext4_grpblk_t start;
  	int preallocated = 0;
  	int count = 0;
  	int len;
  
  	/* all form of preallocation discards first load group,
  	 * so the only competing code is preallocation use.
  	 * we don't need any locking here
  	 * notice we do NOT ignore preallocations with pa_deleted
  	 * otherwise we could leave used blocks available for
  	 * allocation in buddy when concurrent ext4_mb_put_pa()
  	 * is dropping preallocation
  	 */
  	list_for_each(cur, &grp->bb_prealloc_list) {
  		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
  		spin_lock(&pa->pa_lock);
  		ext4_get_group_no_and_offset(sb, pa->pa_pstart,
  					     &groupnr, &start);
  		len = pa->pa_len;
  		spin_unlock(&pa->pa_lock);
  		if (unlikely(len == 0))
  			continue;
  		BUG_ON(groupnr != group);
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
3251
  		mb_set_bits(bitmap, start, len);
c9de560de   Alex Tomas   ext4: Add multi b...
3252
3253
3254
  		preallocated += len;
  		count++;
  	}
6ba495e92   Theodore Ts'o   ext4: Add configu...
3255
3256
  	mb_debug(1, "prellocated %u for group %u
  ", preallocated, group);
c9de560de   Alex Tomas   ext4: Add multi b...
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
  }
  
  static void ext4_mb_pa_callback(struct rcu_head *head)
  {
  	struct ext4_prealloc_space *pa;
  	pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
  	kmem_cache_free(ext4_pspace_cachep, pa);
  }
  
  /*
   * drops a reference to preallocated space descriptor
   * if this was the last reference and the space is consumed
   */
  static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
  			struct super_block *sb, struct ext4_prealloc_space *pa)
  {
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
3273
  	ext4_group_t grp;
d33a1976f   Eric Sandeen   ext4: fix bb_prea...
3274
  	ext4_fsblk_t grp_blk;
c9de560de   Alex Tomas   ext4: Add multi b...
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
  
  	if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
  		return;
  
  	/* in this short window concurrent discard can set pa_deleted */
  	spin_lock(&pa->pa_lock);
  	if (pa->pa_deleted == 1) {
  		spin_unlock(&pa->pa_lock);
  		return;
  	}
  
  	pa->pa_deleted = 1;
  	spin_unlock(&pa->pa_lock);
d33a1976f   Eric Sandeen   ext4: fix bb_prea...
3288
  	grp_blk = pa->pa_pstart;
60e6679e2   Theodore Ts'o   ext4: Drop whites...
3289
  	/*
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
3290
3291
3292
3293
  	 * If doing group-based preallocation, pa_pstart may be in the
  	 * next group when pa is used up
  	 */
  	if (pa->pa_type == MB_GROUP_PA)
d33a1976f   Eric Sandeen   ext4: fix bb_prea...
3294
3295
3296
  		grp_blk--;
  
  	ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
c9de560de   Alex Tomas   ext4: Add multi b...
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
  
  	/*
  	 * possible race:
  	 *
  	 *  P1 (buddy init)			P2 (regular allocation)
  	 *					find block B in PA
  	 *  copy on-disk bitmap to buddy
  	 *  					mark B in on-disk bitmap
  	 *					drop PA from group
  	 *  mark all PAs in buddy
  	 *
  	 * thus, P1 initializes buddy with B available. to prevent this
  	 * we make "copy" and "mark all PAs" atomic and serialize "drop PA"
  	 * against that pair
  	 */
  	ext4_lock_group(sb, grp);
  	list_del(&pa->pa_group_list);
  	ext4_unlock_group(sb, grp);
  
  	spin_lock(pa->pa_obj_lock);
  	list_del_rcu(&pa->pa_inode_list);
  	spin_unlock(pa->pa_obj_lock);
  
  	call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
  }
  
  /*
   * creates new preallocated space for given inode
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3326
3327
  static noinline_for_stack int
  ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
c9de560de   Alex Tomas   ext4: Add multi b...
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
  {
  	struct super_block *sb = ac->ac_sb;
  	struct ext4_prealloc_space *pa;
  	struct ext4_group_info *grp;
  	struct ext4_inode_info *ei;
  
  	/* preallocate only when found space is larger then requested */
  	BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
  	BUG_ON(ac->ac_status != AC_STATUS_FOUND);
  	BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
  
  	pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
  	if (pa == NULL)
  		return -ENOMEM;
  
  	if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
  		int winl;
  		int wins;
  		int win;
  		int offs;
  
  		/* we can't allocate as much as normalizer wants.
  		 * so, found space must get proper lstart
  		 * to cover original request */
  		BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
  		BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
  
  		/* we're limited by original request in that
  		 * logical block must be covered any way
  		 * winl is window we can move our chunk within */
  		winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
  
  		/* also, we should cover whole original request */
  		wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len;
  
  		/* the smallest one defines real window */
  		win = min(winl, wins);
  
  		offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len;
  		if (offs && offs < win)
  			win = offs;
  
  		ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win;
  		BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
  		BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
  	}
  
  	/* preallocation can change ac_b_ex, thus we store actually
  	 * allocated blocks for history */
  	ac->ac_f_ex = ac->ac_b_ex;
  
  	pa->pa_lstart = ac->ac_b_ex.fe_logical;
  	pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
  	pa->pa_len = ac->ac_b_ex.fe_len;
  	pa->pa_free = pa->pa_len;
  	atomic_set(&pa->pa_count, 1);
  	spin_lock_init(&pa->pa_lock);
d794bf8e0   Aneesh Kumar K.V   ext4: Initialize ...
3385
3386
  	INIT_LIST_HEAD(&pa->pa_inode_list);
  	INIT_LIST_HEAD(&pa->pa_group_list);
c9de560de   Alex Tomas   ext4: Add multi b...
3387
  	pa->pa_deleted = 0;
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
3388
  	pa->pa_type = MB_INODE_PA;
c9de560de   Alex Tomas   ext4: Add multi b...
3389

6ba495e92   Theodore Ts'o   ext4: Add configu...
3390
3391
  	mb_debug(1, "new inode pa %p: %llu/%u for %u
  ", pa,
c9de560de   Alex Tomas   ext4: Add multi b...
3392
  			pa->pa_pstart, pa->pa_len, pa->pa_lstart);
9bffad1ed   Theodore Ts'o   ext4: convert ins...
3393
  	trace_ext4_mb_new_inode_pa(ac, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
  
  	ext4_mb_use_inode_pa(ac, pa);
  	atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
  
  	ei = EXT4_I(ac->ac_inode);
  	grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
  
  	pa->pa_obj_lock = &ei->i_prealloc_lock;
  	pa->pa_inode = ac->ac_inode;
  
  	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
  	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
  	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
  
  	spin_lock(pa->pa_obj_lock);
  	list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
  	spin_unlock(pa->pa_obj_lock);
  
  	return 0;
  }
  
  /*
   * creates new preallocated space for locality group inodes belongs to
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3418
3419
  static noinline_for_stack int
  ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
c9de560de   Alex Tomas   ext4: Add multi b...
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
  {
  	struct super_block *sb = ac->ac_sb;
  	struct ext4_locality_group *lg;
  	struct ext4_prealloc_space *pa;
  	struct ext4_group_info *grp;
  
  	/* preallocate only when found space is larger then requested */
  	BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
  	BUG_ON(ac->ac_status != AC_STATUS_FOUND);
  	BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
  
  	BUG_ON(ext4_pspace_cachep == NULL);
  	pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
  	if (pa == NULL)
  		return -ENOMEM;
  
  	/* preallocation can change ac_b_ex, thus we store actually
  	 * allocated blocks for history */
  	ac->ac_f_ex = ac->ac_b_ex;
  
  	pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
  	pa->pa_lstart = pa->pa_pstart;
  	pa->pa_len = ac->ac_b_ex.fe_len;
  	pa->pa_free = pa->pa_len;
  	atomic_set(&pa->pa_count, 1);
  	spin_lock_init(&pa->pa_lock);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3446
  	INIT_LIST_HEAD(&pa->pa_inode_list);
d794bf8e0   Aneesh Kumar K.V   ext4: Initialize ...
3447
  	INIT_LIST_HEAD(&pa->pa_group_list);
c9de560de   Alex Tomas   ext4: Add multi b...
3448
  	pa->pa_deleted = 0;
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
3449
  	pa->pa_type = MB_GROUP_PA;
c9de560de   Alex Tomas   ext4: Add multi b...
3450

6ba495e92   Theodore Ts'o   ext4: Add configu...
3451
3452
  	mb_debug(1, "new group pa %p: %llu/%u for %u
  ", pa,
9bffad1ed   Theodore Ts'o   ext4: convert ins...
3453
3454
  			pa->pa_pstart, pa->pa_len, pa->pa_lstart);
  	trace_ext4_mb_new_group_pa(ac, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
  
  	ext4_mb_use_group_pa(ac, pa);
  	atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
  
  	grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
  	lg = ac->ac_lg;
  	BUG_ON(lg == NULL);
  
  	pa->pa_obj_lock = &lg->lg_prealloc_lock;
  	pa->pa_inode = NULL;
  
  	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
  	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
  	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3469
3470
3471
3472
  	/*
  	 * We will later add the new pa to the right bucket
  	 * after updating the pa_free in ext4_mb_release_context
  	 */
c9de560de   Alex Tomas   ext4: Add multi b...
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
  	return 0;
  }
  
  static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
  {
  	int err;
  
  	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
  		err = ext4_mb_new_group_pa(ac);
  	else
  		err = ext4_mb_new_inode_pa(ac);
  	return err;
  }
  
  /*
   * finds all unused blocks in on-disk bitmap, frees them in
   * in-core bitmap and buddy.
   * @pa must be unlinked from inode and group lists, so that
   * nobody else can find/use it.
   * the caller MUST hold group/inode locks.
   * TODO: optimize the case when there are no in-core structures yet
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3495
3496
  static noinline_for_stack int
  ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
c83617db7   Aneesh Kumar K.V   ext4: Don't do GF...
3497
3498
  			struct ext4_prealloc_space *pa,
  			struct ext4_allocation_context *ac)
c9de560de   Alex Tomas   ext4: Add multi b...
3499
  {
c9de560de   Alex Tomas   ext4: Add multi b...
3500
3501
  	struct super_block *sb = e4b->bd_sb;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
498e5f241   Theodore Ts'o   ext4: Change unsi...
3502
3503
  	unsigned int end;
  	unsigned int next;
c9de560de   Alex Tomas   ext4: Add multi b...
3504
3505
  	ext4_group_t group;
  	ext4_grpblk_t bit;
ba80b1019   Theodore Ts'o   ext4: Add markers...
3506
  	unsigned long long grp_blk_start;
c9de560de   Alex Tomas   ext4: Add multi b...
3507
3508
3509
3510
3511
  	int err = 0;
  	int free = 0;
  
  	BUG_ON(pa->pa_deleted == 0);
  	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
ba80b1019   Theodore Ts'o   ext4: Add markers...
3512
  	grp_blk_start = pa->pa_pstart - bit;
c9de560de   Alex Tomas   ext4: Add multi b...
3513
3514
  	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
  	end = bit + pa->pa_len;
256bdb497   Eric Sandeen   ext4: allocate st...
3515
3516
3517
  	if (ac) {
  		ac->ac_sb = sb;
  		ac->ac_inode = pa->pa_inode;
256bdb497   Eric Sandeen   ext4: allocate st...
3518
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3519
3520
  
  	while (bit < end) {
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
3521
  		bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
c9de560de   Alex Tomas   ext4: Add multi b...
3522
3523
  		if (bit >= end)
  			break;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
3524
  		next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
6ba495e92   Theodore Ts'o   ext4: Add configu...
3525
3526
  		mb_debug(1, "    free preallocated %u/%u in group %u
  ",
5a0790c2c   Andi Kleen   ext4: remove init...
3527
3528
  			 (unsigned) ext4_group_first_block_no(sb, group) + bit,
  			 (unsigned) next - bit, (unsigned) group);
c9de560de   Alex Tomas   ext4: Add multi b...
3529
  		free += next - bit;
256bdb497   Eric Sandeen   ext4: allocate st...
3530
3531
3532
3533
3534
  		if (ac) {
  			ac->ac_b_ex.fe_group = group;
  			ac->ac_b_ex.fe_start = bit;
  			ac->ac_b_ex.fe_len = next - bit;
  			ac->ac_b_ex.fe_logical = 0;
296c355cd   Theodore Ts'o   ext4: Use tracepo...
3535
  			trace_ext4_mballoc_discard(ac);
256bdb497   Eric Sandeen   ext4: allocate st...
3536
  		}
c9de560de   Alex Tomas   ext4: Add multi b...
3537

e5880d76a   Theodore Ts'o   ext4: fix potenti...
3538
  		trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
9bffad1ed   Theodore Ts'o   ext4: convert ins...
3539
  					       next - bit);
c9de560de   Alex Tomas   ext4: Add multi b...
3540
3541
3542
3543
  		mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
  		bit = next + 1;
  	}
  	if (free != pa->pa_free) {
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
3544
3545
  		printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu
  ",
c9de560de   Alex Tomas   ext4: Add multi b...
3546
3547
3548
  			pa, (unsigned long) pa->pa_lstart,
  			(unsigned long) pa->pa_pstart,
  			(unsigned long) pa->pa_len);
e29136f80   Theodore Ts'o   ext4: Enhance ext...
3549
  		ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
5d1b1b3f4   Aneesh Kumar K.V   ext4: fix BUG whe...
3550
  					free, pa->pa_free);
e56eb6590   Aneesh Kumar K.V   ext4: Don't claim...
3551
3552
3553
3554
  		/*
  		 * pa is already deleted so we use the value obtained
  		 * from the bitmap and continue.
  		 */
c9de560de   Alex Tomas   ext4: Add multi b...
3555
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3556
3557
3558
3559
  	atomic_add(free, &sbi->s_mb_discarded);
  
  	return err;
  }
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3560
3561
  static noinline_for_stack int
  ext4_mb_release_group_pa(struct ext4_buddy *e4b,
c83617db7   Aneesh Kumar K.V   ext4: Don't do GF...
3562
3563
  				struct ext4_prealloc_space *pa,
  				struct ext4_allocation_context *ac)
c9de560de   Alex Tomas   ext4: Add multi b...
3564
  {
c9de560de   Alex Tomas   ext4: Add multi b...
3565
3566
3567
  	struct super_block *sb = e4b->bd_sb;
  	ext4_group_t group;
  	ext4_grpblk_t bit;
e5880d76a   Theodore Ts'o   ext4: fix potenti...
3568
  	trace_ext4_mb_release_group_pa(sb, ac, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3569
3570
3571
3572
3573
  	BUG_ON(pa->pa_deleted == 0);
  	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
  	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
  	mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
  	atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
256bdb497   Eric Sandeen   ext4: allocate st...
3574
3575
3576
3577
3578
3579
3580
  	if (ac) {
  		ac->ac_sb = sb;
  		ac->ac_inode = NULL;
  		ac->ac_b_ex.fe_group = group;
  		ac->ac_b_ex.fe_start = bit;
  		ac->ac_b_ex.fe_len = pa->pa_len;
  		ac->ac_b_ex.fe_logical = 0;
296c355cd   Theodore Ts'o   ext4: Use tracepo...
3581
  		trace_ext4_mballoc_discard(ac);
256bdb497   Eric Sandeen   ext4: allocate st...
3582
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
  
  	return 0;
  }
  
  /*
   * releases all preallocations in given group
   *
   * first, we need to decide discard policy:
   * - when do we discard
   *   1) ENOSPC
   * - how many do we discard
   *   1) how many requested
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3596
3597
  static noinline_for_stack int
  ext4_mb_discard_group_preallocations(struct super_block *sb,
c9de560de   Alex Tomas   ext4: Add multi b...
3598
3599
3600
3601
3602
  					ext4_group_t group, int needed)
  {
  	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
  	struct buffer_head *bitmap_bh = NULL;
  	struct ext4_prealloc_space *pa, *tmp;
c83617db7   Aneesh Kumar K.V   ext4: Don't do GF...
3603
  	struct ext4_allocation_context *ac;
c9de560de   Alex Tomas   ext4: Add multi b...
3604
3605
3606
3607
3608
  	struct list_head list;
  	struct ext4_buddy e4b;
  	int err;
  	int busy = 0;
  	int free = 0;
6ba495e92   Theodore Ts'o   ext4: Add configu...
3609
3610
  	mb_debug(1, "discard preallocation for group %u
  ", group);
c9de560de   Alex Tomas   ext4: Add multi b...
3611
3612
3613
  
  	if (list_empty(&grp->bb_prealloc_list))
  		return 0;
574ca174c   Theodore Ts'o   ext4: Rename read...
3614
  	bitmap_bh = ext4_read_block_bitmap(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
3615
  	if (bitmap_bh == NULL) {
12062dddd   Eric Sandeen   ext4: move __func...
3616
  		ext4_error(sb, "Error reading block bitmap for %u", group);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
3617
  		return 0;
c9de560de   Alex Tomas   ext4: Add multi b...
3618
3619
3620
  	}
  
  	err = ext4_mb_load_buddy(sb, group, &e4b);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
3621
  	if (err) {
12062dddd   Eric Sandeen   ext4: move __func...
3622
  		ext4_error(sb, "Error loading buddy information for %u", group);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
3623
3624
3625
  		put_bh(bitmap_bh);
  		return 0;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3626
3627
3628
  
  	if (needed == 0)
  		needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
c9de560de   Alex Tomas   ext4: Add multi b...
3629
  	INIT_LIST_HEAD(&list);
c83617db7   Aneesh Kumar K.V   ext4: Don't do GF...
3630
  	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
9bffad1ed   Theodore Ts'o   ext4: convert ins...
3631
3632
  	if (ac)
  		ac->ac_sb = sb;
c9de560de   Alex Tomas   ext4: Add multi b...
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
  repeat:
  	ext4_lock_group(sb, group);
  	list_for_each_entry_safe(pa, tmp,
  				&grp->bb_prealloc_list, pa_group_list) {
  		spin_lock(&pa->pa_lock);
  		if (atomic_read(&pa->pa_count)) {
  			spin_unlock(&pa->pa_lock);
  			busy = 1;
  			continue;
  		}
  		if (pa->pa_deleted) {
  			spin_unlock(&pa->pa_lock);
  			continue;
  		}
  
  		/* seems this one can be freed ... */
  		pa->pa_deleted = 1;
  
  		/* we can trust pa_free ... */
  		free += pa->pa_free;
  
  		spin_unlock(&pa->pa_lock);
  
  		list_del(&pa->pa_group_list);
  		list_add(&pa->u.pa_tmp_list, &list);
  	}
  
  	/* if we still need more blocks and some PAs were used, try again */
  	if (free < needed && busy) {
  		busy = 0;
  		ext4_unlock_group(sb, group);
  		/*
  		 * Yield the CPU here so that we don't get soft lockup
  		 * in non preempt case.
  		 */
  		yield();
  		goto repeat;
  	}
  
  	/* found anything to free? */
  	if (list_empty(&list)) {
  		BUG_ON(free != 0);
  		goto out;
  	}
  
  	/* now free all selected PAs */
  	list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
  
  		/* remove from object (inode or locality group) */
  		spin_lock(pa->pa_obj_lock);
  		list_del_rcu(&pa->pa_inode_list);
  		spin_unlock(pa->pa_obj_lock);
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
3685
  		if (pa->pa_type == MB_GROUP_PA)
c83617db7   Aneesh Kumar K.V   ext4: Don't do GF...
3686
  			ext4_mb_release_group_pa(&e4b, pa, ac);
c9de560de   Alex Tomas   ext4: Add multi b...
3687
  		else
c83617db7   Aneesh Kumar K.V   ext4: Don't do GF...
3688
  			ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
c9de560de   Alex Tomas   ext4: Add multi b...
3689
3690
3691
3692
3693
3694
3695
  
  		list_del(&pa->u.pa_tmp_list);
  		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
  	}
  
  out:
  	ext4_unlock_group(sb, group);
c83617db7   Aneesh Kumar K.V   ext4: Don't do GF...
3696
3697
  	if (ac)
  		kmem_cache_free(ext4_ac_cachep, ac);
e39e07fdf   Jing Zhang   ext4: rename ext4...
3698
  	ext4_mb_unload_buddy(&e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
  	put_bh(bitmap_bh);
  	return free;
  }
  
  /*
   * releases all non-used preallocated blocks for given inode
   *
   * It's important to discard preallocations under i_data_sem
   * We don't want another block to be served from the prealloc
   * space when we are discarding the inode prealloc space.
   *
   * FIXME!! Make sure it is valid at all the call sites
   */
c2ea3fde6   Theodore Ts'o   ext4: Remove old ...
3712
  void ext4_discard_preallocations(struct inode *inode)
c9de560de   Alex Tomas   ext4: Add multi b...
3713
3714
3715
3716
3717
  {
  	struct ext4_inode_info *ei = EXT4_I(inode);
  	struct super_block *sb = inode->i_sb;
  	struct buffer_head *bitmap_bh = NULL;
  	struct ext4_prealloc_space *pa, *tmp;
c83617db7   Aneesh Kumar K.V   ext4: Don't do GF...
3718
  	struct ext4_allocation_context *ac;
c9de560de   Alex Tomas   ext4: Add multi b...
3719
3720
3721
3722
  	ext4_group_t group = 0;
  	struct list_head list;
  	struct ext4_buddy e4b;
  	int err;
c2ea3fde6   Theodore Ts'o   ext4: Remove old ...
3723
  	if (!S_ISREG(inode->i_mode)) {
c9de560de   Alex Tomas   ext4: Add multi b...
3724
3725
3726
  		/*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
  		return;
  	}
6ba495e92   Theodore Ts'o   ext4: Add configu...
3727
3728
  	mb_debug(1, "discard preallocation for inode %lu
  ", inode->i_ino);
9bffad1ed   Theodore Ts'o   ext4: convert ins...
3729
  	trace_ext4_discard_preallocations(inode);
c9de560de   Alex Tomas   ext4: Add multi b...
3730
3731
  
  	INIT_LIST_HEAD(&list);
c83617db7   Aneesh Kumar K.V   ext4: Don't do GF...
3732
  	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
9bffad1ed   Theodore Ts'o   ext4: convert ins...
3733
3734
3735
3736
  	if (ac) {
  		ac->ac_sb = sb;
  		ac->ac_inode = inode;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
  repeat:
  	/* first, collect all pa's in the inode */
  	spin_lock(&ei->i_prealloc_lock);
  	while (!list_empty(&ei->i_prealloc_list)) {
  		pa = list_entry(ei->i_prealloc_list.next,
  				struct ext4_prealloc_space, pa_inode_list);
  		BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
  		spin_lock(&pa->pa_lock);
  		if (atomic_read(&pa->pa_count)) {
  			/* this shouldn't happen often - nobody should
  			 * use preallocation while we're discarding it */
  			spin_unlock(&pa->pa_lock);
  			spin_unlock(&ei->i_prealloc_lock);
  			printk(KERN_ERR "uh-oh! used pa while discarding
  ");
  			WARN_ON(1);
  			schedule_timeout_uninterruptible(HZ);
  			goto repeat;
  
  		}
  		if (pa->pa_deleted == 0) {
  			pa->pa_deleted = 1;
  			spin_unlock(&pa->pa_lock);
  			list_del_rcu(&pa->pa_inode_list);
  			list_add(&pa->u.pa_tmp_list, &list);
  			continue;
  		}
  
  		/* someone is deleting pa right now */
  		spin_unlock(&pa->pa_lock);
  		spin_unlock(&ei->i_prealloc_lock);
  
  		/* we have to wait here because pa_deleted
  		 * doesn't mean pa is already unlinked from
  		 * the list. as we might be called from
  		 * ->clear_inode() the inode will get freed
  		 * and concurrent thread which is unlinking
  		 * pa from inode's list may access already
  		 * freed memory, bad-bad-bad */
  
  		/* XXX: if this happens too often, we can
  		 * add a flag to force wait only in case
  		 * of ->clear_inode(), but not in case of
  		 * regular truncate */
  		schedule_timeout_uninterruptible(HZ);
  		goto repeat;
  	}
  	spin_unlock(&ei->i_prealloc_lock);
  
  	list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
3787
  		BUG_ON(pa->pa_type != MB_INODE_PA);
c9de560de   Alex Tomas   ext4: Add multi b...
3788
3789
3790
  		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
  
  		err = ext4_mb_load_buddy(sb, group, &e4b);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
3791
  		if (err) {
12062dddd   Eric Sandeen   ext4: move __func...
3792
3793
  			ext4_error(sb, "Error loading buddy information for %u",
  					group);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
3794
3795
  			continue;
  		}
c9de560de   Alex Tomas   ext4: Add multi b...
3796

574ca174c   Theodore Ts'o   ext4: Rename read...
3797
  		bitmap_bh = ext4_read_block_bitmap(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
3798
  		if (bitmap_bh == NULL) {
12062dddd   Eric Sandeen   ext4: move __func...
3799
3800
  			ext4_error(sb, "Error reading block bitmap for %u",
  					group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
3801
  			ext4_mb_unload_buddy(&e4b);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
3802
  			continue;
c9de560de   Alex Tomas   ext4: Add multi b...
3803
3804
3805
3806
  		}
  
  		ext4_lock_group(sb, group);
  		list_del(&pa->pa_group_list);
c83617db7   Aneesh Kumar K.V   ext4: Don't do GF...
3807
  		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
c9de560de   Alex Tomas   ext4: Add multi b...
3808
  		ext4_unlock_group(sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
3809
  		ext4_mb_unload_buddy(&e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
3810
3811
3812
3813
3814
  		put_bh(bitmap_bh);
  
  		list_del(&pa->u.pa_tmp_list);
  		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
  	}
c83617db7   Aneesh Kumar K.V   ext4: Don't do GF...
3815
3816
  	if (ac)
  		kmem_cache_free(ext4_ac_cachep, ac);
c9de560de   Alex Tomas   ext4: Add multi b...
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
  }
  
  /*
   * finds all preallocated spaces and return blocks being freed to them
   * if preallocated space becomes full (no block is used from the space)
   * then the function frees space in buddy
   * XXX: at the moment, truncate (which is the only way to free blocks)
   * discards all preallocations
   */
  static void ext4_mb_return_to_preallocation(struct inode *inode,
  					struct ext4_buddy *e4b,
  					sector_t block, int count)
  {
  	BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
  }
6ba495e92   Theodore Ts'o   ext4: Add configu...
3832
  #ifdef CONFIG_EXT4_DEBUG
c9de560de   Alex Tomas   ext4: Add multi b...
3833
3834
3835
  static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
  {
  	struct super_block *sb = ac->ac_sb;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
3836
  	ext4_group_t ngroups, i;
c9de560de   Alex Tomas   ext4: Add multi b...
3837

e3570639c   Eric Sandeen   ext4: don't print...
3838
3839
  	if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
  		return;
c9de560de   Alex Tomas   ext4: Add multi b...
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
  	printk(KERN_ERR "EXT4-fs: Can't allocate:"
  			" Allocation context details:
  ");
  	printk(KERN_ERR "EXT4-fs: status %d flags %d
  ",
  			ac->ac_status, ac->ac_flags);
  	printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, "
  			"best %lu/%lu/%lu@%lu cr %d
  ",
  			(unsigned long)ac->ac_o_ex.fe_group,
  			(unsigned long)ac->ac_o_ex.fe_start,
  			(unsigned long)ac->ac_o_ex.fe_len,
  			(unsigned long)ac->ac_o_ex.fe_logical,
  			(unsigned long)ac->ac_g_ex.fe_group,
  			(unsigned long)ac->ac_g_ex.fe_start,
  			(unsigned long)ac->ac_g_ex.fe_len,
  			(unsigned long)ac->ac_g_ex.fe_logical,
  			(unsigned long)ac->ac_b_ex.fe_group,
  			(unsigned long)ac->ac_b_ex.fe_start,
  			(unsigned long)ac->ac_b_ex.fe_len,
  			(unsigned long)ac->ac_b_ex.fe_logical,
  			(int)ac->ac_criteria);
  	printk(KERN_ERR "EXT4-fs: %lu scanned, %d found
  ", ac->ac_ex_scanned,
  		ac->ac_found);
  	printk(KERN_ERR "EXT4-fs: groups: 
  ");
8df9675f8   Theodore Ts'o   ext4: Avoid races...
3867
3868
  	ngroups = ext4_get_groups_count(sb);
  	for (i = 0; i < ngroups; i++) {
c9de560de   Alex Tomas   ext4: Add multi b...
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
  		struct ext4_group_info *grp = ext4_get_group_info(sb, i);
  		struct ext4_prealloc_space *pa;
  		ext4_grpblk_t start;
  		struct list_head *cur;
  		ext4_lock_group(sb, i);
  		list_for_each(cur, &grp->bb_prealloc_list) {
  			pa = list_entry(cur, struct ext4_prealloc_space,
  					pa_group_list);
  			spin_lock(&pa->pa_lock);
  			ext4_get_group_no_and_offset(sb, pa->pa_pstart,
  						     NULL, &start);
  			spin_unlock(&pa->pa_lock);
1c7185051   Akira Fujita   ext4: Fix compile...
3881
3882
3883
  			printk(KERN_ERR "PA:%u:%d:%u 
  ", i,
  			       start, pa->pa_len);
c9de560de   Alex Tomas   ext4: Add multi b...
3884
  		}
60bd63d19   Solofo Ramangalahy   ext4: cleanup for...
3885
  		ext4_unlock_group(sb, i);
c9de560de   Alex Tomas   ext4: Add multi b...
3886
3887
3888
  
  		if (grp->bb_free == 0)
  			continue;
1c7185051   Akira Fujita   ext4: Fix compile...
3889
3890
  		printk(KERN_ERR "%u: %d/%d 
  ",
c9de560de   Alex Tomas   ext4: Add multi b...
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
  		       i, grp->bb_free, grp->bb_fragments);
  	}
  	printk(KERN_ERR "
  ");
  }
  #else
  static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
  {
  	return;
  }
  #endif
  
  /*
   * We use locality group preallocation for small size file. The size of the
   * file is determined by the current size or the resulting size after
   * allocation which ever is larger
   *
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
3908
   * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req
c9de560de   Alex Tomas   ext4: Add multi b...
3909
3910
3911
3912
3913
3914
3915
3916
3917
   */
  static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
  {
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
  	int bsbits = ac->ac_sb->s_blocksize_bits;
  	loff_t size, isize;
  
  	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
  		return;
4ba74d00a   Theodore Ts'o   ext4: Fix bugs in...
3918
3919
  	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
  		return;
c9de560de   Alex Tomas   ext4: Add multi b...
3920
  	size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
50797481a   Theodore Ts'o   ext4: Avoid group...
3921
3922
  	isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
  		>> bsbits;
c9de560de   Alex Tomas   ext4: Add multi b...
3923

50797481a   Theodore Ts'o   ext4: Avoid group...
3924
3925
3926
3927
3928
3929
  	if ((size == isize) &&
  	    !ext4_fs_is_busy(sbi) &&
  	    (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
  		ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
  		return;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3930
  	/* don't use group allocation for large files */
717805773   Theodore Ts'o   ext4: Fix huerist...
3931
  	size = max(size, isize);
cc483f102   Tao Ma   ext4: Fix fencepo...
3932
  	if (size > sbi->s_mb_stream_request) {
4ba74d00a   Theodore Ts'o   ext4: Fix bugs in...
3933
  		ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
c9de560de   Alex Tomas   ext4: Add multi b...
3934
  		return;
4ba74d00a   Theodore Ts'o   ext4: Fix bugs in...
3935
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3936
3937
3938
3939
3940
3941
3942
  
  	BUG_ON(ac->ac_lg != NULL);
  	/*
  	 * locality group prealloc space are per cpu. The reason for having
  	 * per cpu locality group is to reduce the contention between block
  	 * request from multiple CPUs.
  	 */
ca0c9584b   Christoph Lameter   this_cpu: Straigh...
3943
  	ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
c9de560de   Alex Tomas   ext4: Add multi b...
3944
3945
3946
3947
3948
3949
3950
  
  	/* we're going to use group allocation */
  	ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
  
  	/* serialize all allocations in the group */
  	mutex_lock(&ac->ac_lg->lg_mutex);
  }
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3951
3952
  static noinline_for_stack int
  ext4_mb_initialize_context(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
3953
3954
3955
3956
3957
3958
  				struct ext4_allocation_request *ar)
  {
  	struct super_block *sb = ar->inode->i_sb;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
  	struct ext4_super_block *es = sbi->s_es;
  	ext4_group_t group;
498e5f241   Theodore Ts'o   ext4: Change unsi...
3959
3960
  	unsigned int len;
  	ext4_fsblk_t goal;
c9de560de   Alex Tomas   ext4: Add multi b...
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
  	ext4_grpblk_t block;
  
  	/* we can't allocate > group size */
  	len = ar->len;
  
  	/* just a dirty hack to filter too big requests  */
  	if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10)
  		len = EXT4_BLOCKS_PER_GROUP(sb) - 10;
  
  	/* start searching from the goal */
  	goal = ar->goal;
  	if (goal < le32_to_cpu(es->s_first_data_block) ||
  			goal >= ext4_blocks_count(es))
  		goal = le32_to_cpu(es->s_first_data_block);
  	ext4_get_group_no_and_offset(sb, goal, &group, &block);
  
  	/* set up allocation goals */
833576b36   Theodore Ts'o   ext4: Fix ext4_mb...
3978
  	memset(ac, 0, sizeof(struct ext4_allocation_context));
c9de560de   Alex Tomas   ext4: Add multi b...
3979
  	ac->ac_b_ex.fe_logical = ar->logical;
c9de560de   Alex Tomas   ext4: Add multi b...
3980
  	ac->ac_status = AC_STATUS_CONTINUE;
c9de560de   Alex Tomas   ext4: Add multi b...
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
  	ac->ac_sb = sb;
  	ac->ac_inode = ar->inode;
  	ac->ac_o_ex.fe_logical = ar->logical;
  	ac->ac_o_ex.fe_group = group;
  	ac->ac_o_ex.fe_start = block;
  	ac->ac_o_ex.fe_len = len;
  	ac->ac_g_ex.fe_logical = ar->logical;
  	ac->ac_g_ex.fe_group = group;
  	ac->ac_g_ex.fe_start = block;
  	ac->ac_g_ex.fe_len = len;
c9de560de   Alex Tomas   ext4: Add multi b...
3991
  	ac->ac_flags = ar->flags;
c9de560de   Alex Tomas   ext4: Add multi b...
3992
3993
3994
3995
  
  	/* we have to define context: we'll we work with a file or
  	 * locality group. this is a policy, actually */
  	ext4_mb_group_or_file(ac);
6ba495e92   Theodore Ts'o   ext4: Add configu...
3996
  	mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
c9de560de   Alex Tomas   ext4: Add multi b...
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
  			"left: %u/%u, right %u/%u to %swritable
  ",
  			(unsigned) ar->len, (unsigned) ar->logical,
  			(unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
  			(unsigned) ar->lleft, (unsigned) ar->pleft,
  			(unsigned) ar->lright, (unsigned) ar->pright,
  			atomic_read(&ar->inode->i_writecount) ? "" : "non-");
  	return 0;
  
  }
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
  static noinline_for_stack void
  ext4_mb_discard_lg_preallocations(struct super_block *sb,
  					struct ext4_locality_group *lg,
  					int order, int total_entries)
  {
  	ext4_group_t group = 0;
  	struct ext4_buddy e4b;
  	struct list_head discard_list;
  	struct ext4_prealloc_space *pa, *tmp;
  	struct ext4_allocation_context *ac;
6ba495e92   Theodore Ts'o   ext4: Add configu...
4017
4018
  	mb_debug(1, "discard locality group preallocation
  ");
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4019
4020
4021
  
  	INIT_LIST_HEAD(&discard_list);
  	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
9bffad1ed   Theodore Ts'o   ext4: convert ins...
4022
4023
  	if (ac)
  		ac->ac_sb = sb;
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
  
  	spin_lock(&lg->lg_prealloc_lock);
  	list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
  						pa_inode_list) {
  		spin_lock(&pa->pa_lock);
  		if (atomic_read(&pa->pa_count)) {
  			/*
  			 * This is the pa that we just used
  			 * for block allocation. So don't
  			 * free that
  			 */
  			spin_unlock(&pa->pa_lock);
  			continue;
  		}
  		if (pa->pa_deleted) {
  			spin_unlock(&pa->pa_lock);
  			continue;
  		}
  		/* only lg prealloc space */
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
4043
  		BUG_ON(pa->pa_type != MB_GROUP_PA);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
  
  		/* seems this one can be freed ... */
  		pa->pa_deleted = 1;
  		spin_unlock(&pa->pa_lock);
  
  		list_del_rcu(&pa->pa_inode_list);
  		list_add(&pa->u.pa_tmp_list, &discard_list);
  
  		total_entries--;
  		if (total_entries <= 5) {
  			/*
  			 * we want to keep only 5 entries
  			 * allowing it to grow to 8. This
  			 * mak sure we don't call discard
  			 * soon for this list.
  			 */
  			break;
  		}
  	}
  	spin_unlock(&lg->lg_prealloc_lock);
  
  	list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
  
  		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
  		if (ext4_mb_load_buddy(sb, group, &e4b)) {
12062dddd   Eric Sandeen   ext4: move __func...
4069
4070
  			ext4_error(sb, "Error loading buddy information for %u",
  					group);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4071
4072
4073
4074
4075
4076
  			continue;
  		}
  		ext4_lock_group(sb, group);
  		list_del(&pa->pa_group_list);
  		ext4_mb_release_group_pa(&e4b, pa, ac);
  		ext4_unlock_group(sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
4077
  		ext4_mb_unload_buddy(&e4b);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
  		list_del(&pa->u.pa_tmp_list);
  		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
  	}
  	if (ac)
  		kmem_cache_free(ext4_ac_cachep, ac);
  }
  
  /*
   * We have incremented pa_count. So it cannot be freed at this
   * point. Also we hold lg_mutex. So no parallel allocation is
   * possible from this lg. That means pa_free cannot be updated.
   *
   * A parallel ext4_mb_discard_group_preallocations is possible.
   * which can cause the lg_prealloc_list to be updated.
   */
  
  static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
  {
  	int order, added = 0, lg_prealloc_count = 1;
  	struct super_block *sb = ac->ac_sb;
  	struct ext4_locality_group *lg = ac->ac_lg;
  	struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
  
  	order = fls(pa->pa_free) - 1;
  	if (order > PREALLOC_TB_SIZE - 1)
  		/* The max size of hash table is PREALLOC_TB_SIZE */
  		order = PREALLOC_TB_SIZE - 1;
  	/* Add the prealloc space to lg */
  	rcu_read_lock();
  	list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
  						pa_inode_list) {
  		spin_lock(&tmp_pa->pa_lock);
  		if (tmp_pa->pa_deleted) {
e7c9e3e99   Theodore Ts'o   ext4: fix locking...
4111
  			spin_unlock(&tmp_pa->pa_lock);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
  			continue;
  		}
  		if (!added && pa->pa_free < tmp_pa->pa_free) {
  			/* Add to the tail of the previous entry */
  			list_add_tail_rcu(&pa->pa_inode_list,
  						&tmp_pa->pa_inode_list);
  			added = 1;
  			/*
  			 * we want to count the total
  			 * number of entries in the list
  			 */
  		}
  		spin_unlock(&tmp_pa->pa_lock);
  		lg_prealloc_count++;
  	}
  	if (!added)
  		list_add_tail_rcu(&pa->pa_inode_list,
  					&lg->lg_prealloc_list[order]);
  	rcu_read_unlock();
  
  	/* Now trim the list to be not more than 8 elements */
  	if (lg_prealloc_count > 8) {
  		ext4_mb_discard_lg_preallocations(sb, lg,
  						order, lg_prealloc_count);
  		return;
  	}
  	return ;
  }
c9de560de   Alex Tomas   ext4: Add multi b...
4140
4141
4142
4143
4144
  /*
   * release all resource we used in allocation
   */
  static int ext4_mb_release_context(struct ext4_allocation_context *ac)
  {
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4145
4146
  	struct ext4_prealloc_space *pa = ac->ac_pa;
  	if (pa) {
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
4147
  		if (pa->pa_type == MB_GROUP_PA) {
c9de560de   Alex Tomas   ext4: Add multi b...
4148
  			/* see comment in ext4_mb_use_group_pa() */
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4149
4150
4151
4152
4153
4154
  			spin_lock(&pa->pa_lock);
  			pa->pa_pstart += ac->ac_b_ex.fe_len;
  			pa->pa_lstart += ac->ac_b_ex.fe_len;
  			pa->pa_free -= ac->ac_b_ex.fe_len;
  			pa->pa_len -= ac->ac_b_ex.fe_len;
  			spin_unlock(&pa->pa_lock);
c9de560de   Alex Tomas   ext4: Add multi b...
4155
  		}
c9de560de   Alex Tomas   ext4: Add multi b...
4156
  	}
8556e8f3b   Aneesh Kumar K.V   ext4: Don't allow...
4157
4158
  	if (ac->alloc_semp)
  		up_read(ac->alloc_semp);
ba4439165   Aneesh Kumar K.V   ext4: Fix lockdep...
4159
4160
4161
4162
4163
4164
4165
4166
  	if (pa) {
  		/*
  		 * We want to add the pa to the right bucket.
  		 * Remove it from the list and while adding
  		 * make sure the list to which we are adding
  		 * doesn't grow big.  We need to release
  		 * alloc_semp before calling ext4_mb_add_n_trim()
  		 */
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
4167
  		if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
ba4439165   Aneesh Kumar K.V   ext4: Fix lockdep...
4168
4169
4170
4171
4172
4173
4174
  			spin_lock(pa->pa_obj_lock);
  			list_del_rcu(&pa->pa_inode_list);
  			spin_unlock(pa->pa_obj_lock);
  			ext4_mb_add_n_trim(ac);
  		}
  		ext4_mb_put_pa(ac, ac->ac_sb, pa);
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
  	if (ac->ac_bitmap_page)
  		page_cache_release(ac->ac_bitmap_page);
  	if (ac->ac_buddy_page)
  		page_cache_release(ac->ac_buddy_page);
  	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
  		mutex_unlock(&ac->ac_lg->lg_mutex);
  	ext4_mb_collect_stats(ac);
  	return 0;
  }
  
  static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
  {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
4187
  	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
4188
4189
  	int ret;
  	int freed = 0;
9bffad1ed   Theodore Ts'o   ext4: convert ins...
4190
  	trace_ext4_mb_discard_preallocations(sb, needed);
8df9675f8   Theodore Ts'o   ext4: Avoid races...
4191
  	for (i = 0; i < ngroups && needed > 0; i++) {
c9de560de   Alex Tomas   ext4: Add multi b...
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
  		ret = ext4_mb_discard_group_preallocations(sb, i, needed);
  		freed += ret;
  		needed -= ret;
  	}
  
  	return freed;
  }
  
  /*
   * Main entry point into mballoc to allocate blocks
   * it tries to use preallocation first, then falls back
   * to usual allocation
   */
  ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
6c7a120ac   Aditya Kali   ext4: Adding erro...
4206
  				struct ext4_allocation_request *ar, int *errp)
c9de560de   Alex Tomas   ext4: Add multi b...
4207
  {
6bc6e63fc   Aneesh Kumar K.V   ext4: Add percpu ...
4208
  	int freed;
256bdb497   Eric Sandeen   ext4: allocate st...
4209
  	struct ext4_allocation_context *ac = NULL;
c9de560de   Alex Tomas   ext4: Add multi b...
4210
4211
4212
  	struct ext4_sb_info *sbi;
  	struct super_block *sb;
  	ext4_fsblk_t block = 0;
60e58e0f3   Mingming Cao   ext4: quota reser...
4213
  	unsigned int inquota = 0;
498e5f241   Theodore Ts'o   ext4: Change unsi...
4214
  	unsigned int reserv_blks = 0;
c9de560de   Alex Tomas   ext4: Add multi b...
4215
4216
4217
  
  	sb = ar->inode->i_sb;
  	sbi = EXT4_SB(sb);
9bffad1ed   Theodore Ts'o   ext4: convert ins...
4218
  	trace_ext4_request_blocks(ar);
ba80b1019   Theodore Ts'o   ext4: Add markers...
4219

60e58e0f3   Mingming Cao   ext4: quota reser...
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
  	/*
  	 * For delayed allocation, we could skip the ENOSPC and
  	 * EDQUOT check, as blocks and quotas have been already
  	 * reserved when data being copied into pagecache.
  	 */
  	if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
  		ar->flags |= EXT4_MB_DELALLOC_RESERVED;
  	else {
  		/* Without delayed allocation we need to verify
  		 * there is enough free blocks to do block allocation
  		 * and verify allocation doesn't exceed the quota limits.
d2a176379   Mingming Cao   ext4: delayed all...
4231
  		 */
030ba6bc6   Aneesh Kumar K.V   ext4: Retry block...
4232
4233
4234
4235
4236
4237
  		while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
  			/* let others to free the space */
  			yield();
  			ar->len = ar->len >> 1;
  		}
  		if (!ar->len) {
a30d542a0   Aneesh Kumar K.V   ext4: Make sure a...
4238
4239
4240
  			*errp = -ENOSPC;
  			return 0;
  		}
6bc6e63fc   Aneesh Kumar K.V   ext4: Add percpu ...
4241
  		reserv_blks = ar->len;
5dd4056db   Christoph Hellwig   dquot: cleanup sp...
4242
  		while (ar->len && dquot_alloc_block(ar->inode, ar->len)) {
60e58e0f3   Mingming Cao   ext4: quota reser...
4243
4244
4245
4246
4247
4248
  			ar->flags |= EXT4_MB_HINT_NOPREALLOC;
  			ar->len--;
  		}
  		inquota = ar->len;
  		if (ar->len == 0) {
  			*errp = -EDQUOT;
6c7a120ac   Aditya Kali   ext4: Adding erro...
4249
  			goto out;
60e58e0f3   Mingming Cao   ext4: quota reser...
4250
  		}
070314310   Mingming Cao   ext4: mballoc avo...
4251
  	}
d2a176379   Mingming Cao   ext4: delayed all...
4252

256bdb497   Eric Sandeen   ext4: allocate st...
4253
  	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
833576b36   Theodore Ts'o   ext4: Fix ext4_mb...
4254
  	if (!ac) {
363d4251d   Shen Feng   ext4: remove quot...
4255
  		ar->len = 0;
256bdb497   Eric Sandeen   ext4: allocate st...
4256
  		*errp = -ENOMEM;
6c7a120ac   Aditya Kali   ext4: Adding erro...
4257
  		goto out;
256bdb497   Eric Sandeen   ext4: allocate st...
4258
  	}
256bdb497   Eric Sandeen   ext4: allocate st...
4259
  	*errp = ext4_mb_initialize_context(ac, ar);
c9de560de   Alex Tomas   ext4: Add multi b...
4260
4261
  	if (*errp) {
  		ar->len = 0;
6c7a120ac   Aditya Kali   ext4: Adding erro...
4262
  		goto out;
c9de560de   Alex Tomas   ext4: Add multi b...
4263
  	}
256bdb497   Eric Sandeen   ext4: allocate st...
4264
4265
  	ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
  	if (!ext4_mb_use_preallocated(ac)) {
256bdb497   Eric Sandeen   ext4: allocate st...
4266
4267
  		ac->ac_op = EXT4_MB_HISTORY_ALLOC;
  		ext4_mb_normalize_request(ac, ar);
c9de560de   Alex Tomas   ext4: Add multi b...
4268
4269
  repeat:
  		/* allocate space in core */
6c7a120ac   Aditya Kali   ext4: Adding erro...
4270
4271
4272
  		*errp = ext4_mb_regular_allocator(ac);
  		if (*errp)
  			goto errout;
c9de560de   Alex Tomas   ext4: Add multi b...
4273
4274
4275
4276
  
  		/* as we've just preallocated more space than
  		 * user requested orinally, we store allocated
  		 * space in a special descriptor */
256bdb497   Eric Sandeen   ext4: allocate st...
4277
4278
4279
  		if (ac->ac_status == AC_STATUS_FOUND &&
  				ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
  			ext4_mb_new_preallocation(ac);
c9de560de   Alex Tomas   ext4: Add multi b...
4280
  	}
256bdb497   Eric Sandeen   ext4: allocate st...
4281
  	if (likely(ac->ac_status == AC_STATUS_FOUND)) {
6bc6e63fc   Aneesh Kumar K.V   ext4: Add percpu ...
4282
  		*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
6c7a120ac   Aditya Kali   ext4: Adding erro...
4283
  		if (*errp == -EAGAIN) {
8556e8f3b   Aneesh Kumar K.V   ext4: Don't allow...
4284
4285
4286
4287
4288
  			/*
  			 * drop the reference that we took
  			 * in ext4_mb_use_best_found
  			 */
  			ext4_mb_release_context(ac);
519deca04   Aneesh Kumar K.V   ext4: Retry block...
4289
4290
4291
4292
4293
  			ac->ac_b_ex.fe_group = 0;
  			ac->ac_b_ex.fe_start = 0;
  			ac->ac_b_ex.fe_len = 0;
  			ac->ac_status = AC_STATUS_CONTINUE;
  			goto repeat;
6c7a120ac   Aditya Kali   ext4: Adding erro...
4294
4295
  		} else if (*errp)
  		errout:
b844167ed   Curt Wohlgemuth   ext4: remove bloc...
4296
  			ext4_discard_allocated_blocks(ac);
6c7a120ac   Aditya Kali   ext4: Adding erro...
4297
  		else {
519deca04   Aneesh Kumar K.V   ext4: Retry block...
4298
4299
4300
  			block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
  			ar->len = ac->ac_b_ex.fe_len;
  		}
c9de560de   Alex Tomas   ext4: Add multi b...
4301
  	} else {
256bdb497   Eric Sandeen   ext4: allocate st...
4302
  		freed  = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
c9de560de   Alex Tomas   ext4: Add multi b...
4303
4304
4305
  		if (freed)
  			goto repeat;
  		*errp = -ENOSPC;
6c7a120ac   Aditya Kali   ext4: Adding erro...
4306
4307
4308
  	}
  
  	if (*errp) {
256bdb497   Eric Sandeen   ext4: allocate st...
4309
  		ac->ac_b_ex.fe_len = 0;
c9de560de   Alex Tomas   ext4: Add multi b...
4310
  		ar->len = 0;
256bdb497   Eric Sandeen   ext4: allocate st...
4311
  		ext4_mb_show_ac(ac);
c9de560de   Alex Tomas   ext4: Add multi b...
4312
  	}
256bdb497   Eric Sandeen   ext4: allocate st...
4313
  	ext4_mb_release_context(ac);
6c7a120ac   Aditya Kali   ext4: Adding erro...
4314
4315
4316
  out:
  	if (ac)
  		kmem_cache_free(ext4_ac_cachep, ac);
60e58e0f3   Mingming Cao   ext4: quota reser...
4317
  	if (inquota && ar->len < inquota)
5dd4056db   Christoph Hellwig   dquot: cleanup sp...
4318
  		dquot_free_block(ar->inode, inquota - ar->len);
0087d9fb3   Aneesh Kumar K.V   ext4: Fix s_dirty...
4319
4320
4321
4322
4323
4324
  	if (!ar->len) {
  		if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
  			/* release all the reserved blocks if non delalloc */
  			percpu_counter_sub(&sbi->s_dirtyblocks_counter,
  						reserv_blks);
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4325

9bffad1ed   Theodore Ts'o   ext4: convert ins...
4326
  	trace_ext4_allocate_blocks(ar, (unsigned long long)block);
ba80b1019   Theodore Ts'o   ext4: Add markers...
4327

c9de560de   Alex Tomas   ext4: Add multi b...
4328
4329
  	return block;
  }
c9de560de   Alex Tomas   ext4: Add multi b...
4330

c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
  /*
   * We can merge two free data extents only if the physical blocks
   * are contiguous, AND the extents were freed by the same transaction,
   * AND the blocks are associated with the same group.
   */
  static int can_merge(struct ext4_free_data *entry1,
  			struct ext4_free_data *entry2)
  {
  	if ((entry1->t_tid == entry2->t_tid) &&
  	    (entry1->group == entry2->group) &&
  	    ((entry1->start_blk + entry1->count) == entry2->start_blk))
  		return 1;
  	return 0;
  }
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
4345
4346
  static noinline_for_stack int
  ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4347
  		      struct ext4_free_data *new_entry)
c9de560de   Alex Tomas   ext4: Add multi b...
4348
  {
e29136f80   Theodore Ts'o   ext4: Enhance ext...
4349
  	ext4_group_t group = e4b->bd_group;
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4350
4351
  	ext4_grpblk_t block;
  	struct ext4_free_data *entry;
c9de560de   Alex Tomas   ext4: Add multi b...
4352
4353
4354
  	struct ext4_group_info *db = e4b->bd_info;
  	struct super_block *sb = e4b->bd_sb;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4355
4356
  	struct rb_node **n = &db->bb_free_root.rb_node, *node;
  	struct rb_node *parent = NULL, *new_node;
0390131ba   Frank Mayhar   ext4: Allow ext4 ...
4357
  	BUG_ON(!ext4_handle_valid(handle));
c9de560de   Alex Tomas   ext4: Add multi b...
4358
4359
  	BUG_ON(e4b->bd_bitmap_page == NULL);
  	BUG_ON(e4b->bd_buddy_page == NULL);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4360
  	new_node = &new_entry->node;
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4361
  	block = new_entry->start_blk;
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4362

c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
  	if (!*n) {
  		/* first free block exent. We need to
  		   protect buddy cache from being freed,
  		 * otherwise we'll refresh it from
  		 * on-disk bitmap and lose not-yet-available
  		 * blocks */
  		page_cache_get(e4b->bd_buddy_page);
  		page_cache_get(e4b->bd_bitmap_page);
  	}
  	while (*n) {
  		parent = *n;
  		entry = rb_entry(parent, struct ext4_free_data, node);
  		if (block < entry->start_blk)
  			n = &(*n)->rb_left;
  		else if (block >= (entry->start_blk + entry->count))
  			n = &(*n)->rb_right;
  		else {
e29136f80   Theodore Ts'o   ext4: Enhance ext...
4380
4381
4382
  			ext4_grp_locked_error(sb, group, 0,
  				ext4_group_first_block_no(sb, group) + block,
  				"Block already on to-be-freed list");
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4383
  			return 0;
c9de560de   Alex Tomas   ext4: Add multi b...
4384
  		}
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4385
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4386

c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
  	rb_link_node(new_node, parent, n);
  	rb_insert_color(new_node, &db->bb_free_root);
  
  	/* Now try to see the extent can be merged to left and right */
  	node = rb_prev(new_node);
  	if (node) {
  		entry = rb_entry(node, struct ext4_free_data, node);
  		if (can_merge(entry, new_entry)) {
  			new_entry->start_blk = entry->start_blk;
  			new_entry->count += entry->count;
  			rb_erase(node, &(db->bb_free_root));
  			spin_lock(&sbi->s_md_lock);
  			list_del(&entry->list);
  			spin_unlock(&sbi->s_md_lock);
  			kmem_cache_free(ext4_free_ext_cachep, entry);
c9de560de   Alex Tomas   ext4: Add multi b...
4402
  		}
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4403
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4404

c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
  	node = rb_next(new_node);
  	if (node) {
  		entry = rb_entry(node, struct ext4_free_data, node);
  		if (can_merge(new_entry, entry)) {
  			new_entry->count += entry->count;
  			rb_erase(node, &(db->bb_free_root));
  			spin_lock(&sbi->s_md_lock);
  			list_del(&entry->list);
  			spin_unlock(&sbi->s_md_lock);
  			kmem_cache_free(ext4_free_ext_cachep, entry);
c9de560de   Alex Tomas   ext4: Add multi b...
4415
4416
  		}
  	}
3e624fc72   Theodore Ts'o   ext4: Replace hac...
4417
  	/* Add the extent to transaction's private list */
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4418
  	spin_lock(&sbi->s_md_lock);
3e624fc72   Theodore Ts'o   ext4: Replace hac...
4419
  	list_add(&new_entry->list, &handle->h_transaction->t_private_list);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4420
  	spin_unlock(&sbi->s_md_lock);
c9de560de   Alex Tomas   ext4: Add multi b...
4421
4422
  	return 0;
  }
443387113   Theodore Ts'o   ext4: fold ext4_f...
4423
4424
4425
4426
4427
4428
4429
  /**
   * ext4_free_blocks() -- Free given blocks and update quota
   * @handle:		handle for this transaction
   * @inode:		inode
   * @block:		start physical block to free
   * @count:		number of blocks to count
   * @metadata: 		Are these metadata blocks
c9de560de   Alex Tomas   ext4: Add multi b...
4430
   */
443387113   Theodore Ts'o   ext4: fold ext4_f...
4431
  void ext4_free_blocks(handle_t *handle, struct inode *inode,
e6362609b   Theodore Ts'o   ext4: call ext4_f...
4432
4433
  		      struct buffer_head *bh, ext4_fsblk_t block,
  		      unsigned long count, int flags)
c9de560de   Alex Tomas   ext4: Add multi b...
4434
  {
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
4435
  	struct buffer_head *bitmap_bh = NULL;
c9de560de   Alex Tomas   ext4: Add multi b...
4436
  	struct super_block *sb = inode->i_sb;
256bdb497   Eric Sandeen   ext4: allocate st...
4437
  	struct ext4_allocation_context *ac = NULL;
c9de560de   Alex Tomas   ext4: Add multi b...
4438
  	struct ext4_group_desc *gdp;
443387113   Theodore Ts'o   ext4: fold ext4_f...
4439
  	unsigned long freed = 0;
498e5f241   Theodore Ts'o   ext4: Change unsi...
4440
  	unsigned int overflow;
c9de560de   Alex Tomas   ext4: Add multi b...
4441
4442
4443
4444
4445
4446
4447
  	ext4_grpblk_t bit;
  	struct buffer_head *gd_bh;
  	ext4_group_t block_group;
  	struct ext4_sb_info *sbi;
  	struct ext4_buddy e4b;
  	int err = 0;
  	int ret;
e6362609b   Theodore Ts'o   ext4: call ext4_f...
4448
4449
4450
4451
4452
4453
  	if (bh) {
  		if (block)
  			BUG_ON(block != bh->b_blocknr);
  		else
  			block = bh->b_blocknr;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4454

c9de560de   Alex Tomas   ext4: Add multi b...
4455
  	sbi = EXT4_SB(sb);
1f2acb601   Theodore Ts'o   ext4: Add block v...
4456
4457
  	if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
  	    !ext4_data_block_valid(sbi, block, count)) {
12062dddd   Eric Sandeen   ext4: move __func...
4458
  		ext4_error(sb, "Freeing blocks not in datazone - "
1f2acb601   Theodore Ts'o   ext4: Add block v...
4459
  			   "block = %llu, count = %lu", block, count);
c9de560de   Alex Tomas   ext4: Add multi b...
4460
4461
  		goto error_return;
  	}
0610b6e99   Theodore Ts'o   ext4: Fix 64-bit ...
4462
4463
  	ext4_debug("freeing block %llu
  ", block);
e6362609b   Theodore Ts'o   ext4: call ext4_f...
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
  	trace_ext4_free_blocks(inode, block, count, flags);
  
  	if (flags & EXT4_FREE_BLOCKS_FORGET) {
  		struct buffer_head *tbh = bh;
  		int i;
  
  		BUG_ON(bh && (count > 1));
  
  		for (i = 0; i < count; i++) {
  			if (!bh)
  				tbh = sb_find_get_block(inode->i_sb,
  							block + i);
60e6679e2   Theodore Ts'o   ext4: Drop whites...
4476
  			ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
e6362609b   Theodore Ts'o   ext4: call ext4_f...
4477
4478
4479
  				    inode, tbh, block + i);
  		}
  	}
60e6679e2   Theodore Ts'o   ext4: Drop whites...
4480
  	/*
e6362609b   Theodore Ts'o   ext4: call ext4_f...
4481
4482
4483
4484
4485
4486
4487
4488
  	 * We need to make sure we don't reuse the freed block until
  	 * after the transaction is committed, which we can do by
  	 * treating the block as metadata, below.  We make an
  	 * exception if the inode is to be written in writeback mode
  	 * since writeback mode has weak data consistency guarantees.
  	 */
  	if (!ext4_should_writeback_data(inode))
  		flags |= EXT4_FREE_BLOCKS_METADATA;
c9de560de   Alex Tomas   ext4: Add multi b...
4489

256bdb497   Eric Sandeen   ext4: allocate st...
4490
4491
  	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
  	if (ac) {
256bdb497   Eric Sandeen   ext4: allocate st...
4492
4493
4494
  		ac->ac_inode = inode;
  		ac->ac_sb = sb;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
  
  do_more:
  	overflow = 0;
  	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
  
  	/*
  	 * Check to see if we are freeing blocks across a group
  	 * boundary.
  	 */
  	if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
  		overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
  		count -= overflow;
  	}
574ca174c   Theodore Ts'o   ext4: Rename read...
4508
  	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
4509
4510
  	if (!bitmap_bh) {
  		err = -EIO;
c9de560de   Alex Tomas   ext4: Add multi b...
4511
  		goto error_return;
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
4512
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4513
  	gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
4514
4515
  	if (!gdp) {
  		err = -EIO;
c9de560de   Alex Tomas   ext4: Add multi b...
4516
  		goto error_return;
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
4517
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4518
4519
4520
4521
4522
4523
4524
  
  	if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
  	    in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
  	    in_range(block, ext4_inode_table(sb, gdp),
  		      EXT4_SB(sb)->s_itb_per_group) ||
  	    in_range(block + count - 1, ext4_inode_table(sb, gdp),
  		      EXT4_SB(sb)->s_itb_per_group)) {
12062dddd   Eric Sandeen   ext4: move __func...
4525
  		ext4_error(sb, "Freeing blocks in system zone - "
0610b6e99   Theodore Ts'o   ext4: Fix 64-bit ...
4526
  			   "Block = %llu, count = %lu", block, count);
519deca04   Aneesh Kumar K.V   ext4: Retry block...
4527
4528
  		/* err = 0. ext4_std_error should be a no op */
  		goto error_return;
c9de560de   Alex Tomas   ext4: Add multi b...
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
  	}
  
  	BUFFER_TRACE(bitmap_bh, "getting write access");
  	err = ext4_journal_get_write_access(handle, bitmap_bh);
  	if (err)
  		goto error_return;
  
  	/*
  	 * We are about to modify some metadata.  Call the journal APIs
  	 * to unshare ->b_data if a currently-committing transaction is
  	 * using it
  	 */
  	BUFFER_TRACE(gd_bh, "get_write_access");
  	err = ext4_journal_get_write_access(handle, gd_bh);
  	if (err)
  		goto error_return;
c9de560de   Alex Tomas   ext4: Add multi b...
4545
4546
4547
4548
4549
4550
4551
  #ifdef AGGRESSIVE_CHECK
  	{
  		int i;
  		for (i = 0; i < count; i++)
  			BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
  	}
  #endif
256bdb497   Eric Sandeen   ext4: allocate st...
4552
4553
4554
4555
  	if (ac) {
  		ac->ac_b_ex.fe_group = block_group;
  		ac->ac_b_ex.fe_start = bit;
  		ac->ac_b_ex.fe_len = count;
296c355cd   Theodore Ts'o   ext4: Use tracepo...
4556
  		trace_ext4_mballoc_free(ac);
256bdb497   Eric Sandeen   ext4: allocate st...
4557
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4558

920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
4559
4560
4561
  	err = ext4_mb_load_buddy(sb, block_group, &e4b);
  	if (err)
  		goto error_return;
e6362609b   Theodore Ts'o   ext4: call ext4_f...
4562
4563
  
  	if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
  		struct ext4_free_data *new_entry;
  		/*
  		 * blocks being freed are metadata. these blocks shouldn't
  		 * be used until this transaction is committed
  		 */
  		new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
  		new_entry->start_blk = bit;
  		new_entry->group  = block_group;
  		new_entry->count = count;
  		new_entry->t_tid = handle->h_transaction->t_tid;
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
4574

7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4575
  		ext4_lock_group(sb, block_group);
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
4576
  		mb_clear_bits(bitmap_bh->b_data, bit, count);
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4577
  		ext4_mb_free_metadata(handle, &e4b, new_entry);
c9de560de   Alex Tomas   ext4: Add multi b...
4578
  	} else {
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4579
4580
4581
4582
  		/* need to update group_info->bb_free and bitmap
  		 * with group lock held. generate_buddy look at
  		 * them with group lock_held
  		 */
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
4583
4584
  		ext4_lock_group(sb, block_group);
  		mb_clear_bits(bitmap_bh->b_data, bit, count);
7e5a8cdd8   Shen Feng   ext4: fix error p...
4585
  		mb_free_blocks(inode, &e4b, bit, count);
c9de560de   Alex Tomas   ext4: Add multi b...
4586
  		ext4_mb_return_to_preallocation(inode, &e4b, block, count);
5c521830c   Jiaying Zhang   ext4: Support dis...
4587
4588
  		if (test_opt(sb, DISCARD))
  			ext4_issue_discard(sb, block_group, bit, count);
c9de560de   Alex Tomas   ext4: Add multi b...
4589
  	}
560671a0d   Aneesh Kumar K.V   ext4: Use high 16...
4590
4591
  	ret = ext4_free_blks_count(sb, gdp) + count;
  	ext4_free_blks_set(sb, gdp, ret);
c9de560de   Alex Tomas   ext4: Add multi b...
4592
  	gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
4593
  	ext4_unlock_group(sb, block_group);
c9de560de   Alex Tomas   ext4: Add multi b...
4594
  	percpu_counter_add(&sbi->s_freeblocks_counter, count);
772cb7c83   Jose R. Santos   ext4: New inode a...
4595
4596
  	if (sbi->s_log_groups_per_flex) {
  		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
9f24e4208   Theodore Ts'o   ext4: Use atomic_...
4597
  		atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
772cb7c83   Jose R. Santos   ext4: New inode a...
4598
  	}
e39e07fdf   Jing Zhang   ext4: rename ext4...
4599
  	ext4_mb_unload_buddy(&e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
4600

443387113   Theodore Ts'o   ext4: fold ext4_f...
4601
  	freed += count;
c9de560de   Alex Tomas   ext4: Add multi b...
4602

7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4603
4604
4605
  	/* We dirtied the bitmap block */
  	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
  	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
c9de560de   Alex Tomas   ext4: Add multi b...
4606
4607
  	/* And the group descriptor block */
  	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
0390131ba   Frank Mayhar   ext4: Allow ext4 ...
4608
  	ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
c9de560de   Alex Tomas   ext4: Add multi b...
4609
4610
4611
4612
4613
4614
4615
4616
4617
  	if (!err)
  		err = ret;
  
  	if (overflow && !err) {
  		block += count;
  		count = overflow;
  		put_bh(bitmap_bh);
  		goto do_more;
  	}
a0375156c   Theodore Ts'o   ext4: Clean up s_...
4618
  	ext4_mark_super_dirty(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
4619
  error_return:
443387113   Theodore Ts'o   ext4: fold ext4_f...
4620
  	if (freed)
5dd4056db   Christoph Hellwig   dquot: cleanup sp...
4621
  		dquot_free_block(inode, freed);
c9de560de   Alex Tomas   ext4: Add multi b...
4622
4623
  	brelse(bitmap_bh);
  	ext4_std_error(sb, err);
256bdb497   Eric Sandeen   ext4: allocate st...
4624
4625
  	if (ac)
  		kmem_cache_free(ext4_ac_cachep, ac);
c9de560de   Alex Tomas   ext4: Add multi b...
4626
4627
  	return;
  }