Blame view

fs/ext4/mballoc.c 139 KB
c9de560de   Alex Tomas   ext4: Add multi b...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
  /*
   * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
   * Written by Alex Tomas <alex@clusterfs.com>
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License version 2 as
   * published by the Free Software Foundation.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   *
   * You should have received a copy of the GNU General Public Licens
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
   */
  
  
  /*
   * mballoc.c contains the multiblocks allocation routines
   */
8f6e39a7a   Mingming Cao   ext4: Move mballo...
23
  #include "mballoc.h"
6ba495e92   Theodore Ts'o   ext4: Add configu...
24
  #include <linux/debugfs.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
25
  #include <linux/slab.h>
9bffad1ed   Theodore Ts'o   ext4: convert ins...
26
  #include <trace/events/ext4.h>
c9de560de   Alex Tomas   ext4: Add multi b...
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
  /*
   * MUSTDO:
   *   - test ext4_ext_search_left() and ext4_ext_search_right()
   *   - search for metadata in few groups
   *
   * TODO v4:
   *   - normalization should take into account whether file is still open
   *   - discard preallocations if no free space left (policy?)
   *   - don't normalize tails
   *   - quota
   *   - reservation for superuser
   *
   * TODO v3:
   *   - bitmap read-ahead (proposed by Oleg Drokin aka green)
   *   - track min/max extents in each group for better group selection
   *   - mb_mark_used() may allocate chunk right after splitting buddy
   *   - tree of groups sorted by number of free blocks
   *   - error handling
   */
  
  /*
   * The allocation request involve request for multiple number of blocks
   * near to the goal(block) value specified.
   *
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
51
52
53
54
55
56
57
58
59
   * During initialization phase of the allocator we decide to use the
   * group preallocation or inode preallocation depending on the size of
   * the file. The size of the file could be the resulting file size we
   * would have after allocation, or the current file size, which ever
   * is larger. If the size is less than sbi->s_mb_stream_request we
   * select to use the group preallocation. The default value of
   * s_mb_stream_request is 16 blocks. This can also be tuned via
   * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in
   * terms of number of blocks.
c9de560de   Alex Tomas   ext4: Add multi b...
60
61
   *
   * The main motivation for having small file use group preallocation is to
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
62
   * ensure that we have small files closer together on the disk.
c9de560de   Alex Tomas   ext4: Add multi b...
63
   *
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
64
65
66
67
   * First stage the allocator looks at the inode prealloc list,
   * ext4_inode_info->i_prealloc_list, which contains list of prealloc
   * spaces for this particular inode. The inode prealloc space is
   * represented as:
c9de560de   Alex Tomas   ext4: Add multi b...
68
69
70
   *
   * pa_lstart -> the logical start block for this prealloc space
   * pa_pstart -> the physical start block for this prealloc space
53accfa9f   Theodore Ts'o   ext4: teach mball...
71
72
   * pa_len    -> length for this prealloc space (in clusters)
   * pa_free   ->  free space available in this prealloc space (in clusters)
c9de560de   Alex Tomas   ext4: Add multi b...
73
74
75
   *
   * The inode preallocation space is used looking at the _logical_ start
   * block. If only the logical file block falls within the range of prealloc
caaf7a29d   Tao Ma   ext4: Fix a doubl...
76
77
   * space we will consume the particular prealloc space. This makes sure that
   * we have contiguous physical blocks representing the file blocks
c9de560de   Alex Tomas   ext4: Add multi b...
78
79
80
81
82
83
84
   *
   * The important thing to be noted in case of inode prealloc space is that
   * we don't modify the values associated to inode prealloc space except
   * pa_free.
   *
   * If we are not able to find blocks in the inode prealloc space and if we
   * have the group allocation flag set then we look at the locality group
caaf7a29d   Tao Ma   ext4: Fix a doubl...
85
   * prealloc space. These are per CPU prealloc list represented as
c9de560de   Alex Tomas   ext4: Add multi b...
86
87
88
89
90
91
92
   *
   * ext4_sb_info.s_locality_groups[smp_processor_id()]
   *
   * The reason for having a per cpu locality group is to reduce the contention
   * between CPUs. It is possible to get scheduled at this point.
   *
   * The locality group prealloc space is used looking at whether we have
25985edce   Lucas De Marchi   Fix common misspe...
93
   * enough free space (pa_free) within the prealloc space.
c9de560de   Alex Tomas   ext4: Add multi b...
94
95
96
97
98
99
100
101
102
103
104
105
   *
   * If we can't allocate blocks via inode prealloc or/and locality group
   * prealloc then we look at the buddy cache. The buddy cache is represented
   * by ext4_sb_info.s_buddy_cache (struct inode) whose file offset gets
   * mapped to the buddy and bitmap information regarding different
   * groups. The buddy information is attached to buddy cache inode so that
   * we can access them through the page cache. The information regarding
   * each group is loaded via ext4_mb_load_buddy.  The information involve
   * block bitmap and buddy information. The information are stored in the
   * inode as:
   *
   *  {                        page                        }
c3a326a65   Aneesh Kumar K.V   ext4: cleanup mba...
106
   *  [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
c9de560de   Alex Tomas   ext4: Add multi b...
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
   *
   *
   * one block each for bitmap and buddy information.  So for each group we
   * take up 2 blocks. A page can contain blocks_per_page (PAGE_CACHE_SIZE /
   * blocksize) blocks.  So it can have information regarding groups_per_page
   * which is blocks_per_page/2
   *
   * The buddy cache inode is not stored on disk. The inode is thrown
   * away when the filesystem is unmounted.
   *
   * We look for count number of blocks in the buddy cache. If we were able
   * to locate that many free blocks we return with additional information
   * regarding rest of the contiguous physical block available
   *
   * Before allocating blocks via buddy cache we normalize the request
   * blocks. This ensure we ask for more blocks that we needed. The extra
   * blocks that we get after allocation is added to the respective prealloc
   * list. In case of inode preallocation we follow a list of heuristics
   * based on file size. This can be found in ext4_mb_normalize_request. If
   * we are doing a group prealloc we try to normalize the request to
27baebb84   Theodore Ts'o   ext4: tune mballo...
127
128
   * sbi->s_mb_group_prealloc.  The default value of s_mb_group_prealloc is
   * dependent on the cluster size; for non-bigalloc file systems, it is
c9de560de   Alex Tomas   ext4: Add multi b...
129
   * 512 blocks. This can be tuned via
d7a1fee13   Dan Ehrenberg   ext4: make the pr...
130
   * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
c9de560de   Alex Tomas   ext4: Add multi b...
131
132
   * terms of number of blocks. If we have mounted the file system with -O
   * stripe=<value> option the group prealloc request is normalized to the
d7a1fee13   Dan Ehrenberg   ext4: make the pr...
133
134
   * the smallest multiple of the stripe value (sbi->s_stripe) which is
   * greater than the default mb_group_prealloc.
c9de560de   Alex Tomas   ext4: Add multi b...
135
   *
d7a1fee13   Dan Ehrenberg   ext4: make the pr...
136
   * The regular allocator (using the buddy cache) supports a few tunables.
c9de560de   Alex Tomas   ext4: Add multi b...
137
   *
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
138
139
140
   * /sys/fs/ext4/<partition>/mb_min_to_scan
   * /sys/fs/ext4/<partition>/mb_max_to_scan
   * /sys/fs/ext4/<partition>/mb_order2_req
c9de560de   Alex Tomas   ext4: Add multi b...
141
   *
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
142
   * The regular allocator uses buddy scan only if the request len is power of
c9de560de   Alex Tomas   ext4: Add multi b...
143
144
   * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
   * value of s_mb_order2_reqs can be tuned via
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
145
   * /sys/fs/ext4/<partition>/mb_order2_req.  If the request len is equal to
af901ca18   AndrĂ© Goddard Rosa   tree-wide: fix as...
146
   * stripe size (sbi->s_stripe), we try to search for contiguous block in
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
147
148
149
   * stripe size. This should result in better allocation on RAID setups. If
   * not, we search in the specific group using bitmap for best extents. The
   * tunable min_to_scan and max_to_scan control the behaviour here.
c9de560de   Alex Tomas   ext4: Add multi b...
150
   * min_to_scan indicate how long the mballoc __must__ look for a best
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
151
   * extent and max_to_scan indicates how long the mballoc __can__ look for a
c9de560de   Alex Tomas   ext4: Add multi b...
152
153
154
   * best extent in the found extents. Searching for the blocks starts with
   * the group specified as the goal value in allocation context via
   * ac_g_ex. Each group is first checked based on the criteria whether it
caaf7a29d   Tao Ma   ext4: Fix a doubl...
155
   * can be used for allocation. ext4_mb_good_group explains how the groups are
c9de560de   Alex Tomas   ext4: Add multi b...
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
   * checked.
   *
   * Both the prealloc space are getting populated as above. So for the first
   * request we will hit the buddy cache which will result in this prealloc
   * space getting filled. The prealloc space is then later used for the
   * subsequent request.
   */
  
  /*
   * mballoc operates on the following data:
   *  - on-disk bitmap
   *  - in-core buddy (actually includes buddy and bitmap)
   *  - preallocation descriptors (PAs)
   *
   * there are two types of preallocations:
   *  - inode
   *    assiged to specific inode and can be used for this inode only.
   *    it describes part of inode's space preallocated to specific
   *    physical blocks. any block from that preallocated can be used
   *    independent. the descriptor just tracks number of blocks left
   *    unused. so, before taking some block from descriptor, one must
   *    make sure corresponded logical block isn't allocated yet. this
   *    also means that freeing any block within descriptor's range
   *    must discard all preallocated blocks.
   *  - locality group
   *    assigned to specific locality group which does not translate to
   *    permanent set of inodes: inode can join and leave group. space
   *    from this type of preallocation can be used for any inode. thus
   *    it's consumed from the beginning to the end.
   *
   * relation between them can be expressed as:
   *    in-core buddy = on-disk bitmap + preallocation descriptors
   *
   * this mean blocks mballoc considers used are:
   *  - allocated blocks (persistent)
   *  - preallocated blocks (non-persistent)
   *
   * consistency in mballoc world means that at any time a block is either
   * free or used in ALL structures. notice: "any time" should not be read
   * literally -- time is discrete and delimited by locks.
   *
   *  to keep it simple, we don't use block numbers, instead we count number of
   *  blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA.
   *
   * all operations can be expressed as:
   *  - init buddy:			buddy = on-disk + PAs
   *  - new PA:				buddy += N; PA = N
   *  - use inode PA:			on-disk += N; PA -= N
   *  - discard inode PA			buddy -= on-disk - PA; PA = 0
   *  - use locality group PA		on-disk += N; PA -= N
   *  - discard locality group PA		buddy -= PA; PA = 0
   *  note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap
   *        is used in real operation because we can't know actual used
   *        bits from PA, only from on-disk bitmap
   *
   * if we follow this strict logic, then all operations above should be atomic.
   * given some of them can block, we'd have to use something like semaphores
   * killing performance on high-end SMP hardware. let's try to relax it using
   * the following knowledge:
   *  1) if buddy is referenced, it's already initialized
   *  2) while block is used in buddy and the buddy is referenced,
   *     nobody can re-allocate that block
   *  3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has
   *     bit set and PA claims same block, it's OK. IOW, one can set bit in
   *     on-disk bitmap if buddy has same bit set or/and PA covers corresponded
   *     block
   *
   * so, now we're building a concurrency table:
   *  - init buddy vs.
   *    - new PA
   *      blocks for PA are allocated in the buddy, buddy must be referenced
   *      until PA is linked to allocation group to avoid concurrent buddy init
   *    - use inode PA
   *      we need to make sure that either on-disk bitmap or PA has uptodate data
   *      given (3) we care that PA-=N operation doesn't interfere with init
   *    - discard inode PA
   *      the simplest way would be to have buddy initialized by the discard
   *    - use locality group PA
   *      again PA-=N must be serialized with init
   *    - discard locality group PA
   *      the simplest way would be to have buddy initialized by the discard
   *  - new PA vs.
   *    - use inode PA
   *      i_data_sem serializes them
   *    - discard inode PA
   *      discard process must wait until PA isn't used by another process
   *    - use locality group PA
   *      some mutex should serialize them
   *    - discard locality group PA
   *      discard process must wait until PA isn't used by another process
   *  - use inode PA
   *    - use inode PA
   *      i_data_sem or another mutex should serializes them
   *    - discard inode PA
   *      discard process must wait until PA isn't used by another process
   *    - use locality group PA
   *      nothing wrong here -- they're different PAs covering different blocks
   *    - discard locality group PA
   *      discard process must wait until PA isn't used by another process
   *
   * now we're ready to make few consequences:
   *  - PA is referenced and while it is no discard is possible
   *  - PA is referenced until block isn't marked in on-disk bitmap
   *  - PA changes only after on-disk bitmap
   *  - discard must not compete with init. either init is done before
   *    any discard or they're serialized somehow
   *  - buddy init as sum of on-disk bitmap and PAs is done atomically
   *
   * a special case when we've used PA to emptiness. no need to modify buddy
   * in this case, but we should care about concurrent init
   *
   */
  
   /*
   * Logic in few words:
   *
   *  - allocation:
   *    load group
   *    find blocks
   *    mark bits in on-disk bitmap
   *    release group
   *
   *  - use preallocation:
   *    find proper PA (per-inode or group)
   *    load group
   *    mark bits in on-disk bitmap
   *    release group
   *    release PA
   *
   *  - free:
   *    load group
   *    mark bits in on-disk bitmap
   *    release group
   *
   *  - discard preallocations in group:
   *    mark PAs deleted
   *    move them onto local list
   *    load on-disk bitmap
   *    load group
   *    remove PA from object (inode or locality group)
   *    mark free blocks in-core
   *
   *  - discard inode's preallocations:
   */
  
  /*
   * Locking rules
   *
   * Locks:
   *  - bitlock on a group	(group)
   *  - object (inode/locality)	(object)
   *  - per-pa lock		(pa)
   *
   * Paths:
   *  - new pa
   *    object
   *    group
   *
   *  - find and use pa:
   *    pa
   *
   *  - release consumed pa:
   *    pa
   *    group
   *    object
   *
   *  - generate in-core bitmap:
   *    group
   *        pa
   *
   *  - discard all for given object (inode, locality group):
   *    object
   *        pa
   *    group
   *
   *  - discard all for given group:
   *    group
   *        pa
   *    group
   *        object
   *
   */
c3a326a65   Aneesh Kumar K.V   ext4: cleanup mba...
338
339
340
  static struct kmem_cache *ext4_pspace_cachep;
  static struct kmem_cache *ext4_ac_cachep;
  static struct kmem_cache *ext4_free_ext_cachep;
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
341
342
343
344
  
  /* We create slab caches for groupinfo data structures based on the
   * superblock block size.  There will be one per mounted filesystem for
   * each unique s_blocksize_bits */
2892c15dd   Eric Sandeen   ext4: make grpinf...
345
  #define NR_GRPINFO_CACHES 8
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
346
  static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
2892c15dd   Eric Sandeen   ext4: make grpinf...
347
348
349
350
351
  static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
  	"ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
  	"ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
  	"ext4_groupinfo_64k", "ext4_groupinfo_128k"
  };
c3a326a65   Aneesh Kumar K.V   ext4: cleanup mba...
352
353
  static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
  					ext4_group_t group);
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
354
355
  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
  						ext4_group_t group);
c3a326a65   Aneesh Kumar K.V   ext4: cleanup mba...
356
  static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
357
358
  static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
  {
c9de560de   Alex Tomas   ext4: Add multi b...
359
  #if BITS_PER_LONG == 64
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
360
361
  	*bit += ((unsigned long) addr & 7UL) << 3;
  	addr = (void *) ((unsigned long) addr & ~7UL);
c9de560de   Alex Tomas   ext4: Add multi b...
362
  #elif BITS_PER_LONG == 32
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
363
364
  	*bit += ((unsigned long) addr & 3UL) << 3;
  	addr = (void *) ((unsigned long) addr & ~3UL);
c9de560de   Alex Tomas   ext4: Add multi b...
365
366
367
  #else
  #error "how many bits you are?!"
  #endif
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
368
369
  	return addr;
  }
c9de560de   Alex Tomas   ext4: Add multi b...
370
371
372
373
374
375
376
  
  static inline int mb_test_bit(int bit, void *addr)
  {
  	/*
  	 * ext4_test_bit on architecture like powerpc
  	 * needs unsigned long aligned address
  	 */
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
377
  	addr = mb_correct_addr_and_bit(&bit, addr);
c9de560de   Alex Tomas   ext4: Add multi b...
378
379
380
381
382
  	return ext4_test_bit(bit, addr);
  }
  
  static inline void mb_set_bit(int bit, void *addr)
  {
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
383
  	addr = mb_correct_addr_and_bit(&bit, addr);
c9de560de   Alex Tomas   ext4: Add multi b...
384
385
  	ext4_set_bit(bit, addr);
  }
c9de560de   Alex Tomas   ext4: Add multi b...
386
387
  static inline void mb_clear_bit(int bit, void *addr)
  {
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
388
  	addr = mb_correct_addr_and_bit(&bit, addr);
c9de560de   Alex Tomas   ext4: Add multi b...
389
390
  	ext4_clear_bit(bit, addr);
  }
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
391
392
  static inline int mb_find_next_zero_bit(void *addr, int max, int start)
  {
e7dfb2463   Aneesh Kumar K.V   ext4: Fix mb_find...
393
  	int fix = 0, ret, tmpmax;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
394
  	addr = mb_correct_addr_and_bit(&fix, addr);
e7dfb2463   Aneesh Kumar K.V   ext4: Fix mb_find...
395
  	tmpmax = max + fix;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
396
  	start += fix;
e7dfb2463   Aneesh Kumar K.V   ext4: Fix mb_find...
397
398
399
400
  	ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
  	if (ret > max)
  		return max;
  	return ret;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
401
402
403
404
  }
  
  static inline int mb_find_next_bit(void *addr, int max, int start)
  {
e7dfb2463   Aneesh Kumar K.V   ext4: Fix mb_find...
405
  	int fix = 0, ret, tmpmax;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
406
  	addr = mb_correct_addr_and_bit(&fix, addr);
e7dfb2463   Aneesh Kumar K.V   ext4: Fix mb_find...
407
  	tmpmax = max + fix;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
408
  	start += fix;
e7dfb2463   Aneesh Kumar K.V   ext4: Fix mb_find...
409
410
411
412
  	ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
  	if (ret > max)
  		return max;
  	return ret;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
413
  }
c9de560de   Alex Tomas   ext4: Add multi b...
414
415
416
  static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
  {
  	char *bb;
c9de560de   Alex Tomas   ext4: Add multi b...
417
418
419
420
421
422
423
424
425
  	BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
  	BUG_ON(max == NULL);
  
  	if (order > e4b->bd_blkbits + 1) {
  		*max = 0;
  		return NULL;
  	}
  
  	/* at order 0 we see each particular block */
84b775a35   Coly Li   ext4: code cleanu...
426
427
  	if (order == 0) {
  		*max = 1 << (e4b->bd_blkbits + 3);
c9de560de   Alex Tomas   ext4: Add multi b...
428
  		return EXT4_MB_BITMAP(e4b);
84b775a35   Coly Li   ext4: code cleanu...
429
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
  
  	bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
  	*max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
  
  	return bb;
  }
  
  #ifdef DOUBLE_CHECK
  static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
  			   int first, int count)
  {
  	int i;
  	struct super_block *sb = e4b->bd_sb;
  
  	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
  		return;
bc8e67409   Vincent Minet   ext4: Fix spinloc...
446
  	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
c9de560de   Alex Tomas   ext4: Add multi b...
447
448
449
  	for (i = 0; i < count; i++) {
  		if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
  			ext4_fsblk_t blocknr;
5661bd686   Akinobu Mita   ext4: cleanup to ...
450
451
  
  			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
53accfa9f   Theodore Ts'o   ext4: teach mball...
452
  			blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
5d1b1b3f4   Aneesh Kumar K.V   ext4: fix BUG whe...
453
  			ext4_grp_locked_error(sb, e4b->bd_group,
e29136f80   Theodore Ts'o   ext4: Enhance ext...
454
455
456
457
458
  					      inode ? inode->i_ino : 0,
  					      blocknr,
  					      "freeing block already freed "
  					      "(bit %u)",
  					      first + i);
c9de560de   Alex Tomas   ext4: Add multi b...
459
460
461
462
463
464
465
466
467
468
469
  		}
  		mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
  	}
  }
  
  static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
  {
  	int i;
  
  	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
  		return;
bc8e67409   Vincent Minet   ext4: Fix spinloc...
470
  	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
c9de560de   Alex Tomas   ext4: Add multi b...
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
  	for (i = 0; i < count; i++) {
  		BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
  		mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
  	}
  }
  
  static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
  {
  	if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
  		unsigned char *b1, *b2;
  		int i;
  		b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
  		b2 = (unsigned char *) bitmap;
  		for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
  			if (b1[i] != b2[i]) {
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
486
487
488
489
490
  				ext4_msg(e4b->bd_sb, KERN_ERR,
  					 "corruption in group %u "
  					 "at byte %u(%u): %x in copy != %x "
  					 "on disk/prealloc",
  					 e4b->bd_group, i, i * 8, b1[i], b2[i]);
c9de560de   Alex Tomas   ext4: Add multi b...
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
  				BUG();
  			}
  		}
  	}
  }
  
  #else
  static inline void mb_free_blocks_double(struct inode *inode,
  				struct ext4_buddy *e4b, int first, int count)
  {
  	return;
  }
  static inline void mb_mark_used_double(struct ext4_buddy *e4b,
  						int first, int count)
  {
  	return;
  }
  static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
  {
  	return;
  }
  #endif
  
  #ifdef AGGRESSIVE_CHECK
  
  #define MB_CHECK_ASSERT(assert)						\
  do {									\
  	if (!(assert)) {						\
  		printk(KERN_EMERG					\
  			"Assertion failure in %s() at %s:%d: \"%s\"
  ",	\
  			function, file, line, # assert);		\
  		BUG();							\
  	}								\
  } while (0)
  
  static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
  				const char *function, int line)
  {
  	struct super_block *sb = e4b->bd_sb;
  	int order = e4b->bd_blkbits + 1;
  	int max;
  	int max2;
  	int i;
  	int j;
  	int k;
  	int count;
  	struct ext4_group_info *grp;
  	int fragments = 0;
  	int fstart;
  	struct list_head *cur;
  	void *buddy;
  	void *buddy2;
c9de560de   Alex Tomas   ext4: Add multi b...
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
  	{
  		static int mb_check_counter;
  		if (mb_check_counter++ % 100 != 0)
  			return 0;
  	}
  
  	while (order > 1) {
  		buddy = mb_find_buddy(e4b, order, &max);
  		MB_CHECK_ASSERT(buddy);
  		buddy2 = mb_find_buddy(e4b, order - 1, &max2);
  		MB_CHECK_ASSERT(buddy2);
  		MB_CHECK_ASSERT(buddy != buddy2);
  		MB_CHECK_ASSERT(max * 2 == max2);
  
  		count = 0;
  		for (i = 0; i < max; i++) {
  
  			if (mb_test_bit(i, buddy)) {
  				/* only single bit in buddy2 may be 1 */
  				if (!mb_test_bit(i << 1, buddy2)) {
  					MB_CHECK_ASSERT(
  						mb_test_bit((i<<1)+1, buddy2));
  				} else if (!mb_test_bit((i << 1) + 1, buddy2)) {
  					MB_CHECK_ASSERT(
  						mb_test_bit(i << 1, buddy2));
  				}
  				continue;
  			}
0a10da73e   Robin Dong   ext4: fix a wrong...
572
  			/* both bits in buddy2 must be 1 */
c9de560de   Alex Tomas   ext4: Add multi b...
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
  			MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
  			MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
  
  			for (j = 0; j < (1 << order); j++) {
  				k = (i * (1 << order)) + j;
  				MB_CHECK_ASSERT(
  					!mb_test_bit(k, EXT4_MB_BITMAP(e4b)));
  			}
  			count++;
  		}
  		MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
  		order--;
  	}
  
  	fstart = -1;
  	buddy = mb_find_buddy(e4b, 0, &max);
  	for (i = 0; i < max; i++) {
  		if (!mb_test_bit(i, buddy)) {
  			MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
  			if (fstart == -1) {
  				fragments++;
  				fstart = i;
  			}
  			continue;
  		}
  		fstart = -1;
  		/* check used bits only */
  		for (j = 0; j < e4b->bd_blkbits + 1; j++) {
  			buddy2 = mb_find_buddy(e4b, j, &max2);
  			k = i >> j;
  			MB_CHECK_ASSERT(k < max2);
  			MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
  		}
  	}
  	MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
  	MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
  
  	grp = ext4_get_group_info(sb, e4b->bd_group);
c9de560de   Alex Tomas   ext4: Add multi b...
611
612
613
  	list_for_each(cur, &grp->bb_prealloc_list) {
  		ext4_group_t groupnr;
  		struct ext4_prealloc_space *pa;
60bd63d19   Solofo Ramangalahy   ext4: cleanup for...
614
615
  		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
  		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
c9de560de   Alex Tomas   ext4: Add multi b...
616
  		MB_CHECK_ASSERT(groupnr == e4b->bd_group);
60bd63d19   Solofo Ramangalahy   ext4: cleanup for...
617
  		for (i = 0; i < pa->pa_len; i++)
c9de560de   Alex Tomas   ext4: Add multi b...
618
619
620
621
622
623
  			MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
  	}
  	return 0;
  }
  #undef MB_CHECK_ASSERT
  #define mb_check_buddy(e4b) __mb_check_buddy(e4b,	\
46e665e9d   Harvey Harrison   ext4: replace rem...
624
  					__FILE__, __func__, __LINE__)
c9de560de   Alex Tomas   ext4: Add multi b...
625
626
627
  #else
  #define mb_check_buddy(e4b)
  #endif
7c7860592   Coly Li   mballoc: add comm...
628
629
630
631
632
633
  /*
   * Divide blocks started from @first with length @len into
   * smaller chunks with power of 2 blocks.
   * Clear the bits in bitmap which the blocks of the chunk(s) covered,
   * then increase bb_counters[] for corresponded chunk size.
   */
c9de560de   Alex Tomas   ext4: Add multi b...
634
  static void ext4_mb_mark_free_simple(struct super_block *sb,
a36b44988   Eric Sandeen   ext4: use ext4_gr...
635
  				void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
c9de560de   Alex Tomas   ext4: Add multi b...
636
637
638
  					struct ext4_group_info *grp)
  {
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
a36b44988   Eric Sandeen   ext4: use ext4_gr...
639
640
641
  	ext4_grpblk_t min;
  	ext4_grpblk_t max;
  	ext4_grpblk_t chunk;
c9de560de   Alex Tomas   ext4: Add multi b...
642
  	unsigned short border;
7137d7a48   Theodore Ts'o   ext4: convert ins...
643
  	BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
c9de560de   Alex Tomas   ext4: Add multi b...
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
  
  	border = 2 << sb->s_blocksize_bits;
  
  	while (len > 0) {
  		/* find how many blocks can be covered since this position */
  		max = ffs(first | border) - 1;
  
  		/* find how many blocks of power 2 we need to mark */
  		min = fls(len) - 1;
  
  		if (max < min)
  			min = max;
  		chunk = 1 << min;
  
  		/* mark multiblock chunks only */
  		grp->bb_counters[min]++;
  		if (min > 0)
  			mb_clear_bit(first >> min,
  				     buddy + sbi->s_mb_offsets[min]);
  
  		len -= chunk;
  		first += chunk;
  	}
  }
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
  /*
   * Cache the order of the largest free extent we have available in this block
   * group.
   */
  static void
  mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
  {
  	int i;
  	int bits;
  
  	grp->bb_largest_free_order = -1; /* uninit */
  
  	bits = sb->s_blocksize_bits + 1;
  	for (i = bits; i >= 0; i--) {
  		if (grp->bb_counters[i] > 0) {
  			grp->bb_largest_free_order = i;
  			break;
  		}
  	}
  }
089ceecc1   Eric Sandeen   ext4: mark severa...
688
689
  static noinline_for_stack
  void ext4_mb_generate_buddy(struct super_block *sb,
c9de560de   Alex Tomas   ext4: Add multi b...
690
691
692
  				void *buddy, void *bitmap, ext4_group_t group)
  {
  	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
7137d7a48   Theodore Ts'o   ext4: convert ins...
693
  	ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
a36b44988   Eric Sandeen   ext4: use ext4_gr...
694
695
696
  	ext4_grpblk_t i = 0;
  	ext4_grpblk_t first;
  	ext4_grpblk_t len;
c9de560de   Alex Tomas   ext4: Add multi b...
697
698
699
700
701
702
  	unsigned free = 0;
  	unsigned fragments = 0;
  	unsigned long long period = get_cycles();
  
  	/* initialize buddy from bitmap which is aggregation
  	 * of on-disk bitmap and preallocations */
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
703
  	i = mb_find_next_zero_bit(bitmap, max, 0);
c9de560de   Alex Tomas   ext4: Add multi b...
704
705
706
707
  	grp->bb_first_free = i;
  	while (i < max) {
  		fragments++;
  		first = i;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
708
  		i = mb_find_next_bit(bitmap, max, i);
c9de560de   Alex Tomas   ext4: Add multi b...
709
710
711
712
713
714
715
  		len = i - first;
  		free += len;
  		if (len > 1)
  			ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
  		else
  			grp->bb_counters[0]++;
  		if (i < max)
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
716
  			i = mb_find_next_zero_bit(bitmap, max, i);
c9de560de   Alex Tomas   ext4: Add multi b...
717
718
719
720
  	}
  	grp->bb_fragments = fragments;
  
  	if (free != grp->bb_free) {
e29136f80   Theodore Ts'o   ext4: Enhance ext...
721
  		ext4_grp_locked_error(sb, group, 0, 0,
53accfa9f   Theodore Ts'o   ext4: teach mball...
722
  				      "%u clusters in bitmap, %u in gd",
e29136f80   Theodore Ts'o   ext4: Enhance ext...
723
  				      free, grp->bb_free);
e56eb6590   Aneesh Kumar K.V   ext4: Don't claim...
724
725
726
727
  		/*
  		 * If we intent to continue, we consider group descritor
  		 * corrupt and update bb_free using bitmap value
  		 */
c9de560de   Alex Tomas   ext4: Add multi b...
728
729
  		grp->bb_free = free;
  	}
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
730
  	mb_set_largest_free_order(sb, grp);
c9de560de   Alex Tomas   ext4: Add multi b...
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
  
  	clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
  
  	period = get_cycles() - period;
  	spin_lock(&EXT4_SB(sb)->s_bal_lock);
  	EXT4_SB(sb)->s_mb_buddies_generated++;
  	EXT4_SB(sb)->s_mb_generation_time += period;
  	spin_unlock(&EXT4_SB(sb)->s_bal_lock);
  }
  
  /* The buddy information is attached the buddy cache inode
   * for convenience. The information regarding each group
   * is loaded via ext4_mb_load_buddy. The information involve
   * block bitmap and buddy information. The information are
   * stored in the inode as
   *
   * {                        page                        }
c3a326a65   Aneesh Kumar K.V   ext4: cleanup mba...
748
   * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
c9de560de   Alex Tomas   ext4: Add multi b...
749
750
751
752
753
754
755
   *
   *
   * one block each for bitmap and buddy information.
   * So for each group we take up 2 blocks. A page can
   * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize)  blocks.
   * So it can have information regarding groups_per_page which
   * is blocks_per_page/2
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
756
757
758
   *
   * Locking note:  This routine takes the block group lock of all groups
   * for this page; do not hold this lock when calling this routine!
c9de560de   Alex Tomas   ext4: Add multi b...
759
760
761
762
   */
  
  static int ext4_mb_init_cache(struct page *page, char *incore)
  {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
763
  	ext4_group_t ngroups;
c9de560de   Alex Tomas   ext4: Add multi b...
764
765
766
767
768
769
770
771
772
773
774
775
776
  	int blocksize;
  	int blocks_per_page;
  	int groups_per_page;
  	int err = 0;
  	int i;
  	ext4_group_t first_group;
  	int first_block;
  	struct super_block *sb;
  	struct buffer_head *bhs;
  	struct buffer_head **bh;
  	struct inode *inode;
  	char *data;
  	char *bitmap;
9b8b7d353   Amir Goldstein   ext4: teach ext4_...
777
  	struct ext4_group_info *grinfo;
c9de560de   Alex Tomas   ext4: Add multi b...
778

6ba495e92   Theodore Ts'o   ext4: Add configu...
779
780
  	mb_debug(1, "init page %lu
  ", page->index);
c9de560de   Alex Tomas   ext4: Add multi b...
781
782
783
  
  	inode = page->mapping->host;
  	sb = inode->i_sb;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
784
  	ngroups = ext4_get_groups_count(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
  	blocksize = 1 << inode->i_blkbits;
  	blocks_per_page = PAGE_CACHE_SIZE / blocksize;
  
  	groups_per_page = blocks_per_page >> 1;
  	if (groups_per_page == 0)
  		groups_per_page = 1;
  
  	/* allocate buffer_heads to read bitmaps */
  	if (groups_per_page > 1) {
  		err = -ENOMEM;
  		i = sizeof(struct buffer_head *) * groups_per_page;
  		bh = kzalloc(i, GFP_NOFS);
  		if (bh == NULL)
  			goto out;
  	} else
  		bh = &bhs;
  
  	first_group = page->index * blocks_per_page / 2;
  
  	/* read all groups the page covers into the cache */
  	for (i = 0; i < groups_per_page; i++) {
  		struct ext4_group_desc *desc;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
807
  		if (first_group + i >= ngroups)
c9de560de   Alex Tomas   ext4: Add multi b...
808
  			break;
9b8b7d353   Amir Goldstein   ext4: teach ext4_...
809
810
811
812
813
814
815
816
817
818
819
  		grinfo = ext4_get_group_info(sb, first_group + i);
  		/*
  		 * If page is uptodate then we came here after online resize
  		 * which added some new uninitialized group info structs, so
  		 * we must skip all initialized uptodate buddies on the page,
  		 * which may be currently in use by an allocating task.
  		 */
  		if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
  			bh[i] = NULL;
  			continue;
  		}
c9de560de   Alex Tomas   ext4: Add multi b...
820
821
822
823
824
825
826
827
828
  		err = -EIO;
  		desc = ext4_get_group_desc(sb, first_group + i, NULL);
  		if (desc == NULL)
  			goto out;
  
  		err = -ENOMEM;
  		bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
  		if (bh[i] == NULL)
  			goto out;
2ccb5fb9f   Aneesh Kumar K.V   ext4: Use new buf...
829
  		if (bitmap_uptodate(bh[i]))
c9de560de   Alex Tomas   ext4: Add multi b...
830
  			continue;
c806e68f5   Frederic Bohe   ext4: fix initial...
831
  		lock_buffer(bh[i]);
2ccb5fb9f   Aneesh Kumar K.V   ext4: Use new buf...
832
833
834
835
  		if (bitmap_uptodate(bh[i])) {
  			unlock_buffer(bh[i]);
  			continue;
  		}
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
836
  		ext4_lock_group(sb, first_group + i);
c9de560de   Alex Tomas   ext4: Add multi b...
837
838
839
  		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
  			ext4_init_block_bitmap(sb, bh[i],
  						first_group + i, desc);
2ccb5fb9f   Aneesh Kumar K.V   ext4: Use new buf...
840
  			set_bitmap_uptodate(bh[i]);
c9de560de   Alex Tomas   ext4: Add multi b...
841
  			set_buffer_uptodate(bh[i]);
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
842
  			ext4_unlock_group(sb, first_group + i);
3300beda5   Aneesh Kumar K.V   ext4: code cleanup
843
  			unlock_buffer(bh[i]);
c9de560de   Alex Tomas   ext4: Add multi b...
844
845
  			continue;
  		}
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
846
  		ext4_unlock_group(sb, first_group + i);
2ccb5fb9f   Aneesh Kumar K.V   ext4: Use new buf...
847
848
849
850
851
852
853
854
855
  		if (buffer_uptodate(bh[i])) {
  			/*
  			 * if not uninit if bh is uptodate,
  			 * bitmap is also uptodate
  			 */
  			set_bitmap_uptodate(bh[i]);
  			unlock_buffer(bh[i]);
  			continue;
  		}
c9de560de   Alex Tomas   ext4: Add multi b...
856
  		get_bh(bh[i]);
2ccb5fb9f   Aneesh Kumar K.V   ext4: Use new buf...
857
858
859
860
861
862
863
  		/*
  		 * submit the buffer_head for read. We can
  		 * safely mark the bitmap as uptodate now.
  		 * We do it here so the bitmap uptodate bit
  		 * get set with buffer lock held.
  		 */
  		set_bitmap_uptodate(bh[i]);
c9de560de   Alex Tomas   ext4: Add multi b...
864
865
  		bh[i]->b_end_io = end_buffer_read_sync;
  		submit_bh(READ, bh[i]);
6ba495e92   Theodore Ts'o   ext4: Add configu...
866
867
  		mb_debug(1, "read bitmap for group %u
  ", first_group + i);
c9de560de   Alex Tomas   ext4: Add multi b...
868
869
870
  	}
  
  	/* wait for I/O completion */
9b8b7d353   Amir Goldstein   ext4: teach ext4_...
871
872
873
  	for (i = 0; i < groups_per_page; i++)
  		if (bh[i])
  			wait_on_buffer(bh[i]);
c9de560de   Alex Tomas   ext4: Add multi b...
874
875
  
  	err = -EIO;
9b8b7d353   Amir Goldstein   ext4: teach ext4_...
876
877
  	for (i = 0; i < groups_per_page; i++)
  		if (bh[i] && !buffer_uptodate(bh[i]))
c9de560de   Alex Tomas   ext4: Add multi b...
878
  			goto out;
31b481dc7   Mingming Cao   ext4: Fix ext4_mb...
879
  	err = 0;
c9de560de   Alex Tomas   ext4: Add multi b...
880
881
882
  	first_block = page->index * blocks_per_page;
  	for (i = 0; i < blocks_per_page; i++) {
  		int group;
c9de560de   Alex Tomas   ext4: Add multi b...
883
884
  
  		group = (first_block + i) >> 1;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
885
  		if (group >= ngroups)
c9de560de   Alex Tomas   ext4: Add multi b...
886
  			break;
9b8b7d353   Amir Goldstein   ext4: teach ext4_...
887
888
889
  		if (!bh[group - first_group])
  			/* skip initialized uptodate buddy */
  			continue;
c9de560de   Alex Tomas   ext4: Add multi b...
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
  		/*
  		 * data carry information regarding this
  		 * particular group in the format specified
  		 * above
  		 *
  		 */
  		data = page_address(page) + (i * blocksize);
  		bitmap = bh[group - first_group]->b_data;
  
  		/*
  		 * We place the buddy block and bitmap block
  		 * close together
  		 */
  		if ((first_block + i) & 1) {
  			/* this is block of buddy */
  			BUG_ON(incore == NULL);
6ba495e92   Theodore Ts'o   ext4: Add configu...
906
907
  			mb_debug(1, "put buddy for group %u in page %lu/%x
  ",
c9de560de   Alex Tomas   ext4: Add multi b...
908
  				group, page->index, i * blocksize);
f307333e1   Theodore Ts'o   ext4: Add new tra...
909
  			trace_ext4_mb_buddy_bitmap_load(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
910
911
912
  			grinfo = ext4_get_group_info(sb, group);
  			grinfo->bb_fragments = 0;
  			memset(grinfo->bb_counters, 0,
1927805e6   Eric Sandeen   ext4: use variabl...
913
914
  			       sizeof(*grinfo->bb_counters) *
  				(sb->s_blocksize_bits+2));
c9de560de   Alex Tomas   ext4: Add multi b...
915
916
917
  			/*
  			 * incore got set to the group block bitmap below
  			 */
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
918
  			ext4_lock_group(sb, group);
9b8b7d353   Amir Goldstein   ext4: teach ext4_...
919
920
  			/* init the buddy */
  			memset(data, 0xff, blocksize);
c9de560de   Alex Tomas   ext4: Add multi b...
921
  			ext4_mb_generate_buddy(sb, data, incore, group);
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
922
  			ext4_unlock_group(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
923
924
925
926
  			incore = NULL;
  		} else {
  			/* this is block of bitmap */
  			BUG_ON(incore != NULL);
6ba495e92   Theodore Ts'o   ext4: Add configu...
927
928
  			mb_debug(1, "put bitmap for group %u in page %lu/%x
  ",
c9de560de   Alex Tomas   ext4: Add multi b...
929
  				group, page->index, i * blocksize);
f307333e1   Theodore Ts'o   ext4: Add new tra...
930
  			trace_ext4_mb_bitmap_load(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
931
932
933
934
935
936
937
  
  			/* see comments in ext4_mb_put_pa() */
  			ext4_lock_group(sb, group);
  			memcpy(data, bitmap, blocksize);
  
  			/* mark all preallocated blks used in in-core bitmap */
  			ext4_mb_generate_from_pa(sb, data, group);
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
938
  			ext4_mb_generate_from_freelist(sb, data, group);
c9de560de   Alex Tomas   ext4: Add multi b...
939
940
941
942
943
944
945
946
947
948
949
950
  			ext4_unlock_group(sb, group);
  
  			/* set incore so that the buddy information can be
  			 * generated using this
  			 */
  			incore = data;
  		}
  	}
  	SetPageUptodate(page);
  
  out:
  	if (bh) {
9b8b7d353   Amir Goldstein   ext4: teach ext4_...
951
  		for (i = 0; i < groups_per_page; i++)
c9de560de   Alex Tomas   ext4: Add multi b...
952
953
954
955
956
957
  			brelse(bh[i]);
  		if (bh != &bhs)
  			kfree(bh);
  	}
  	return err;
  }
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
958
  /*
2de8807b2   Amir Goldstein   ext4: synchronize...
959
960
961
962
   * Lock the buddy and bitmap pages. This make sure other parallel init_group
   * on the same buddy page doesn't happen whild holding the buddy page lock.
   * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
   * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
eee4adc70   Eric Sandeen   ext4: move ext4_m...
963
   */
2de8807b2   Amir Goldstein   ext4: synchronize...
964
965
  static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
  		ext4_group_t group, struct ext4_buddy *e4b)
eee4adc70   Eric Sandeen   ext4: move ext4_m...
966
  {
2de8807b2   Amir Goldstein   ext4: synchronize...
967
968
  	struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
  	int block, pnum, poff;
eee4adc70   Eric Sandeen   ext4: move ext4_m...
969
  	int blocks_per_page;
2de8807b2   Amir Goldstein   ext4: synchronize...
970
971
972
973
  	struct page *page;
  
  	e4b->bd_buddy_page = NULL;
  	e4b->bd_bitmap_page = NULL;
eee4adc70   Eric Sandeen   ext4: move ext4_m...
974
975
976
977
978
979
980
981
982
  
  	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
  	/*
  	 * the buddy cache inode stores the block bitmap
  	 * and buddy information in consecutive blocks.
  	 * So for each group we need two blocks.
  	 */
  	block = group * 2;
  	pnum = block / blocks_per_page;
2de8807b2   Amir Goldstein   ext4: synchronize...
983
984
985
986
987
988
989
990
991
992
993
  	poff = block % blocks_per_page;
  	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
  	if (!page)
  		return -EIO;
  	BUG_ON(page->mapping != inode->i_mapping);
  	e4b->bd_bitmap_page = page;
  	e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
  
  	if (blocks_per_page >= 2) {
  		/* buddy and bitmap are on the same page */
  		return 0;
eee4adc70   Eric Sandeen   ext4: move ext4_m...
994
  	}
2de8807b2   Amir Goldstein   ext4: synchronize...
995
996
997
998
999
1000
1001
1002
1003
1004
  
  	block++;
  	pnum = block / blocks_per_page;
  	poff = block % blocks_per_page;
  	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
  	if (!page)
  		return -EIO;
  	BUG_ON(page->mapping != inode->i_mapping);
  	e4b->bd_buddy_page = page;
  	return 0;
eee4adc70   Eric Sandeen   ext4: move ext4_m...
1005
  }
2de8807b2   Amir Goldstein   ext4: synchronize...
1006
  static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
eee4adc70   Eric Sandeen   ext4: move ext4_m...
1007
  {
2de8807b2   Amir Goldstein   ext4: synchronize...
1008
1009
1010
1011
1012
1013
1014
  	if (e4b->bd_bitmap_page) {
  		unlock_page(e4b->bd_bitmap_page);
  		page_cache_release(e4b->bd_bitmap_page);
  	}
  	if (e4b->bd_buddy_page) {
  		unlock_page(e4b->bd_buddy_page);
  		page_cache_release(e4b->bd_buddy_page);
eee4adc70   Eric Sandeen   ext4: move ext4_m...
1015
  	}
eee4adc70   Eric Sandeen   ext4: move ext4_m...
1016
1017
1018
  }
  
  /*
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1019
1020
1021
1022
   * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
   * block group lock of all groups for this page; do not hold the BG lock when
   * calling this routine!
   */
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1023
1024
1025
  static noinline_for_stack
  int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
  {
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1026
  	struct ext4_group_info *this_grp;
2de8807b2   Amir Goldstein   ext4: synchronize...
1027
1028
1029
  	struct ext4_buddy e4b;
  	struct page *page;
  	int ret = 0;
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1030
1031
1032
  
  	mb_debug(1, "init group %u
  ", group);
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1033
1034
  	this_grp = ext4_get_group_info(sb, group);
  	/*
08c3a8133   Aneesh Kumar K.V   ext4: Clarify the...
1035
1036
1037
1038
  	 * This ensures that we don't reinit the buddy cache
  	 * page which map to the group from which we are already
  	 * allocating. If we are looking at the buddy cache we would
  	 * have taken a reference using ext4_mb_load_buddy and that
2de8807b2   Amir Goldstein   ext4: synchronize...
1039
  	 * would have pinned buddy page to page cache.
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1040
  	 */
2de8807b2   Amir Goldstein   ext4: synchronize...
1041
1042
  	ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
  	if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1043
1044
1045
1046
  		/*
  		 * somebody initialized the group
  		 * return without doing anything
  		 */
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1047
1048
  		goto err;
  	}
2de8807b2   Amir Goldstein   ext4: synchronize...
1049
1050
1051
1052
1053
1054
  
  	page = e4b.bd_bitmap_page;
  	ret = ext4_mb_init_cache(page, NULL);
  	if (ret)
  		goto err;
  	if (!PageUptodate(page)) {
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1055
1056
1057
1058
  		ret = -EIO;
  		goto err;
  	}
  	mark_page_accessed(page);
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1059

2de8807b2   Amir Goldstein   ext4: synchronize...
1060
  	if (e4b.bd_buddy_page == NULL) {
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1061
1062
1063
1064
1065
  		/*
  		 * If both the bitmap and buddy are in
  		 * the same page we don't need to force
  		 * init the buddy
  		 */
2de8807b2   Amir Goldstein   ext4: synchronize...
1066
1067
  		ret = 0;
  		goto err;
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1068
  	}
2de8807b2   Amir Goldstein   ext4: synchronize...
1069
1070
1071
1072
1073
1074
  	/* init buddy cache */
  	page = e4b.bd_buddy_page;
  	ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
  	if (ret)
  		goto err;
  	if (!PageUptodate(page)) {
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1075
1076
1077
1078
1079
  		ret = -EIO;
  		goto err;
  	}
  	mark_page_accessed(page);
  err:
2de8807b2   Amir Goldstein   ext4: synchronize...
1080
  	ext4_mb_put_buddy_page_lock(&e4b);
b6a758ec3   Aneesh Kumar K.V   ext4: move ext4_m...
1081
1082
  	return ret;
  }
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1083
1084
1085
1086
1087
  /*
   * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
   * block group lock of all groups for this page; do not hold the BG lock when
   * calling this routine!
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
1088
1089
1090
  static noinline_for_stack int
  ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
  					struct ext4_buddy *e4b)
c9de560de   Alex Tomas   ext4: Add multi b...
1091
  {
c9de560de   Alex Tomas   ext4: Add multi b...
1092
1093
1094
1095
1096
  	int blocks_per_page;
  	int block;
  	int pnum;
  	int poff;
  	struct page *page;
fdf6c7a76   Shen Feng   ext4: add error p...
1097
  	int ret;
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1098
1099
1100
  	struct ext4_group_info *grp;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
  	struct inode *inode = sbi->s_buddy_cache;
c9de560de   Alex Tomas   ext4: Add multi b...
1101

6ba495e92   Theodore Ts'o   ext4: Add configu...
1102
1103
  	mb_debug(1, "load group %u
  ", group);
c9de560de   Alex Tomas   ext4: Add multi b...
1104
1105
  
  	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1106
  	grp = ext4_get_group_info(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
1107
1108
  
  	e4b->bd_blkbits = sb->s_blocksize_bits;
529da704a   Tao Ma   ext4: remove unne...
1109
  	e4b->bd_info = grp;
c9de560de   Alex Tomas   ext4: Add multi b...
1110
1111
1112
1113
  	e4b->bd_sb = sb;
  	e4b->bd_group = group;
  	e4b->bd_buddy_page = NULL;
  	e4b->bd_bitmap_page = NULL;
f41c07505   Aneesh Kumar K.V   ext4: check for n...
1114
  	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
f41c07505   Aneesh Kumar K.V   ext4: check for n...
1115
1116
1117
1118
1119
1120
1121
  		/*
  		 * we need full data about the group
  		 * to make a good selection
  		 */
  		ret = ext4_mb_init_group(sb, group);
  		if (ret)
  			return ret;
f41c07505   Aneesh Kumar K.V   ext4: check for n...
1122
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
  	/*
  	 * the buddy cache inode stores the block bitmap
  	 * and buddy information in consecutive blocks.
  	 * So for each group we need two blocks.
  	 */
  	block = group * 2;
  	pnum = block / blocks_per_page;
  	poff = block % blocks_per_page;
  
  	/* we could use find_or_create_page(), but it locks page
  	 * what we'd like to avoid in fast path ... */
  	page = find_get_page(inode->i_mapping, pnum);
  	if (page == NULL || !PageUptodate(page)) {
  		if (page)
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
1137
1138
1139
1140
1141
1142
1143
1144
  			/*
  			 * drop the page reference and try
  			 * to get the page with lock. If we
  			 * are not uptodate that implies
  			 * somebody just created the page but
  			 * is yet to initialize the same. So
  			 * wait for it to initialize.
  			 */
c9de560de   Alex Tomas   ext4: Add multi b...
1145
1146
1147
1148
1149
  			page_cache_release(page);
  		page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
  		if (page) {
  			BUG_ON(page->mapping != inode->i_mapping);
  			if (!PageUptodate(page)) {
fdf6c7a76   Shen Feng   ext4: add error p...
1150
1151
1152
1153
1154
  				ret = ext4_mb_init_cache(page, NULL);
  				if (ret) {
  					unlock_page(page);
  					goto err;
  				}
c9de560de   Alex Tomas   ext4: Add multi b...
1155
1156
1157
1158
1159
1160
  				mb_cmp_bitmaps(e4b, page_address(page) +
  					       (poff * sb->s_blocksize));
  			}
  			unlock_page(page);
  		}
  	}
fdf6c7a76   Shen Feng   ext4: add error p...
1161
1162
  	if (page == NULL || !PageUptodate(page)) {
  		ret = -EIO;
c9de560de   Alex Tomas   ext4: Add multi b...
1163
  		goto err;
fdf6c7a76   Shen Feng   ext4: add error p...
1164
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
  	e4b->bd_bitmap_page = page;
  	e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
  	mark_page_accessed(page);
  
  	block++;
  	pnum = block / blocks_per_page;
  	poff = block % blocks_per_page;
  
  	page = find_get_page(inode->i_mapping, pnum);
  	if (page == NULL || !PageUptodate(page)) {
  		if (page)
  			page_cache_release(page);
  		page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
  		if (page) {
  			BUG_ON(page->mapping != inode->i_mapping);
fdf6c7a76   Shen Feng   ext4: add error p...
1180
1181
1182
1183
1184
1185
1186
  			if (!PageUptodate(page)) {
  				ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
  				if (ret) {
  					unlock_page(page);
  					goto err;
  				}
  			}
c9de560de   Alex Tomas   ext4: Add multi b...
1187
1188
1189
  			unlock_page(page);
  		}
  	}
fdf6c7a76   Shen Feng   ext4: add error p...
1190
1191
  	if (page == NULL || !PageUptodate(page)) {
  		ret = -EIO;
c9de560de   Alex Tomas   ext4: Add multi b...
1192
  		goto err;
fdf6c7a76   Shen Feng   ext4: add error p...
1193
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
  	e4b->bd_buddy_page = page;
  	e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
  	mark_page_accessed(page);
  
  	BUG_ON(e4b->bd_bitmap_page == NULL);
  	BUG_ON(e4b->bd_buddy_page == NULL);
  
  	return 0;
  
  err:
26626f117   Yang Ruirui   ext4: release pag...
1204
1205
  	if (page)
  		page_cache_release(page);
c9de560de   Alex Tomas   ext4: Add multi b...
1206
1207
1208
1209
1210
1211
  	if (e4b->bd_bitmap_page)
  		page_cache_release(e4b->bd_bitmap_page);
  	if (e4b->bd_buddy_page)
  		page_cache_release(e4b->bd_buddy_page);
  	e4b->bd_buddy = NULL;
  	e4b->bd_bitmap = NULL;
fdf6c7a76   Shen Feng   ext4: add error p...
1212
  	return ret;
c9de560de   Alex Tomas   ext4: Add multi b...
1213
  }
e39e07fdf   Jing Zhang   ext4: rename ext4...
1214
  static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
c9de560de   Alex Tomas   ext4: Add multi b...
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
  {
  	if (e4b->bd_bitmap_page)
  		page_cache_release(e4b->bd_bitmap_page);
  	if (e4b->bd_buddy_page)
  		page_cache_release(e4b->bd_buddy_page);
  }
  
  
  static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
  {
  	int order = 1;
  	void *bb;
  
  	BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
  	BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
  
  	bb = EXT4_MB_BUDDY(e4b);
  	while (order <= e4b->bd_blkbits + 1) {
  		block = block >> 1;
  		if (!mb_test_bit(block, bb)) {
  			/* this block is part of buddy of order 'order' */
  			return order;
  		}
  		bb += 1 << (e4b->bd_blkbits - order);
  		order++;
  	}
  	return 0;
  }
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
1243
  static void mb_clear_bits(void *bm, int cur, int len)
c9de560de   Alex Tomas   ext4: Add multi b...
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
  {
  	__u32 *addr;
  
  	len = cur + len;
  	while (cur < len) {
  		if ((cur & 31) == 0 && (len - cur) >= 32) {
  			/* fast path: clear whole word at once */
  			addr = bm + (cur >> 3);
  			*addr = 0;
  			cur += 32;
  			continue;
  		}
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
1256
  		mb_clear_bit(cur, bm);
c9de560de   Alex Tomas   ext4: Add multi b...
1257
1258
1259
  		cur++;
  	}
  }
c3e94d1df   Yongqiang Yang   ext4: let setup_n...
1260
  void ext4_set_bits(void *bm, int cur, int len)
c9de560de   Alex Tomas   ext4: Add multi b...
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
  {
  	__u32 *addr;
  
  	len = cur + len;
  	while (cur < len) {
  		if ((cur & 31) == 0 && (len - cur) >= 32) {
  			/* fast path: set whole word at once */
  			addr = bm + (cur >> 3);
  			*addr = 0xffffffff;
  			cur += 32;
  			continue;
  		}
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
1273
  		mb_set_bit(cur, bm);
c9de560de   Alex Tomas   ext4: Add multi b...
1274
1275
1276
  		cur++;
  	}
  }
7e5a8cdd8   Shen Feng   ext4: fix error p...
1277
  static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
c9de560de   Alex Tomas   ext4: Add multi b...
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
  			  int first, int count)
  {
  	int block = 0;
  	int max = 0;
  	int order;
  	void *buddy;
  	void *buddy2;
  	struct super_block *sb = e4b->bd_sb;
  
  	BUG_ON(first + count > (sb->s_blocksize << 3));
bc8e67409   Vincent Minet   ext4: Fix spinloc...
1288
  	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
c9de560de   Alex Tomas   ext4: Add multi b...
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
  	mb_check_buddy(e4b);
  	mb_free_blocks_double(inode, e4b, first, count);
  
  	e4b->bd_info->bb_free += count;
  	if (first < e4b->bd_info->bb_first_free)
  		e4b->bd_info->bb_first_free = first;
  
  	/* let's maintain fragments counter */
  	if (first != 0)
  		block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b));
  	if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
  		max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b));
  	if (block && max)
  		e4b->bd_info->bb_fragments--;
  	else if (!block && !max)
  		e4b->bd_info->bb_fragments++;
  
  	/* let's maintain buddy itself */
  	while (count-- > 0) {
  		block = first++;
  		order = 0;
  
  		if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
  			ext4_fsblk_t blocknr;
5661bd686   Akinobu Mita   ext4: cleanup to ...
1313
1314
  
  			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
53accfa9f   Theodore Ts'o   ext4: teach mball...
1315
  			blocknr += EXT4_C2B(EXT4_SB(sb), block);
5d1b1b3f4   Aneesh Kumar K.V   ext4: fix BUG whe...
1316
  			ext4_grp_locked_error(sb, e4b->bd_group,
e29136f80   Theodore Ts'o   ext4: Enhance ext...
1317
1318
1319
1320
  					      inode ? inode->i_ino : 0,
  					      blocknr,
  					      "freeing already freed block "
  					      "(bit %u)", block);
c9de560de   Alex Tomas   ext4: Add multi b...
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
  		}
  		mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
  		e4b->bd_info->bb_counters[order]++;
  
  		/* start of the buddy */
  		buddy = mb_find_buddy(e4b, order, &max);
  
  		do {
  			block &= ~1UL;
  			if (mb_test_bit(block, buddy) ||
  					mb_test_bit(block + 1, buddy))
  				break;
  
  			/* both the buddies are free, try to coalesce them */
  			buddy2 = mb_find_buddy(e4b, order + 1, &max);
  
  			if (!buddy2)
  				break;
  
  			if (order > 0) {
  				/* for special purposes, we don't set
  				 * free bits in bitmap */
  				mb_set_bit(block, buddy);
  				mb_set_bit(block + 1, buddy);
  			}
  			e4b->bd_info->bb_counters[order]--;
  			e4b->bd_info->bb_counters[order]--;
  
  			block = block >> 1;
  			order++;
  			e4b->bd_info->bb_counters[order]++;
  
  			mb_clear_bit(block, buddy2);
  			buddy = buddy2;
  		} while (1);
  	}
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1357
  	mb_set_largest_free_order(sb, e4b->bd_info);
c9de560de   Alex Tomas   ext4: Add multi b...
1358
  	mb_check_buddy(e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
1359
1360
1361
1362
1363
1364
1365
  }
  
  static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
  				int needed, struct ext4_free_extent *ex)
  {
  	int next = block;
  	int max;
c9de560de   Alex Tomas   ext4: Add multi b...
1366
  	void *buddy;
bc8e67409   Vincent Minet   ext4: Fix spinloc...
1367
  	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
c9de560de   Alex Tomas   ext4: Add multi b...
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
  	BUG_ON(ex == NULL);
  
  	buddy = mb_find_buddy(e4b, order, &max);
  	BUG_ON(buddy == NULL);
  	BUG_ON(block >= max);
  	if (mb_test_bit(block, buddy)) {
  		ex->fe_len = 0;
  		ex->fe_start = 0;
  		ex->fe_group = 0;
  		return 0;
  	}
  
  	/* FIXME dorp order completely ? */
  	if (likely(order == 0)) {
  		/* find actual order */
  		order = mb_find_order_for_block(e4b, block);
  		block = block >> order;
  	}
  
  	ex->fe_len = 1 << order;
  	ex->fe_start = block << order;
  	ex->fe_group = e4b->bd_group;
  
  	/* calc difference from given start */
  	next = next - ex->fe_start;
  	ex->fe_len -= next;
  	ex->fe_start += next;
  
  	while (needed > ex->fe_len &&
  	       (buddy = mb_find_buddy(e4b, order, &max))) {
  
  		if (block + 1 >= max)
  			break;
  
  		next = (block + 1) * (1 << order);
  		if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
  			break;
b051d8dc4   Robin Dong   ext4: remove unus...
1405
  		order = mb_find_order_for_block(e4b, next);
c9de560de   Alex Tomas   ext4: Add multi b...
1406

c9de560de   Alex Tomas   ext4: Add multi b...
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
  		block = next >> order;
  		ex->fe_len += 1 << order;
  	}
  
  	BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
  	return ex->fe_len;
  }
  
  static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
  {
  	int ord;
  	int mlen = 0;
  	int max = 0;
  	int cur;
  	int start = ex->fe_start;
  	int len = ex->fe_len;
  	unsigned ret = 0;
  	int len0 = len;
  	void *buddy;
  
  	BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
  	BUG_ON(e4b->bd_group != ex->fe_group);
bc8e67409   Vincent Minet   ext4: Fix spinloc...
1429
  	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
c9de560de   Alex Tomas   ext4: Add multi b...
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
  	mb_check_buddy(e4b);
  	mb_mark_used_double(e4b, start, len);
  
  	e4b->bd_info->bb_free -= len;
  	if (e4b->bd_info->bb_first_free == start)
  		e4b->bd_info->bb_first_free += len;
  
  	/* let's maintain fragments counter */
  	if (start != 0)
  		mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b));
  	if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
  		max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b));
  	if (mlen && max)
  		e4b->bd_info->bb_fragments++;
  	else if (!mlen && !max)
  		e4b->bd_info->bb_fragments--;
  
  	/* let's maintain buddy itself */
  	while (len) {
  		ord = mb_find_order_for_block(e4b, start);
  
  		if (((start >> ord) << ord) == start && len >= (1 << ord)) {
  			/* the whole chunk may be allocated at once! */
  			mlen = 1 << ord;
  			buddy = mb_find_buddy(e4b, ord, &max);
  			BUG_ON((start >> ord) >= max);
  			mb_set_bit(start >> ord, buddy);
  			e4b->bd_info->bb_counters[ord]--;
  			start += mlen;
  			len -= mlen;
  			BUG_ON(len < 0);
  			continue;
  		}
  
  		/* store for history */
  		if (ret == 0)
  			ret = len | (ord << 16);
  
  		/* we have to split large buddy */
  		BUG_ON(ord <= 0);
  		buddy = mb_find_buddy(e4b, ord, &max);
  		mb_set_bit(start >> ord, buddy);
  		e4b->bd_info->bb_counters[ord]--;
  
  		ord--;
  		cur = (start >> ord) & ~1U;
  		buddy = mb_find_buddy(e4b, ord, &max);
  		mb_clear_bit(cur, buddy);
  		mb_clear_bit(cur + 1, buddy);
  		e4b->bd_info->bb_counters[ord]++;
  		e4b->bd_info->bb_counters[ord]++;
  	}
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1482
  	mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
c9de560de   Alex Tomas   ext4: Add multi b...
1483

c3e94d1df   Yongqiang Yang   ext4: let setup_n...
1484
  	ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
c9de560de   Alex Tomas   ext4: Add multi b...
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
  	mb_check_buddy(e4b);
  
  	return ret;
  }
  
  /*
   * Must be called under group lock!
   */
  static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
  					struct ext4_buddy *e4b)
  {
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
  	int ret;
  
  	BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
  	BUG_ON(ac->ac_status == AC_STATUS_FOUND);
  
  	ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
  	ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
  	ret = mb_mark_used(e4b, &ac->ac_b_ex);
  
  	/* preallocation can change ac_b_ex, thus we store actually
  	 * allocated blocks for history */
  	ac->ac_f_ex = ac->ac_b_ex;
  
  	ac->ac_status = AC_STATUS_FOUND;
  	ac->ac_tail = ret & 0xffff;
  	ac->ac_buddy = ret >> 16;
c3a326a65   Aneesh Kumar K.V   ext4: cleanup mba...
1513
1514
1515
1516
1517
1518
1519
  	/*
  	 * take the page reference. We want the page to be pinned
  	 * so that we don't get a ext4_mb_init_cache_call for this
  	 * group until we update the bitmap. That would mean we
  	 * double allocate blocks. The reference is dropped
  	 * in ext4_mb_release_context
  	 */
c9de560de   Alex Tomas   ext4: Add multi b...
1520
1521
1522
1523
  	ac->ac_bitmap_page = e4b->bd_bitmap_page;
  	get_page(ac->ac_bitmap_page);
  	ac->ac_buddy_page = e4b->bd_buddy_page;
  	get_page(ac->ac_buddy_page);
c9de560de   Alex Tomas   ext4: Add multi b...
1524
  	/* store last allocated for subsequent stream allocation */
4ba74d00a   Theodore Ts'o   ext4: Fix bugs in...
1525
  	if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
c9de560de   Alex Tomas   ext4: Add multi b...
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
  		spin_lock(&sbi->s_md_lock);
  		sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
  		sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
  		spin_unlock(&sbi->s_md_lock);
  	}
  }
  
  /*
   * regular allocator, for general purposes allocation
   */
  
  static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
  					struct ext4_buddy *e4b,
  					int finish_group)
  {
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
  	struct ext4_free_extent *bex = &ac->ac_b_ex;
  	struct ext4_free_extent *gex = &ac->ac_g_ex;
  	struct ext4_free_extent ex;
  	int max;
032115fce   Aneesh Kumar K.V   ext4: Don't overw...
1546
1547
  	if (ac->ac_status == AC_STATUS_FOUND)
  		return;
c9de560de   Alex Tomas   ext4: Add multi b...
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
  	/*
  	 * We don't want to scan for a whole year
  	 */
  	if (ac->ac_found > sbi->s_mb_max_to_scan &&
  			!(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
  		ac->ac_status = AC_STATUS_BREAK;
  		return;
  	}
  
  	/*
  	 * Haven't found good chunk so far, let's continue
  	 */
  	if (bex->fe_len < gex->fe_len)
  		return;
  
  	if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
  			&& bex->fe_group == e4b->bd_group) {
  		/* recheck chunk's availability - we don't know
  		 * when it was found (within this lock-unlock
  		 * period or not) */
  		max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
  		if (max >= gex->fe_len) {
  			ext4_mb_use_best_found(ac, e4b);
  			return;
  		}
  	}
  }
  
  /*
   * The routine checks whether found extent is good enough. If it is,
   * then the extent gets marked used and flag is set to the context
   * to stop scanning. Otherwise, the extent is compared with the
   * previous found extent and if new one is better, then it's stored
   * in the context. Later, the best found extent will be used, if
   * mballoc can't find good enough extent.
   *
   * FIXME: real allocation policy is to be designed yet!
   */
  static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
  					struct ext4_free_extent *ex,
  					struct ext4_buddy *e4b)
  {
  	struct ext4_free_extent *bex = &ac->ac_b_ex;
  	struct ext4_free_extent *gex = &ac->ac_g_ex;
  
  	BUG_ON(ex->fe_len <= 0);
7137d7a48   Theodore Ts'o   ext4: convert ins...
1594
1595
  	BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
  	BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
c9de560de   Alex Tomas   ext4: Add multi b...
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
  	BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
  
  	ac->ac_found++;
  
  	/*
  	 * The special case - take what you catch first
  	 */
  	if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
  		*bex = *ex;
  		ext4_mb_use_best_found(ac, e4b);
  		return;
  	}
  
  	/*
  	 * Let's check whether the chuck is good enough
  	 */
  	if (ex->fe_len == gex->fe_len) {
  		*bex = *ex;
  		ext4_mb_use_best_found(ac, e4b);
  		return;
  	}
  
  	/*
  	 * If this is first found extent, just store it in the context
  	 */
  	if (bex->fe_len == 0) {
  		*bex = *ex;
  		return;
  	}
  
  	/*
  	 * If new found extent is better, store it in the context
  	 */
  	if (bex->fe_len < gex->fe_len) {
  		/* if the request isn't satisfied, any found extent
  		 * larger than previous best one is better */
  		if (ex->fe_len > bex->fe_len)
  			*bex = *ex;
  	} else if (ex->fe_len > gex->fe_len) {
  		/* if the request is satisfied, then we try to find
  		 * an extent that still satisfy the request, but is
  		 * smaller than previous one */
  		if (ex->fe_len < bex->fe_len)
  			*bex = *ex;
  	}
  
  	ext4_mb_check_limits(ac, e4b, 0);
  }
089ceecc1   Eric Sandeen   ext4: mark severa...
1644
1645
  static noinline_for_stack
  int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
  					struct ext4_buddy *e4b)
  {
  	struct ext4_free_extent ex = ac->ac_b_ex;
  	ext4_group_t group = ex.fe_group;
  	int max;
  	int err;
  
  	BUG_ON(ex.fe_len <= 0);
  	err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
  	if (err)
  		return err;
  
  	ext4_lock_group(ac->ac_sb, group);
  	max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
  
  	if (max > 0) {
  		ac->ac_b_ex = ex;
  		ext4_mb_use_best_found(ac, e4b);
  	}
  
  	ext4_unlock_group(ac->ac_sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
1667
  	ext4_mb_unload_buddy(e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
1668
1669
1670
  
  	return 0;
  }
089ceecc1   Eric Sandeen   ext4: mark severa...
1671
1672
  static noinline_for_stack
  int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
1673
1674
1675
1676
1677
1678
  				struct ext4_buddy *e4b)
  {
  	ext4_group_t group = ac->ac_g_ex.fe_group;
  	int max;
  	int err;
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
c9de560de   Alex Tomas   ext4: Add multi b...
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
  	struct ext4_free_extent ex;
  
  	if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
  		return 0;
  
  	err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
  	if (err)
  		return err;
  
  	ext4_lock_group(ac->ac_sb, group);
  	max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
  			     ac->ac_g_ex.fe_len, &ex);
  
  	if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
  		ext4_fsblk_t start;
5661bd686   Akinobu Mita   ext4: cleanup to ...
1694
1695
  		start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
  			ex.fe_start;
c9de560de   Alex Tomas   ext4: Add multi b...
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
  		/* use do_div to get remainder (would be 64-bit modulo) */
  		if (do_div(start, sbi->s_stripe) == 0) {
  			ac->ac_found++;
  			ac->ac_b_ex = ex;
  			ext4_mb_use_best_found(ac, e4b);
  		}
  	} else if (max >= ac->ac_g_ex.fe_len) {
  		BUG_ON(ex.fe_len <= 0);
  		BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
  		BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
  		ac->ac_found++;
  		ac->ac_b_ex = ex;
  		ext4_mb_use_best_found(ac, e4b);
  	} else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
  		/* Sometimes, caller may want to merge even small
  		 * number of blocks to an existing extent */
  		BUG_ON(ex.fe_len <= 0);
  		BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
  		BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
  		ac->ac_found++;
  		ac->ac_b_ex = ex;
  		ext4_mb_use_best_found(ac, e4b);
  	}
  	ext4_unlock_group(ac->ac_sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
1720
  	ext4_mb_unload_buddy(e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
1721
1722
1723
1724
1725
1726
1727
1728
  
  	return 0;
  }
  
  /*
   * The routine scans buddy structures (not bitmap!) from given order
   * to max order and tries to find big enough chunk to satisfy the req
   */
089ceecc1   Eric Sandeen   ext4: mark severa...
1729
1730
  static noinline_for_stack
  void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
  					struct ext4_buddy *e4b)
  {
  	struct super_block *sb = ac->ac_sb;
  	struct ext4_group_info *grp = e4b->bd_info;
  	void *buddy;
  	int i;
  	int k;
  	int max;
  
  	BUG_ON(ac->ac_2order <= 0);
  	for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
  		if (grp->bb_counters[i] == 0)
  			continue;
  
  		buddy = mb_find_buddy(e4b, i, &max);
  		BUG_ON(buddy == NULL);
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
1747
  		k = mb_find_next_zero_bit(buddy, max, 0);
c9de560de   Alex Tomas   ext4: Add multi b...
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
  		BUG_ON(k >= max);
  
  		ac->ac_found++;
  
  		ac->ac_b_ex.fe_len = 1 << i;
  		ac->ac_b_ex.fe_start = k << i;
  		ac->ac_b_ex.fe_group = e4b->bd_group;
  
  		ext4_mb_use_best_found(ac, e4b);
  
  		BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
  
  		if (EXT4_SB(sb)->s_mb_stats)
  			atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
  
  		break;
  	}
  }
  
  /*
   * The routine scans the group and measures all found extents.
   * In order to optimize scanning, caller must pass number of
   * free blocks in the group, so the routine can know upper limit.
   */
089ceecc1   Eric Sandeen   ext4: mark severa...
1772
1773
  static noinline_for_stack
  void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
  					struct ext4_buddy *e4b)
  {
  	struct super_block *sb = ac->ac_sb;
  	void *bitmap = EXT4_MB_BITMAP(e4b);
  	struct ext4_free_extent ex;
  	int i;
  	int free;
  
  	free = e4b->bd_info->bb_free;
  	BUG_ON(free <= 0);
  
  	i = e4b->bd_info->bb_first_free;
  
  	while (free && ac->ac_status == AC_STATUS_CONTINUE) {
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
1788
  		i = mb_find_next_zero_bit(bitmap,
7137d7a48   Theodore Ts'o   ext4: convert ins...
1789
1790
  						EXT4_CLUSTERS_PER_GROUP(sb), i);
  		if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
1791
  			/*
e56eb6590   Aneesh Kumar K.V   ext4: Don't claim...
1792
  			 * IF we have corrupt bitmap, we won't find any
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
1793
1794
1795
  			 * free blocks even though group info says we
  			 * we have free blocks
  			 */
e29136f80   Theodore Ts'o   ext4: Enhance ext...
1796
  			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
53accfa9f   Theodore Ts'o   ext4: teach mball...
1797
  					"%d free clusters as per "
fde4d95ad   Theodore Ts'o   ext4: remove extr...
1798
  					"group info. But bitmap says 0",
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
1799
  					free);
c9de560de   Alex Tomas   ext4: Add multi b...
1800
1801
1802
1803
1804
  			break;
  		}
  
  		mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
  		BUG_ON(ex.fe_len <= 0);
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
1805
  		if (free < ex.fe_len) {
e29136f80   Theodore Ts'o   ext4: Enhance ext...
1806
  			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
53accfa9f   Theodore Ts'o   ext4: teach mball...
1807
  					"%d free clusters as per "
fde4d95ad   Theodore Ts'o   ext4: remove extr...
1808
  					"group info. But got %d blocks",
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
1809
  					free, ex.fe_len);
e56eb6590   Aneesh Kumar K.V   ext4: Don't claim...
1810
1811
1812
1813
1814
1815
  			/*
  			 * The number of free blocks differs. This mostly
  			 * indicate that the bitmap is corrupt. So exit
  			 * without claiming the space.
  			 */
  			break;
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
1816
  		}
c9de560de   Alex Tomas   ext4: Add multi b...
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
  
  		ext4_mb_measure_extent(ac, &ex, e4b);
  
  		i += ex.fe_len;
  		free -= ex.fe_len;
  	}
  
  	ext4_mb_check_limits(ac, e4b, 1);
  }
  
  /*
   * This is a special case for storages like raid5
506bf2d82   Eric Sandeen   ext4: allocate st...
1829
   * we try to find stripe-aligned chunks for stripe-size-multiple requests
c9de560de   Alex Tomas   ext4: Add multi b...
1830
   */
089ceecc1   Eric Sandeen   ext4: mark severa...
1831
1832
  static noinline_for_stack
  void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
  				 struct ext4_buddy *e4b)
  {
  	struct super_block *sb = ac->ac_sb;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
  	void *bitmap = EXT4_MB_BITMAP(e4b);
  	struct ext4_free_extent ex;
  	ext4_fsblk_t first_group_block;
  	ext4_fsblk_t a;
  	ext4_grpblk_t i;
  	int max;
  
  	BUG_ON(sbi->s_stripe == 0);
  
  	/* find first stripe-aligned block in group */
5661bd686   Akinobu Mita   ext4: cleanup to ...
1847
  	first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
c9de560de   Alex Tomas   ext4: Add multi b...
1848
1849
1850
  	a = first_group_block + sbi->s_stripe - 1;
  	do_div(a, sbi->s_stripe);
  	i = (a * sbi->s_stripe) - first_group_block;
7137d7a48   Theodore Ts'o   ext4: convert ins...
1851
  	while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
c9de560de   Alex Tomas   ext4: Add multi b...
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
  		if (!mb_test_bit(i, bitmap)) {
  			max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
  			if (max >= sbi->s_stripe) {
  				ac->ac_found++;
  				ac->ac_b_ex = ex;
  				ext4_mb_use_best_found(ac, e4b);
  				break;
  			}
  		}
  		i += sbi->s_stripe;
  	}
  }
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1864
  /* This is now called BEFORE we load the buddy bitmap. */
c9de560de   Alex Tomas   ext4: Add multi b...
1865
1866
1867
1868
  static int ext4_mb_good_group(struct ext4_allocation_context *ac,
  				ext4_group_t group, int cr)
  {
  	unsigned free, fragments;
a4912123b   Theodore Ts'o   ext4: New inode/b...
1869
  	int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
c9de560de   Alex Tomas   ext4: Add multi b...
1870
1871
1872
  	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
  
  	BUG_ON(cr < 0 || cr >= 4);
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1873
1874
1875
1876
1877
1878
1879
  
  	/* We only do this if the grp has never been initialized */
  	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
  		int ret = ext4_mb_init_group(ac->ac_sb, group);
  		if (ret)
  			return 0;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
  
  	free = grp->bb_free;
  	fragments = grp->bb_fragments;
  	if (free == 0)
  		return 0;
  	if (fragments == 0)
  		return 0;
  
  	switch (cr) {
  	case 0:
  		BUG_ON(ac->ac_2order == 0);
c9de560de   Alex Tomas   ext4: Add multi b...
1891

8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1892
1893
  		if (grp->bb_largest_free_order < ac->ac_2order)
  			return 0;
a4912123b   Theodore Ts'o   ext4: New inode/b...
1894
1895
1896
1897
1898
  		/* Avoid using the first bg of a flexgroup for data files */
  		if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
  		    (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
  		    ((group % flex_size) == 0))
  			return 0;
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1899
  		return 1;
c9de560de   Alex Tomas   ext4: Add multi b...
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
  	case 1:
  		if ((free / fragments) >= ac->ac_g_ex.fe_len)
  			return 1;
  		break;
  	case 2:
  		if (free >= ac->ac_g_ex.fe_len)
  			return 1;
  		break;
  	case 3:
  		return 1;
  	default:
  		BUG();
  	}
  
  	return 0;
  }
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
1916
1917
  static noinline_for_stack int
  ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
c9de560de   Alex Tomas   ext4: Add multi b...
1918
  {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
1919
  	ext4_group_t ngroups, group, i;
c9de560de   Alex Tomas   ext4: Add multi b...
1920
1921
  	int cr;
  	int err = 0;
c9de560de   Alex Tomas   ext4: Add multi b...
1922
1923
1924
  	struct ext4_sb_info *sbi;
  	struct super_block *sb;
  	struct ext4_buddy e4b;
c9de560de   Alex Tomas   ext4: Add multi b...
1925
1926
1927
  
  	sb = ac->ac_sb;
  	sbi = EXT4_SB(sb);
8df9675f8   Theodore Ts'o   ext4: Avoid races...
1928
  	ngroups = ext4_get_groups_count(sb);
fb0a387dc   Eric Sandeen   ext4: limit block...
1929
  	/* non-extent files are limited to low blocks/groups */
12e9b8920   Dmitry Monakhov   ext4: Use bitops ...
1930
  	if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
fb0a387dc   Eric Sandeen   ext4: limit block...
1931
  		ngroups = sbi->s_blockfile_groups;
c9de560de   Alex Tomas   ext4: Add multi b...
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
  	BUG_ON(ac->ac_status == AC_STATUS_FOUND);
  
  	/* first, try the goal */
  	err = ext4_mb_find_by_goal(ac, &e4b);
  	if (err || ac->ac_status == AC_STATUS_FOUND)
  		goto out;
  
  	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
  		goto out;
  
  	/*
  	 * ac->ac2_order is set only if the fe_len is a power of 2
  	 * if ac2_order is set we also set criteria to 0 so that we
  	 * try exact allocation using buddy.
  	 */
  	i = fls(ac->ac_g_ex.fe_len);
  	ac->ac_2order = 0;
  	/*
  	 * We search using buddy data only if the order of the request
  	 * is greater than equal to the sbi_s_mb_order2_reqs
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
1952
  	 * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
c9de560de   Alex Tomas   ext4: Add multi b...
1953
1954
1955
1956
1957
1958
1959
1960
  	 */
  	if (i >= sbi->s_mb_order2_reqs) {
  		/*
  		 * This should tell if fe_len is exactly power of 2
  		 */
  		if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
  			ac->ac_2order = i - 1;
  	}
4ba74d00a   Theodore Ts'o   ext4: Fix bugs in...
1961
1962
  	/* if stream allocation is enabled, use global goal */
  	if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
c9de560de   Alex Tomas   ext4: Add multi b...
1963
1964
1965
1966
1967
1968
  		/* TBD: may be hot point */
  		spin_lock(&sbi->s_md_lock);
  		ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
  		ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
  		spin_unlock(&sbi->s_md_lock);
  	}
4ba74d00a   Theodore Ts'o   ext4: Fix bugs in...
1969

c9de560de   Alex Tomas   ext4: Add multi b...
1970
1971
1972
1973
1974
1975
1976
1977
1978
  	/* Let's just scan groups to find more-less suitable blocks */
  	cr = ac->ac_2order ? 0 : 1;
  	/*
  	 * cr == 0 try to get exact allocation,
  	 * cr == 3  try to get anything
  	 */
  repeat:
  	for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
  		ac->ac_criteria = cr;
ed8f9c751   Aneesh Kumar K.V   ext4: start searc...
1979
1980
1981
1982
1983
  		/*
  		 * searching for the right group start
  		 * from the goal value specified
  		 */
  		group = ac->ac_g_ex.fe_group;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
1984
  		for (i = 0; i < ngroups; group++, i++) {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
1985
  			if (group == ngroups)
c9de560de   Alex Tomas   ext4: Add multi b...
1986
  				group = 0;
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1987
1988
  			/* This now checks without needing the buddy page */
  			if (!ext4_mb_good_group(ac, group, cr))
c9de560de   Alex Tomas   ext4: Add multi b...
1989
  				continue;
c9de560de   Alex Tomas   ext4: Add multi b...
1990
1991
1992
1993
1994
  			err = ext4_mb_load_buddy(sb, group, &e4b);
  			if (err)
  				goto out;
  
  			ext4_lock_group(sb, group);
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
1995
1996
1997
1998
1999
  
  			/*
  			 * We need to check again after locking the
  			 * block group
  			 */
c9de560de   Alex Tomas   ext4: Add multi b...
2000
  			if (!ext4_mb_good_group(ac, group, cr)) {
c9de560de   Alex Tomas   ext4: Add multi b...
2001
  				ext4_unlock_group(sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
2002
  				ext4_mb_unload_buddy(&e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
2003
2004
2005
2006
  				continue;
  			}
  
  			ac->ac_groups_scanned++;
75507efb1   Theodore Ts'o   ext4: Don't avoid...
2007
  			if (cr == 0)
c9de560de   Alex Tomas   ext4: Add multi b...
2008
  				ext4_mb_simple_scan_group(ac, &e4b);
506bf2d82   Eric Sandeen   ext4: allocate st...
2009
2010
  			else if (cr == 1 && sbi->s_stripe &&
  					!(ac->ac_g_ex.fe_len % sbi->s_stripe))
c9de560de   Alex Tomas   ext4: Add multi b...
2011
2012
2013
2014
2015
  				ext4_mb_scan_aligned(ac, &e4b);
  			else
  				ext4_mb_complex_scan_group(ac, &e4b);
  
  			ext4_unlock_group(sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
2016
  			ext4_mb_unload_buddy(&e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
  
  			if (ac->ac_status != AC_STATUS_CONTINUE)
  				break;
  		}
  	}
  
  	if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
  	    !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
  		/*
  		 * We've been searching too long. Let's try to allocate
  		 * the best chunk we've found so far
  		 */
  
  		ext4_mb_try_best_found(ac, &e4b);
  		if (ac->ac_status != AC_STATUS_FOUND) {
  			/*
  			 * Someone more lucky has already allocated it.
  			 * The only thing we can do is just take first
  			 * found block(s)
  			printk(KERN_DEBUG "EXT4-fs: someone won our chunk
  ");
  			 */
  			ac->ac_b_ex.fe_group = 0;
  			ac->ac_b_ex.fe_start = 0;
  			ac->ac_b_ex.fe_len = 0;
  			ac->ac_status = AC_STATUS_CONTINUE;
  			ac->ac_flags |= EXT4_MB_HINT_FIRST;
  			cr = 3;
  			atomic_inc(&sbi->s_mb_lost_chunks);
  			goto repeat;
  		}
  	}
  out:
  	return err;
  }
c9de560de   Alex Tomas   ext4: Add multi b...
2052
2053
2054
  static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
  {
  	struct super_block *sb = seq->private;
c9de560de   Alex Tomas   ext4: Add multi b...
2055
  	ext4_group_t group;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2056
  	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
c9de560de   Alex Tomas   ext4: Add multi b...
2057
  		return NULL;
c9de560de   Alex Tomas   ext4: Add multi b...
2058
  	group = *pos + 1;
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2059
  	return (void *) ((unsigned long) group);
c9de560de   Alex Tomas   ext4: Add multi b...
2060
2061
2062
2063
2064
  }
  
  static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
  {
  	struct super_block *sb = seq->private;
c9de560de   Alex Tomas   ext4: Add multi b...
2065
2066
2067
  	ext4_group_t group;
  
  	++*pos;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2068
  	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
c9de560de   Alex Tomas   ext4: Add multi b...
2069
2070
  		return NULL;
  	group = *pos + 1;
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2071
  	return (void *) ((unsigned long) group);
c9de560de   Alex Tomas   ext4: Add multi b...
2072
2073
2074
2075
2076
  }
  
  static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
  {
  	struct super_block *sb = seq->private;
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2077
  	ext4_group_t group = (ext4_group_t) ((unsigned long) v);
c9de560de   Alex Tomas   ext4: Add multi b...
2078
2079
2080
2081
2082
  	int i;
  	int err;
  	struct ext4_buddy e4b;
  	struct sg {
  		struct ext4_group_info info;
a36b44988   Eric Sandeen   ext4: use ext4_gr...
2083
  		ext4_grpblk_t counters[16];
c9de560de   Alex Tomas   ext4: Add multi b...
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
  	} sg;
  
  	group--;
  	if (group == 0)
  		seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
  				"[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
  				  "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]
  ",
  			   "group", "free", "frags", "first",
  			   "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
  			   "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
  
  	i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
  		sizeof(struct ext4_group_info);
  	err = ext4_mb_load_buddy(sb, group, &e4b);
  	if (err) {
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2100
2101
  		seq_printf(seq, "#%-5u: I/O error
  ", group);
c9de560de   Alex Tomas   ext4: Add multi b...
2102
2103
2104
2105
2106
  		return 0;
  	}
  	ext4_lock_group(sb, group);
  	memcpy(&sg, ext4_get_group_info(sb, group), i);
  	ext4_unlock_group(sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
2107
  	ext4_mb_unload_buddy(&e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
2108

a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2109
  	seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
c9de560de   Alex Tomas   ext4: Add multi b...
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
  			sg.info.bb_fragments, sg.info.bb_first_free);
  	for (i = 0; i <= 13; i++)
  		seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
  				sg.info.bb_counters[i] : 0);
  	seq_printf(seq, " ]
  ");
  
  	return 0;
  }
  
  static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
  {
  }
7f1346a9d   Tobias Klauser   ext4: Declare seq...
2123
  static const struct seq_operations ext4_mb_seq_groups_ops = {
c9de560de   Alex Tomas   ext4: Add multi b...
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
  	.start  = ext4_mb_seq_groups_start,
  	.next   = ext4_mb_seq_groups_next,
  	.stop   = ext4_mb_seq_groups_stop,
  	.show   = ext4_mb_seq_groups_show,
  };
  
  static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
  {
  	struct super_block *sb = PDE(inode)->data;
  	int rc;
  
  	rc = seq_open(file, &ext4_mb_seq_groups_ops);
  	if (rc == 0) {
a271fe852   Joe Perches   ext4: Remove unne...
2137
  		struct seq_file *m = file->private_data;
c9de560de   Alex Tomas   ext4: Add multi b...
2138
2139
2140
2141
2142
  		m->private = sb;
  	}
  	return rc;
  
  }
7f1346a9d   Tobias Klauser   ext4: Declare seq...
2143
  static const struct file_operations ext4_mb_seq_groups_fops = {
c9de560de   Alex Tomas   ext4: Add multi b...
2144
2145
2146
2147
2148
2149
  	.owner		= THIS_MODULE,
  	.open		= ext4_mb_seq_groups_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= seq_release,
  };
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2150
2151
2152
2153
2154
2155
2156
2157
  static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
  {
  	int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
  	struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
  
  	BUG_ON(!cachep);
  	return cachep;
  }
5f21b0e64   Frederic Bohe   ext4: fix online ...
2158
2159
  
  /* Create and initialize ext4_group_info data for the given group. */
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
2160
  int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
5f21b0e64   Frederic Bohe   ext4: fix online ...
2161
2162
  			  struct ext4_group_desc *desc)
  {
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2163
  	int i;
5f21b0e64   Frederic Bohe   ext4: fix online ...
2164
2165
2166
  	int metalen = 0;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
  	struct ext4_group_info **meta_group_info;
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2167
  	struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
5f21b0e64   Frederic Bohe   ext4: fix online ...
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
  
  	/*
  	 * First check if this group is the first of a reserved block.
  	 * If it's true, we have to allocate a new table of pointers
  	 * to ext4_group_info structures
  	 */
  	if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
  		metalen = sizeof(*meta_group_info) <<
  			EXT4_DESC_PER_BLOCK_BITS(sb);
  		meta_group_info = kmalloc(metalen, GFP_KERNEL);
  		if (meta_group_info == NULL) {
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
2179
2180
  			ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem "
  				 "for a buddy group");
5f21b0e64   Frederic Bohe   ext4: fix online ...
2181
2182
2183
2184
2185
  			goto exit_meta_group_info;
  		}
  		sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
  			meta_group_info;
  	}
5f21b0e64   Frederic Bohe   ext4: fix online ...
2186
2187
2188
  	meta_group_info =
  		sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
  	i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2189
  	meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
5f21b0e64   Frederic Bohe   ext4: fix online ...
2190
  	if (meta_group_info[i] == NULL) {
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
2191
  		ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem");
5f21b0e64   Frederic Bohe   ext4: fix online ...
2192
2193
  		goto exit_group_info;
  	}
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2194
  	memset(meta_group_info[i], 0, kmem_cache_size(cachep));
5f21b0e64   Frederic Bohe   ext4: fix online ...
2195
2196
2197
2198
2199
2200
2201
2202
2203
  	set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
  		&(meta_group_info[i]->bb_state));
  
  	/*
  	 * initialize bb_free to be able to skip
  	 * empty groups without initialization
  	 */
  	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
  		meta_group_info[i]->bb_free =
cff1dfd76   Theodore Ts'o   ext4: rename ext4...
2204
  			ext4_free_clusters_after_init(sb, group, desc);
5f21b0e64   Frederic Bohe   ext4: fix online ...
2205
2206
  	} else {
  		meta_group_info[i]->bb_free =
021b65bb1   Theodore Ts'o   ext4: Rename ext4...
2207
  			ext4_free_group_clusters(sb, desc);
5f21b0e64   Frederic Bohe   ext4: fix online ...
2208
2209
2210
  	}
  
  	INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
2211
  	init_rwsem(&meta_group_info[i]->alloc_sem);
64e290ec6   Venkatesh Pallipadi   ext4: fix up rb_r...
2212
  	meta_group_info[i]->bb_free_root = RB_ROOT;
8a57d9d61   Curt Wohlgemuth   ext4: check for a...
2213
  	meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
5f21b0e64   Frederic Bohe   ext4: fix online ...
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
  
  #ifdef DOUBLE_CHECK
  	{
  		struct buffer_head *bh;
  		meta_group_info[i]->bb_bitmap =
  			kmalloc(sb->s_blocksize, GFP_KERNEL);
  		BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
  		bh = ext4_read_block_bitmap(sb, group);
  		BUG_ON(bh == NULL);
  		memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
  			sb->s_blocksize);
  		put_bh(bh);
  	}
  #endif
  
  	return 0;
  
  exit_group_info:
  	/* If a meta_group_info table has been allocated, release it now */
caaf7a29d   Tao Ma   ext4: Fix a doubl...
2233
  	if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
5f21b0e64   Frederic Bohe   ext4: fix online ...
2234
  		kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
caaf7a29d   Tao Ma   ext4: Fix a doubl...
2235
2236
  		sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
  	}
5f21b0e64   Frederic Bohe   ext4: fix online ...
2237
2238
2239
  exit_meta_group_info:
  	return -ENOMEM;
  } /* ext4_mb_add_groupinfo */
c9de560de   Alex Tomas   ext4: Add multi b...
2240
2241
  static int ext4_mb_init_backend(struct super_block *sb)
  {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2242
  	ext4_group_t ngroups = ext4_get_groups_count(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
2243
  	ext4_group_t i;
c9de560de   Alex Tomas   ext4: Add multi b...
2244
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
5f21b0e64   Frederic Bohe   ext4: fix online ...
2245
2246
2247
2248
  	struct ext4_super_block *es = sbi->s_es;
  	int num_meta_group_infos;
  	int num_meta_group_infos_max;
  	int array_size;
5f21b0e64   Frederic Bohe   ext4: fix online ...
2249
  	struct ext4_group_desc *desc;
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2250
  	struct kmem_cache *cachep;
5f21b0e64   Frederic Bohe   ext4: fix online ...
2251
2252
  
  	/* This is the number of blocks used by GDT */
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2253
  	num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
5f21b0e64   Frederic Bohe   ext4: fix online ...
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
  				1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
  
  	/*
  	 * This is the total number of blocks used by GDT including
  	 * the number of reserved blocks for GDT.
  	 * The s_group_info array is allocated with this value
  	 * to allow a clean online resize without a complex
  	 * manipulation of pointer.
  	 * The drawback is the unused memory when no resize
  	 * occurs but it's very low in terms of pages
  	 * (see comments below)
  	 * Need to handle this properly when META_BG resizing is allowed
  	 */
  	num_meta_group_infos_max = num_meta_group_infos +
  				le16_to_cpu(es->s_reserved_gdt_blocks);
c9de560de   Alex Tomas   ext4: Add multi b...
2269

5f21b0e64   Frederic Bohe   ext4: fix online ...
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
  	/*
  	 * array_size is the size of s_group_info array. We round it
  	 * to the next power of two because this approximation is done
  	 * internally by kmalloc so we can have some more memory
  	 * for free here (e.g. may be used for META_BG resize).
  	 */
  	array_size = 1;
  	while (array_size < sizeof(*sbi->s_group_info) *
  	       num_meta_group_infos_max)
  		array_size = array_size << 1;
c9de560de   Alex Tomas   ext4: Add multi b...
2280
2281
2282
  	/* An 8TB filesystem with 64-bit pointers requires a 4096 byte
  	 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
  	 * So a two level scheme suffices for now. */
f18a5f21c   Theodore Ts'o   ext4: use ext4_kv...
2283
  	sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
c9de560de   Alex Tomas   ext4: Add multi b...
2284
  	if (sbi->s_group_info == NULL) {
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
2285
  		ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
c9de560de   Alex Tomas   ext4: Add multi b...
2286
2287
2288
2289
  		return -ENOMEM;
  	}
  	sbi->s_buddy_cache = new_inode(sb);
  	if (sbi->s_buddy_cache == NULL) {
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
2290
  		ext4_msg(sb, KERN_ERR, "can't get new inode");
c9de560de   Alex Tomas   ext4: Add multi b...
2291
2292
  		goto err_freesgi;
  	}
48e6061bf   Yu Jian   ext4: use EXT4_BA...
2293
2294
2295
2296
2297
  	/* To avoid potentially colliding with an valid on-disk inode number,
  	 * use EXT4_BAD_INO for the buddy cache inode number.  This inode is
  	 * not in the inode hash, so it should never be found by iget(), but
  	 * this will avoid confusion if it ever shows up during debugging. */
  	sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
c9de560de   Alex Tomas   ext4: Add multi b...
2298
  	EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2299
  	for (i = 0; i < ngroups; i++) {
c9de560de   Alex Tomas   ext4: Add multi b...
2300
2301
  		desc = ext4_get_group_desc(sb, i, NULL);
  		if (desc == NULL) {
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
2302
  			ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
c9de560de   Alex Tomas   ext4: Add multi b...
2303
2304
  			goto err_freebuddy;
  		}
5f21b0e64   Frederic Bohe   ext4: fix online ...
2305
2306
  		if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
  			goto err_freebuddy;
c9de560de   Alex Tomas   ext4: Add multi b...
2307
2308
2309
2310
2311
  	}
  
  	return 0;
  
  err_freebuddy:
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2312
  	cachep = get_groupinfo_cache(sb->s_blocksize_bits);
f1fa3342e   Roel Kluin   ext4: fix hot spi...
2313
  	while (i-- > 0)
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2314
  		kmem_cache_free(cachep, ext4_get_group_info(sb, i));
c9de560de   Alex Tomas   ext4: Add multi b...
2315
  	i = num_meta_group_infos;
f1fa3342e   Roel Kluin   ext4: fix hot spi...
2316
  	while (i-- > 0)
c9de560de   Alex Tomas   ext4: Add multi b...
2317
2318
2319
  		kfree(sbi->s_group_info[i]);
  	iput(sbi->s_buddy_cache);
  err_freesgi:
f18a5f21c   Theodore Ts'o   ext4: use ext4_kv...
2320
  	ext4_kvfree(sbi->s_group_info);
c9de560de   Alex Tomas   ext4: Add multi b...
2321
2322
  	return -ENOMEM;
  }
2892c15dd   Eric Sandeen   ext4: make grpinf...
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
  static void ext4_groupinfo_destroy_slabs(void)
  {
  	int i;
  
  	for (i = 0; i < NR_GRPINFO_CACHES; i++) {
  		if (ext4_groupinfo_caches[i])
  			kmem_cache_destroy(ext4_groupinfo_caches[i]);
  		ext4_groupinfo_caches[i] = NULL;
  	}
  }
  
  static int ext4_groupinfo_create_slab(size_t size)
  {
  	static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
  	int slab_size;
  	int blocksize_bits = order_base_2(size);
  	int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
  	struct kmem_cache *cachep;
  
  	if (cache_index >= NR_GRPINFO_CACHES)
  		return -EINVAL;
  
  	if (unlikely(cache_index < 0))
  		cache_index = 0;
  
  	mutex_lock(&ext4_grpinfo_slab_create_mutex);
  	if (ext4_groupinfo_caches[cache_index]) {
  		mutex_unlock(&ext4_grpinfo_slab_create_mutex);
  		return 0;	/* Already created */
  	}
  
  	slab_size = offsetof(struct ext4_group_info,
  				bb_counters[blocksize_bits + 2]);
  
  	cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
  					slab_size, 0, SLAB_RECLAIM_ACCOUNT,
  					NULL);
823ba01fc   Tao Ma   ext4: fix a race ...
2360
  	ext4_groupinfo_caches[cache_index] = cachep;
2892c15dd   Eric Sandeen   ext4: make grpinf...
2361
2362
  	mutex_unlock(&ext4_grpinfo_slab_create_mutex);
  	if (!cachep) {
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
2363
2364
2365
  		printk(KERN_EMERG
  		       "EXT4-fs: no memory for groupinfo slab cache
  ");
2892c15dd   Eric Sandeen   ext4: make grpinf...
2366
2367
  		return -ENOMEM;
  	}
2892c15dd   Eric Sandeen   ext4: make grpinf...
2368
2369
  	return 0;
  }
c9de560de   Alex Tomas   ext4: Add multi b...
2370
2371
2372
  int ext4_mb_init(struct super_block *sb, int needs_recovery)
  {
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
2373
  	unsigned i, j;
c9de560de   Alex Tomas   ext4: Add multi b...
2374
2375
  	unsigned offset;
  	unsigned max;
74767c5a2   Shen Feng   ext4: miscellaneo...
2376
  	int ret;
c9de560de   Alex Tomas   ext4: Add multi b...
2377

1927805e6   Eric Sandeen   ext4: use variabl...
2378
  	i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
c9de560de   Alex Tomas   ext4: Add multi b...
2379
2380
2381
  
  	sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
  	if (sbi->s_mb_offsets == NULL) {
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2382
2383
  		ret = -ENOMEM;
  		goto out;
c9de560de   Alex Tomas   ext4: Add multi b...
2384
  	}
ff7ef329b   Yasunori Goto   ext4: Widen type ...
2385

1927805e6   Eric Sandeen   ext4: use variabl...
2386
  	i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
c9de560de   Alex Tomas   ext4: Add multi b...
2387
2388
  	sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
  	if (sbi->s_mb_maxs == NULL) {
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2389
2390
2391
  		ret = -ENOMEM;
  		goto out;
  	}
2892c15dd   Eric Sandeen   ext4: make grpinf...
2392
2393
2394
  	ret = ext4_groupinfo_create_slab(sb->s_blocksize);
  	if (ret < 0)
  		goto out;
c9de560de   Alex Tomas   ext4: Add multi b...
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
  
  	/* order 0 is regular bitmap */
  	sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
  	sbi->s_mb_offsets[0] = 0;
  
  	i = 1;
  	offset = 0;
  	max = sb->s_blocksize << 2;
  	do {
  		sbi->s_mb_offsets[i] = offset;
  		sbi->s_mb_maxs[i] = max;
  		offset += 1 << (sb->s_blocksize_bits - i);
  		max = max >> 1;
  		i++;
  	} while (i <= sb->s_blocksize_bits + 1);
c9de560de   Alex Tomas   ext4: Add multi b...
2410
  	spin_lock_init(&sbi->s_md_lock);
c9de560de   Alex Tomas   ext4: Add multi b...
2411
2412
2413
2414
2415
2416
2417
  	spin_lock_init(&sbi->s_bal_lock);
  
  	sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
  	sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
  	sbi->s_mb_stats = MB_DEFAULT_STATS;
  	sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
  	sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
27baebb84   Theodore Ts'o   ext4: tune mballo...
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
  	/*
  	 * The default group preallocation is 512, which for 4k block
  	 * sizes translates to 2 megabytes.  However for bigalloc file
  	 * systems, this is probably too big (i.e, if the cluster size
  	 * is 1 megabyte, then group preallocation size becomes half a
  	 * gigabyte!).  As a default, we will keep a two megabyte
  	 * group pralloc size for cluster sizes up to 64k, and after
  	 * that, we will force a minimum group preallocation size of
  	 * 32 clusters.  This translates to 8 megs when the cluster
  	 * size is 256k, and 32 megs when the cluster size is 1 meg,
  	 * which seems reasonable as a default.
  	 */
  	sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
  				       sbi->s_cluster_bits, 32);
d7a1fee13   Dan Ehrenberg   ext4: make the pr...
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
  	/*
  	 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
  	 * to the lowest multiple of s_stripe which is bigger than
  	 * the s_mb_group_prealloc as determined above. We want
  	 * the preallocation size to be an exact multiple of the
  	 * RAID stripe size so that preallocations don't fragment
  	 * the stripes.
  	 */
  	if (sbi->s_stripe > 1) {
  		sbi->s_mb_group_prealloc = roundup(
  			sbi->s_mb_group_prealloc, sbi->s_stripe);
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
2444

730c213c7   Eric Sandeen   ext4: use percpu ...
2445
  	sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
c9de560de   Alex Tomas   ext4: Add multi b...
2446
  	if (sbi->s_locality_groups == NULL) {
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2447
  		ret = -ENOMEM;
7aa0baeab   Tao Ma   ext4: Free resour...
2448
  		goto out_free_groupinfo_slab;
c9de560de   Alex Tomas   ext4: Add multi b...
2449
  	}
730c213c7   Eric Sandeen   ext4: use percpu ...
2450
  	for_each_possible_cpu(i) {
c9de560de   Alex Tomas   ext4: Add multi b...
2451
  		struct ext4_locality_group *lg;
730c213c7   Eric Sandeen   ext4: use percpu ...
2452
  		lg = per_cpu_ptr(sbi->s_locality_groups, i);
c9de560de   Alex Tomas   ext4: Add multi b...
2453
  		mutex_init(&lg->lg_mutex);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
2454
2455
  		for (j = 0; j < PREALLOC_TB_SIZE; j++)
  			INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
c9de560de   Alex Tomas   ext4: Add multi b...
2456
2457
  		spin_lock_init(&lg->lg_prealloc_lock);
  	}
79a77c5ac   Yu Jian   ext4: prevent mem...
2458
2459
  	/* init file for buddy data */
  	ret = ext4_mb_init_backend(sb);
7aa0baeab   Tao Ma   ext4: Free resour...
2460
2461
  	if (ret != 0)
  		goto out_free_locality_groups;
79a77c5ac   Yu Jian   ext4: prevent mem...
2462

296c355cd   Theodore Ts'o   ext4: Use tracepo...
2463
2464
2465
  	if (sbi->s_proc)
  		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
  				 &ext4_mb_seq_groups_fops, sb);
c9de560de   Alex Tomas   ext4: Add multi b...
2466

0390131ba   Frank Mayhar   ext4: Allow ext4 ...
2467
2468
  	if (sbi->s_journal)
  		sbi->s_journal->j_commit_callback = release_blocks_on_commit;
7aa0baeab   Tao Ma   ext4: Free resour...
2469
2470
2471
2472
2473
2474
2475
2476
  
  	return 0;
  
  out_free_locality_groups:
  	free_percpu(sbi->s_locality_groups);
  	sbi->s_locality_groups = NULL;
  out_free_groupinfo_slab:
  	ext4_groupinfo_destroy_slabs();
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2477
  out:
7aa0baeab   Tao Ma   ext4: Free resour...
2478
2479
2480
2481
  	kfree(sbi->s_mb_offsets);
  	sbi->s_mb_offsets = NULL;
  	kfree(sbi->s_mb_maxs);
  	sbi->s_mb_maxs = NULL;
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2482
  	return ret;
c9de560de   Alex Tomas   ext4: Add multi b...
2483
  }
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
2484
  /* need to called with the ext4 group lock held */
c9de560de   Alex Tomas   ext4: Add multi b...
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
  static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
  {
  	struct ext4_prealloc_space *pa;
  	struct list_head *cur, *tmp;
  	int count = 0;
  
  	list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
  		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
  		list_del(&pa->pa_group_list);
  		count++;
688f05a01   Aneesh Kumar K.V   ext4: Free ext4_p...
2495
  		kmem_cache_free(ext4_pspace_cachep, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
2496
2497
  	}
  	if (count)
6ba495e92   Theodore Ts'o   ext4: Add configu...
2498
2499
  		mb_debug(1, "mballoc: %u PAs left
  ", count);
c9de560de   Alex Tomas   ext4: Add multi b...
2500
2501
2502
2503
2504
  
  }
  
  int ext4_mb_release(struct super_block *sb)
  {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2505
  	ext4_group_t ngroups = ext4_get_groups_count(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
2506
2507
2508
2509
  	ext4_group_t i;
  	int num_meta_group_infos;
  	struct ext4_group_info *grinfo;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2510
  	struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
c9de560de   Alex Tomas   ext4: Add multi b...
2511

c9de560de   Alex Tomas   ext4: Add multi b...
2512
  	if (sbi->s_group_info) {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2513
  		for (i = 0; i < ngroups; i++) {
c9de560de   Alex Tomas   ext4: Add multi b...
2514
2515
2516
2517
2518
2519
2520
  			grinfo = ext4_get_group_info(sb, i);
  #ifdef DOUBLE_CHECK
  			kfree(grinfo->bb_bitmap);
  #endif
  			ext4_lock_group(sb, i);
  			ext4_mb_cleanup_pa(grinfo);
  			ext4_unlock_group(sb, i);
fb1813f4a   Curt Wohlgemuth   ext4: use dedicat...
2521
  			kmem_cache_free(cachep, grinfo);
c9de560de   Alex Tomas   ext4: Add multi b...
2522
  		}
8df9675f8   Theodore Ts'o   ext4: Avoid races...
2523
  		num_meta_group_infos = (ngroups +
c9de560de   Alex Tomas   ext4: Add multi b...
2524
2525
2526
2527
  				EXT4_DESC_PER_BLOCK(sb) - 1) >>
  			EXT4_DESC_PER_BLOCK_BITS(sb);
  		for (i = 0; i < num_meta_group_infos; i++)
  			kfree(sbi->s_group_info[i]);
f18a5f21c   Theodore Ts'o   ext4: use ext4_kv...
2528
  		ext4_kvfree(sbi->s_group_info);
c9de560de   Alex Tomas   ext4: Add multi b...
2529
2530
2531
2532
2533
2534
  	}
  	kfree(sbi->s_mb_offsets);
  	kfree(sbi->s_mb_maxs);
  	if (sbi->s_buddy_cache)
  		iput(sbi->s_buddy_cache);
  	if (sbi->s_mb_stats) {
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
2535
2536
  		ext4_msg(sb, KERN_INFO,
  		       "mballoc: %u blocks %u reqs (%u success)",
c9de560de   Alex Tomas   ext4: Add multi b...
2537
2538
2539
  				atomic_read(&sbi->s_bal_allocated),
  				atomic_read(&sbi->s_bal_reqs),
  				atomic_read(&sbi->s_bal_success));
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
2540
2541
2542
  		ext4_msg(sb, KERN_INFO,
  		      "mballoc: %u extents scanned, %u goal hits, "
  				"%u 2^N hits, %u breaks, %u lost",
c9de560de   Alex Tomas   ext4: Add multi b...
2543
2544
2545
2546
2547
  				atomic_read(&sbi->s_bal_ex_scanned),
  				atomic_read(&sbi->s_bal_goals),
  				atomic_read(&sbi->s_bal_2orders),
  				atomic_read(&sbi->s_bal_breaks),
  				atomic_read(&sbi->s_mb_lost_chunks));
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
2548
2549
  		ext4_msg(sb, KERN_INFO,
  		       "mballoc: %lu generated and it took %Lu",
ced156e46   Tao Ma   ext4: don't incre...
2550
  				sbi->s_mb_buddies_generated,
c9de560de   Alex Tomas   ext4: Add multi b...
2551
  				sbi->s_mb_generation_time);
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
2552
2553
  		ext4_msg(sb, KERN_INFO,
  		       "mballoc: %u preallocated, %u discarded",
c9de560de   Alex Tomas   ext4: Add multi b...
2554
2555
2556
  				atomic_read(&sbi->s_mb_preallocated),
  				atomic_read(&sbi->s_mb_discarded));
  	}
730c213c7   Eric Sandeen   ext4: use percpu ...
2557
  	free_percpu(sbi->s_locality_groups);
296c355cd   Theodore Ts'o   ext4: Use tracepo...
2558
2559
  	if (sbi->s_proc)
  		remove_proc_entry("mb_groups", sbi->s_proc);
c9de560de   Alex Tomas   ext4: Add multi b...
2560
2561
2562
  
  	return 0;
  }
77ca6cdf0   Lukas Czerner   ext4: Use return ...
2563
  static inline int ext4_issue_discard(struct super_block *sb,
84130193e   Theodore Ts'o   ext4: teach ext4_...
2564
  		ext4_group_t block_group, ext4_grpblk_t cluster, int count)
5c521830c   Jiaying Zhang   ext4: Support dis...
2565
  {
5c521830c   Jiaying Zhang   ext4: Support dis...
2566
  	ext4_fsblk_t discard_block;
84130193e   Theodore Ts'o   ext4: teach ext4_...
2567
2568
2569
  	discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
  			 ext4_group_first_block_no(sb, block_group));
  	count = EXT4_C2B(EXT4_SB(sb), count);
5c521830c   Jiaying Zhang   ext4: Support dis...
2570
2571
  	trace_ext4_discard_blocks(sb,
  			(unsigned long long) discard_block, count);
932596366   Lukas Czerner   ext4: remove warn...
2572
  	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
5c521830c   Jiaying Zhang   ext4: Support dis...
2573
  }
3e624fc72   Theodore Ts'o   ext4: Replace hac...
2574
2575
2576
2577
2578
  /*
   * This function is called by the jbd2 layer once the commit has finished,
   * so we know we can free the blocks that were released with that commit.
   */
  static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
c9de560de   Alex Tomas   ext4: Add multi b...
2579
  {
3e624fc72   Theodore Ts'o   ext4: Replace hac...
2580
  	struct super_block *sb = journal->j_private;
c9de560de   Alex Tomas   ext4: Add multi b...
2581
  	struct ext4_buddy e4b;
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2582
  	struct ext4_group_info *db;
d9f34504e   Theodore Ts'o   ext4: ignore erro...
2583
  	int err, count = 0, count2 = 0;
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2584
  	struct ext4_free_data *entry;
3e624fc72   Theodore Ts'o   ext4: Replace hac...
2585
  	struct list_head *l, *ltmp;
c9de560de   Alex Tomas   ext4: Add multi b...
2586

3e624fc72   Theodore Ts'o   ext4: Replace hac...
2587
2588
  	list_for_each_safe(l, ltmp, &txn->t_private_list) {
  		entry = list_entry(l, struct ext4_free_data, list);
c9de560de   Alex Tomas   ext4: Add multi b...
2589

6ba495e92   Theodore Ts'o   ext4: Add configu...
2590
  		mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
3e624fc72   Theodore Ts'o   ext4: Replace hac...
2591
  			 entry->count, entry->group, entry);
c9de560de   Alex Tomas   ext4: Add multi b...
2592

d9f34504e   Theodore Ts'o   ext4: ignore erro...
2593
2594
  		if (test_opt(sb, DISCARD))
  			ext4_issue_discard(sb, entry->group,
84130193e   Theodore Ts'o   ext4: teach ext4_...
2595
  					   entry->start_cluster, entry->count);
b90f68701   Theodore Ts'o   ext4: Issue the d...
2596

c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2597
  		err = ext4_mb_load_buddy(sb, entry->group, &e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
2598
2599
  		/* we expect to find existing buddy because it's pinned */
  		BUG_ON(err != 0);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2600
  		db = e4b.bd_info;
c9de560de   Alex Tomas   ext4: Add multi b...
2601
  		/* there are blocks to put in buddy to make them really free */
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2602
  		count += entry->count;
c9de560de   Alex Tomas   ext4: Add multi b...
2603
  		count2++;
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2604
2605
2606
  		ext4_lock_group(sb, entry->group);
  		/* Take it out of per group rb tree */
  		rb_erase(&entry->node, &(db->bb_free_root));
84130193e   Theodore Ts'o   ext4: teach ext4_...
2607
  		mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2608

3d56b8d2c   Tao Ma   ext4: Speed up FI...
2609
2610
2611
2612
2613
2614
2615
2616
  		/*
  		 * Clear the trimmed flag for the group so that the next
  		 * ext4_trim_fs can trim it.
  		 * If the volume is mounted with -o discard, online discard
  		 * is supported and the free blocks will be trimmed online.
  		 */
  		if (!test_opt(sb, DISCARD))
  			EXT4_MB_GRP_CLEAR_TRIMMED(db);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2617
2618
2619
2620
2621
2622
  		if (!db->bb_free_root.rb_node) {
  			/* No more items in the per group rb tree
  			 * balance refcounts from ext4_mb_free_metadata()
  			 */
  			page_cache_release(e4b.bd_buddy_page);
  			page_cache_release(e4b.bd_bitmap_page);
c9de560de   Alex Tomas   ext4: Add multi b...
2623
  		}
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2624
  		ext4_unlock_group(sb, entry->group);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2625
  		kmem_cache_free(ext4_free_ext_cachep, entry);
e39e07fdf   Jing Zhang   ext4: rename ext4...
2626
  		ext4_mb_unload_buddy(&e4b);
3e624fc72   Theodore Ts'o   ext4: Replace hac...
2627
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
2628

6ba495e92   Theodore Ts'o   ext4: Add configu...
2629
2630
  	mb_debug(1, "freed %u blocks in %u structures
  ", count, count2);
c9de560de   Alex Tomas   ext4: Add multi b...
2631
  }
6ba495e92   Theodore Ts'o   ext4: Add configu...
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
  #ifdef CONFIG_EXT4_DEBUG
  u8 mb_enable_debug __read_mostly;
  
  static struct dentry *debugfs_dir;
  static struct dentry *debugfs_debug;
  
  static void __init ext4_create_debugfs_entry(void)
  {
  	debugfs_dir = debugfs_create_dir("ext4", NULL);
  	if (debugfs_dir)
  		debugfs_debug = debugfs_create_u8("mballoc-debug",
  						  S_IRUGO | S_IWUSR,
  						  debugfs_dir,
  						  &mb_enable_debug);
  }
  
  static void ext4_remove_debugfs_entry(void)
  {
  	debugfs_remove(debugfs_debug);
  	debugfs_remove(debugfs_dir);
  }
  
  #else
  
  static void __init ext4_create_debugfs_entry(void)
  {
  }
  
  static void ext4_remove_debugfs_entry(void)
  {
  }
  
  #endif
5dabfc78d   Theodore Ts'o   ext4: rename {exi...
2665
  int __init ext4_init_mballoc(void)
c9de560de   Alex Tomas   ext4: Add multi b...
2666
  {
16828088f   Theodore Ts'o   ext4: use KMEM_CA...
2667
2668
  	ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
  					SLAB_RECLAIM_ACCOUNT);
c9de560de   Alex Tomas   ext4: Add multi b...
2669
2670
  	if (ext4_pspace_cachep == NULL)
  		return -ENOMEM;
16828088f   Theodore Ts'o   ext4: use KMEM_CA...
2671
2672
  	ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
  				    SLAB_RECLAIM_ACCOUNT);
256bdb497   Eric Sandeen   ext4: allocate st...
2673
2674
2675
2676
  	if (ext4_ac_cachep == NULL) {
  		kmem_cache_destroy(ext4_pspace_cachep);
  		return -ENOMEM;
  	}
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2677

16828088f   Theodore Ts'o   ext4: use KMEM_CA...
2678
2679
  	ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
  					  SLAB_RECLAIM_ACCOUNT);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2680
2681
2682
2683
2684
  	if (ext4_free_ext_cachep == NULL) {
  		kmem_cache_destroy(ext4_pspace_cachep);
  		kmem_cache_destroy(ext4_ac_cachep);
  		return -ENOMEM;
  	}
6ba495e92   Theodore Ts'o   ext4: Add configu...
2685
  	ext4_create_debugfs_entry();
c9de560de   Alex Tomas   ext4: Add multi b...
2686
2687
  	return 0;
  }
5dabfc78d   Theodore Ts'o   ext4: rename {exi...
2688
  void ext4_exit_mballoc(void)
c9de560de   Alex Tomas   ext4: Add multi b...
2689
  {
60e6679e2   Theodore Ts'o   ext4: Drop whites...
2690
  	/*
3e03f9ca6   Jesper Dangaard Brouer   ext4: Use rcu_bar...
2691
2692
2693
2694
  	 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
  	 * before destroying the slab cache.
  	 */
  	rcu_barrier();
c9de560de   Alex Tomas   ext4: Add multi b...
2695
  	kmem_cache_destroy(ext4_pspace_cachep);
256bdb497   Eric Sandeen   ext4: allocate st...
2696
  	kmem_cache_destroy(ext4_ac_cachep);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
2697
  	kmem_cache_destroy(ext4_free_ext_cachep);
2892c15dd   Eric Sandeen   ext4: make grpinf...
2698
  	ext4_groupinfo_destroy_slabs();
6ba495e92   Theodore Ts'o   ext4: Add configu...
2699
  	ext4_remove_debugfs_entry();
c9de560de   Alex Tomas   ext4: Add multi b...
2700
2701
2702
2703
  }
  
  
  /*
73b2c7165   Uwe Kleine-König   fix comment typo ...
2704
   * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
c9de560de   Alex Tomas   ext4: Add multi b...
2705
2706
   * Returns 0 if success or error code
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
2707
2708
  static noinline_for_stack int
  ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
53accfa9f   Theodore Ts'o   ext4: teach mball...
2709
  				handle_t *handle, unsigned int reserv_clstrs)
c9de560de   Alex Tomas   ext4: Add multi b...
2710
2711
  {
  	struct buffer_head *bitmap_bh = NULL;
c9de560de   Alex Tomas   ext4: Add multi b...
2712
2713
2714
2715
2716
  	struct ext4_group_desc *gdp;
  	struct buffer_head *gdp_bh;
  	struct ext4_sb_info *sbi;
  	struct super_block *sb;
  	ext4_fsblk_t block;
519deca04   Aneesh Kumar K.V   ext4: Retry block...
2717
  	int err, len;
c9de560de   Alex Tomas   ext4: Add multi b...
2718
2719
2720
2721
2722
2723
  
  	BUG_ON(ac->ac_status != AC_STATUS_FOUND);
  	BUG_ON(ac->ac_b_ex.fe_len <= 0);
  
  	sb = ac->ac_sb;
  	sbi = EXT4_SB(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
2724
2725
  
  	err = -EIO;
574ca174c   Theodore Ts'o   ext4: Rename read...
2726
  	bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
c9de560de   Alex Tomas   ext4: Add multi b...
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
  	if (!bitmap_bh)
  		goto out_err;
  
  	err = ext4_journal_get_write_access(handle, bitmap_bh);
  	if (err)
  		goto out_err;
  
  	err = -EIO;
  	gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
  	if (!gdp)
  		goto out_err;
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
2738
2739
  	ext4_debug("using block group %u(%d)
  ", ac->ac_b_ex.fe_group,
021b65bb1   Theodore Ts'o   ext4: Rename ext4...
2740
  			ext4_free_group_clusters(sb, gdp));
03cddb80e   Aneesh Kumar K.V   ext4: Fix use of ...
2741

c9de560de   Alex Tomas   ext4: Add multi b...
2742
2743
2744
  	err = ext4_journal_get_write_access(handle, gdp_bh);
  	if (err)
  		goto out_err;
bda00de7e   Akinobu Mita   ext4: cleanup to ...
2745
  	block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
c9de560de   Alex Tomas   ext4: Add multi b...
2746

53accfa9f   Theodore Ts'o   ext4: teach mball...
2747
  	len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
6fd058f77   Theodore Ts'o   ext4: Add a compr...
2748
  	if (!ext4_data_block_valid(sbi, block, len)) {
12062dddd   Eric Sandeen   ext4: move __func...
2749
  		ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
6fd058f77   Theodore Ts'o   ext4: Add a compr...
2750
2751
  			   "fs metadata
  ", block, block+len);
519deca04   Aneesh Kumar K.V   ext4: Retry block...
2752
2753
2754
2755
  		/* File system mounted not to panic on error
  		 * Fix the bitmap and repeat the block allocation
  		 * We leak some of the blocks here.
  		 */
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
2756
  		ext4_lock_group(sb, ac->ac_b_ex.fe_group);
c3e94d1df   Yongqiang Yang   ext4: let setup_n...
2757
2758
  		ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
  			      ac->ac_b_ex.fe_len);
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
2759
  		ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
0390131ba   Frank Mayhar   ext4: Allow ext4 ...
2760
  		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
519deca04   Aneesh Kumar K.V   ext4: Retry block...
2761
2762
2763
  		if (!err)
  			err = -EAGAIN;
  		goto out_err;
c9de560de   Alex Tomas   ext4: Add multi b...
2764
  	}
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
2765
2766
  
  	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
c9de560de   Alex Tomas   ext4: Add multi b...
2767
2768
2769
2770
2771
2772
2773
2774
2775
  #ifdef AGGRESSIVE_CHECK
  	{
  		int i;
  		for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
  			BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
  						bitmap_bh->b_data));
  		}
  	}
  #endif
c3e94d1df   Yongqiang Yang   ext4: let setup_n...
2776
2777
  	ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
  		      ac->ac_b_ex.fe_len);
c9de560de   Alex Tomas   ext4: Add multi b...
2778
2779
  	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
  		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
021b65bb1   Theodore Ts'o   ext4: Rename ext4...
2780
  		ext4_free_group_clusters_set(sb, gdp,
cff1dfd76   Theodore Ts'o   ext4: rename ext4...
2781
  					     ext4_free_clusters_after_init(sb,
021b65bb1   Theodore Ts'o   ext4: Rename ext4...
2782
  						ac->ac_b_ex.fe_group, gdp));
c9de560de   Alex Tomas   ext4: Add multi b...
2783
  	}
021b65bb1   Theodore Ts'o   ext4: Rename ext4...
2784
2785
  	len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
  	ext4_free_group_clusters_set(sb, gdp, len);
c9de560de   Alex Tomas   ext4: Add multi b...
2786
  	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
2787
2788
  
  	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
570426518   Theodore Ts'o   ext4: convert s_{...
2789
  	percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
d2a176379   Mingming Cao   ext4: delayed all...
2790
  	/*
6bc6e63fc   Aneesh Kumar K.V   ext4: Add percpu ...
2791
  	 * Now reduce the dirty block count also. Should not go negative
d2a176379   Mingming Cao   ext4: delayed all...
2792
  	 */
6bc6e63fc   Aneesh Kumar K.V   ext4: Add percpu ...
2793
2794
  	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
  		/* release all the reserved blocks if non delalloc */
570426518   Theodore Ts'o   ext4: convert s_{...
2795
2796
  		percpu_counter_sub(&sbi->s_dirtyclusters_counter,
  				   reserv_clstrs);
c9de560de   Alex Tomas   ext4: Add multi b...
2797

772cb7c83   Jose R. Santos   ext4: New inode a...
2798
2799
2800
  	if (sbi->s_log_groups_per_flex) {
  		ext4_group_t flex_group = ext4_flex_group(sbi,
  							  ac->ac_b_ex.fe_group);
9f24e4208   Theodore Ts'o   ext4: Use atomic_...
2801
  		atomic_sub(ac->ac_b_ex.fe_len,
24aaa8ef4   Theodore Ts'o   ext4: convert the...
2802
  			   &sbi->s_flex_groups[flex_group].free_clusters);
772cb7c83   Jose R. Santos   ext4: New inode a...
2803
  	}
0390131ba   Frank Mayhar   ext4: Allow ext4 ...
2804
  	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
c9de560de   Alex Tomas   ext4: Add multi b...
2805
2806
  	if (err)
  		goto out_err;
0390131ba   Frank Mayhar   ext4: Allow ext4 ...
2807
  	err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
c9de560de   Alex Tomas   ext4: Add multi b...
2808
2809
  
  out_err:
a0375156c   Theodore Ts'o   ext4: Clean up s_...
2810
  	ext4_mark_super_dirty(sb);
42a10add8   Aneesh Kumar K.V   ext4: Fix null bh...
2811
  	brelse(bitmap_bh);
c9de560de   Alex Tomas   ext4: Add multi b...
2812
2813
2814
2815
2816
  	return err;
  }
  
  /*
   * here we normalize request for locality group
d7a1fee13   Dan Ehrenberg   ext4: make the pr...
2817
2818
2819
   * Group request are normalized to s_mb_group_prealloc, which goes to
   * s_strip if we set the same via mount option.
   * s_mb_group_prealloc can be configured via
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
2820
   * /sys/fs/ext4/<partition>/mb_group_prealloc
c9de560de   Alex Tomas   ext4: Add multi b...
2821
2822
2823
2824
2825
2826
2827
2828
2829
   *
   * XXX: should we try to preallocate more than the group has now?
   */
  static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
  {
  	struct super_block *sb = ac->ac_sb;
  	struct ext4_locality_group *lg = ac->ac_lg;
  
  	BUG_ON(lg == NULL);
d7a1fee13   Dan Ehrenberg   ext4: make the pr...
2830
  	ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
6ba495e92   Theodore Ts'o   ext4: Add configu...
2831
2832
  	mb_debug(1, "#%u: goal %u blocks for locality group
  ",
c9de560de   Alex Tomas   ext4: Add multi b...
2833
2834
2835
2836
2837
2838
2839
  		current->pid, ac->ac_g_ex.fe_len);
  }
  
  /*
   * Normalization means making request better in terms of
   * size and alignment
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
2840
2841
  static noinline_for_stack void
  ext4_mb_normalize_request(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
2842
2843
  				struct ext4_allocation_request *ar)
  {
53accfa9f   Theodore Ts'o   ext4: teach mball...
2844
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
c9de560de   Alex Tomas   ext4: Add multi b...
2845
2846
  	int bsbits, max;
  	ext4_lblk_t end;
c9de560de   Alex Tomas   ext4: Add multi b...
2847
  	loff_t size, orig_size, start_off;
5a0790c2c   Andi Kleen   ext4: remove init...
2848
  	ext4_lblk_t start;
c9de560de   Alex Tomas   ext4: Add multi b...
2849
  	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
9a0762c5a   Aneesh Kumar K.V   ext4: Convert li...
2850
  	struct ext4_prealloc_space *pa;
c9de560de   Alex Tomas   ext4: Add multi b...
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
  
  	/* do normalize only data requests, metadata requests
  	   do not need preallocation */
  	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
  		return;
  
  	/* sometime caller may want exact blocks */
  	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
  		return;
  
  	/* caller may indicate that preallocation isn't
  	 * required (it's a tail, for example) */
  	if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
  		return;
  
  	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
  		ext4_mb_normalize_group_request(ac);
  		return ;
  	}
  
  	bsbits = ac->ac_sb->s_blocksize_bits;
  
  	/* first, let's learn actual file size
  	 * given current request is allocated */
53accfa9f   Theodore Ts'o   ext4: teach mball...
2875
  	size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
c9de560de   Alex Tomas   ext4: Add multi b...
2876
2877
2878
  	size = size << bsbits;
  	if (size < i_size_read(ac->ac_inode))
  		size = i_size_read(ac->ac_inode);
5a0790c2c   Andi Kleen   ext4: remove init...
2879
  	orig_size = size;
c9de560de   Alex Tomas   ext4: Add multi b...
2880

1930479c4   Valerie Clement   ext4: mballoc fix...
2881
2882
  	/* max size of free chunks */
  	max = 2 << bsbits;
c9de560de   Alex Tomas   ext4: Add multi b...
2883

1930479c4   Valerie Clement   ext4: mballoc fix...
2884
2885
  #define NRL_CHECK_SIZE(req, size, max, chunk_size)	\
  		(req <= (size) || max <= (chunk_size))
c9de560de   Alex Tomas   ext4: Add multi b...
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
  
  	/* first, try to predict filesize */
  	/* XXX: should this table be tunable? */
  	start_off = 0;
  	if (size <= 16 * 1024) {
  		size = 16 * 1024;
  	} else if (size <= 32 * 1024) {
  		size = 32 * 1024;
  	} else if (size <= 64 * 1024) {
  		size = 64 * 1024;
  	} else if (size <= 128 * 1024) {
  		size = 128 * 1024;
  	} else if (size <= 256 * 1024) {
  		size = 256 * 1024;
  	} else if (size <= 512 * 1024) {
  		size = 512 * 1024;
  	} else if (size <= 1024 * 1024) {
  		size = 1024 * 1024;
1930479c4   Valerie Clement   ext4: mballoc fix...
2904
  	} else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
c9de560de   Alex Tomas   ext4: Add multi b...
2905
  		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
1930479c4   Valerie Clement   ext4: mballoc fix...
2906
2907
2908
  						(21 - bsbits)) << 21;
  		size = 2 * 1024 * 1024;
  	} else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
c9de560de   Alex Tomas   ext4: Add multi b...
2909
2910
2911
2912
  		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
  							(22 - bsbits)) << 22;
  		size = 4 * 1024 * 1024;
  	} else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
1930479c4   Valerie Clement   ext4: mballoc fix...
2913
  					(8<<20)>>bsbits, max, 8 * 1024)) {
c9de560de   Alex Tomas   ext4: Add multi b...
2914
2915
2916
2917
2918
2919
2920
  		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
  							(23 - bsbits)) << 23;
  		size = 8 * 1024 * 1024;
  	} else {
  		start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
  		size	  = ac->ac_o_ex.fe_len << bsbits;
  	}
5a0790c2c   Andi Kleen   ext4: remove init...
2921
2922
  	size = size >> bsbits;
  	start = start_off >> bsbits;
c9de560de   Alex Tomas   ext4: Add multi b...
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
  
  	/* don't cover already allocated blocks in selected range */
  	if (ar->pleft && start <= ar->lleft) {
  		size -= ar->lleft + 1 - start;
  		start = ar->lleft + 1;
  	}
  	if (ar->pright && start + size - 1 >= ar->lright)
  		size -= start + size - ar->lright;
  
  	end = start + size;
  
  	/* check we don't cross already preallocated blocks */
  	rcu_read_lock();
9a0762c5a   Aneesh Kumar K.V   ext4: Convert li...
2936
  	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
498e5f241   Theodore Ts'o   ext4: Change unsi...
2937
  		ext4_lblk_t pa_end;
c9de560de   Alex Tomas   ext4: Add multi b...
2938

c9de560de   Alex Tomas   ext4: Add multi b...
2939
2940
2941
2942
2943
2944
2945
  		if (pa->pa_deleted)
  			continue;
  		spin_lock(&pa->pa_lock);
  		if (pa->pa_deleted) {
  			spin_unlock(&pa->pa_lock);
  			continue;
  		}
53accfa9f   Theodore Ts'o   ext4: teach mball...
2946
2947
  		pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
  						  pa->pa_len);
c9de560de   Alex Tomas   ext4: Add multi b...
2948
2949
2950
2951
  
  		/* PA must not overlap original request */
  		BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
  			ac->ac_o_ex.fe_logical < pa->pa_lstart));
38877f4e8   Eric Sandeen   simplify some log...
2952
2953
  		/* skip PAs this normalized request doesn't overlap with */
  		if (pa->pa_lstart >= end || pa_end <= start) {
c9de560de   Alex Tomas   ext4: Add multi b...
2954
2955
2956
2957
  			spin_unlock(&pa->pa_lock);
  			continue;
  		}
  		BUG_ON(pa->pa_lstart <= start && pa_end >= end);
38877f4e8   Eric Sandeen   simplify some log...
2958
  		/* adjust start or end to be adjacent to this pa */
c9de560de   Alex Tomas   ext4: Add multi b...
2959
2960
2961
  		if (pa_end <= ac->ac_o_ex.fe_logical) {
  			BUG_ON(pa_end < start);
  			start = pa_end;
38877f4e8   Eric Sandeen   simplify some log...
2962
  		} else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
c9de560de   Alex Tomas   ext4: Add multi b...
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
  			BUG_ON(pa->pa_lstart > end);
  			end = pa->pa_lstart;
  		}
  		spin_unlock(&pa->pa_lock);
  	}
  	rcu_read_unlock();
  	size = end - start;
  
  	/* XXX: extra loop to check we really don't overlap preallocations */
  	rcu_read_lock();
9a0762c5a   Aneesh Kumar K.V   ext4: Convert li...
2973
  	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
498e5f241   Theodore Ts'o   ext4: Change unsi...
2974
  		ext4_lblk_t pa_end;
53accfa9f   Theodore Ts'o   ext4: teach mball...
2975

c9de560de   Alex Tomas   ext4: Add multi b...
2976
2977
  		spin_lock(&pa->pa_lock);
  		if (pa->pa_deleted == 0) {
53accfa9f   Theodore Ts'o   ext4: teach mball...
2978
2979
  			pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
  							  pa->pa_len);
c9de560de   Alex Tomas   ext4: Add multi b...
2980
2981
2982
2983
2984
2985
2986
2987
  			BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
  		}
  		spin_unlock(&pa->pa_lock);
  	}
  	rcu_read_unlock();
  
  	if (start + size <= ac->ac_o_ex.fe_logical &&
  			start > ac->ac_o_ex.fe_logical) {
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
2988
2989
2990
2991
  		ext4_msg(ac->ac_sb, KERN_ERR,
  			 "start %lu, size %lu, fe_logical %lu",
  			 (unsigned long) start, (unsigned long) size,
  			 (unsigned long) ac->ac_o_ex.fe_logical);
c9de560de   Alex Tomas   ext4: Add multi b...
2992
2993
2994
  	}
  	BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
  			start > ac->ac_o_ex.fe_logical);
7137d7a48   Theodore Ts'o   ext4: convert ins...
2995
  	BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
c9de560de   Alex Tomas   ext4: Add multi b...
2996
2997
2998
2999
3000
3001
  
  	/* now prepare goal request */
  
  	/* XXX: is it better to align blocks WRT to logical
  	 * placement or satisfy big request as is */
  	ac->ac_g_ex.fe_logical = start;
53accfa9f   Theodore Ts'o   ext4: teach mball...
3002
  	ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
c9de560de   Alex Tomas   ext4: Add multi b...
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
  
  	/* define goal start in order to merge */
  	if (ar->pright && (ar->lright == (start + size))) {
  		/* merge to the right */
  		ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
  						&ac->ac_f_ex.fe_group,
  						&ac->ac_f_ex.fe_start);
  		ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
  	}
  	if (ar->pleft && (ar->lleft + 1 == start)) {
  		/* merge to the left */
  		ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
  						&ac->ac_f_ex.fe_group,
  						&ac->ac_f_ex.fe_start);
  		ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
  	}
6ba495e92   Theodore Ts'o   ext4: Add configu...
3019
3020
  	mb_debug(1, "goal: %u(was %u) blocks at %u
  ", (unsigned) size,
c9de560de   Alex Tomas   ext4: Add multi b...
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
  		(unsigned) orig_size, (unsigned) start);
  }
  
  static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
  {
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
  
  	if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
  		atomic_inc(&sbi->s_bal_reqs);
  		atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
291dae472   Curt Wohlgemuth   ext4: Fix for ext...
3031
  		if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
c9de560de   Alex Tomas   ext4: Add multi b...
3032
3033
3034
3035
3036
3037
3038
3039
  			atomic_inc(&sbi->s_bal_success);
  		atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
  		if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
  				ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
  			atomic_inc(&sbi->s_bal_goals);
  		if (ac->ac_found > sbi->s_mb_max_to_scan)
  			atomic_inc(&sbi->s_bal_breaks);
  	}
296c355cd   Theodore Ts'o   ext4: Use tracepo...
3040
3041
3042
3043
  	if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
  		trace_ext4_mballoc_alloc(ac);
  	else
  		trace_ext4_mballoc_prealloc(ac);
c9de560de   Alex Tomas   ext4: Add multi b...
3044
3045
3046
  }
  
  /*
b844167ed   Curt Wohlgemuth   ext4: remove bloc...
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
   * Called on failure; free up any blocks from the inode PA for this
   * context.  We don't need this for MB_GROUP_PA because we only change
   * pa_free in ext4_mb_release_context(), but on failure, we've already
   * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
   */
  static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
  {
  	struct ext4_prealloc_space *pa = ac->ac_pa;
  	int len;
  
  	if (pa && pa->pa_type == MB_INODE_PA) {
  		len = ac->ac_b_ex.fe_len;
  		pa->pa_free += len;
  	}
  
  }
  
  /*
c9de560de   Alex Tomas   ext4: Add multi b...
3065
3066
3067
3068
3069
   * use blocks preallocated to inode
   */
  static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
  				struct ext4_prealloc_space *pa)
  {
53accfa9f   Theodore Ts'o   ext4: teach mball...
3070
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
c9de560de   Alex Tomas   ext4: Add multi b...
3071
3072
3073
3074
3075
3076
  	ext4_fsblk_t start;
  	ext4_fsblk_t end;
  	int len;
  
  	/* found preallocated blocks, use them */
  	start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
53accfa9f   Theodore Ts'o   ext4: teach mball...
3077
3078
3079
  	end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
  		  start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
  	len = EXT4_NUM_B2C(sbi, end - start);
c9de560de   Alex Tomas   ext4: Add multi b...
3080
3081
3082
3083
3084
3085
3086
  	ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
  					&ac->ac_b_ex.fe_start);
  	ac->ac_b_ex.fe_len = len;
  	ac->ac_status = AC_STATUS_FOUND;
  	ac->ac_pa = pa;
  
  	BUG_ON(start < pa->pa_pstart);
53accfa9f   Theodore Ts'o   ext4: teach mball...
3087
  	BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
c9de560de   Alex Tomas   ext4: Add multi b...
3088
3089
  	BUG_ON(pa->pa_free < len);
  	pa->pa_free -= len;
6ba495e92   Theodore Ts'o   ext4: Add configu...
3090
3091
  	mb_debug(1, "use %llu/%u from inode pa %p
  ", start, len, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3092
3093
3094
3095
3096
3097
3098
3099
  }
  
  /*
   * use blocks preallocated to locality group
   */
  static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
  				struct ext4_prealloc_space *pa)
  {
03cddb80e   Aneesh Kumar K.V   ext4: Fix use of ...
3100
  	unsigned int len = ac->ac_o_ex.fe_len;
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3101

c9de560de   Alex Tomas   ext4: Add multi b...
3102
3103
3104
3105
3106
3107
3108
3109
  	ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
  					&ac->ac_b_ex.fe_group,
  					&ac->ac_b_ex.fe_start);
  	ac->ac_b_ex.fe_len = len;
  	ac->ac_status = AC_STATUS_FOUND;
  	ac->ac_pa = pa;
  
  	/* we don't correct pa_pstart or pa_plen here to avoid
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
3110
  	 * possible race when the group is being loaded concurrently
c9de560de   Alex Tomas   ext4: Add multi b...
3111
  	 * instead we correct pa later, after blocks are marked
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
3112
3113
  	 * in on-disk bitmap -- see ext4_mb_release_context()
  	 * Other CPUs are prevented from allocating from this pa by lg_mutex
c9de560de   Alex Tomas   ext4: Add multi b...
3114
  	 */
6ba495e92   Theodore Ts'o   ext4: Add configu...
3115
3116
  	mb_debug(1, "use %u/%u from group pa %p
  ", pa->pa_lstart-len, len, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3117
3118
3119
  }
  
  /*
5e745b041   Aneesh Kumar K.V   ext4: Fix small f...
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
   * Return the prealloc space that have minimal distance
   * from the goal block. @cpa is the prealloc
   * space that is having currently known minimal distance
   * from the goal block.
   */
  static struct ext4_prealloc_space *
  ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
  			struct ext4_prealloc_space *pa,
  			struct ext4_prealloc_space *cpa)
  {
  	ext4_fsblk_t cur_distance, new_distance;
  
  	if (cpa == NULL) {
  		atomic_inc(&pa->pa_count);
  		return pa;
  	}
  	cur_distance = abs(goal_block - cpa->pa_pstart);
  	new_distance = abs(goal_block - pa->pa_pstart);
5a54b2f19   Coly Li   ext4: mballoc: do...
3138
  	if (cur_distance <= new_distance)
5e745b041   Aneesh Kumar K.V   ext4: Fix small f...
3139
3140
3141
3142
3143
3144
3145
3146
3147
  		return cpa;
  
  	/* drop the previous reference */
  	atomic_dec(&cpa->pa_count);
  	atomic_inc(&pa->pa_count);
  	return pa;
  }
  
  /*
c9de560de   Alex Tomas   ext4: Add multi b...
3148
3149
   * search goal blocks in preallocated space
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3150
3151
  static noinline_for_stack int
  ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
c9de560de   Alex Tomas   ext4: Add multi b...
3152
  {
53accfa9f   Theodore Ts'o   ext4: teach mball...
3153
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3154
  	int order, i;
c9de560de   Alex Tomas   ext4: Add multi b...
3155
3156
  	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
  	struct ext4_locality_group *lg;
5e745b041   Aneesh Kumar K.V   ext4: Fix small f...
3157
3158
  	struct ext4_prealloc_space *pa, *cpa = NULL;
  	ext4_fsblk_t goal_block;
c9de560de   Alex Tomas   ext4: Add multi b...
3159
3160
3161
3162
3163
3164
3165
  
  	/* only data can be preallocated */
  	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
  		return 0;
  
  	/* first, try per-file preallocation */
  	rcu_read_lock();
9a0762c5a   Aneesh Kumar K.V   ext4: Convert li...
3166
  	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
c9de560de   Alex Tomas   ext4: Add multi b...
3167
3168
3169
3170
  
  		/* all fields in this condition don't change,
  		 * so we can skip locking for them */
  		if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
53accfa9f   Theodore Ts'o   ext4: teach mball...
3171
3172
  		    ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
  					       EXT4_C2B(sbi, pa->pa_len)))
c9de560de   Alex Tomas   ext4: Add multi b...
3173
  			continue;
fb0a387dc   Eric Sandeen   ext4: limit block...
3174
  		/* non-extent files can't have physical blocks past 2^32 */
12e9b8920   Dmitry Monakhov   ext4: Use bitops ...
3175
  		if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
53accfa9f   Theodore Ts'o   ext4: teach mball...
3176
3177
  		    (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
  		     EXT4_MAX_BLOCK_FILE_PHYS))
fb0a387dc   Eric Sandeen   ext4: limit block...
3178
  			continue;
c9de560de   Alex Tomas   ext4: Add multi b...
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
  		/* found preallocated blocks, use them */
  		spin_lock(&pa->pa_lock);
  		if (pa->pa_deleted == 0 && pa->pa_free) {
  			atomic_inc(&pa->pa_count);
  			ext4_mb_use_inode_pa(ac, pa);
  			spin_unlock(&pa->pa_lock);
  			ac->ac_criteria = 10;
  			rcu_read_unlock();
  			return 1;
  		}
  		spin_unlock(&pa->pa_lock);
  	}
  	rcu_read_unlock();
  
  	/* can we use group allocation? */
  	if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
  		return 0;
  
  	/* inode may have no locality group for some reason */
  	lg = ac->ac_lg;
  	if (lg == NULL)
  		return 0;
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3201
3202
3203
3204
  	order  = fls(ac->ac_o_ex.fe_len) - 1;
  	if (order > PREALLOC_TB_SIZE - 1)
  		/* The max size of hash table is PREALLOC_TB_SIZE */
  		order = PREALLOC_TB_SIZE - 1;
bda00de7e   Akinobu Mita   ext4: cleanup to ...
3205
  	goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
5e745b041   Aneesh Kumar K.V   ext4: Fix small f...
3206
3207
3208
3209
  	/*
  	 * search for the prealloc space that is having
  	 * minimal distance from the goal block.
  	 */
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3210
3211
3212
3213
3214
3215
3216
  	for (i = order; i < PREALLOC_TB_SIZE; i++) {
  		rcu_read_lock();
  		list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
  					pa_inode_list) {
  			spin_lock(&pa->pa_lock);
  			if (pa->pa_deleted == 0 &&
  					pa->pa_free >= ac->ac_o_ex.fe_len) {
5e745b041   Aneesh Kumar K.V   ext4: Fix small f...
3217
3218
3219
  
  				cpa = ext4_mb_check_group_pa(goal_block,
  								pa, cpa);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3220
  			}
c9de560de   Alex Tomas   ext4: Add multi b...
3221
  			spin_unlock(&pa->pa_lock);
c9de560de   Alex Tomas   ext4: Add multi b...
3222
  		}
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3223
  		rcu_read_unlock();
c9de560de   Alex Tomas   ext4: Add multi b...
3224
  	}
5e745b041   Aneesh Kumar K.V   ext4: Fix small f...
3225
3226
3227
3228
3229
  	if (cpa) {
  		ext4_mb_use_group_pa(ac, cpa);
  		ac->ac_criteria = 20;
  		return 1;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3230
3231
3232
3233
  	return 0;
  }
  
  /*
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
3234
3235
3236
   * the function goes through all block freed in the group
   * but not yet committed and marks them used in in-core bitmap.
   * buddy must be generated from this bitmap
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
3237
   * Need to be called with the ext4 group lock held
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
   */
  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
  						ext4_group_t group)
  {
  	struct rb_node *n;
  	struct ext4_group_info *grp;
  	struct ext4_free_data *entry;
  
  	grp = ext4_get_group_info(sb, group);
  	n = rb_first(&(grp->bb_free_root));
  
  	while (n) {
  		entry = rb_entry(n, struct ext4_free_data, node);
84130193e   Theodore Ts'o   ext4: teach ext4_...
3251
  		ext4_set_bits(bitmap, entry->start_cluster, entry->count);
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
3252
3253
3254
3255
3256
3257
  		n = rb_next(n);
  	}
  	return;
  }
  
  /*
c9de560de   Alex Tomas   ext4: Add multi b...
3258
3259
   * the function goes through all preallocation in this group and marks them
   * used in in-core bitmap. buddy must be generated from this bitmap
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
3260
   * Need to be called with ext4 group lock held
c9de560de   Alex Tomas   ext4: Add multi b...
3261
   */
089ceecc1   Eric Sandeen   ext4: mark severa...
3262
3263
  static noinline_for_stack
  void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
c9de560de   Alex Tomas   ext4: Add multi b...
3264
3265
3266
3267
3268
3269
3270
3271
  					ext4_group_t group)
  {
  	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
  	struct ext4_prealloc_space *pa;
  	struct list_head *cur;
  	ext4_group_t groupnr;
  	ext4_grpblk_t start;
  	int preallocated = 0;
c9de560de   Alex Tomas   ext4: Add multi b...
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
  	int len;
  
  	/* all form of preallocation discards first load group,
  	 * so the only competing code is preallocation use.
  	 * we don't need any locking here
  	 * notice we do NOT ignore preallocations with pa_deleted
  	 * otherwise we could leave used blocks available for
  	 * allocation in buddy when concurrent ext4_mb_put_pa()
  	 * is dropping preallocation
  	 */
  	list_for_each(cur, &grp->bb_prealloc_list) {
  		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
  		spin_lock(&pa->pa_lock);
  		ext4_get_group_no_and_offset(sb, pa->pa_pstart,
  					     &groupnr, &start);
  		len = pa->pa_len;
  		spin_unlock(&pa->pa_lock);
  		if (unlikely(len == 0))
  			continue;
  		BUG_ON(groupnr != group);
c3e94d1df   Yongqiang Yang   ext4: let setup_n...
3292
  		ext4_set_bits(bitmap, start, len);
c9de560de   Alex Tomas   ext4: Add multi b...
3293
  		preallocated += len;
c9de560de   Alex Tomas   ext4: Add multi b...
3294
  	}
6ba495e92   Theodore Ts'o   ext4: Add configu...
3295
3296
  	mb_debug(1, "prellocated %u for group %u
  ", preallocated, group);
c9de560de   Alex Tomas   ext4: Add multi b...
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
  }
  
  static void ext4_mb_pa_callback(struct rcu_head *head)
  {
  	struct ext4_prealloc_space *pa;
  	pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
  	kmem_cache_free(ext4_pspace_cachep, pa);
  }
  
  /*
   * drops a reference to preallocated space descriptor
   * if this was the last reference and the space is consumed
   */
  static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
  			struct super_block *sb, struct ext4_prealloc_space *pa)
  {
a9df9a491   Theodore Ts'o   ext4: Make ext4_g...
3313
  	ext4_group_t grp;
d33a1976f   Eric Sandeen   ext4: fix bb_prea...
3314
  	ext4_fsblk_t grp_blk;
c9de560de   Alex Tomas   ext4: Add multi b...
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
  
  	if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
  		return;
  
  	/* in this short window concurrent discard can set pa_deleted */
  	spin_lock(&pa->pa_lock);
  	if (pa->pa_deleted == 1) {
  		spin_unlock(&pa->pa_lock);
  		return;
  	}
  
  	pa->pa_deleted = 1;
  	spin_unlock(&pa->pa_lock);
d33a1976f   Eric Sandeen   ext4: fix bb_prea...
3328
  	grp_blk = pa->pa_pstart;
60e6679e2   Theodore Ts'o   ext4: Drop whites...
3329
  	/*
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
3330
3331
3332
3333
  	 * If doing group-based preallocation, pa_pstart may be in the
  	 * next group when pa is used up
  	 */
  	if (pa->pa_type == MB_GROUP_PA)
d33a1976f   Eric Sandeen   ext4: fix bb_prea...
3334
3335
3336
  		grp_blk--;
  
  	ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
c9de560de   Alex Tomas   ext4: Add multi b...
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
  
  	/*
  	 * possible race:
  	 *
  	 *  P1 (buddy init)			P2 (regular allocation)
  	 *					find block B in PA
  	 *  copy on-disk bitmap to buddy
  	 *  					mark B in on-disk bitmap
  	 *					drop PA from group
  	 *  mark all PAs in buddy
  	 *
  	 * thus, P1 initializes buddy with B available. to prevent this
  	 * we make "copy" and "mark all PAs" atomic and serialize "drop PA"
  	 * against that pair
  	 */
  	ext4_lock_group(sb, grp);
  	list_del(&pa->pa_group_list);
  	ext4_unlock_group(sb, grp);
  
  	spin_lock(pa->pa_obj_lock);
  	list_del_rcu(&pa->pa_inode_list);
  	spin_unlock(pa->pa_obj_lock);
  
  	call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
  }
  
  /*
   * creates new preallocated space for given inode
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3366
3367
  static noinline_for_stack int
  ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
c9de560de   Alex Tomas   ext4: Add multi b...
3368
3369
  {
  	struct super_block *sb = ac->ac_sb;
53accfa9f   Theodore Ts'o   ext4: teach mball...
3370
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
  	struct ext4_prealloc_space *pa;
  	struct ext4_group_info *grp;
  	struct ext4_inode_info *ei;
  
  	/* preallocate only when found space is larger then requested */
  	BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
  	BUG_ON(ac->ac_status != AC_STATUS_FOUND);
  	BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
  
  	pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
  	if (pa == NULL)
  		return -ENOMEM;
  
  	if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
  		int winl;
  		int wins;
  		int win;
  		int offs;
  
  		/* we can't allocate as much as normalizer wants.
  		 * so, found space must get proper lstart
  		 * to cover original request */
  		BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
  		BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
  
  		/* we're limited by original request in that
  		 * logical block must be covered any way
  		 * winl is window we can move our chunk within */
  		winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
  
  		/* also, we should cover whole original request */
53accfa9f   Theodore Ts'o   ext4: teach mball...
3402
  		wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
c9de560de   Alex Tomas   ext4: Add multi b...
3403
3404
3405
  
  		/* the smallest one defines real window */
  		win = min(winl, wins);
53accfa9f   Theodore Ts'o   ext4: teach mball...
3406
3407
  		offs = ac->ac_o_ex.fe_logical %
  			EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
c9de560de   Alex Tomas   ext4: Add multi b...
3408
3409
  		if (offs && offs < win)
  			win = offs;
53accfa9f   Theodore Ts'o   ext4: teach mball...
3410
3411
  		ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
  			EXT4_B2C(sbi, win);
c9de560de   Alex Tomas   ext4: Add multi b...
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
  		BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
  		BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
  	}
  
  	/* preallocation can change ac_b_ex, thus we store actually
  	 * allocated blocks for history */
  	ac->ac_f_ex = ac->ac_b_ex;
  
  	pa->pa_lstart = ac->ac_b_ex.fe_logical;
  	pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
  	pa->pa_len = ac->ac_b_ex.fe_len;
  	pa->pa_free = pa->pa_len;
  	atomic_set(&pa->pa_count, 1);
  	spin_lock_init(&pa->pa_lock);
d794bf8e0   Aneesh Kumar K.V   ext4: Initialize ...
3426
3427
  	INIT_LIST_HEAD(&pa->pa_inode_list);
  	INIT_LIST_HEAD(&pa->pa_group_list);
c9de560de   Alex Tomas   ext4: Add multi b...
3428
  	pa->pa_deleted = 0;
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
3429
  	pa->pa_type = MB_INODE_PA;
c9de560de   Alex Tomas   ext4: Add multi b...
3430

6ba495e92   Theodore Ts'o   ext4: Add configu...
3431
3432
  	mb_debug(1, "new inode pa %p: %llu/%u for %u
  ", pa,
c9de560de   Alex Tomas   ext4: Add multi b...
3433
  			pa->pa_pstart, pa->pa_len, pa->pa_lstart);
9bffad1ed   Theodore Ts'o   ext4: convert ins...
3434
  	trace_ext4_mb_new_inode_pa(ac, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3435
3436
  
  	ext4_mb_use_inode_pa(ac, pa);
53accfa9f   Theodore Ts'o   ext4: teach mball...
3437
  	atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
c9de560de   Alex Tomas   ext4: Add multi b...
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
  
  	ei = EXT4_I(ac->ac_inode);
  	grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
  
  	pa->pa_obj_lock = &ei->i_prealloc_lock;
  	pa->pa_inode = ac->ac_inode;
  
  	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
  	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
  	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
  
  	spin_lock(pa->pa_obj_lock);
  	list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
  	spin_unlock(pa->pa_obj_lock);
  
  	return 0;
  }
  
  /*
   * creates new preallocated space for locality group inodes belongs to
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3459
3460
  static noinline_for_stack int
  ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
c9de560de   Alex Tomas   ext4: Add multi b...
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
  {
  	struct super_block *sb = ac->ac_sb;
  	struct ext4_locality_group *lg;
  	struct ext4_prealloc_space *pa;
  	struct ext4_group_info *grp;
  
  	/* preallocate only when found space is larger then requested */
  	BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
  	BUG_ON(ac->ac_status != AC_STATUS_FOUND);
  	BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
  
  	BUG_ON(ext4_pspace_cachep == NULL);
  	pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
  	if (pa == NULL)
  		return -ENOMEM;
  
  	/* preallocation can change ac_b_ex, thus we store actually
  	 * allocated blocks for history */
  	ac->ac_f_ex = ac->ac_b_ex;
  
  	pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
  	pa->pa_lstart = pa->pa_pstart;
  	pa->pa_len = ac->ac_b_ex.fe_len;
  	pa->pa_free = pa->pa_len;
  	atomic_set(&pa->pa_count, 1);
  	spin_lock_init(&pa->pa_lock);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3487
  	INIT_LIST_HEAD(&pa->pa_inode_list);
d794bf8e0   Aneesh Kumar K.V   ext4: Initialize ...
3488
  	INIT_LIST_HEAD(&pa->pa_group_list);
c9de560de   Alex Tomas   ext4: Add multi b...
3489
  	pa->pa_deleted = 0;
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
3490
  	pa->pa_type = MB_GROUP_PA;
c9de560de   Alex Tomas   ext4: Add multi b...
3491

6ba495e92   Theodore Ts'o   ext4: Add configu...
3492
3493
  	mb_debug(1, "new group pa %p: %llu/%u for %u
  ", pa,
9bffad1ed   Theodore Ts'o   ext4: convert ins...
3494
3495
  			pa->pa_pstart, pa->pa_len, pa->pa_lstart);
  	trace_ext4_mb_new_group_pa(ac, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
  
  	ext4_mb_use_group_pa(ac, pa);
  	atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
  
  	grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
  	lg = ac->ac_lg;
  	BUG_ON(lg == NULL);
  
  	pa->pa_obj_lock = &lg->lg_prealloc_lock;
  	pa->pa_inode = NULL;
  
  	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
  	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
  	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3510
3511
3512
3513
  	/*
  	 * We will later add the new pa to the right bucket
  	 * after updating the pa_free in ext4_mb_release_context
  	 */
c9de560de   Alex Tomas   ext4: Add multi b...
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
  	return 0;
  }
  
  static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
  {
  	int err;
  
  	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
  		err = ext4_mb_new_group_pa(ac);
  	else
  		err = ext4_mb_new_inode_pa(ac);
  	return err;
  }
  
  /*
   * finds all unused blocks in on-disk bitmap, frees them in
   * in-core bitmap and buddy.
   * @pa must be unlinked from inode and group lists, so that
   * nobody else can find/use it.
   * the caller MUST hold group/inode locks.
   * TODO: optimize the case when there are no in-core structures yet
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3536
3537
  static noinline_for_stack int
  ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3e1e5f501   Eric Sandeen   ext4: don't use e...
3538
  			struct ext4_prealloc_space *pa)
c9de560de   Alex Tomas   ext4: Add multi b...
3539
  {
c9de560de   Alex Tomas   ext4: Add multi b...
3540
3541
  	struct super_block *sb = e4b->bd_sb;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
498e5f241   Theodore Ts'o   ext4: Change unsi...
3542
3543
  	unsigned int end;
  	unsigned int next;
c9de560de   Alex Tomas   ext4: Add multi b...
3544
3545
  	ext4_group_t group;
  	ext4_grpblk_t bit;
ba80b1019   Theodore Ts'o   ext4: Add markers...
3546
  	unsigned long long grp_blk_start;
c9de560de   Alex Tomas   ext4: Add multi b...
3547
3548
3549
3550
3551
  	int err = 0;
  	int free = 0;
  
  	BUG_ON(pa->pa_deleted == 0);
  	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
53accfa9f   Theodore Ts'o   ext4: teach mball...
3552
  	grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
c9de560de   Alex Tomas   ext4: Add multi b...
3553
3554
  	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
  	end = bit + pa->pa_len;
c9de560de   Alex Tomas   ext4: Add multi b...
3555
  	while (bit < end) {
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
3556
  		bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
c9de560de   Alex Tomas   ext4: Add multi b...
3557
3558
  		if (bit >= end)
  			break;
ffad0a44b   Aneesh Kumar K.V   ext4: ext4_find_n...
3559
  		next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
6ba495e92   Theodore Ts'o   ext4: Add configu...
3560
3561
  		mb_debug(1, "    free preallocated %u/%u in group %u
  ",
5a0790c2c   Andi Kleen   ext4: remove init...
3562
3563
  			 (unsigned) ext4_group_first_block_no(sb, group) + bit,
  			 (unsigned) next - bit, (unsigned) group);
c9de560de   Alex Tomas   ext4: Add multi b...
3564
  		free += next - bit;
3e1e5f501   Eric Sandeen   ext4: don't use e...
3565
  		trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
53accfa9f   Theodore Ts'o   ext4: teach mball...
3566
3567
  		trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
  						    EXT4_C2B(sbi, bit)),
a9c667f8f   Lukas Czerner   ext4: fixed trace...
3568
  					       next - bit);
c9de560de   Alex Tomas   ext4: Add multi b...
3569
3570
3571
3572
  		mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
  		bit = next + 1;
  	}
  	if (free != pa->pa_free) {
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
3573
3574
3575
3576
3577
  		ext4_msg(e4b->bd_sb, KERN_CRIT,
  			 "pa %p: logic %lu, phys. %lu, len %lu",
  			 pa, (unsigned long) pa->pa_lstart,
  			 (unsigned long) pa->pa_pstart,
  			 (unsigned long) pa->pa_len);
e29136f80   Theodore Ts'o   ext4: Enhance ext...
3578
  		ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
5d1b1b3f4   Aneesh Kumar K.V   ext4: fix BUG whe...
3579
  					free, pa->pa_free);
e56eb6590   Aneesh Kumar K.V   ext4: Don't claim...
3580
3581
3582
3583
  		/*
  		 * pa is already deleted so we use the value obtained
  		 * from the bitmap and continue.
  		 */
c9de560de   Alex Tomas   ext4: Add multi b...
3584
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3585
3586
3587
3588
  	atomic_add(free, &sbi->s_mb_discarded);
  
  	return err;
  }
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3589
3590
  static noinline_for_stack int
  ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3e1e5f501   Eric Sandeen   ext4: don't use e...
3591
  				struct ext4_prealloc_space *pa)
c9de560de   Alex Tomas   ext4: Add multi b...
3592
  {
c9de560de   Alex Tomas   ext4: Add multi b...
3593
3594
3595
  	struct super_block *sb = e4b->bd_sb;
  	ext4_group_t group;
  	ext4_grpblk_t bit;
60e07cf51   Yongqiang Yang   ext4: do not refe...
3596
  	trace_ext4_mb_release_group_pa(sb, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3597
3598
3599
3600
3601
  	BUG_ON(pa->pa_deleted == 0);
  	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
  	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
  	mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
  	atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3e1e5f501   Eric Sandeen   ext4: don't use e...
3602
  	trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
c9de560de   Alex Tomas   ext4: Add multi b...
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
  
  	return 0;
  }
  
  /*
   * releases all preallocations in given group
   *
   * first, we need to decide discard policy:
   * - when do we discard
   *   1) ENOSPC
   * - how many do we discard
   *   1) how many requested
   */
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3616
3617
  static noinline_for_stack int
  ext4_mb_discard_group_preallocations(struct super_block *sb,
c9de560de   Alex Tomas   ext4: Add multi b...
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
  					ext4_group_t group, int needed)
  {
  	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
  	struct buffer_head *bitmap_bh = NULL;
  	struct ext4_prealloc_space *pa, *tmp;
  	struct list_head list;
  	struct ext4_buddy e4b;
  	int err;
  	int busy = 0;
  	int free = 0;
6ba495e92   Theodore Ts'o   ext4: Add configu...
3628
3629
  	mb_debug(1, "discard preallocation for group %u
  ", group);
c9de560de   Alex Tomas   ext4: Add multi b...
3630
3631
3632
  
  	if (list_empty(&grp->bb_prealloc_list))
  		return 0;
574ca174c   Theodore Ts'o   ext4: Rename read...
3633
  	bitmap_bh = ext4_read_block_bitmap(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
3634
  	if (bitmap_bh == NULL) {
12062dddd   Eric Sandeen   ext4: move __func...
3635
  		ext4_error(sb, "Error reading block bitmap for %u", group);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
3636
  		return 0;
c9de560de   Alex Tomas   ext4: Add multi b...
3637
3638
3639
  	}
  
  	err = ext4_mb_load_buddy(sb, group, &e4b);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
3640
  	if (err) {
12062dddd   Eric Sandeen   ext4: move __func...
3641
  		ext4_error(sb, "Error loading buddy information for %u", group);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
3642
3643
3644
  		put_bh(bitmap_bh);
  		return 0;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3645
3646
  
  	if (needed == 0)
7137d7a48   Theodore Ts'o   ext4: convert ins...
3647
  		needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
c9de560de   Alex Tomas   ext4: Add multi b...
3648

c9de560de   Alex Tomas   ext4: Add multi b...
3649
  	INIT_LIST_HEAD(&list);
c9de560de   Alex Tomas   ext4: Add multi b...
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
  repeat:
  	ext4_lock_group(sb, group);
  	list_for_each_entry_safe(pa, tmp,
  				&grp->bb_prealloc_list, pa_group_list) {
  		spin_lock(&pa->pa_lock);
  		if (atomic_read(&pa->pa_count)) {
  			spin_unlock(&pa->pa_lock);
  			busy = 1;
  			continue;
  		}
  		if (pa->pa_deleted) {
  			spin_unlock(&pa->pa_lock);
  			continue;
  		}
  
  		/* seems this one can be freed ... */
  		pa->pa_deleted = 1;
  
  		/* we can trust pa_free ... */
  		free += pa->pa_free;
  
  		spin_unlock(&pa->pa_lock);
  
  		list_del(&pa->pa_group_list);
  		list_add(&pa->u.pa_tmp_list, &list);
  	}
  
  	/* if we still need more blocks and some PAs were used, try again */
  	if (free < needed && busy) {
  		busy = 0;
  		ext4_unlock_group(sb, group);
  		/*
  		 * Yield the CPU here so that we don't get soft lockup
  		 * in non preempt case.
  		 */
  		yield();
  		goto repeat;
  	}
  
  	/* found anything to free? */
  	if (list_empty(&list)) {
  		BUG_ON(free != 0);
  		goto out;
  	}
  
  	/* now free all selected PAs */
  	list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
  
  		/* remove from object (inode or locality group) */
  		spin_lock(pa->pa_obj_lock);
  		list_del_rcu(&pa->pa_inode_list);
  		spin_unlock(pa->pa_obj_lock);
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
3702
  		if (pa->pa_type == MB_GROUP_PA)
3e1e5f501   Eric Sandeen   ext4: don't use e...
3703
  			ext4_mb_release_group_pa(&e4b, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3704
  		else
3e1e5f501   Eric Sandeen   ext4: don't use e...
3705
  			ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3706
3707
3708
3709
3710
3711
3712
  
  		list_del(&pa->u.pa_tmp_list);
  		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
  	}
  
  out:
  	ext4_unlock_group(sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
3713
  	ext4_mb_unload_buddy(&e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
  	put_bh(bitmap_bh);
  	return free;
  }
  
  /*
   * releases all non-used preallocated blocks for given inode
   *
   * It's important to discard preallocations under i_data_sem
   * We don't want another block to be served from the prealloc
   * space when we are discarding the inode prealloc space.
   *
   * FIXME!! Make sure it is valid at all the call sites
   */
c2ea3fde6   Theodore Ts'o   ext4: Remove old ...
3727
  void ext4_discard_preallocations(struct inode *inode)
c9de560de   Alex Tomas   ext4: Add multi b...
3728
3729
3730
3731
3732
3733
3734
3735
3736
  {
  	struct ext4_inode_info *ei = EXT4_I(inode);
  	struct super_block *sb = inode->i_sb;
  	struct buffer_head *bitmap_bh = NULL;
  	struct ext4_prealloc_space *pa, *tmp;
  	ext4_group_t group = 0;
  	struct list_head list;
  	struct ext4_buddy e4b;
  	int err;
c2ea3fde6   Theodore Ts'o   ext4: Remove old ...
3737
  	if (!S_ISREG(inode->i_mode)) {
c9de560de   Alex Tomas   ext4: Add multi b...
3738
3739
3740
  		/*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
  		return;
  	}
6ba495e92   Theodore Ts'o   ext4: Add configu...
3741
3742
  	mb_debug(1, "discard preallocation for inode %lu
  ", inode->i_ino);
9bffad1ed   Theodore Ts'o   ext4: convert ins...
3743
  	trace_ext4_discard_preallocations(inode);
c9de560de   Alex Tomas   ext4: Add multi b...
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
  
  	INIT_LIST_HEAD(&list);
  
  repeat:
  	/* first, collect all pa's in the inode */
  	spin_lock(&ei->i_prealloc_lock);
  	while (!list_empty(&ei->i_prealloc_list)) {
  		pa = list_entry(ei->i_prealloc_list.next,
  				struct ext4_prealloc_space, pa_inode_list);
  		BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
  		spin_lock(&pa->pa_lock);
  		if (atomic_read(&pa->pa_count)) {
  			/* this shouldn't happen often - nobody should
  			 * use preallocation while we're discarding it */
  			spin_unlock(&pa->pa_lock);
  			spin_unlock(&ei->i_prealloc_lock);
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
3760
3761
  			ext4_msg(sb, KERN_ERR,
  				 "uh-oh! used pa while discarding");
c9de560de   Alex Tomas   ext4: Add multi b...
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
  			WARN_ON(1);
  			schedule_timeout_uninterruptible(HZ);
  			goto repeat;
  
  		}
  		if (pa->pa_deleted == 0) {
  			pa->pa_deleted = 1;
  			spin_unlock(&pa->pa_lock);
  			list_del_rcu(&pa->pa_inode_list);
  			list_add(&pa->u.pa_tmp_list, &list);
  			continue;
  		}
  
  		/* someone is deleting pa right now */
  		spin_unlock(&pa->pa_lock);
  		spin_unlock(&ei->i_prealloc_lock);
  
  		/* we have to wait here because pa_deleted
  		 * doesn't mean pa is already unlinked from
  		 * the list. as we might be called from
  		 * ->clear_inode() the inode will get freed
  		 * and concurrent thread which is unlinking
  		 * pa from inode's list may access already
  		 * freed memory, bad-bad-bad */
  
  		/* XXX: if this happens too often, we can
  		 * add a flag to force wait only in case
  		 * of ->clear_inode(), but not in case of
  		 * regular truncate */
  		schedule_timeout_uninterruptible(HZ);
  		goto repeat;
  	}
  	spin_unlock(&ei->i_prealloc_lock);
  
  	list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
3797
  		BUG_ON(pa->pa_type != MB_INODE_PA);
c9de560de   Alex Tomas   ext4: Add multi b...
3798
3799
3800
  		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
  
  		err = ext4_mb_load_buddy(sb, group, &e4b);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
3801
  		if (err) {
12062dddd   Eric Sandeen   ext4: move __func...
3802
3803
  			ext4_error(sb, "Error loading buddy information for %u",
  					group);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
3804
3805
  			continue;
  		}
c9de560de   Alex Tomas   ext4: Add multi b...
3806

574ca174c   Theodore Ts'o   ext4: Rename read...
3807
  		bitmap_bh = ext4_read_block_bitmap(sb, group);
c9de560de   Alex Tomas   ext4: Add multi b...
3808
  		if (bitmap_bh == NULL) {
12062dddd   Eric Sandeen   ext4: move __func...
3809
3810
  			ext4_error(sb, "Error reading block bitmap for %u",
  					group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
3811
  			ext4_mb_unload_buddy(&e4b);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
3812
  			continue;
c9de560de   Alex Tomas   ext4: Add multi b...
3813
3814
3815
3816
  		}
  
  		ext4_lock_group(sb, group);
  		list_del(&pa->pa_group_list);
3e1e5f501   Eric Sandeen   ext4: don't use e...
3817
  		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
c9de560de   Alex Tomas   ext4: Add multi b...
3818
  		ext4_unlock_group(sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
3819
  		ext4_mb_unload_buddy(&e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
3820
3821
3822
3823
3824
3825
  		put_bh(bitmap_bh);
  
  		list_del(&pa->u.pa_tmp_list);
  		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
  	}
  }
6ba495e92   Theodore Ts'o   ext4: Add configu...
3826
  #ifdef CONFIG_EXT4_DEBUG
c9de560de   Alex Tomas   ext4: Add multi b...
3827
3828
3829
  static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
  {
  	struct super_block *sb = ac->ac_sb;
8df9675f8   Theodore Ts'o   ext4: Avoid races...
3830
  	ext4_group_t ngroups, i;
c9de560de   Alex Tomas   ext4: Add multi b...
3831

4dd89fc62   Theodore Ts'o   ext4: suppress ve...
3832
3833
  	if (!mb_enable_debug ||
  	    (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
e3570639c   Eric Sandeen   ext4: don't print...
3834
  		return;
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
3835
3836
3837
  	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:"
  			" Allocation context details:");
  	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d",
c9de560de   Alex Tomas   ext4: Add multi b...
3838
  			ac->ac_status, ac->ac_flags);
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
3839
3840
3841
  	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, "
  		 	"goal %lu/%lu/%lu@%lu, "
  			"best %lu/%lu/%lu@%lu cr %d",
c9de560de   Alex Tomas   ext4: Add multi b...
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
  			(unsigned long)ac->ac_o_ex.fe_group,
  			(unsigned long)ac->ac_o_ex.fe_start,
  			(unsigned long)ac->ac_o_ex.fe_len,
  			(unsigned long)ac->ac_o_ex.fe_logical,
  			(unsigned long)ac->ac_g_ex.fe_group,
  			(unsigned long)ac->ac_g_ex.fe_start,
  			(unsigned long)ac->ac_g_ex.fe_len,
  			(unsigned long)ac->ac_g_ex.fe_logical,
  			(unsigned long)ac->ac_b_ex.fe_group,
  			(unsigned long)ac->ac_b_ex.fe_start,
  			(unsigned long)ac->ac_b_ex.fe_len,
  			(unsigned long)ac->ac_b_ex.fe_logical,
  			(int)ac->ac_criteria);
9d8b9ec44   Theodore Ts'o   ext4: use ext4_ms...
3855
3856
3857
  	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found",
  		 ac->ac_ex_scanned, ac->ac_found);
  	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: ");
8df9675f8   Theodore Ts'o   ext4: Avoid races...
3858
3859
  	ngroups = ext4_get_groups_count(sb);
  	for (i = 0; i < ngroups; i++) {
c9de560de   Alex Tomas   ext4: Add multi b...
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
  		struct ext4_group_info *grp = ext4_get_group_info(sb, i);
  		struct ext4_prealloc_space *pa;
  		ext4_grpblk_t start;
  		struct list_head *cur;
  		ext4_lock_group(sb, i);
  		list_for_each(cur, &grp->bb_prealloc_list) {
  			pa = list_entry(cur, struct ext4_prealloc_space,
  					pa_group_list);
  			spin_lock(&pa->pa_lock);
  			ext4_get_group_no_and_offset(sb, pa->pa_pstart,
  						     NULL, &start);
  			spin_unlock(&pa->pa_lock);
1c7185051   Akira Fujita   ext4: Fix compile...
3872
3873
3874
  			printk(KERN_ERR "PA:%u:%d:%u 
  ", i,
  			       start, pa->pa_len);
c9de560de   Alex Tomas   ext4: Add multi b...
3875
  		}
60bd63d19   Solofo Ramangalahy   ext4: cleanup for...
3876
  		ext4_unlock_group(sb, i);
c9de560de   Alex Tomas   ext4: Add multi b...
3877
3878
3879
  
  		if (grp->bb_free == 0)
  			continue;
1c7185051   Akira Fujita   ext4: Fix compile...
3880
3881
  		printk(KERN_ERR "%u: %d/%d 
  ",
c9de560de   Alex Tomas   ext4: Add multi b...
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
  		       i, grp->bb_free, grp->bb_fragments);
  	}
  	printk(KERN_ERR "
  ");
  }
  #else
  static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
  {
  	return;
  }
  #endif
  
  /*
   * We use locality group preallocation for small size file. The size of the
   * file is determined by the current size or the resulting size after
   * allocation which ever is larger
   *
b713a5ec5   Theodore Ts'o   ext4: remove /pro...
3899
   * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req
c9de560de   Alex Tomas   ext4: Add multi b...
3900
3901
3902
3903
3904
3905
3906
3907
3908
   */
  static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
  {
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
  	int bsbits = ac->ac_sb->s_blocksize_bits;
  	loff_t size, isize;
  
  	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
  		return;
4ba74d00a   Theodore Ts'o   ext4: Fix bugs in...
3909
3910
  	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
  		return;
53accfa9f   Theodore Ts'o   ext4: teach mball...
3911
  	size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
50797481a   Theodore Ts'o   ext4: Avoid group...
3912
3913
  	isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
  		>> bsbits;
c9de560de   Alex Tomas   ext4: Add multi b...
3914

50797481a   Theodore Ts'o   ext4: Avoid group...
3915
3916
3917
3918
3919
3920
  	if ((size == isize) &&
  	    !ext4_fs_is_busy(sbi) &&
  	    (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
  		ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
  		return;
  	}
ebbe02779   Robin Dong   ext4: use stream-...
3921
3922
3923
3924
  	if (sbi->s_mb_group_prealloc <= 0) {
  		ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
  		return;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3925
  	/* don't use group allocation for large files */
717805773   Theodore Ts'o   ext4: Fix huerist...
3926
  	size = max(size, isize);
cc483f102   Tao Ma   ext4: Fix fencepo...
3927
  	if (size > sbi->s_mb_stream_request) {
4ba74d00a   Theodore Ts'o   ext4: Fix bugs in...
3928
  		ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
c9de560de   Alex Tomas   ext4: Add multi b...
3929
  		return;
4ba74d00a   Theodore Ts'o   ext4: Fix bugs in...
3930
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
3931
3932
3933
3934
3935
3936
3937
  
  	BUG_ON(ac->ac_lg != NULL);
  	/*
  	 * locality group prealloc space are per cpu. The reason for having
  	 * per cpu locality group is to reduce the contention between block
  	 * request from multiple CPUs.
  	 */
ca0c9584b   Christoph Lameter   this_cpu: Straigh...
3938
  	ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
c9de560de   Alex Tomas   ext4: Add multi b...
3939
3940
3941
3942
3943
3944
3945
  
  	/* we're going to use group allocation */
  	ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
  
  	/* serialize all allocations in the group */
  	mutex_lock(&ac->ac_lg->lg_mutex);
  }
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
3946
3947
  static noinline_for_stack int
  ext4_mb_initialize_context(struct ext4_allocation_context *ac,
c9de560de   Alex Tomas   ext4: Add multi b...
3948
3949
3950
3951
3952
3953
  				struct ext4_allocation_request *ar)
  {
  	struct super_block *sb = ar->inode->i_sb;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
  	struct ext4_super_block *es = sbi->s_es;
  	ext4_group_t group;
498e5f241   Theodore Ts'o   ext4: Change unsi...
3954
3955
  	unsigned int len;
  	ext4_fsblk_t goal;
c9de560de   Alex Tomas   ext4: Add multi b...
3956
3957
3958
3959
3960
3961
  	ext4_grpblk_t block;
  
  	/* we can't allocate > group size */
  	len = ar->len;
  
  	/* just a dirty hack to filter too big requests  */
7137d7a48   Theodore Ts'o   ext4: convert ins...
3962
3963
  	if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
  		len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
c9de560de   Alex Tomas   ext4: Add multi b...
3964
3965
3966
3967
3968
3969
3970
3971
3972
  
  	/* start searching from the goal */
  	goal = ar->goal;
  	if (goal < le32_to_cpu(es->s_first_data_block) ||
  			goal >= ext4_blocks_count(es))
  		goal = le32_to_cpu(es->s_first_data_block);
  	ext4_get_group_no_and_offset(sb, goal, &group, &block);
  
  	/* set up allocation goals */
833576b36   Theodore Ts'o   ext4: Fix ext4_mb...
3973
  	memset(ac, 0, sizeof(struct ext4_allocation_context));
53accfa9f   Theodore Ts'o   ext4: teach mball...
3974
  	ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
c9de560de   Alex Tomas   ext4: Add multi b...
3975
  	ac->ac_status = AC_STATUS_CONTINUE;
c9de560de   Alex Tomas   ext4: Add multi b...
3976
3977
  	ac->ac_sb = sb;
  	ac->ac_inode = ar->inode;
53accfa9f   Theodore Ts'o   ext4: teach mball...
3978
  	ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
c9de560de   Alex Tomas   ext4: Add multi b...
3979
3980
3981
  	ac->ac_o_ex.fe_group = group;
  	ac->ac_o_ex.fe_start = block;
  	ac->ac_o_ex.fe_len = len;
53accfa9f   Theodore Ts'o   ext4: teach mball...
3982
  	ac->ac_g_ex = ac->ac_o_ex;
c9de560de   Alex Tomas   ext4: Add multi b...
3983
  	ac->ac_flags = ar->flags;
c9de560de   Alex Tomas   ext4: Add multi b...
3984
3985
3986
3987
  
  	/* we have to define context: we'll we work with a file or
  	 * locality group. this is a policy, actually */
  	ext4_mb_group_or_file(ac);
6ba495e92   Theodore Ts'o   ext4: Add configu...
3988
  	mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
c9de560de   Alex Tomas   ext4: Add multi b...
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
  			"left: %u/%u, right %u/%u to %swritable
  ",
  			(unsigned) ar->len, (unsigned) ar->logical,
  			(unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
  			(unsigned) ar->lleft, (unsigned) ar->pleft,
  			(unsigned) ar->lright, (unsigned) ar->pright,
  			atomic_read(&ar->inode->i_writecount) ? "" : "non-");
  	return 0;
  
  }
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
3999
4000
4001
4002
4003
4004
4005
4006
4007
  static noinline_for_stack void
  ext4_mb_discard_lg_preallocations(struct super_block *sb,
  					struct ext4_locality_group *lg,
  					int order, int total_entries)
  {
  	ext4_group_t group = 0;
  	struct ext4_buddy e4b;
  	struct list_head discard_list;
  	struct ext4_prealloc_space *pa, *tmp;
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4008

6ba495e92   Theodore Ts'o   ext4: Add configu...
4009
4010
  	mb_debug(1, "discard locality group preallocation
  ");
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4011
4012
  
  	INIT_LIST_HEAD(&discard_list);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
  
  	spin_lock(&lg->lg_prealloc_lock);
  	list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
  						pa_inode_list) {
  		spin_lock(&pa->pa_lock);
  		if (atomic_read(&pa->pa_count)) {
  			/*
  			 * This is the pa that we just used
  			 * for block allocation. So don't
  			 * free that
  			 */
  			spin_unlock(&pa->pa_lock);
  			continue;
  		}
  		if (pa->pa_deleted) {
  			spin_unlock(&pa->pa_lock);
  			continue;
  		}
  		/* only lg prealloc space */
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
4032
  		BUG_ON(pa->pa_type != MB_GROUP_PA);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
  
  		/* seems this one can be freed ... */
  		pa->pa_deleted = 1;
  		spin_unlock(&pa->pa_lock);
  
  		list_del_rcu(&pa->pa_inode_list);
  		list_add(&pa->u.pa_tmp_list, &discard_list);
  
  		total_entries--;
  		if (total_entries <= 5) {
  			/*
  			 * we want to keep only 5 entries
  			 * allowing it to grow to 8. This
  			 * mak sure we don't call discard
  			 * soon for this list.
  			 */
  			break;
  		}
  	}
  	spin_unlock(&lg->lg_prealloc_lock);
  
  	list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
  
  		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
  		if (ext4_mb_load_buddy(sb, group, &e4b)) {
12062dddd   Eric Sandeen   ext4: move __func...
4058
4059
  			ext4_error(sb, "Error loading buddy information for %u",
  					group);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4060
4061
4062
4063
  			continue;
  		}
  		ext4_lock_group(sb, group);
  		list_del(&pa->pa_group_list);
3e1e5f501   Eric Sandeen   ext4: don't use e...
4064
  		ext4_mb_release_group_pa(&e4b, pa);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4065
  		ext4_unlock_group(sb, group);
e39e07fdf   Jing Zhang   ext4: rename ext4...
4066
  		ext4_mb_unload_buddy(&e4b);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4067
4068
4069
  		list_del(&pa->u.pa_tmp_list);
  		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
  	}
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
  }
  
  /*
   * We have incremented pa_count. So it cannot be freed at this
   * point. Also we hold lg_mutex. So no parallel allocation is
   * possible from this lg. That means pa_free cannot be updated.
   *
   * A parallel ext4_mb_discard_group_preallocations is possible.
   * which can cause the lg_prealloc_list to be updated.
   */
  
  static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
  {
  	int order, added = 0, lg_prealloc_count = 1;
  	struct super_block *sb = ac->ac_sb;
  	struct ext4_locality_group *lg = ac->ac_lg;
  	struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
  
  	order = fls(pa->pa_free) - 1;
  	if (order > PREALLOC_TB_SIZE - 1)
  		/* The max size of hash table is PREALLOC_TB_SIZE */
  		order = PREALLOC_TB_SIZE - 1;
  	/* Add the prealloc space to lg */
  	rcu_read_lock();
  	list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
  						pa_inode_list) {
  		spin_lock(&tmp_pa->pa_lock);
  		if (tmp_pa->pa_deleted) {
e7c9e3e99   Theodore Ts'o   ext4: fix locking...
4098
  			spin_unlock(&tmp_pa->pa_lock);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
  			continue;
  		}
  		if (!added && pa->pa_free < tmp_pa->pa_free) {
  			/* Add to the tail of the previous entry */
  			list_add_tail_rcu(&pa->pa_inode_list,
  						&tmp_pa->pa_inode_list);
  			added = 1;
  			/*
  			 * we want to count the total
  			 * number of entries in the list
  			 */
  		}
  		spin_unlock(&tmp_pa->pa_lock);
  		lg_prealloc_count++;
  	}
  	if (!added)
  		list_add_tail_rcu(&pa->pa_inode_list,
  					&lg->lg_prealloc_list[order]);
  	rcu_read_unlock();
  
  	/* Now trim the list to be not more than 8 elements */
  	if (lg_prealloc_count > 8) {
  		ext4_mb_discard_lg_preallocations(sb, lg,
  						order, lg_prealloc_count);
  		return;
  	}
  	return ;
  }
c9de560de   Alex Tomas   ext4: Add multi b...
4127
4128
4129
4130
4131
  /*
   * release all resource we used in allocation
   */
  static int ext4_mb_release_context(struct ext4_allocation_context *ac)
  {
53accfa9f   Theodore Ts'o   ext4: teach mball...
4132
  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4133
4134
  	struct ext4_prealloc_space *pa = ac->ac_pa;
  	if (pa) {
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
4135
  		if (pa->pa_type == MB_GROUP_PA) {
c9de560de   Alex Tomas   ext4: Add multi b...
4136
  			/* see comment in ext4_mb_use_group_pa() */
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4137
  			spin_lock(&pa->pa_lock);
53accfa9f   Theodore Ts'o   ext4: teach mball...
4138
4139
  			pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
  			pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
6be2ded1d   Aneesh Kumar K.V   ext4: Don't allow...
4140
4141
4142
  			pa->pa_free -= ac->ac_b_ex.fe_len;
  			pa->pa_len -= ac->ac_b_ex.fe_len;
  			spin_unlock(&pa->pa_lock);
c9de560de   Alex Tomas   ext4: Add multi b...
4143
  		}
c9de560de   Alex Tomas   ext4: Add multi b...
4144
  	}
ba4439165   Aneesh Kumar K.V   ext4: Fix lockdep...
4145
4146
4147
4148
4149
  	if (pa) {
  		/*
  		 * We want to add the pa to the right bucket.
  		 * Remove it from the list and while adding
  		 * make sure the list to which we are adding
44183d423   Amir Goldstein   ext4: remove allo...
4150
  		 * doesn't grow big.
ba4439165   Aneesh Kumar K.V   ext4: Fix lockdep...
4151
  		 */
cc0fb9ad7   Aneesh Kumar K.V   ext4: Rename pa_l...
4152
  		if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
ba4439165   Aneesh Kumar K.V   ext4: Fix lockdep...
4153
4154
4155
4156
4157
4158
4159
  			spin_lock(pa->pa_obj_lock);
  			list_del_rcu(&pa->pa_inode_list);
  			spin_unlock(pa->pa_obj_lock);
  			ext4_mb_add_n_trim(ac);
  		}
  		ext4_mb_put_pa(ac, ac->ac_sb, pa);
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
  	if (ac->ac_bitmap_page)
  		page_cache_release(ac->ac_bitmap_page);
  	if (ac->ac_buddy_page)
  		page_cache_release(ac->ac_buddy_page);
  	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
  		mutex_unlock(&ac->ac_lg->lg_mutex);
  	ext4_mb_collect_stats(ac);
  	return 0;
  }
  
  static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
  {
8df9675f8   Theodore Ts'o   ext4: Avoid races...
4172
  	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
4173
4174
  	int ret;
  	int freed = 0;
9bffad1ed   Theodore Ts'o   ext4: convert ins...
4175
  	trace_ext4_mb_discard_preallocations(sb, needed);
8df9675f8   Theodore Ts'o   ext4: Avoid races...
4176
  	for (i = 0; i < ngroups && needed > 0; i++) {
c9de560de   Alex Tomas   ext4: Add multi b...
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
  		ret = ext4_mb_discard_group_preallocations(sb, i, needed);
  		freed += ret;
  		needed -= ret;
  	}
  
  	return freed;
  }
  
  /*
   * Main entry point into mballoc to allocate blocks
   * it tries to use preallocation first, then falls back
   * to usual allocation
   */
  ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
6c7a120ac   Aditya Kali   ext4: Adding erro...
4191
  				struct ext4_allocation_request *ar, int *errp)
c9de560de   Alex Tomas   ext4: Add multi b...
4192
  {
6bc6e63fc   Aneesh Kumar K.V   ext4: Add percpu ...
4193
  	int freed;
256bdb497   Eric Sandeen   ext4: allocate st...
4194
  	struct ext4_allocation_context *ac = NULL;
c9de560de   Alex Tomas   ext4: Add multi b...
4195
4196
4197
  	struct ext4_sb_info *sbi;
  	struct super_block *sb;
  	ext4_fsblk_t block = 0;
60e58e0f3   Mingming Cao   ext4: quota reser...
4198
  	unsigned int inquota = 0;
53accfa9f   Theodore Ts'o   ext4: teach mball...
4199
  	unsigned int reserv_clstrs = 0;
c9de560de   Alex Tomas   ext4: Add multi b...
4200
4201
4202
  
  	sb = ar->inode->i_sb;
  	sbi = EXT4_SB(sb);
9bffad1ed   Theodore Ts'o   ext4: convert ins...
4203
  	trace_ext4_request_blocks(ar);
ba80b1019   Theodore Ts'o   ext4: Add markers...
4204

45dc63e7d   Dmitry Monakhov   ext4: Allow quota...
4205
4206
4207
  	/* Allow to use superuser reservation for quota file */
  	if (IS_NOQUOTA(ar->inode))
  		ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
60e58e0f3   Mingming Cao   ext4: quota reser...
4208
4209
4210
4211
4212
  	/*
  	 * For delayed allocation, we could skip the ENOSPC and
  	 * EDQUOT check, as blocks and quotas have been already
  	 * reserved when data being copied into pagecache.
  	 */
f23210977   Theodore Ts'o   ext4: replace i_d...
4213
  	if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
60e58e0f3   Mingming Cao   ext4: quota reser...
4214
4215
4216
4217
4218
  		ar->flags |= EXT4_MB_DELALLOC_RESERVED;
  	else {
  		/* Without delayed allocation we need to verify
  		 * there is enough free blocks to do block allocation
  		 * and verify allocation doesn't exceed the quota limits.
d2a176379   Mingming Cao   ext4: delayed all...
4219
  		 */
55f020db6   Allison Henderson   ext4: add flag to...
4220
  		while (ar->len &&
e7d5f3156   Theodore Ts'o   ext4: rename ext4...
4221
  			ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
55f020db6   Allison Henderson   ext4: add flag to...
4222

030ba6bc6   Aneesh Kumar K.V   ext4: Retry block...
4223
4224
4225
4226
4227
  			/* let others to free the space */
  			yield();
  			ar->len = ar->len >> 1;
  		}
  		if (!ar->len) {
a30d542a0   Aneesh Kumar K.V   ext4: Make sure a...
4228
4229
4230
  			*errp = -ENOSPC;
  			return 0;
  		}
53accfa9f   Theodore Ts'o   ext4: teach mball...
4231
  		reserv_clstrs = ar->len;
55f020db6   Allison Henderson   ext4: add flag to...
4232
  		if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
53accfa9f   Theodore Ts'o   ext4: teach mball...
4233
4234
  			dquot_alloc_block_nofail(ar->inode,
  						 EXT4_C2B(sbi, ar->len));
55f020db6   Allison Henderson   ext4: add flag to...
4235
4236
  		} else {
  			while (ar->len &&
53accfa9f   Theodore Ts'o   ext4: teach mball...
4237
4238
  				dquot_alloc_block(ar->inode,
  						  EXT4_C2B(sbi, ar->len))) {
55f020db6   Allison Henderson   ext4: add flag to...
4239
4240
4241
4242
  
  				ar->flags |= EXT4_MB_HINT_NOPREALLOC;
  				ar->len--;
  			}
60e58e0f3   Mingming Cao   ext4: quota reser...
4243
4244
4245
4246
  		}
  		inquota = ar->len;
  		if (ar->len == 0) {
  			*errp = -EDQUOT;
6c7a120ac   Aditya Kali   ext4: Adding erro...
4247
  			goto out;
60e58e0f3   Mingming Cao   ext4: quota reser...
4248
  		}
070314310   Mingming Cao   ext4: mballoc avo...
4249
  	}
d2a176379   Mingming Cao   ext4: delayed all...
4250

256bdb497   Eric Sandeen   ext4: allocate st...
4251
  	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
833576b36   Theodore Ts'o   ext4: Fix ext4_mb...
4252
  	if (!ac) {
363d4251d   Shen Feng   ext4: remove quot...
4253
  		ar->len = 0;
256bdb497   Eric Sandeen   ext4: allocate st...
4254
  		*errp = -ENOMEM;
6c7a120ac   Aditya Kali   ext4: Adding erro...
4255
  		goto out;
256bdb497   Eric Sandeen   ext4: allocate st...
4256
  	}
256bdb497   Eric Sandeen   ext4: allocate st...
4257
  	*errp = ext4_mb_initialize_context(ac, ar);
c9de560de   Alex Tomas   ext4: Add multi b...
4258
4259
  	if (*errp) {
  		ar->len = 0;
6c7a120ac   Aditya Kali   ext4: Adding erro...
4260
  		goto out;
c9de560de   Alex Tomas   ext4: Add multi b...
4261
  	}
256bdb497   Eric Sandeen   ext4: allocate st...
4262
4263
  	ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
  	if (!ext4_mb_use_preallocated(ac)) {
256bdb497   Eric Sandeen   ext4: allocate st...
4264
4265
  		ac->ac_op = EXT4_MB_HISTORY_ALLOC;
  		ext4_mb_normalize_request(ac, ar);
c9de560de   Alex Tomas   ext4: Add multi b...
4266
4267
  repeat:
  		/* allocate space in core */
6c7a120ac   Aditya Kali   ext4: Adding erro...
4268
4269
4270
  		*errp = ext4_mb_regular_allocator(ac);
  		if (*errp)
  			goto errout;
c9de560de   Alex Tomas   ext4: Add multi b...
4271
4272
4273
4274
  
  		/* as we've just preallocated more space than
  		 * user requested orinally, we store allocated
  		 * space in a special descriptor */
256bdb497   Eric Sandeen   ext4: allocate st...
4275
4276
4277
  		if (ac->ac_status == AC_STATUS_FOUND &&
  				ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
  			ext4_mb_new_preallocation(ac);
c9de560de   Alex Tomas   ext4: Add multi b...
4278
  	}
256bdb497   Eric Sandeen   ext4: allocate st...
4279
  	if (likely(ac->ac_status == AC_STATUS_FOUND)) {
53accfa9f   Theodore Ts'o   ext4: teach mball...
4280
  		*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
6c7a120ac   Aditya Kali   ext4: Adding erro...
4281
  		if (*errp == -EAGAIN) {
8556e8f3b   Aneesh Kumar K.V   ext4: Don't allow...
4282
4283
4284
4285
4286
  			/*
  			 * drop the reference that we took
  			 * in ext4_mb_use_best_found
  			 */
  			ext4_mb_release_context(ac);
519deca04   Aneesh Kumar K.V   ext4: Retry block...
4287
4288
4289
4290
4291
  			ac->ac_b_ex.fe_group = 0;
  			ac->ac_b_ex.fe_start = 0;
  			ac->ac_b_ex.fe_len = 0;
  			ac->ac_status = AC_STATUS_CONTINUE;
  			goto repeat;
6c7a120ac   Aditya Kali   ext4: Adding erro...
4292
4293
  		} else if (*errp)
  		errout:
b844167ed   Curt Wohlgemuth   ext4: remove bloc...
4294
  			ext4_discard_allocated_blocks(ac);
6c7a120ac   Aditya Kali   ext4: Adding erro...
4295
  		else {
519deca04   Aneesh Kumar K.V   ext4: Retry block...
4296
4297
4298
  			block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
  			ar->len = ac->ac_b_ex.fe_len;
  		}
c9de560de   Alex Tomas   ext4: Add multi b...
4299
  	} else {
256bdb497   Eric Sandeen   ext4: allocate st...
4300
  		freed  = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
c9de560de   Alex Tomas   ext4: Add multi b...
4301
4302
4303
  		if (freed)
  			goto repeat;
  		*errp = -ENOSPC;
6c7a120ac   Aditya Kali   ext4: Adding erro...
4304
4305
4306
  	}
  
  	if (*errp) {
256bdb497   Eric Sandeen   ext4: allocate st...
4307
  		ac->ac_b_ex.fe_len = 0;
c9de560de   Alex Tomas   ext4: Add multi b...
4308
  		ar->len = 0;
256bdb497   Eric Sandeen   ext4: allocate st...
4309
  		ext4_mb_show_ac(ac);
c9de560de   Alex Tomas   ext4: Add multi b...
4310
  	}
256bdb497   Eric Sandeen   ext4: allocate st...
4311
  	ext4_mb_release_context(ac);
6c7a120ac   Aditya Kali   ext4: Adding erro...
4312
4313
4314
  out:
  	if (ac)
  		kmem_cache_free(ext4_ac_cachep, ac);
60e58e0f3   Mingming Cao   ext4: quota reser...
4315
  	if (inquota && ar->len < inquota)
53accfa9f   Theodore Ts'o   ext4: teach mball...
4316
  		dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
0087d9fb3   Aneesh Kumar K.V   ext4: Fix s_dirty...
4317
  	if (!ar->len) {
f23210977   Theodore Ts'o   ext4: replace i_d...
4318
4319
  		if (!ext4_test_inode_state(ar->inode,
  					   EXT4_STATE_DELALLOC_RESERVED))
0087d9fb3   Aneesh Kumar K.V   ext4: Fix s_dirty...
4320
  			/* release all the reserved blocks if non delalloc */
570426518   Theodore Ts'o   ext4: convert s_{...
4321
  			percpu_counter_sub(&sbi->s_dirtyclusters_counter,
53accfa9f   Theodore Ts'o   ext4: teach mball...
4322
  						reserv_clstrs);
0087d9fb3   Aneesh Kumar K.V   ext4: Fix s_dirty...
4323
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4324

9bffad1ed   Theodore Ts'o   ext4: convert ins...
4325
  	trace_ext4_allocate_blocks(ar, (unsigned long long)block);
ba80b1019   Theodore Ts'o   ext4: Add markers...
4326

c9de560de   Alex Tomas   ext4: Add multi b...
4327
4328
  	return block;
  }
c9de560de   Alex Tomas   ext4: Add multi b...
4329

c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
  /*
   * We can merge two free data extents only if the physical blocks
   * are contiguous, AND the extents were freed by the same transaction,
   * AND the blocks are associated with the same group.
   */
  static int can_merge(struct ext4_free_data *entry1,
  			struct ext4_free_data *entry2)
  {
  	if ((entry1->t_tid == entry2->t_tid) &&
  	    (entry1->group == entry2->group) &&
84130193e   Theodore Ts'o   ext4: teach ext4_...
4340
  	    ((entry1->start_cluster + entry1->count) == entry2->start_cluster))
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4341
4342
4343
  		return 1;
  	return 0;
  }
4ddfef7b4   Eric Sandeen   ext4: reduce mbal...
4344
4345
  static noinline_for_stack int
  ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4346
  		      struct ext4_free_data *new_entry)
c9de560de   Alex Tomas   ext4: Add multi b...
4347
  {
e29136f80   Theodore Ts'o   ext4: Enhance ext...
4348
  	ext4_group_t group = e4b->bd_group;
84130193e   Theodore Ts'o   ext4: teach ext4_...
4349
  	ext4_grpblk_t cluster;
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4350
  	struct ext4_free_data *entry;
c9de560de   Alex Tomas   ext4: Add multi b...
4351
4352
4353
  	struct ext4_group_info *db = e4b->bd_info;
  	struct super_block *sb = e4b->bd_sb;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4354
4355
  	struct rb_node **n = &db->bb_free_root.rb_node, *node;
  	struct rb_node *parent = NULL, *new_node;
0390131ba   Frank Mayhar   ext4: Allow ext4 ...
4356
  	BUG_ON(!ext4_handle_valid(handle));
c9de560de   Alex Tomas   ext4: Add multi b...
4357
4358
  	BUG_ON(e4b->bd_bitmap_page == NULL);
  	BUG_ON(e4b->bd_buddy_page == NULL);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4359
  	new_node = &new_entry->node;
84130193e   Theodore Ts'o   ext4: teach ext4_...
4360
  	cluster = new_entry->start_cluster;
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4361

c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
  	if (!*n) {
  		/* first free block exent. We need to
  		   protect buddy cache from being freed,
  		 * otherwise we'll refresh it from
  		 * on-disk bitmap and lose not-yet-available
  		 * blocks */
  		page_cache_get(e4b->bd_buddy_page);
  		page_cache_get(e4b->bd_bitmap_page);
  	}
  	while (*n) {
  		parent = *n;
  		entry = rb_entry(parent, struct ext4_free_data, node);
84130193e   Theodore Ts'o   ext4: teach ext4_...
4374
  		if (cluster < entry->start_cluster)
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4375
  			n = &(*n)->rb_left;
84130193e   Theodore Ts'o   ext4: teach ext4_...
4376
  		else if (cluster >= (entry->start_cluster + entry->count))
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4377
4378
  			n = &(*n)->rb_right;
  		else {
e29136f80   Theodore Ts'o   ext4: Enhance ext...
4379
  			ext4_grp_locked_error(sb, group, 0,
84130193e   Theodore Ts'o   ext4: teach ext4_...
4380
4381
  				ext4_group_first_block_no(sb, group) +
  				EXT4_C2B(sbi, cluster),
e29136f80   Theodore Ts'o   ext4: Enhance ext...
4382
  				"Block already on to-be-freed list");
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4383
  			return 0;
c9de560de   Alex Tomas   ext4: Add multi b...
4384
  		}
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4385
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4386

c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4387
4388
4389
4390
4391
4392
4393
4394
  	rb_link_node(new_node, parent, n);
  	rb_insert_color(new_node, &db->bb_free_root);
  
  	/* Now try to see the extent can be merged to left and right */
  	node = rb_prev(new_node);
  	if (node) {
  		entry = rb_entry(node, struct ext4_free_data, node);
  		if (can_merge(entry, new_entry)) {
84130193e   Theodore Ts'o   ext4: teach ext4_...
4395
  			new_entry->start_cluster = entry->start_cluster;
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4396
4397
4398
4399
4400
4401
  			new_entry->count += entry->count;
  			rb_erase(node, &(db->bb_free_root));
  			spin_lock(&sbi->s_md_lock);
  			list_del(&entry->list);
  			spin_unlock(&sbi->s_md_lock);
  			kmem_cache_free(ext4_free_ext_cachep, entry);
c9de560de   Alex Tomas   ext4: Add multi b...
4402
  		}
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4403
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4404

c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
  	node = rb_next(new_node);
  	if (node) {
  		entry = rb_entry(node, struct ext4_free_data, node);
  		if (can_merge(new_entry, entry)) {
  			new_entry->count += entry->count;
  			rb_erase(node, &(db->bb_free_root));
  			spin_lock(&sbi->s_md_lock);
  			list_del(&entry->list);
  			spin_unlock(&sbi->s_md_lock);
  			kmem_cache_free(ext4_free_ext_cachep, entry);
c9de560de   Alex Tomas   ext4: Add multi b...
4415
4416
  		}
  	}
3e624fc72   Theodore Ts'o   ext4: Replace hac...
4417
  	/* Add the extent to transaction's private list */
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4418
  	spin_lock(&sbi->s_md_lock);
3e624fc72   Theodore Ts'o   ext4: Replace hac...
4419
  	list_add(&new_entry->list, &handle->h_transaction->t_private_list);
c894058d6   Aneesh Kumar K.V   ext4: Use an rbtr...
4420
  	spin_unlock(&sbi->s_md_lock);
c9de560de   Alex Tomas   ext4: Add multi b...
4421
4422
  	return 0;
  }
443387113   Theodore Ts'o   ext4: fold ext4_f...
4423
4424
4425
4426
4427
4428
  /**
   * ext4_free_blocks() -- Free given blocks and update quota
   * @handle:		handle for this transaction
   * @inode:		inode
   * @block:		start physical block to free
   * @count:		number of blocks to count
5def13602   Yongqiang Yang   ext4: correct com...
4429
   * @flags:		flags used by ext4_free_blocks
c9de560de   Alex Tomas   ext4: Add multi b...
4430
   */
443387113   Theodore Ts'o   ext4: fold ext4_f...
4431
  void ext4_free_blocks(handle_t *handle, struct inode *inode,
e6362609b   Theodore Ts'o   ext4: call ext4_f...
4432
4433
  		      struct buffer_head *bh, ext4_fsblk_t block,
  		      unsigned long count, int flags)
c9de560de   Alex Tomas   ext4: Add multi b...
4434
  {
26346ff68   Aneesh Kumar K.V   ext4: Don't panic...
4435
  	struct buffer_head *bitmap_bh = NULL;
c9de560de   Alex Tomas   ext4: Add multi b...
4436
  	struct super_block *sb = inode->i_sb;
c9de560de   Alex Tomas   ext4: Add multi b...
4437
  	struct ext4_group_desc *gdp;
443387113   Theodore Ts'o   ext4: fold ext4_f...
4438
  	unsigned long freed = 0;
498e5f241   Theodore Ts'o   ext4: Change unsi...
4439
  	unsigned int overflow;
c9de560de   Alex Tomas   ext4: Add multi b...
4440
4441
4442
4443
4444
  	ext4_grpblk_t bit;
  	struct buffer_head *gd_bh;
  	ext4_group_t block_group;
  	struct ext4_sb_info *sbi;
  	struct ext4_buddy e4b;
84130193e   Theodore Ts'o   ext4: teach ext4_...
4445
  	unsigned int count_clusters;
c9de560de   Alex Tomas   ext4: Add multi b...
4446
4447
  	int err = 0;
  	int ret;
e6362609b   Theodore Ts'o   ext4: call ext4_f...
4448
4449
4450
4451
4452
4453
  	if (bh) {
  		if (block)
  			BUG_ON(block != bh->b_blocknr);
  		else
  			block = bh->b_blocknr;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4454

c9de560de   Alex Tomas   ext4: Add multi b...
4455
  	sbi = EXT4_SB(sb);
1f2acb601   Theodore Ts'o   ext4: Add block v...
4456
4457
  	if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
  	    !ext4_data_block_valid(sbi, block, count)) {
12062dddd   Eric Sandeen   ext4: move __func...
4458
  		ext4_error(sb, "Freeing blocks not in datazone - "
1f2acb601   Theodore Ts'o   ext4: Add block v...
4459
  			   "block = %llu, count = %lu", block, count);
c9de560de   Alex Tomas   ext4: Add multi b...
4460
4461
  		goto error_return;
  	}
0610b6e99   Theodore Ts'o   ext4: Fix 64-bit ...
4462
4463
  	ext4_debug("freeing block %llu
  ", block);
e6362609b   Theodore Ts'o   ext4: call ext4_f...
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
  	trace_ext4_free_blocks(inode, block, count, flags);
  
  	if (flags & EXT4_FREE_BLOCKS_FORGET) {
  		struct buffer_head *tbh = bh;
  		int i;
  
  		BUG_ON(bh && (count > 1));
  
  		for (i = 0; i < count; i++) {
  			if (!bh)
  				tbh = sb_find_get_block(inode->i_sb,
  							block + i);
877836905   Namhyung Kim   ext4: Check retur...
4476
4477
  			if (unlikely(!tbh))
  				continue;
60e6679e2   Theodore Ts'o   ext4: Drop whites...
4478
  			ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
e6362609b   Theodore Ts'o   ext4: call ext4_f...
4479
4480
4481
  				    inode, tbh, block + i);
  		}
  	}
60e6679e2   Theodore Ts'o   ext4: Drop whites...
4482
  	/*
e6362609b   Theodore Ts'o   ext4: call ext4_f...
4483
4484
4485
4486
4487
4488
4489
4490
  	 * We need to make sure we don't reuse the freed block until
  	 * after the transaction is committed, which we can do by
  	 * treating the block as metadata, below.  We make an
  	 * exception if the inode is to be written in writeback mode
  	 * since writeback mode has weak data consistency guarantees.
  	 */
  	if (!ext4_should_writeback_data(inode))
  		flags |= EXT4_FREE_BLOCKS_METADATA;
c9de560de   Alex Tomas   ext4: Add multi b...
4491

84130193e   Theodore Ts'o   ext4: teach ext4_...
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
  	/*
  	 * If the extent to be freed does not begin on a cluster
  	 * boundary, we need to deal with partial clusters at the
  	 * beginning and end of the extent.  Normally we will free
  	 * blocks at the beginning or the end unless we are explicitly
  	 * requested to avoid doing so.
  	 */
  	overflow = block & (sbi->s_cluster_ratio - 1);
  	if (overflow) {
  		if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
  			overflow = sbi->s_cluster_ratio - overflow;
  			block += overflow;
  			if (count > overflow)
  				count -= overflow;
  			else
  				return;
  		} else {
  			block -= overflow;
  			count += overflow;
  		}
  	}
  	overflow = count & (sbi->s_cluster_ratio - 1);
  	if (overflow) {
  		if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
  			if (count > overflow)
  				count -= overflow;
  			else
  				return;
  		} else
  			count += sbi->s_cluster_ratio - overflow;
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4523
4524
4525
4526
4527
4528
4529
4530
  do_more:
  	overflow = 0;
  	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
  
  	/*
  	 * Check to see if we are freeing blocks across a group
  	 * boundary.
  	 */
84130193e   Theodore Ts'o   ext4: teach ext4_...
4531
4532
4533
  	if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
  		overflow = EXT4_C2B(sbi, bit) + count -
  			EXT4_BLOCKS_PER_GROUP(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
4534
4535
  		count -= overflow;
  	}
84130193e   Theodore Ts'o   ext4: teach ext4_...
4536
  	count_clusters = EXT4_B2C(sbi, count);
574ca174c   Theodore Ts'o   ext4: Rename read...
4537
  	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
4538
4539
  	if (!bitmap_bh) {
  		err = -EIO;
c9de560de   Alex Tomas   ext4: Add multi b...
4540
  		goto error_return;
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
4541
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4542
  	gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
4543
4544
  	if (!gdp) {
  		err = -EIO;
c9de560de   Alex Tomas   ext4: Add multi b...
4545
  		goto error_return;
ce89f46cb   Aneesh Kumar K.V   ext4: Improve err...
4546
  	}
c9de560de   Alex Tomas   ext4: Add multi b...
4547
4548
4549
4550
  
  	if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
  	    in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
  	    in_range(block, ext4_inode_table(sb, gdp),
84130193e   Theodore Ts'o   ext4: teach ext4_...
4551
  		     EXT4_SB(sb)->s_itb_per_group) ||
c9de560de   Alex Tomas   ext4: Add multi b...
4552
  	    in_range(block + count - 1, ext4_inode_table(sb, gdp),
84130193e   Theodore Ts'o   ext4: teach ext4_...
4553
  		     EXT4_SB(sb)->s_itb_per_group)) {
c9de560de   Alex Tomas   ext4: Add multi b...
4554

12062dddd   Eric Sandeen   ext4: move __func...
4555
  		ext4_error(sb, "Freeing blocks in system zone - "
0610b6e99   Theodore Ts'o   ext4: Fix 64-bit ...
4556
  			   "Block = %llu, count = %lu", block, count);
519deca04   Aneesh Kumar K.V   ext4: Retry block...
4557
4558
  		/* err = 0. ext4_std_error should be a no op */
  		goto error_return;
c9de560de   Alex Tomas   ext4: Add multi b...
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
  	}
  
  	BUFFER_TRACE(bitmap_bh, "getting write access");
  	err = ext4_journal_get_write_access(handle, bitmap_bh);
  	if (err)
  		goto error_return;
  
  	/*
  	 * We are about to modify some metadata.  Call the journal APIs
  	 * to unshare ->b_data if a currently-committing transaction is
  	 * using it
  	 */
  	BUFFER_TRACE(gd_bh, "get_write_access");
  	err = ext4_journal_get_write_access(handle, gd_bh);
  	if (err)
  		goto error_return;
c9de560de   Alex Tomas   ext4: Add multi b...
4575
4576
4577
  #ifdef AGGRESSIVE_CHECK
  	{
  		int i;
84130193e   Theodore Ts'o   ext4: teach ext4_...
4578
  		for (i = 0; i < count_clusters; i++)
c9de560de   Alex Tomas   ext4: Add multi b...
4579
4580
4581
  			BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
  	}
  #endif
84130193e   Theodore Ts'o   ext4: teach ext4_...
4582
  	trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
c9de560de   Alex Tomas   ext4: Add multi b...
4583

920313a72   Aneesh Kumar K.V   ext4: Use EXT4_GR...
4584
4585
4586
  	err = ext4_mb_load_buddy(sb, block_group, &e4b);
  	if (err)
  		goto error_return;
e6362609b   Theodore Ts'o   ext4: call ext4_f...
4587
4588
  
  	if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4589
4590
4591
4592
4593
  		struct ext4_free_data *new_entry;
  		/*
  		 * blocks being freed are metadata. these blocks shouldn't
  		 * be used until this transaction is committed
  		 */
b72143ab3   Theodore Ts'o   ext4: Add error c...
4594
4595
4596
4597
4598
  		new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
  		if (!new_entry) {
  			err = -ENOMEM;
  			goto error_return;
  		}
84130193e   Theodore Ts'o   ext4: teach ext4_...
4599
  		new_entry->start_cluster = bit;
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4600
  		new_entry->group  = block_group;
84130193e   Theodore Ts'o   ext4: teach ext4_...
4601
  		new_entry->count = count_clusters;
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4602
  		new_entry->t_tid = handle->h_transaction->t_tid;
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
4603

7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4604
  		ext4_lock_group(sb, block_group);
84130193e   Theodore Ts'o   ext4: teach ext4_...
4605
  		mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4606
  		ext4_mb_free_metadata(handle, &e4b, new_entry);
c9de560de   Alex Tomas   ext4: Add multi b...
4607
  	} else {
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4608
4609
4610
4611
  		/* need to update group_info->bb_free and bitmap
  		 * with group lock held. generate_buddy look at
  		 * them with group lock_held
  		 */
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
4612
  		ext4_lock_group(sb, block_group);
84130193e   Theodore Ts'o   ext4: teach ext4_...
4613
4614
  		mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
  		mb_free_blocks(inode, &e4b, bit, count_clusters);
c9de560de   Alex Tomas   ext4: Add multi b...
4615
  	}
021b65bb1   Theodore Ts'o   ext4: Rename ext4...
4616
4617
  	ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
  	ext4_free_group_clusters_set(sb, gdp, ret);
c9de560de   Alex Tomas   ext4: Add multi b...
4618
  	gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
955ce5f5b   Aneesh Kumar K.V   ext4: Convert ext...
4619
  	ext4_unlock_group(sb, block_group);
570426518   Theodore Ts'o   ext4: convert s_{...
4620
  	percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
c9de560de   Alex Tomas   ext4: Add multi b...
4621

772cb7c83   Jose R. Santos   ext4: New inode a...
4622
4623
  	if (sbi->s_log_groups_per_flex) {
  		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
24aaa8ef4   Theodore Ts'o   ext4: convert the...
4624
4625
  		atomic_add(count_clusters,
  			   &sbi->s_flex_groups[flex_group].free_clusters);
772cb7c83   Jose R. Santos   ext4: New inode a...
4626
  	}
e39e07fdf   Jing Zhang   ext4: rename ext4...
4627
  	ext4_mb_unload_buddy(&e4b);
c9de560de   Alex Tomas   ext4: Add multi b...
4628

443387113   Theodore Ts'o   ext4: fold ext4_f...
4629
  	freed += count;
c9de560de   Alex Tomas   ext4: Add multi b...
4630

7b415bf60   Aditya Kali   ext4: Fix bigallo...
4631
4632
  	if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
  		dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
7a2fcbf7f   Aneesh Kumar K.V   ext4: don't use b...
4633
4634
4635
  	/* We dirtied the bitmap block */
  	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
  	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
c9de560de   Alex Tomas   ext4: Add multi b...
4636
4637
  	/* And the group descriptor block */
  	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
0390131ba   Frank Mayhar   ext4: Allow ext4 ...
4638
  	ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
c9de560de   Alex Tomas   ext4: Add multi b...
4639
4640
4641
4642
4643
4644
4645
4646
4647
  	if (!err)
  		err = ret;
  
  	if (overflow && !err) {
  		block += count;
  		count = overflow;
  		put_bh(bitmap_bh);
  		goto do_more;
  	}
a0375156c   Theodore Ts'o   ext4: Clean up s_...
4648
  	ext4_mark_super_dirty(sb);
c9de560de   Alex Tomas   ext4: Add multi b...
4649
4650
4651
4652
4653
  error_return:
  	brelse(bitmap_bh);
  	ext4_std_error(sb, err);
  	return;
  }
7360d1731   Lukas Czerner   ext4: Add batched...
4654
4655
  
  /**
0529155e8   Yongqiang Yang   ext4: rename ext4...
4656
   * ext4_group_add_blocks() -- Add given blocks to an existing group
2846e8200   Amir Goldstein   ext4: move ext4_a...
4657
4658
4659
4660
4661
   * @handle:			handle to this transaction
   * @sb:				super block
   * @block:			start physcial block to add to the block group
   * @count:			number of blocks to free
   *
e73a347b7   Amir Goldstein   ext4: implement e...
4662
   * This marks the blocks as free in the bitmap and buddy.
2846e8200   Amir Goldstein   ext4: move ext4_a...
4663
   */
cc7365dfe   Yongqiang Yang   ext4: let ext4_gr...
4664
  int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
2846e8200   Amir Goldstein   ext4: move ext4_a...
4665
4666
4667
4668
4669
4670
4671
4672
4673
  			 ext4_fsblk_t block, unsigned long count)
  {
  	struct buffer_head *bitmap_bh = NULL;
  	struct buffer_head *gd_bh;
  	ext4_group_t block_group;
  	ext4_grpblk_t bit;
  	unsigned int i;
  	struct ext4_group_desc *desc;
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
e73a347b7   Amir Goldstein   ext4: implement e...
4674
  	struct ext4_buddy e4b;
2846e8200   Amir Goldstein   ext4: move ext4_a...
4675
4676
  	int err = 0, ret, blk_free_count;
  	ext4_grpblk_t blocks_freed;
2846e8200   Amir Goldstein   ext4: move ext4_a...
4677
4678
4679
  
  	ext4_debug("Adding block(s) %llu-%llu
  ", block, block + count - 1);
4740b830e   Yongqiang Yang   ext4: let ext4_gr...
4680
4681
  	if (count == 0)
  		return 0;
2846e8200   Amir Goldstein   ext4: move ext4_a...
4682
  	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
2846e8200   Amir Goldstein   ext4: move ext4_a...
4683
4684
4685
4686
  	/*
  	 * Check to see if we are freeing blocks across a group
  	 * boundary.
  	 */
cc7365dfe   Yongqiang Yang   ext4: let ext4_gr...
4687
4688
4689
4690
4691
  	if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
  		ext4_warning(sb, "too much blocks added to group %u
  ",
  			     block_group);
  		err = -EINVAL;
2846e8200   Amir Goldstein   ext4: move ext4_a...
4692
  		goto error_return;
cc7365dfe   Yongqiang Yang   ext4: let ext4_gr...
4693
  	}
2cd05cc39   Theodore Ts'o   ext4: remove unne...
4694

2846e8200   Amir Goldstein   ext4: move ext4_a...
4695
  	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
cc7365dfe   Yongqiang Yang   ext4: let ext4_gr...
4696
4697
  	if (!bitmap_bh) {
  		err = -EIO;
2846e8200   Amir Goldstein   ext4: move ext4_a...
4698
  		goto error_return;
cc7365dfe   Yongqiang Yang   ext4: let ext4_gr...
4699
  	}
2846e8200   Amir Goldstein   ext4: move ext4_a...
4700
  	desc = ext4_get_group_desc(sb, block_group, &gd_bh);
cc7365dfe   Yongqiang Yang   ext4: let ext4_gr...
4701
4702
  	if (!desc) {
  		err = -EIO;
2846e8200   Amir Goldstein   ext4: move ext4_a...
4703
  		goto error_return;
cc7365dfe   Yongqiang Yang   ext4: let ext4_gr...
4704
  	}
2846e8200   Amir Goldstein   ext4: move ext4_a...
4705
4706
4707
4708
4709
4710
4711
4712
4713
  
  	if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
  	    in_range(ext4_inode_bitmap(sb, desc), block, count) ||
  	    in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
  	    in_range(block + count - 1, ext4_inode_table(sb, desc),
  		     sbi->s_itb_per_group)) {
  		ext4_error(sb, "Adding blocks in system zones - "
  			   "Block = %llu, count = %lu",
  			   block, count);
cc7365dfe   Yongqiang Yang   ext4: let ext4_gr...
4714
  		err = -EINVAL;
2846e8200   Amir Goldstein   ext4: move ext4_a...
4715
4716
  		goto error_return;
  	}
2cd05cc39   Theodore Ts'o   ext4: remove unne...
4717
4718
  	BUFFER_TRACE(bitmap_bh, "getting write access");
  	err = ext4_journal_get_write_access(handle, bitmap_bh);
2846e8200   Amir Goldstein   ext4: move ext4_a...
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
  	if (err)
  		goto error_return;
  
  	/*
  	 * We are about to modify some metadata.  Call the journal APIs
  	 * to unshare ->b_data if a currently-committing transaction is
  	 * using it
  	 */
  	BUFFER_TRACE(gd_bh, "get_write_access");
  	err = ext4_journal_get_write_access(handle, gd_bh);
  	if (err)
  		goto error_return;
e73a347b7   Amir Goldstein   ext4: implement e...
4731

2846e8200   Amir Goldstein   ext4: move ext4_a...
4732
4733
  	for (i = 0, blocks_freed = 0; i < count; i++) {
  		BUFFER_TRACE(bitmap_bh, "clear bit");
e73a347b7   Amir Goldstein   ext4: implement e...
4734
  		if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
2846e8200   Amir Goldstein   ext4: move ext4_a...
4735
4736
4737
4738
4739
4740
4741
  			ext4_error(sb, "bit already cleared for block %llu",
  				   (ext4_fsblk_t)(block + i));
  			BUFFER_TRACE(bitmap_bh, "bit already cleared");
  		} else {
  			blocks_freed++;
  		}
  	}
e73a347b7   Amir Goldstein   ext4: implement e...
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
  
  	err = ext4_mb_load_buddy(sb, block_group, &e4b);
  	if (err)
  		goto error_return;
  
  	/*
  	 * need to update group_info->bb_free and bitmap
  	 * with group lock held. generate_buddy look at
  	 * them with group lock_held
  	 */
2846e8200   Amir Goldstein   ext4: move ext4_a...
4752
  	ext4_lock_group(sb, block_group);
e73a347b7   Amir Goldstein   ext4: implement e...
4753
4754
  	mb_clear_bits(bitmap_bh->b_data, bit, count);
  	mb_free_blocks(NULL, &e4b, bit, count);
021b65bb1   Theodore Ts'o   ext4: Rename ext4...
4755
4756
  	blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
  	ext4_free_group_clusters_set(sb, desc, blk_free_count);
2846e8200   Amir Goldstein   ext4: move ext4_a...
4757
4758
  	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
  	ext4_unlock_group(sb, block_group);
570426518   Theodore Ts'o   ext4: convert s_{...
4759
4760
  	percpu_counter_add(&sbi->s_freeclusters_counter,
  			   EXT4_B2C(sbi, blocks_freed));
2846e8200   Amir Goldstein   ext4: move ext4_a...
4761
4762
4763
  
  	if (sbi->s_log_groups_per_flex) {
  		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
24aaa8ef4   Theodore Ts'o   ext4: convert the...
4764
4765
  		atomic_add(EXT4_B2C(sbi, blocks_freed),
  			   &sbi->s_flex_groups[flex_group].free_clusters);
2846e8200   Amir Goldstein   ext4: move ext4_a...
4766
  	}
e73a347b7   Amir Goldstein   ext4: implement e...
4767
4768
  
  	ext4_mb_unload_buddy(&e4b);
2846e8200   Amir Goldstein   ext4: move ext4_a...
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
  
  	/* We dirtied the bitmap block */
  	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
  	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
  
  	/* And the group descriptor block */
  	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
  	ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
  	if (!err)
  		err = ret;
  
  error_return:
  	brelse(bitmap_bh);
  	ext4_std_error(sb, err);
cc7365dfe   Yongqiang Yang   ext4: let ext4_gr...
4783
  	return err;
2846e8200   Amir Goldstein   ext4: move ext4_a...
4784
4785
4786
  }
  
  /**
7360d1731   Lukas Czerner   ext4: Add batched...
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
   * ext4_trim_extent -- function to TRIM one single free extent in the group
   * @sb:		super block for the file system
   * @start:	starting block of the free extent in the alloc. group
   * @count:	number of blocks to TRIM
   * @group:	alloc. group we are working with
   * @e4b:	ext4 buddy for the group
   *
   * Trim "count" blocks starting at "start" in the "group". To assure that no
   * one will allocate those blocks, mark it as used in buddy bitmap. This must
   * be called with under the group lock.
   */
d9f34504e   Theodore Ts'o   ext4: ignore erro...
4798
4799
  static void ext4_trim_extent(struct super_block *sb, int start, int count,
  			     ext4_group_t group, struct ext4_buddy *e4b)
7360d1731   Lukas Czerner   ext4: Add batched...
4800
4801
  {
  	struct ext4_free_extent ex;
7360d1731   Lukas Czerner   ext4: Add batched...
4802

b3d4c2b10   Tao Ma   ext4: Add new ext...
4803
  	trace_ext4_trim_extent(sb, group, start, count);
7360d1731   Lukas Czerner   ext4: Add batched...
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
  	assert_spin_locked(ext4_group_lock_ptr(sb, group));
  
  	ex.fe_start = start;
  	ex.fe_group = group;
  	ex.fe_len = count;
  
  	/*
  	 * Mark blocks used, so no one can reuse them while
  	 * being trimmed.
  	 */
  	mb_mark_used(e4b, &ex);
  	ext4_unlock_group(sb, group);
d9f34504e   Theodore Ts'o   ext4: ignore erro...
4816
  	ext4_issue_discard(sb, group, start, count);
7360d1731   Lukas Czerner   ext4: Add batched...
4817
4818
  	ext4_lock_group(sb, group);
  	mb_free_blocks(NULL, e4b, start, ex.fe_len);
7360d1731   Lukas Czerner   ext4: Add batched...
4819
4820
4821
4822
4823
  }
  
  /**
   * ext4_trim_all_free -- function to trim all free space in alloc. group
   * @sb:			super block for file system
22612283f   Tao Ma   ext4: Change the ...
4824
   * @group:		group to be trimmed
7360d1731   Lukas Czerner   ext4: Add batched...
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
   * @start:		first group block to examine
   * @max:		last group block to examine
   * @minblocks:		minimum extent block count
   *
   * ext4_trim_all_free walks through group's buddy bitmap searching for free
   * extents. When the free block is found, ext4_trim_extent is called to TRIM
   * the extent.
   *
   *
   * ext4_trim_all_free walks through group's block bitmap searching for free
   * extents. When the free extent is found, mark it as used in group buddy
   * bitmap. Then issue a TRIM command on this extent and free the extent in
   * the group buddy bitmap. This is done until whole group is scanned.
   */
0b75a8401   Lukas Czerner   ext4: mark file-l...
4839
  static ext4_grpblk_t
789440866   Lukas Czerner   ext4: only load b...
4840
4841
4842
  ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
  		   ext4_grpblk_t start, ext4_grpblk_t max,
  		   ext4_grpblk_t minblocks)
7360d1731   Lukas Czerner   ext4: Add batched...
4843
4844
  {
  	void *bitmap;
169ddc3ec   Tao Ma   ext4: speed up gr...
4845
  	ext4_grpblk_t next, count = 0, free_count = 0;
789440866   Lukas Czerner   ext4: only load b...
4846
4847
  	struct ext4_buddy e4b;
  	int ret;
7360d1731   Lukas Czerner   ext4: Add batched...
4848

b3d4c2b10   Tao Ma   ext4: Add new ext...
4849
  	trace_ext4_trim_all_free(sb, group, start, max);
789440866   Lukas Czerner   ext4: only load b...
4850
4851
4852
4853
4854
4855
  	ret = ext4_mb_load_buddy(sb, group, &e4b);
  	if (ret) {
  		ext4_error(sb, "Error in loading buddy "
  				"information for %u", group);
  		return ret;
  	}
789440866   Lukas Czerner   ext4: only load b...
4856
  	bitmap = e4b.bd_bitmap;
28739eea9   Lukas Czerner   ext4: protect bb_...
4857
4858
  
  	ext4_lock_group(sb, group);
3d56b8d2c   Tao Ma   ext4: Speed up FI...
4859
4860
4861
  	if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
  	    minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
  		goto out;
789440866   Lukas Czerner   ext4: only load b...
4862
4863
  	start = (e4b.bd_info->bb_first_free > start) ?
  		e4b.bd_info->bb_first_free : start;
7360d1731   Lukas Czerner   ext4: Add batched...
4864
4865
4866
4867
4868
4869
4870
4871
  
  	while (start < max) {
  		start = mb_find_next_zero_bit(bitmap, max, start);
  		if (start >= max)
  			break;
  		next = mb_find_next_bit(bitmap, max, start);
  
  		if ((next - start) >= minblocks) {
d9f34504e   Theodore Ts'o   ext4: ignore erro...
4872
  			ext4_trim_extent(sb, start,
789440866   Lukas Czerner   ext4: only load b...
4873
  					 next - start, group, &e4b);
7360d1731   Lukas Czerner   ext4: Add batched...
4874
4875
  			count += next - start;
  		}
169ddc3ec   Tao Ma   ext4: speed up gr...
4876
  		free_count += next - start;
7360d1731   Lukas Czerner   ext4: Add batched...
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
  		start = next + 1;
  
  		if (fatal_signal_pending(current)) {
  			count = -ERESTARTSYS;
  			break;
  		}
  
  		if (need_resched()) {
  			ext4_unlock_group(sb, group);
  			cond_resched();
  			ext4_lock_group(sb, group);
  		}
169ddc3ec   Tao Ma   ext4: speed up gr...
4889
  		if ((e4b.bd_info->bb_free - free_count) < minblocks)
7360d1731   Lukas Czerner   ext4: Add batched...
4890
4891
  			break;
  	}
3d56b8d2c   Tao Ma   ext4: Speed up FI...
4892
4893
4894
4895
  
  	if (!ret)
  		EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
  out:
7360d1731   Lukas Czerner   ext4: Add batched...
4896
  	ext4_unlock_group(sb, group);
789440866   Lukas Czerner   ext4: only load b...
4897
  	ext4_mb_unload_buddy(&e4b);
7360d1731   Lukas Czerner   ext4: Add batched...
4898
4899
4900
4901
  
  	ext4_debug("trimmed %d blocks in the group %d
  ",
  		count, group);
7360d1731   Lukas Czerner   ext4: Add batched...
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
  	return count;
  }
  
  /**
   * ext4_trim_fs() -- trim ioctl handle function
   * @sb:			superblock for filesystem
   * @range:		fstrim_range structure
   *
   * start:	First Byte to trim
   * len:		number of Bytes to trim from start
   * minlen:	minimum extent length in Bytes
   * ext4_trim_fs goes through all allocation groups containing Bytes from
   * start to start+len. For each such a group ext4_trim_all_free function
   * is invoked to trim all free space.
   */
  int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
  {
789440866   Lukas Czerner   ext4: only load b...
4919
  	struct ext4_group_info *grp;
7360d1731   Lukas Czerner   ext4: Add batched...
4920
4921
  	ext4_group_t first_group, last_group;
  	ext4_group_t group, ngroups = ext4_get_groups_count(sb);
7137d7a48   Theodore Ts'o   ext4: convert ins...
4922
  	ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
789440866   Lukas Czerner   ext4: only load b...
4923
  	uint64_t start, len, minlen, trimmed = 0;
0f0a25bf5   Jan Kara   ext4: fix trimmin...
4924
4925
  	ext4_fsblk_t first_data_blk =
  			le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
7360d1731   Lukas Czerner   ext4: Add batched...
4926
4927
4928
4929
4930
  	int ret = 0;
  
  	start = range->start >> sb->s_blocksize_bits;
  	len = range->len >> sb->s_blocksize_bits;
  	minlen = range->minlen >> sb->s_blocksize_bits;
7360d1731   Lukas Czerner   ext4: Add batched...
4931

7137d7a48   Theodore Ts'o   ext4: convert ins...
4932
  	if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)))
7360d1731   Lukas Czerner   ext4: Add batched...
4933
  		return -EINVAL;
22f104574   Tao Ma   ext4: fix trim le...
4934
4935
  	if (start + len <= first_data_blk)
  		goto out;
0f0a25bf5   Jan Kara   ext4: fix trimmin...
4936
4937
4938
4939
  	if (start < first_data_blk) {
  		len -= first_data_blk - start;
  		start = first_data_blk;
  	}
7360d1731   Lukas Czerner   ext4: Add batched...
4940
4941
4942
  
  	/* Determine first and last group to examine based on start and len */
  	ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
7137d7a48   Theodore Ts'o   ext4: convert ins...
4943
  				     &first_group, &first_cluster);
7360d1731   Lukas Czerner   ext4: Add batched...
4944
  	ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
7137d7a48   Theodore Ts'o   ext4: convert ins...
4945
  				     &last_group, &last_cluster);
7360d1731   Lukas Czerner   ext4: Add batched...
4946
  	last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
7137d7a48   Theodore Ts'o   ext4: convert ins...
4947
  	last_cluster = EXT4_CLUSTERS_PER_GROUP(sb);
7360d1731   Lukas Czerner   ext4: Add batched...
4948
4949
4950
4951
4952
  
  	if (first_group > last_group)
  		return -EINVAL;
  
  	for (group = first_group; group <= last_group; group++) {
789440866   Lukas Czerner   ext4: only load b...
4953
4954
4955
4956
4957
4958
  		grp = ext4_get_group_info(sb, group);
  		/* We only do this if the grp has never been initialized */
  		if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
  			ret = ext4_mb_init_group(sb, group);
  			if (ret)
  				break;
7360d1731   Lukas Czerner   ext4: Add batched...
4959
  		}
0ba085171   Tao Ma   ext4: fix a BUG i...
4960
4961
4962
4963
4964
4965
  		/*
  		 * For all the groups except the last one, last block will
  		 * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to
  		 * change it for the last group in which case start +
  		 * len < EXT4_BLOCKS_PER_GROUP(sb).
  		 */
7137d7a48   Theodore Ts'o   ext4: convert ins...
4966
4967
4968
  		if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb))
  			last_cluster = first_cluster + len;
  		len -= last_cluster - first_cluster;
7360d1731   Lukas Czerner   ext4: Add batched...
4969

789440866   Lukas Czerner   ext4: only load b...
4970
  		if (grp->bb_free >= minlen) {
7137d7a48   Theodore Ts'o   ext4: convert ins...
4971
4972
  			cnt = ext4_trim_all_free(sb, group, first_cluster,
  						last_cluster, minlen);
7360d1731   Lukas Czerner   ext4: Add batched...
4973
4974
  			if (cnt < 0) {
  				ret = cnt;
7360d1731   Lukas Czerner   ext4: Add batched...
4975
4976
4977
  				break;
  			}
  		}
7360d1731   Lukas Czerner   ext4: Add batched...
4978
  		trimmed += cnt;
7137d7a48   Theodore Ts'o   ext4: convert ins...
4979
  		first_cluster = 0;
7360d1731   Lukas Czerner   ext4: Add batched...
4980
4981
  	}
  	range->len = trimmed * sb->s_blocksize;
3d56b8d2c   Tao Ma   ext4: Speed up FI...
4982
4983
  	if (!ret)
  		atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
22f104574   Tao Ma   ext4: fix trim le...
4984
  out:
7360d1731   Lukas Czerner   ext4: Add batched...
4985
4986
  	return ret;
  }