Commit feaf222925cdfbc841a695fd30df8c6d0a694146

Authored by Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "Ext4 bug fixes.

  We also reserved code points for encryption and read-only images (for
  which the implementation is mostly just the reserved code point for a
  read-only feature :-)"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix indirect punch hole corruption
  ext4: ignore journal checksum on remount; don't fail
  ext4: remove duplicate remount check for JOURNAL_CHECKSUM change
  ext4: fix mmap data corruption in nodelalloc mode when blocksize < pagesize
  ext4: support read-only images
  ext4: change to use setup_timer() instead of init_timer()
  ext4: reserve codepoints used by the ext4 encryption feature
  jbd2: complain about descriptor block checksum errors

Showing 5 changed files Inline Diff

1 /* 1 /*
2 * ext4.h 2 * ext4.h
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 * 8 *
9 * from 9 * from
10 * 10 *
11 * linux/include/linux/minix_fs.h 11 * linux/include/linux/minix_fs.h
12 * 12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */ 14 */
15 15
16 #ifndef _EXT4_H 16 #ifndef _EXT4_H
17 #define _EXT4_H 17 #define _EXT4_H
18 18
19 #include <linux/types.h> 19 #include <linux/types.h>
20 #include <linux/blkdev.h> 20 #include <linux/blkdev.h>
21 #include <linux/magic.h> 21 #include <linux/magic.h>
22 #include <linux/jbd2.h> 22 #include <linux/jbd2.h>
23 #include <linux/quota.h> 23 #include <linux/quota.h>
24 #include <linux/rwsem.h> 24 #include <linux/rwsem.h>
25 #include <linux/rbtree.h> 25 #include <linux/rbtree.h>
26 #include <linux/seqlock.h> 26 #include <linux/seqlock.h>
27 #include <linux/mutex.h> 27 #include <linux/mutex.h>
28 #include <linux/timer.h> 28 #include <linux/timer.h>
29 #include <linux/wait.h> 29 #include <linux/wait.h>
30 #include <linux/blockgroup_lock.h> 30 #include <linux/blockgroup_lock.h>
31 #include <linux/percpu_counter.h> 31 #include <linux/percpu_counter.h>
32 #include <linux/ratelimit.h> 32 #include <linux/ratelimit.h>
33 #include <crypto/hash.h> 33 #include <crypto/hash.h>
34 #include <linux/falloc.h> 34 #include <linux/falloc.h>
35 #ifdef __KERNEL__ 35 #ifdef __KERNEL__
36 #include <linux/compat.h> 36 #include <linux/compat.h>
37 #endif 37 #endif
38 38
39 /* 39 /*
40 * The fourth extended filesystem constants/structures 40 * The fourth extended filesystem constants/structures
41 */ 41 */
42 42
43 /* 43 /*
44 * Define EXT4FS_DEBUG to produce debug messages 44 * Define EXT4FS_DEBUG to produce debug messages
45 */ 45 */
46 #undef EXT4FS_DEBUG 46 #undef EXT4FS_DEBUG
47 47
48 /* 48 /*
49 * Debug code 49 * Debug code
50 */ 50 */
51 #ifdef EXT4FS_DEBUG 51 #ifdef EXT4FS_DEBUG
52 #define ext4_debug(f, a...) \ 52 #define ext4_debug(f, a...) \
53 do { \ 53 do { \
54 printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ 54 printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
55 __FILE__, __LINE__, __func__); \ 55 __FILE__, __LINE__, __func__); \
56 printk(KERN_DEBUG f, ## a); \ 56 printk(KERN_DEBUG f, ## a); \
57 } while (0) 57 } while (0)
58 #else 58 #else
59 #define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) 59 #define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
60 #endif 60 #endif
61 61
62 /* 62 /*
63 * Turn on EXT_DEBUG to get lots of info about extents operations. 63 * Turn on EXT_DEBUG to get lots of info about extents operations.
64 */ 64 */
65 #define EXT_DEBUG__ 65 #define EXT_DEBUG__
66 #ifdef EXT_DEBUG 66 #ifdef EXT_DEBUG
67 #define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__) 67 #define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__)
68 #else 68 #else
69 #define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) 69 #define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
70 #endif 70 #endif
71 71
72 #define EXT4_ERROR_INODE(inode, fmt, a...) \ 72 #define EXT4_ERROR_INODE(inode, fmt, a...) \
73 ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a) 73 ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
74 74
75 #define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \ 75 #define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
76 ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a) 76 ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
77 77
78 #define EXT4_ERROR_FILE(file, block, fmt, a...) \ 78 #define EXT4_ERROR_FILE(file, block, fmt, a...) \
79 ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a) 79 ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
80 80
81 /* data type for block offset of block group */ 81 /* data type for block offset of block group */
82 typedef int ext4_grpblk_t; 82 typedef int ext4_grpblk_t;
83 83
84 /* data type for filesystem-wide blocks number */ 84 /* data type for filesystem-wide blocks number */
85 typedef unsigned long long ext4_fsblk_t; 85 typedef unsigned long long ext4_fsblk_t;
86 86
87 /* data type for file logical block number */ 87 /* data type for file logical block number */
88 typedef __u32 ext4_lblk_t; 88 typedef __u32 ext4_lblk_t;
89 89
90 /* data type for block group number */ 90 /* data type for block group number */
91 typedef unsigned int ext4_group_t; 91 typedef unsigned int ext4_group_t;
92 92
93 /* 93 /*
94 * Flags used in mballoc's allocation_context flags field. 94 * Flags used in mballoc's allocation_context flags field.
95 * 95 *
96 * Also used to show what's going on for debugging purposes when the 96 * Also used to show what's going on for debugging purposes when the
97 * flag field is exported via the traceport interface 97 * flag field is exported via the traceport interface
98 */ 98 */
99 99
100 /* prefer goal again. length */ 100 /* prefer goal again. length */
101 #define EXT4_MB_HINT_MERGE 0x0001 101 #define EXT4_MB_HINT_MERGE 0x0001
102 /* blocks already reserved */ 102 /* blocks already reserved */
103 #define EXT4_MB_HINT_RESERVED 0x0002 103 #define EXT4_MB_HINT_RESERVED 0x0002
104 /* metadata is being allocated */ 104 /* metadata is being allocated */
105 #define EXT4_MB_HINT_METADATA 0x0004 105 #define EXT4_MB_HINT_METADATA 0x0004
106 /* first blocks in the file */ 106 /* first blocks in the file */
107 #define EXT4_MB_HINT_FIRST 0x0008 107 #define EXT4_MB_HINT_FIRST 0x0008
108 /* search for the best chunk */ 108 /* search for the best chunk */
109 #define EXT4_MB_HINT_BEST 0x0010 109 #define EXT4_MB_HINT_BEST 0x0010
110 /* data is being allocated */ 110 /* data is being allocated */
111 #define EXT4_MB_HINT_DATA 0x0020 111 #define EXT4_MB_HINT_DATA 0x0020
112 /* don't preallocate (for tails) */ 112 /* don't preallocate (for tails) */
113 #define EXT4_MB_HINT_NOPREALLOC 0x0040 113 #define EXT4_MB_HINT_NOPREALLOC 0x0040
114 /* allocate for locality group */ 114 /* allocate for locality group */
115 #define EXT4_MB_HINT_GROUP_ALLOC 0x0080 115 #define EXT4_MB_HINT_GROUP_ALLOC 0x0080
116 /* allocate goal blocks or none */ 116 /* allocate goal blocks or none */
117 #define EXT4_MB_HINT_GOAL_ONLY 0x0100 117 #define EXT4_MB_HINT_GOAL_ONLY 0x0100
118 /* goal is meaningful */ 118 /* goal is meaningful */
119 #define EXT4_MB_HINT_TRY_GOAL 0x0200 119 #define EXT4_MB_HINT_TRY_GOAL 0x0200
120 /* blocks already pre-reserved by delayed allocation */ 120 /* blocks already pre-reserved by delayed allocation */
121 #define EXT4_MB_DELALLOC_RESERVED 0x0400 121 #define EXT4_MB_DELALLOC_RESERVED 0x0400
122 /* We are doing stream allocation */ 122 /* We are doing stream allocation */
123 #define EXT4_MB_STREAM_ALLOC 0x0800 123 #define EXT4_MB_STREAM_ALLOC 0x0800
124 /* Use reserved root blocks if needed */ 124 /* Use reserved root blocks if needed */
125 #define EXT4_MB_USE_ROOT_BLOCKS 0x1000 125 #define EXT4_MB_USE_ROOT_BLOCKS 0x1000
126 /* Use blocks from reserved pool */ 126 /* Use blocks from reserved pool */
127 #define EXT4_MB_USE_RESERVED 0x2000 127 #define EXT4_MB_USE_RESERVED 0x2000
128 128
129 struct ext4_allocation_request { 129 struct ext4_allocation_request {
130 /* target inode for block we're allocating */ 130 /* target inode for block we're allocating */
131 struct inode *inode; 131 struct inode *inode;
132 /* how many blocks we want to allocate */ 132 /* how many blocks we want to allocate */
133 unsigned int len; 133 unsigned int len;
134 /* logical block in target inode */ 134 /* logical block in target inode */
135 ext4_lblk_t logical; 135 ext4_lblk_t logical;
136 /* the closest logical allocated block to the left */ 136 /* the closest logical allocated block to the left */
137 ext4_lblk_t lleft; 137 ext4_lblk_t lleft;
138 /* the closest logical allocated block to the right */ 138 /* the closest logical allocated block to the right */
139 ext4_lblk_t lright; 139 ext4_lblk_t lright;
140 /* phys. target (a hint) */ 140 /* phys. target (a hint) */
141 ext4_fsblk_t goal; 141 ext4_fsblk_t goal;
142 /* phys. block for the closest logical allocated block to the left */ 142 /* phys. block for the closest logical allocated block to the left */
143 ext4_fsblk_t pleft; 143 ext4_fsblk_t pleft;
144 /* phys. block for the closest logical allocated block to the right */ 144 /* phys. block for the closest logical allocated block to the right */
145 ext4_fsblk_t pright; 145 ext4_fsblk_t pright;
146 /* flags. see above EXT4_MB_HINT_* */ 146 /* flags. see above EXT4_MB_HINT_* */
147 unsigned int flags; 147 unsigned int flags;
148 }; 148 };
149 149
150 /* 150 /*
151 * Logical to physical block mapping, used by ext4_map_blocks() 151 * Logical to physical block mapping, used by ext4_map_blocks()
152 * 152 *
153 * This structure is used to pass requests into ext4_map_blocks() as 153 * This structure is used to pass requests into ext4_map_blocks() as
154 * well as to store the information returned by ext4_map_blocks(). It 154 * well as to store the information returned by ext4_map_blocks(). It
155 * takes less room on the stack than a struct buffer_head. 155 * takes less room on the stack than a struct buffer_head.
156 */ 156 */
157 #define EXT4_MAP_NEW (1 << BH_New) 157 #define EXT4_MAP_NEW (1 << BH_New)
158 #define EXT4_MAP_MAPPED (1 << BH_Mapped) 158 #define EXT4_MAP_MAPPED (1 << BH_Mapped)
159 #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) 159 #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
160 #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) 160 #define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
161 #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ 161 #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
162 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY) 162 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY)
163 163
164 struct ext4_map_blocks { 164 struct ext4_map_blocks {
165 ext4_fsblk_t m_pblk; 165 ext4_fsblk_t m_pblk;
166 ext4_lblk_t m_lblk; 166 ext4_lblk_t m_lblk;
167 unsigned int m_len; 167 unsigned int m_len;
168 unsigned int m_flags; 168 unsigned int m_flags;
169 }; 169 };
170 170
171 /* 171 /*
172 * Flags for ext4_io_end->flags 172 * Flags for ext4_io_end->flags
173 */ 173 */
174 #define EXT4_IO_END_UNWRITTEN 0x0001 174 #define EXT4_IO_END_UNWRITTEN 0x0001
175 175
176 /* 176 /*
177 * For converting unwritten extents on a work queue. 'handle' is used for 177 * For converting unwritten extents on a work queue. 'handle' is used for
178 * buffered writeback. 178 * buffered writeback.
179 */ 179 */
180 typedef struct ext4_io_end { 180 typedef struct ext4_io_end {
181 struct list_head list; /* per-file finished IO list */ 181 struct list_head list; /* per-file finished IO list */
182 handle_t *handle; /* handle reserved for extent 182 handle_t *handle; /* handle reserved for extent
183 * conversion */ 183 * conversion */
184 struct inode *inode; /* file being written to */ 184 struct inode *inode; /* file being written to */
185 struct bio *bio; /* Linked list of completed 185 struct bio *bio; /* Linked list of completed
186 * bios covering the extent */ 186 * bios covering the extent */
187 unsigned int flag; /* unwritten or not */ 187 unsigned int flag; /* unwritten or not */
188 loff_t offset; /* offset in the file */ 188 loff_t offset; /* offset in the file */
189 ssize_t size; /* size of the extent */ 189 ssize_t size; /* size of the extent */
190 atomic_t count; /* reference counter */ 190 atomic_t count; /* reference counter */
191 } ext4_io_end_t; 191 } ext4_io_end_t;
192 192
193 struct ext4_io_submit { 193 struct ext4_io_submit {
194 int io_op; 194 int io_op;
195 struct bio *io_bio; 195 struct bio *io_bio;
196 ext4_io_end_t *io_end; 196 ext4_io_end_t *io_end;
197 sector_t io_next_block; 197 sector_t io_next_block;
198 }; 198 };
199 199
200 /* 200 /*
201 * Special inodes numbers 201 * Special inodes numbers
202 */ 202 */
203 #define EXT4_BAD_INO 1 /* Bad blocks inode */ 203 #define EXT4_BAD_INO 1 /* Bad blocks inode */
204 #define EXT4_ROOT_INO 2 /* Root inode */ 204 #define EXT4_ROOT_INO 2 /* Root inode */
205 #define EXT4_USR_QUOTA_INO 3 /* User quota inode */ 205 #define EXT4_USR_QUOTA_INO 3 /* User quota inode */
206 #define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */ 206 #define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */
207 #define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ 207 #define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */
208 #define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ 208 #define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */
209 #define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ 209 #define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */
210 #define EXT4_JOURNAL_INO 8 /* Journal inode */ 210 #define EXT4_JOURNAL_INO 8 /* Journal inode */
211 211
212 /* First non-reserved inode for old ext4 filesystems */ 212 /* First non-reserved inode for old ext4 filesystems */
213 #define EXT4_GOOD_OLD_FIRST_INO 11 213 #define EXT4_GOOD_OLD_FIRST_INO 11
214 214
215 /* 215 /*
216 * Maximal count of links to a file 216 * Maximal count of links to a file
217 */ 217 */
218 #define EXT4_LINK_MAX 65000 218 #define EXT4_LINK_MAX 65000
219 219
220 /* 220 /*
221 * Macro-instructions used to manage several block sizes 221 * Macro-instructions used to manage several block sizes
222 */ 222 */
223 #define EXT4_MIN_BLOCK_SIZE 1024 223 #define EXT4_MIN_BLOCK_SIZE 1024
224 #define EXT4_MAX_BLOCK_SIZE 65536 224 #define EXT4_MAX_BLOCK_SIZE 65536
225 #define EXT4_MIN_BLOCK_LOG_SIZE 10 225 #define EXT4_MIN_BLOCK_LOG_SIZE 10
226 #define EXT4_MAX_BLOCK_LOG_SIZE 16 226 #define EXT4_MAX_BLOCK_LOG_SIZE 16
227 #ifdef __KERNEL__ 227 #ifdef __KERNEL__
228 # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) 228 # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
229 #else 229 #else
230 # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) 230 # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
231 #endif 231 #endif
232 #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) 232 #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
233 #define EXT4_CLUSTER_SIZE(s) (EXT4_BLOCK_SIZE(s) << \ 233 #define EXT4_CLUSTER_SIZE(s) (EXT4_BLOCK_SIZE(s) << \
234 EXT4_SB(s)->s_cluster_bits) 234 EXT4_SB(s)->s_cluster_bits)
235 #ifdef __KERNEL__ 235 #ifdef __KERNEL__
236 # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) 236 # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
237 # define EXT4_CLUSTER_BITS(s) (EXT4_SB(s)->s_cluster_bits) 237 # define EXT4_CLUSTER_BITS(s) (EXT4_SB(s)->s_cluster_bits)
238 #else 238 #else
239 # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) 239 # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
240 #endif 240 #endif
241 #ifdef __KERNEL__ 241 #ifdef __KERNEL__
242 #define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits) 242 #define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits)
243 #define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size) 243 #define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size)
244 #define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino) 244 #define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino)
245 #else 245 #else
246 #define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ 246 #define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
247 EXT4_GOOD_OLD_INODE_SIZE : \ 247 EXT4_GOOD_OLD_INODE_SIZE : \
248 (s)->s_inode_size) 248 (s)->s_inode_size)
249 #define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ 249 #define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
250 EXT4_GOOD_OLD_FIRST_INO : \ 250 EXT4_GOOD_OLD_FIRST_INO : \
251 (s)->s_first_ino) 251 (s)->s_first_ino)
252 #endif 252 #endif
253 #define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) 253 #define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
254 254
255 /* Translate a block number to a cluster number */ 255 /* Translate a block number to a cluster number */
256 #define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits) 256 #define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits)
257 /* Translate a cluster number to a block number */ 257 /* Translate a cluster number to a block number */
258 #define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits) 258 #define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits)
259 /* Translate # of blks to # of clusters */ 259 /* Translate # of blks to # of clusters */
260 #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ 260 #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
261 (sbi)->s_cluster_bits) 261 (sbi)->s_cluster_bits)
262 /* Mask out the low bits to get the starting block of the cluster */ 262 /* Mask out the low bits to get the starting block of the cluster */
263 #define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \ 263 #define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \
264 ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) 264 ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
265 #define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \ 265 #define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \
266 ~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) 266 ~((ext4_lblk_t) (s)->s_cluster_ratio - 1))
267 /* Get the cluster offset */ 267 /* Get the cluster offset */
268 #define EXT4_PBLK_COFF(s, pblk) ((pblk) & \ 268 #define EXT4_PBLK_COFF(s, pblk) ((pblk) & \
269 ((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) 269 ((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
270 #define EXT4_LBLK_COFF(s, lblk) ((lblk) & \ 270 #define EXT4_LBLK_COFF(s, lblk) ((lblk) & \
271 ((ext4_lblk_t) (s)->s_cluster_ratio - 1)) 271 ((ext4_lblk_t) (s)->s_cluster_ratio - 1))
272 272
273 /* 273 /*
274 * Structure of a blocks group descriptor 274 * Structure of a blocks group descriptor
275 */ 275 */
276 struct ext4_group_desc 276 struct ext4_group_desc
277 { 277 {
278 __le32 bg_block_bitmap_lo; /* Blocks bitmap block */ 278 __le32 bg_block_bitmap_lo; /* Blocks bitmap block */
279 __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */ 279 __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
280 __le32 bg_inode_table_lo; /* Inodes table block */ 280 __le32 bg_inode_table_lo; /* Inodes table block */
281 __le16 bg_free_blocks_count_lo;/* Free blocks count */ 281 __le16 bg_free_blocks_count_lo;/* Free blocks count */
282 __le16 bg_free_inodes_count_lo;/* Free inodes count */ 282 __le16 bg_free_inodes_count_lo;/* Free inodes count */
283 __le16 bg_used_dirs_count_lo; /* Directories count */ 283 __le16 bg_used_dirs_count_lo; /* Directories count */
284 __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ 284 __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
285 __le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */ 285 __le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */
286 __le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */ 286 __le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */
287 __le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */ 287 __le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */
288 __le16 bg_itable_unused_lo; /* Unused inodes count */ 288 __le16 bg_itable_unused_lo; /* Unused inodes count */
289 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ 289 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
290 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ 290 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
291 __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ 291 __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
292 __le32 bg_inode_table_hi; /* Inodes table block MSB */ 292 __le32 bg_inode_table_hi; /* Inodes table block MSB */
293 __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */ 293 __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
294 __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ 294 __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
295 __le16 bg_used_dirs_count_hi; /* Directories count MSB */ 295 __le16 bg_used_dirs_count_hi; /* Directories count MSB */
296 __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ 296 __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
297 __le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */ 297 __le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */
298 __le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */ 298 __le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */
299 __le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */ 299 __le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */
300 __u32 bg_reserved; 300 __u32 bg_reserved;
301 }; 301 };
302 302
303 #define EXT4_BG_INODE_BITMAP_CSUM_HI_END \ 303 #define EXT4_BG_INODE_BITMAP_CSUM_HI_END \
304 (offsetof(struct ext4_group_desc, bg_inode_bitmap_csum_hi) + \ 304 (offsetof(struct ext4_group_desc, bg_inode_bitmap_csum_hi) + \
305 sizeof(__le16)) 305 sizeof(__le16))
306 #define EXT4_BG_BLOCK_BITMAP_CSUM_HI_END \ 306 #define EXT4_BG_BLOCK_BITMAP_CSUM_HI_END \
307 (offsetof(struct ext4_group_desc, bg_block_bitmap_csum_hi) + \ 307 (offsetof(struct ext4_group_desc, bg_block_bitmap_csum_hi) + \
308 sizeof(__le16)) 308 sizeof(__le16))
309 309
310 /* 310 /*
311 * Structure of a flex block group info 311 * Structure of a flex block group info
312 */ 312 */
313 313
314 struct flex_groups { 314 struct flex_groups {
315 atomic64_t free_clusters; 315 atomic64_t free_clusters;
316 atomic_t free_inodes; 316 atomic_t free_inodes;
317 atomic_t used_dirs; 317 atomic_t used_dirs;
318 }; 318 };
319 319
320 #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ 320 #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
321 #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ 321 #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
322 #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ 322 #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
323 323
324 /* 324 /*
325 * Macro-instructions used to manage group descriptors 325 * Macro-instructions used to manage group descriptors
326 */ 326 */
327 #define EXT4_MIN_DESC_SIZE 32 327 #define EXT4_MIN_DESC_SIZE 32
328 #define EXT4_MIN_DESC_SIZE_64BIT 64 328 #define EXT4_MIN_DESC_SIZE_64BIT 64
329 #define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE 329 #define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE
330 #define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) 330 #define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size)
331 #ifdef __KERNEL__ 331 #ifdef __KERNEL__
332 # define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) 332 # define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group)
333 # define EXT4_CLUSTERS_PER_GROUP(s) (EXT4_SB(s)->s_clusters_per_group) 333 # define EXT4_CLUSTERS_PER_GROUP(s) (EXT4_SB(s)->s_clusters_per_group)
334 # define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) 334 # define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block)
335 # define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) 335 # define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group)
336 # define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) 336 # define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits)
337 #else 337 #else
338 # define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) 338 # define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group)
339 # define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s)) 339 # define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s))
340 # define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) 340 # define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group)
341 #endif 341 #endif
342 342
343 /* 343 /*
344 * Constants relative to the data blocks 344 * Constants relative to the data blocks
345 */ 345 */
346 #define EXT4_NDIR_BLOCKS 12 346 #define EXT4_NDIR_BLOCKS 12
347 #define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS 347 #define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS
348 #define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1) 348 #define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1)
349 #define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1) 349 #define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1)
350 #define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1) 350 #define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1)
351 351
352 /* 352 /*
353 * Inode flags 353 * Inode flags
354 */ 354 */
355 #define EXT4_SECRM_FL 0x00000001 /* Secure deletion */ 355 #define EXT4_SECRM_FL 0x00000001 /* Secure deletion */
356 #define EXT4_UNRM_FL 0x00000002 /* Undelete */ 356 #define EXT4_UNRM_FL 0x00000002 /* Undelete */
357 #define EXT4_COMPR_FL 0x00000004 /* Compress file */ 357 #define EXT4_COMPR_FL 0x00000004 /* Compress file */
358 #define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */ 358 #define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */
359 #define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */ 359 #define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */
360 #define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */ 360 #define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */
361 #define EXT4_NODUMP_FL 0x00000040 /* do not dump file */ 361 #define EXT4_NODUMP_FL 0x00000040 /* do not dump file */
362 #define EXT4_NOATIME_FL 0x00000080 /* do not update atime */ 362 #define EXT4_NOATIME_FL 0x00000080 /* do not update atime */
363 /* Reserved for compression usage... */ 363 /* Reserved for compression usage... */
364 #define EXT4_DIRTY_FL 0x00000100 364 #define EXT4_DIRTY_FL 0x00000100
365 #define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ 365 #define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
366 #define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */ 366 #define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */
367 #define EXT4_ECOMPR_FL 0x00000800 /* Compression error */ 367 /* nb: was previously EXT2_ECOMPR_FL */
368 #define EXT4_ENCRYPT_FL 0x00000800 /* encrypted file */
368 /* End compression flags --- maybe not all used */ 369 /* End compression flags --- maybe not all used */
369 #define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */ 370 #define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */
370 #define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */ 371 #define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */
371 #define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ 372 #define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
372 #define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ 373 #define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */
373 #define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ 374 #define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
374 #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ 375 #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
375 #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ 376 #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
376 #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ 377 #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
377 #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ 378 #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
378 #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ 379 #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
379 #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ 380 #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
380 #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ 381 #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
381 382
382 #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ 383 #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
383 #define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */ 384 #define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */
384 385
385 /* Flags that should be inherited by new inodes from their parent. */ 386 /* Flags that should be inherited by new inodes from their parent. */
386 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ 387 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
387 EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ 388 EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
388 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ 389 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
389 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) 390 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
390 391
391 /* Flags that are appropriate for regular files (all but dir-specific ones). */ 392 /* Flags that are appropriate for regular files (all but dir-specific ones). */
392 #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL)) 393 #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
393 394
394 /* Flags that are appropriate for non-directories/regular files. */ 395 /* Flags that are appropriate for non-directories/regular files. */
395 #define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL) 396 #define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
396 397
397 /* Mask out flags that are inappropriate for the given type of inode. */ 398 /* Mask out flags that are inappropriate for the given type of inode. */
398 static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) 399 static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
399 { 400 {
400 if (S_ISDIR(mode)) 401 if (S_ISDIR(mode))
401 return flags; 402 return flags;
402 else if (S_ISREG(mode)) 403 else if (S_ISREG(mode))
403 return flags & EXT4_REG_FLMASK; 404 return flags & EXT4_REG_FLMASK;
404 else 405 else
405 return flags & EXT4_OTHER_FLMASK; 406 return flags & EXT4_OTHER_FLMASK;
406 } 407 }
407 408
408 /* 409 /*
409 * Inode flags used for atomic set/get 410 * Inode flags used for atomic set/get
410 */ 411 */
411 enum { 412 enum {
412 EXT4_INODE_SECRM = 0, /* Secure deletion */ 413 EXT4_INODE_SECRM = 0, /* Secure deletion */
413 EXT4_INODE_UNRM = 1, /* Undelete */ 414 EXT4_INODE_UNRM = 1, /* Undelete */
414 EXT4_INODE_COMPR = 2, /* Compress file */ 415 EXT4_INODE_COMPR = 2, /* Compress file */
415 EXT4_INODE_SYNC = 3, /* Synchronous updates */ 416 EXT4_INODE_SYNC = 3, /* Synchronous updates */
416 EXT4_INODE_IMMUTABLE = 4, /* Immutable file */ 417 EXT4_INODE_IMMUTABLE = 4, /* Immutable file */
417 EXT4_INODE_APPEND = 5, /* writes to file may only append */ 418 EXT4_INODE_APPEND = 5, /* writes to file may only append */
418 EXT4_INODE_NODUMP = 6, /* do not dump file */ 419 EXT4_INODE_NODUMP = 6, /* do not dump file */
419 EXT4_INODE_NOATIME = 7, /* do not update atime */ 420 EXT4_INODE_NOATIME = 7, /* do not update atime */
420 /* Reserved for compression usage... */ 421 /* Reserved for compression usage... */
421 EXT4_INODE_DIRTY = 8, 422 EXT4_INODE_DIRTY = 8,
422 EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ 423 EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */
423 EXT4_INODE_NOCOMPR = 10, /* Don't compress */ 424 EXT4_INODE_NOCOMPR = 10, /* Don't compress */
424 EXT4_INODE_ECOMPR = 11, /* Compression error */ 425 EXT4_INODE_ENCRYPT = 11, /* Compression error */
425 /* End compression flags --- maybe not all used */ 426 /* End compression flags --- maybe not all used */
426 EXT4_INODE_INDEX = 12, /* hash-indexed directory */ 427 EXT4_INODE_INDEX = 12, /* hash-indexed directory */
427 EXT4_INODE_IMAGIC = 13, /* AFS directory */ 428 EXT4_INODE_IMAGIC = 13, /* AFS directory */
428 EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */ 429 EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */
429 EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */ 430 EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */
430 EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */ 431 EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */
431 EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/ 432 EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/
432 EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */ 433 EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */
433 EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ 434 EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
434 EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ 435 EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
435 EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ 436 EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */
436 EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */ 437 EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
437 EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ 438 EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */
438 }; 439 };
439 440
440 /* 441 /*
441 * Since it's pretty easy to mix up bit numbers and hex values, we use a 442 * Since it's pretty easy to mix up bit numbers and hex values, we use a
442 * build-time check to make sure that EXT4_XXX_FL is consistent with respect to 443 * build-time check to make sure that EXT4_XXX_FL is consistent with respect to
443 * EXT4_INODE_XXX. If all is well, the macros will be dropped, so, it won't cost 444 * EXT4_INODE_XXX. If all is well, the macros will be dropped, so, it won't cost
444 * any extra space in the compiled kernel image, otherwise, the build will fail. 445 * any extra space in the compiled kernel image, otherwise, the build will fail.
445 * It's important that these values are the same, since we are using 446 * It's important that these values are the same, since we are using
446 * EXT4_INODE_XXX to test for flag values, but EXT4_XXX_FL must be consistent 447 * EXT4_INODE_XXX to test for flag values, but EXT4_XXX_FL must be consistent
447 * with the values of FS_XXX_FL defined in include/linux/fs.h and the on-disk 448 * with the values of FS_XXX_FL defined in include/linux/fs.h and the on-disk
448 * values found in ext2, ext3 and ext4 filesystems, and of course the values 449 * values found in ext2, ext3 and ext4 filesystems, and of course the values
449 * defined in e2fsprogs. 450 * defined in e2fsprogs.
450 * 451 *
451 * It's not paranoia if the Murphy's Law really *is* out to get you. :-) 452 * It's not paranoia if the Murphy's Law really *is* out to get you. :-)
452 */ 453 */
453 #define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG)) 454 #define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
454 #define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG)) 455 #define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG))
455 456
456 static inline void ext4_check_flag_values(void) 457 static inline void ext4_check_flag_values(void)
457 { 458 {
458 CHECK_FLAG_VALUE(SECRM); 459 CHECK_FLAG_VALUE(SECRM);
459 CHECK_FLAG_VALUE(UNRM); 460 CHECK_FLAG_VALUE(UNRM);
460 CHECK_FLAG_VALUE(COMPR); 461 CHECK_FLAG_VALUE(COMPR);
461 CHECK_FLAG_VALUE(SYNC); 462 CHECK_FLAG_VALUE(SYNC);
462 CHECK_FLAG_VALUE(IMMUTABLE); 463 CHECK_FLAG_VALUE(IMMUTABLE);
463 CHECK_FLAG_VALUE(APPEND); 464 CHECK_FLAG_VALUE(APPEND);
464 CHECK_FLAG_VALUE(NODUMP); 465 CHECK_FLAG_VALUE(NODUMP);
465 CHECK_FLAG_VALUE(NOATIME); 466 CHECK_FLAG_VALUE(NOATIME);
466 CHECK_FLAG_VALUE(DIRTY); 467 CHECK_FLAG_VALUE(DIRTY);
467 CHECK_FLAG_VALUE(COMPRBLK); 468 CHECK_FLAG_VALUE(COMPRBLK);
468 CHECK_FLAG_VALUE(NOCOMPR); 469 CHECK_FLAG_VALUE(NOCOMPR);
469 CHECK_FLAG_VALUE(ECOMPR); 470 CHECK_FLAG_VALUE(ENCRYPT);
470 CHECK_FLAG_VALUE(INDEX); 471 CHECK_FLAG_VALUE(INDEX);
471 CHECK_FLAG_VALUE(IMAGIC); 472 CHECK_FLAG_VALUE(IMAGIC);
472 CHECK_FLAG_VALUE(JOURNAL_DATA); 473 CHECK_FLAG_VALUE(JOURNAL_DATA);
473 CHECK_FLAG_VALUE(NOTAIL); 474 CHECK_FLAG_VALUE(NOTAIL);
474 CHECK_FLAG_VALUE(DIRSYNC); 475 CHECK_FLAG_VALUE(DIRSYNC);
475 CHECK_FLAG_VALUE(TOPDIR); 476 CHECK_FLAG_VALUE(TOPDIR);
476 CHECK_FLAG_VALUE(HUGE_FILE); 477 CHECK_FLAG_VALUE(HUGE_FILE);
477 CHECK_FLAG_VALUE(EXTENTS); 478 CHECK_FLAG_VALUE(EXTENTS);
478 CHECK_FLAG_VALUE(EA_INODE); 479 CHECK_FLAG_VALUE(EA_INODE);
479 CHECK_FLAG_VALUE(EOFBLOCKS); 480 CHECK_FLAG_VALUE(EOFBLOCKS);
480 CHECK_FLAG_VALUE(INLINE_DATA); 481 CHECK_FLAG_VALUE(INLINE_DATA);
481 CHECK_FLAG_VALUE(RESERVED); 482 CHECK_FLAG_VALUE(RESERVED);
482 } 483 }
483 484
484 /* Used to pass group descriptor data when online resize is done */ 485 /* Used to pass group descriptor data when online resize is done */
485 struct ext4_new_group_input { 486 struct ext4_new_group_input {
486 __u32 group; /* Group number for this data */ 487 __u32 group; /* Group number for this data */
487 __u64 block_bitmap; /* Absolute block number of block bitmap */ 488 __u64 block_bitmap; /* Absolute block number of block bitmap */
488 __u64 inode_bitmap; /* Absolute block number of inode bitmap */ 489 __u64 inode_bitmap; /* Absolute block number of inode bitmap */
489 __u64 inode_table; /* Absolute block number of inode table start */ 490 __u64 inode_table; /* Absolute block number of inode table start */
490 __u32 blocks_count; /* Total number of blocks in this group */ 491 __u32 blocks_count; /* Total number of blocks in this group */
491 __u16 reserved_blocks; /* Number of reserved blocks in this group */ 492 __u16 reserved_blocks; /* Number of reserved blocks in this group */
492 __u16 unused; 493 __u16 unused;
493 }; 494 };
494 495
495 #if defined(__KERNEL__) && defined(CONFIG_COMPAT) 496 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
496 struct compat_ext4_new_group_input { 497 struct compat_ext4_new_group_input {
497 u32 group; 498 u32 group;
498 compat_u64 block_bitmap; 499 compat_u64 block_bitmap;
499 compat_u64 inode_bitmap; 500 compat_u64 inode_bitmap;
500 compat_u64 inode_table; 501 compat_u64 inode_table;
501 u32 blocks_count; 502 u32 blocks_count;
502 u16 reserved_blocks; 503 u16 reserved_blocks;
503 u16 unused; 504 u16 unused;
504 }; 505 };
505 #endif 506 #endif
506 507
507 /* The struct ext4_new_group_input in kernel space, with free_blocks_count */ 508 /* The struct ext4_new_group_input in kernel space, with free_blocks_count */
508 struct ext4_new_group_data { 509 struct ext4_new_group_data {
509 __u32 group; 510 __u32 group;
510 __u64 block_bitmap; 511 __u64 block_bitmap;
511 __u64 inode_bitmap; 512 __u64 inode_bitmap;
512 __u64 inode_table; 513 __u64 inode_table;
513 __u32 blocks_count; 514 __u32 blocks_count;
514 __u16 reserved_blocks; 515 __u16 reserved_blocks;
515 __u16 unused; 516 __u16 unused;
516 __u32 free_blocks_count; 517 __u32 free_blocks_count;
517 }; 518 };
518 519
519 /* Indexes used to index group tables in ext4_new_group_data */ 520 /* Indexes used to index group tables in ext4_new_group_data */
520 enum { 521 enum {
521 BLOCK_BITMAP = 0, /* block bitmap */ 522 BLOCK_BITMAP = 0, /* block bitmap */
522 INODE_BITMAP, /* inode bitmap */ 523 INODE_BITMAP, /* inode bitmap */
523 INODE_TABLE, /* inode tables */ 524 INODE_TABLE, /* inode tables */
524 GROUP_TABLE_COUNT, 525 GROUP_TABLE_COUNT,
525 }; 526 };
526 527
527 /* 528 /*
528 * Flags used by ext4_map_blocks() 529 * Flags used by ext4_map_blocks()
529 */ 530 */
530 /* Allocate any needed blocks and/or convert an unwritten 531 /* Allocate any needed blocks and/or convert an unwritten
531 extent to be an initialized ext4 */ 532 extent to be an initialized ext4 */
532 #define EXT4_GET_BLOCKS_CREATE 0x0001 533 #define EXT4_GET_BLOCKS_CREATE 0x0001
533 /* Request the creation of an unwritten extent */ 534 /* Request the creation of an unwritten extent */
534 #define EXT4_GET_BLOCKS_UNWRIT_EXT 0x0002 535 #define EXT4_GET_BLOCKS_UNWRIT_EXT 0x0002
535 #define EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT (EXT4_GET_BLOCKS_UNWRIT_EXT|\ 536 #define EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT (EXT4_GET_BLOCKS_UNWRIT_EXT|\
536 EXT4_GET_BLOCKS_CREATE) 537 EXT4_GET_BLOCKS_CREATE)
537 /* Caller is from the delayed allocation writeout path 538 /* Caller is from the delayed allocation writeout path
538 * finally doing the actual allocation of delayed blocks */ 539 * finally doing the actual allocation of delayed blocks */
539 #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 540 #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
540 /* caller is from the direct IO path, request to creation of an 541 /* caller is from the direct IO path, request to creation of an
541 unwritten extents if not allocated, split the unwritten 542 unwritten extents if not allocated, split the unwritten
542 extent if blocks has been preallocated already*/ 543 extent if blocks has been preallocated already*/
543 #define EXT4_GET_BLOCKS_PRE_IO 0x0008 544 #define EXT4_GET_BLOCKS_PRE_IO 0x0008
544 #define EXT4_GET_BLOCKS_CONVERT 0x0010 545 #define EXT4_GET_BLOCKS_CONVERT 0x0010
545 #define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\ 546 #define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\
546 EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT) 547 EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT)
547 /* Convert extent to initialized after IO complete */ 548 /* Convert extent to initialized after IO complete */
548 #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ 549 #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
549 EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT) 550 EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT)
550 /* Eventual metadata allocation (due to growing extent tree) 551 /* Eventual metadata allocation (due to growing extent tree)
551 * should not fail, so try to use reserved blocks for that.*/ 552 * should not fail, so try to use reserved blocks for that.*/
552 #define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020 553 #define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020
553 /* Don't normalize allocation size (used for fallocate) */ 554 /* Don't normalize allocation size (used for fallocate) */
554 #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 555 #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
555 /* Request will not result in inode size update (user for fallocate) */ 556 /* Request will not result in inode size update (user for fallocate) */
556 #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 557 #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
557 /* Do not take i_data_sem locking in ext4_map_blocks */ 558 /* Do not take i_data_sem locking in ext4_map_blocks */
558 #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 559 #define EXT4_GET_BLOCKS_NO_LOCK 0x0100
559 /* Convert written extents to unwritten */ 560 /* Convert written extents to unwritten */
560 #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200 561 #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200
561 562
562 /* 563 /*
563 * The bit position of these flags must not overlap with any of the 564 * The bit position of these flags must not overlap with any of the
564 * EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(), 565 * EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(),
565 * read_extent_tree_block(), ext4_split_extent_at(), 566 * read_extent_tree_block(), ext4_split_extent_at(),
566 * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). 567 * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf().
567 * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be 568 * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be
568 * caching the extents when reading from the extent tree while a 569 * caching the extents when reading from the extent tree while a
569 * truncate or punch hole operation is in progress. 570 * truncate or punch hole operation is in progress.
570 */ 571 */
571 #define EXT4_EX_NOCACHE 0x40000000 572 #define EXT4_EX_NOCACHE 0x40000000
572 #define EXT4_EX_FORCE_CACHE 0x20000000 573 #define EXT4_EX_FORCE_CACHE 0x20000000
573 574
574 /* 575 /*
575 * Flags used by ext4_free_blocks 576 * Flags used by ext4_free_blocks
576 */ 577 */
577 #define EXT4_FREE_BLOCKS_METADATA 0x0001 578 #define EXT4_FREE_BLOCKS_METADATA 0x0001
578 #define EXT4_FREE_BLOCKS_FORGET 0x0002 579 #define EXT4_FREE_BLOCKS_FORGET 0x0002
579 #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 580 #define EXT4_FREE_BLOCKS_VALIDATED 0x0004
580 #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 581 #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
581 #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 582 #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
582 #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 583 #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
583 584
584 /* 585 /*
585 * ioctl commands 586 * ioctl commands
586 */ 587 */
587 #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS 588 #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
588 #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS 589 #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
589 #define EXT4_IOC_GETVERSION _IOR('f', 3, long) 590 #define EXT4_IOC_GETVERSION _IOR('f', 3, long)
590 #define EXT4_IOC_SETVERSION _IOW('f', 4, long) 591 #define EXT4_IOC_SETVERSION _IOW('f', 4, long)
591 #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION 592 #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
592 #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION 593 #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
593 #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) 594 #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
594 #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) 595 #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
595 #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) 596 #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
596 #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) 597 #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
597 #define EXT4_IOC_MIGRATE _IO('f', 9) 598 #define EXT4_IOC_MIGRATE _IO('f', 9)
598 /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ 599 /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
599 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ 600 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
600 #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) 601 #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
601 #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) 602 #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
602 #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) 603 #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
603 #define EXT4_IOC_SWAP_BOOT _IO('f', 17) 604 #define EXT4_IOC_SWAP_BOOT _IO('f', 17)
604 #define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) 605 #define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
605 606
606 #if defined(__KERNEL__) && defined(CONFIG_COMPAT) 607 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
607 /* 608 /*
608 * ioctl commands in 32 bit emulation 609 * ioctl commands in 32 bit emulation
609 */ 610 */
610 #define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS 611 #define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS
611 #define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS 612 #define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS
612 #define EXT4_IOC32_GETVERSION _IOR('f', 3, int) 613 #define EXT4_IOC32_GETVERSION _IOR('f', 3, int)
613 #define EXT4_IOC32_SETVERSION _IOW('f', 4, int) 614 #define EXT4_IOC32_SETVERSION _IOW('f', 4, int)
614 #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) 615 #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
615 #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) 616 #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
616 #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) 617 #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
617 #define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) 618 #define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input)
618 #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION 619 #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
619 #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION 620 #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
620 #endif 621 #endif
621 622
622 /* Max physical block we can address w/o extents */ 623 /* Max physical block we can address w/o extents */
623 #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF 624 #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF
624 625
625 /* 626 /*
626 * Structure of an inode on the disk 627 * Structure of an inode on the disk
627 */ 628 */
628 struct ext4_inode { 629 struct ext4_inode {
629 __le16 i_mode; /* File mode */ 630 __le16 i_mode; /* File mode */
630 __le16 i_uid; /* Low 16 bits of Owner Uid */ 631 __le16 i_uid; /* Low 16 bits of Owner Uid */
631 __le32 i_size_lo; /* Size in bytes */ 632 __le32 i_size_lo; /* Size in bytes */
632 __le32 i_atime; /* Access time */ 633 __le32 i_atime; /* Access time */
633 __le32 i_ctime; /* Inode Change time */ 634 __le32 i_ctime; /* Inode Change time */
634 __le32 i_mtime; /* Modification time */ 635 __le32 i_mtime; /* Modification time */
635 __le32 i_dtime; /* Deletion Time */ 636 __le32 i_dtime; /* Deletion Time */
636 __le16 i_gid; /* Low 16 bits of Group Id */ 637 __le16 i_gid; /* Low 16 bits of Group Id */
637 __le16 i_links_count; /* Links count */ 638 __le16 i_links_count; /* Links count */
638 __le32 i_blocks_lo; /* Blocks count */ 639 __le32 i_blocks_lo; /* Blocks count */
639 __le32 i_flags; /* File flags */ 640 __le32 i_flags; /* File flags */
640 union { 641 union {
641 struct { 642 struct {
642 __le32 l_i_version; 643 __le32 l_i_version;
643 } linux1; 644 } linux1;
644 struct { 645 struct {
645 __u32 h_i_translator; 646 __u32 h_i_translator;
646 } hurd1; 647 } hurd1;
647 struct { 648 struct {
648 __u32 m_i_reserved1; 649 __u32 m_i_reserved1;
649 } masix1; 650 } masix1;
650 } osd1; /* OS dependent 1 */ 651 } osd1; /* OS dependent 1 */
651 __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ 652 __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
652 __le32 i_generation; /* File version (for NFS) */ 653 __le32 i_generation; /* File version (for NFS) */
653 __le32 i_file_acl_lo; /* File ACL */ 654 __le32 i_file_acl_lo; /* File ACL */
654 __le32 i_size_high; 655 __le32 i_size_high;
655 __le32 i_obso_faddr; /* Obsoleted fragment address */ 656 __le32 i_obso_faddr; /* Obsoleted fragment address */
656 union { 657 union {
657 struct { 658 struct {
658 __le16 l_i_blocks_high; /* were l_i_reserved1 */ 659 __le16 l_i_blocks_high; /* were l_i_reserved1 */
659 __le16 l_i_file_acl_high; 660 __le16 l_i_file_acl_high;
660 __le16 l_i_uid_high; /* these 2 fields */ 661 __le16 l_i_uid_high; /* these 2 fields */
661 __le16 l_i_gid_high; /* were reserved2[0] */ 662 __le16 l_i_gid_high; /* were reserved2[0] */
662 __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */ 663 __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
663 __le16 l_i_reserved; 664 __le16 l_i_reserved;
664 } linux2; 665 } linux2;
665 struct { 666 struct {
666 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ 667 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
667 __u16 h_i_mode_high; 668 __u16 h_i_mode_high;
668 __u16 h_i_uid_high; 669 __u16 h_i_uid_high;
669 __u16 h_i_gid_high; 670 __u16 h_i_gid_high;
670 __u32 h_i_author; 671 __u32 h_i_author;
671 } hurd2; 672 } hurd2;
672 struct { 673 struct {
673 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ 674 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
674 __le16 m_i_file_acl_high; 675 __le16 m_i_file_acl_high;
675 __u32 m_i_reserved2[2]; 676 __u32 m_i_reserved2[2];
676 } masix2; 677 } masix2;
677 } osd2; /* OS dependent 2 */ 678 } osd2; /* OS dependent 2 */
678 __le16 i_extra_isize; 679 __le16 i_extra_isize;
679 __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */ 680 __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */
680 __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ 681 __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
681 __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ 682 __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
682 __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ 683 __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
683 __le32 i_crtime; /* File Creation time */ 684 __le32 i_crtime; /* File Creation time */
684 __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ 685 __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
685 __le32 i_version_hi; /* high 32 bits for 64-bit version */ 686 __le32 i_version_hi; /* high 32 bits for 64-bit version */
686 }; 687 };
687 688
688 struct move_extent { 689 struct move_extent {
689 __u32 reserved; /* should be zero */ 690 __u32 reserved; /* should be zero */
690 __u32 donor_fd; /* donor file descriptor */ 691 __u32 donor_fd; /* donor file descriptor */
691 __u64 orig_start; /* logical start offset in block for orig */ 692 __u64 orig_start; /* logical start offset in block for orig */
692 __u64 donor_start; /* logical start offset in block for donor */ 693 __u64 donor_start; /* logical start offset in block for donor */
693 __u64 len; /* block length to be moved */ 694 __u64 len; /* block length to be moved */
694 __u64 moved_len; /* moved block length */ 695 __u64 moved_len; /* moved block length */
695 }; 696 };
696 697
697 #define EXT4_EPOCH_BITS 2 698 #define EXT4_EPOCH_BITS 2
698 #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) 699 #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
699 #define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) 700 #define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
700 701
701 /* 702 /*
702 * Extended fields will fit into an inode if the filesystem was formatted 703 * Extended fields will fit into an inode if the filesystem was formatted
703 * with large inodes (-I 256 or larger) and there are not currently any EAs 704 * with large inodes (-I 256 or larger) and there are not currently any EAs
704 * consuming all of the available space. For new inodes we always reserve 705 * consuming all of the available space. For new inodes we always reserve
705 * enough space for the kernel's known extended fields, but for inodes 706 * enough space for the kernel's known extended fields, but for inodes
706 * created with an old kernel this might not have been the case. None of 707 * created with an old kernel this might not have been the case. None of
707 * the extended inode fields is critical for correct filesystem operation. 708 * the extended inode fields is critical for correct filesystem operation.
708 * This macro checks if a certain field fits in the inode. Note that 709 * This macro checks if a certain field fits in the inode. Note that
709 * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize 710 * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
710 */ 711 */
711 #define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \ 712 #define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
712 ((offsetof(typeof(*ext4_inode), field) + \ 713 ((offsetof(typeof(*ext4_inode), field) + \
713 sizeof((ext4_inode)->field)) \ 714 sizeof((ext4_inode)->field)) \
714 <= (EXT4_GOOD_OLD_INODE_SIZE + \ 715 <= (EXT4_GOOD_OLD_INODE_SIZE + \
715 (einode)->i_extra_isize)) \ 716 (einode)->i_extra_isize)) \
716 717
717 static inline __le32 ext4_encode_extra_time(struct timespec *time) 718 static inline __le32 ext4_encode_extra_time(struct timespec *time)
718 { 719 {
719 return cpu_to_le32((sizeof(time->tv_sec) > 4 ? 720 return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
720 (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) | 721 (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) |
721 ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK)); 722 ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK));
722 } 723 }
723 724
724 static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) 725 static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
725 { 726 {
726 if (sizeof(time->tv_sec) > 4) 727 if (sizeof(time->tv_sec) > 4)
727 time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) 728 time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
728 << 32; 729 << 32;
729 time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; 730 time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
730 } 731 }
731 732
732 #define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ 733 #define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
733 do { \ 734 do { \
734 (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ 735 (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
735 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ 736 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
736 (raw_inode)->xtime ## _extra = \ 737 (raw_inode)->xtime ## _extra = \
737 ext4_encode_extra_time(&(inode)->xtime); \ 738 ext4_encode_extra_time(&(inode)->xtime); \
738 } while (0) 739 } while (0)
739 740
740 #define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ 741 #define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
741 do { \ 742 do { \
742 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ 743 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
743 (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ 744 (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
744 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ 745 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
745 (raw_inode)->xtime ## _extra = \ 746 (raw_inode)->xtime ## _extra = \
746 ext4_encode_extra_time(&(einode)->xtime); \ 747 ext4_encode_extra_time(&(einode)->xtime); \
747 } while (0) 748 } while (0)
748 749
749 #define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ 750 #define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
750 do { \ 751 do { \
751 (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ 752 (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
752 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ 753 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
753 ext4_decode_extra_time(&(inode)->xtime, \ 754 ext4_decode_extra_time(&(inode)->xtime, \
754 raw_inode->xtime ## _extra); \ 755 raw_inode->xtime ## _extra); \
755 else \ 756 else \
756 (inode)->xtime.tv_nsec = 0; \ 757 (inode)->xtime.tv_nsec = 0; \
757 } while (0) 758 } while (0)
758 759
759 #define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ 760 #define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
760 do { \ 761 do { \
761 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ 762 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
762 (einode)->xtime.tv_sec = \ 763 (einode)->xtime.tv_sec = \
763 (signed)le32_to_cpu((raw_inode)->xtime); \ 764 (signed)le32_to_cpu((raw_inode)->xtime); \
764 else \ 765 else \
765 (einode)->xtime.tv_sec = 0; \ 766 (einode)->xtime.tv_sec = 0; \
766 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ 767 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
767 ext4_decode_extra_time(&(einode)->xtime, \ 768 ext4_decode_extra_time(&(einode)->xtime, \
768 raw_inode->xtime ## _extra); \ 769 raw_inode->xtime ## _extra); \
769 else \ 770 else \
770 (einode)->xtime.tv_nsec = 0; \ 771 (einode)->xtime.tv_nsec = 0; \
771 } while (0) 772 } while (0)
772 773
773 #define i_disk_version osd1.linux1.l_i_version 774 #define i_disk_version osd1.linux1.l_i_version
774 775
775 #if defined(__KERNEL__) || defined(__linux__) 776 #if defined(__KERNEL__) || defined(__linux__)
776 #define i_reserved1 osd1.linux1.l_i_reserved1 777 #define i_reserved1 osd1.linux1.l_i_reserved1
777 #define i_file_acl_high osd2.linux2.l_i_file_acl_high 778 #define i_file_acl_high osd2.linux2.l_i_file_acl_high
778 #define i_blocks_high osd2.linux2.l_i_blocks_high 779 #define i_blocks_high osd2.linux2.l_i_blocks_high
779 #define i_uid_low i_uid 780 #define i_uid_low i_uid
780 #define i_gid_low i_gid 781 #define i_gid_low i_gid
781 #define i_uid_high osd2.linux2.l_i_uid_high 782 #define i_uid_high osd2.linux2.l_i_uid_high
782 #define i_gid_high osd2.linux2.l_i_gid_high 783 #define i_gid_high osd2.linux2.l_i_gid_high
783 #define i_checksum_lo osd2.linux2.l_i_checksum_lo 784 #define i_checksum_lo osd2.linux2.l_i_checksum_lo
784 785
785 #elif defined(__GNU__) 786 #elif defined(__GNU__)
786 787
787 #define i_translator osd1.hurd1.h_i_translator 788 #define i_translator osd1.hurd1.h_i_translator
788 #define i_uid_high osd2.hurd2.h_i_uid_high 789 #define i_uid_high osd2.hurd2.h_i_uid_high
789 #define i_gid_high osd2.hurd2.h_i_gid_high 790 #define i_gid_high osd2.hurd2.h_i_gid_high
790 #define i_author osd2.hurd2.h_i_author 791 #define i_author osd2.hurd2.h_i_author
791 792
792 #elif defined(__masix__) 793 #elif defined(__masix__)
793 794
794 #define i_reserved1 osd1.masix1.m_i_reserved1 795 #define i_reserved1 osd1.masix1.m_i_reserved1
795 #define i_file_acl_high osd2.masix2.m_i_file_acl_high 796 #define i_file_acl_high osd2.masix2.m_i_file_acl_high
796 #define i_reserved2 osd2.masix2.m_i_reserved2 797 #define i_reserved2 osd2.masix2.m_i_reserved2
797 798
798 #endif /* defined(__KERNEL__) || defined(__linux__) */ 799 #endif /* defined(__KERNEL__) || defined(__linux__) */
799 800
800 #include "extents_status.h" 801 #include "extents_status.h"
801 802
802 /* 803 /*
803 * fourth extended file system inode data in memory 804 * fourth extended file system inode data in memory
804 */ 805 */
805 struct ext4_inode_info { 806 struct ext4_inode_info {
806 __le32 i_data[15]; /* unconverted */ 807 __le32 i_data[15]; /* unconverted */
807 __u32 i_dtime; 808 __u32 i_dtime;
808 ext4_fsblk_t i_file_acl; 809 ext4_fsblk_t i_file_acl;
809 810
810 /* 811 /*
811 * i_block_group is the number of the block group which contains 812 * i_block_group is the number of the block group which contains
812 * this file's inode. Constant across the lifetime of the inode, 813 * this file's inode. Constant across the lifetime of the inode,
813 * it is ued for making block allocation decisions - we try to 814 * it is ued for making block allocation decisions - we try to
814 * place a file's data blocks near its inode block, and new inodes 815 * place a file's data blocks near its inode block, and new inodes
815 * near to their parent directory's inode. 816 * near to their parent directory's inode.
816 */ 817 */
817 ext4_group_t i_block_group; 818 ext4_group_t i_block_group;
818 ext4_lblk_t i_dir_start_lookup; 819 ext4_lblk_t i_dir_start_lookup;
819 #if (BITS_PER_LONG < 64) 820 #if (BITS_PER_LONG < 64)
820 unsigned long i_state_flags; /* Dynamic state flags */ 821 unsigned long i_state_flags; /* Dynamic state flags */
821 #endif 822 #endif
822 unsigned long i_flags; 823 unsigned long i_flags;
823 824
824 /* 825 /*
825 * Extended attributes can be read independently of the main file 826 * Extended attributes can be read independently of the main file
826 * data. Taking i_mutex even when reading would cause contention 827 * data. Taking i_mutex even when reading would cause contention
827 * between readers of EAs and writers of regular file data, so 828 * between readers of EAs and writers of regular file data, so
828 * instead we synchronize on xattr_sem when reading or changing 829 * instead we synchronize on xattr_sem when reading or changing
829 * EAs. 830 * EAs.
830 */ 831 */
831 struct rw_semaphore xattr_sem; 832 struct rw_semaphore xattr_sem;
832 833
833 struct list_head i_orphan; /* unlinked but open inodes */ 834 struct list_head i_orphan; /* unlinked but open inodes */
834 835
835 /* 836 /*
836 * i_disksize keeps track of what the inode size is ON DISK, not 837 * i_disksize keeps track of what the inode size is ON DISK, not
837 * in memory. During truncate, i_size is set to the new size by 838 * in memory. During truncate, i_size is set to the new size by
838 * the VFS prior to calling ext4_truncate(), but the filesystem won't 839 * the VFS prior to calling ext4_truncate(), but the filesystem won't
839 * set i_disksize to 0 until the truncate is actually under way. 840 * set i_disksize to 0 until the truncate is actually under way.
840 * 841 *
841 * The intent is that i_disksize always represents the blocks which 842 * The intent is that i_disksize always represents the blocks which
842 * are used by this file. This allows recovery to restart truncate 843 * are used by this file. This allows recovery to restart truncate
843 * on orphans if we crash during truncate. We actually write i_disksize 844 * on orphans if we crash during truncate. We actually write i_disksize
844 * into the on-disk inode when writing inodes out, instead of i_size. 845 * into the on-disk inode when writing inodes out, instead of i_size.
845 * 846 *
846 * The only time when i_disksize and i_size may be different is when 847 * The only time when i_disksize and i_size may be different is when
847 * a truncate is in progress. The only things which change i_disksize 848 * a truncate is in progress. The only things which change i_disksize
848 * are ext4_get_block (growth) and ext4_truncate (shrinkth). 849 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
849 */ 850 */
850 loff_t i_disksize; 851 loff_t i_disksize;
851 852
852 /* 853 /*
853 * i_data_sem is for serialising ext4_truncate() against 854 * i_data_sem is for serialising ext4_truncate() against
854 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's 855 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
855 * data tree are chopped off during truncate. We can't do that in 856 * data tree are chopped off during truncate. We can't do that in
856 * ext4 because whenever we perform intermediate commits during 857 * ext4 because whenever we perform intermediate commits during
857 * truncate, the inode and all the metadata blocks *must* be in a 858 * truncate, the inode and all the metadata blocks *must* be in a
858 * consistent state which allows truncation of the orphans to restart 859 * consistent state which allows truncation of the orphans to restart
859 * during recovery. Hence we must fix the get_block-vs-truncate race 860 * during recovery. Hence we must fix the get_block-vs-truncate race
860 * by other means, so we have i_data_sem. 861 * by other means, so we have i_data_sem.
861 */ 862 */
862 struct rw_semaphore i_data_sem; 863 struct rw_semaphore i_data_sem;
863 struct inode vfs_inode; 864 struct inode vfs_inode;
864 struct jbd2_inode *jinode; 865 struct jbd2_inode *jinode;
865 866
866 spinlock_t i_raw_lock; /* protects updates to the raw inode */ 867 spinlock_t i_raw_lock; /* protects updates to the raw inode */
867 868
868 /* 869 /*
869 * File creation time. Its function is same as that of 870 * File creation time. Its function is same as that of
870 * struct timespec i_{a,c,m}time in the generic inode. 871 * struct timespec i_{a,c,m}time in the generic inode.
871 */ 872 */
872 struct timespec i_crtime; 873 struct timespec i_crtime;
873 874
874 /* mballoc */ 875 /* mballoc */
875 struct list_head i_prealloc_list; 876 struct list_head i_prealloc_list;
876 spinlock_t i_prealloc_lock; 877 spinlock_t i_prealloc_lock;
877 878
878 /* extents status tree */ 879 /* extents status tree */
879 struct ext4_es_tree i_es_tree; 880 struct ext4_es_tree i_es_tree;
880 rwlock_t i_es_lock; 881 rwlock_t i_es_lock;
881 struct list_head i_es_list; 882 struct list_head i_es_list;
882 unsigned int i_es_all_nr; /* protected by i_es_lock */ 883 unsigned int i_es_all_nr; /* protected by i_es_lock */
883 unsigned int i_es_shk_nr; /* protected by i_es_lock */ 884 unsigned int i_es_shk_nr; /* protected by i_es_lock */
884 ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for 885 ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for
885 extents to shrink. Protected by 886 extents to shrink. Protected by
886 i_es_lock */ 887 i_es_lock */
887 888
888 /* ialloc */ 889 /* ialloc */
889 ext4_group_t i_last_alloc_group; 890 ext4_group_t i_last_alloc_group;
890 891
891 /* allocation reservation info for delalloc */ 892 /* allocation reservation info for delalloc */
892 /* In case of bigalloc, these refer to clusters rather than blocks */ 893 /* In case of bigalloc, these refer to clusters rather than blocks */
893 unsigned int i_reserved_data_blocks; 894 unsigned int i_reserved_data_blocks;
894 unsigned int i_reserved_meta_blocks; 895 unsigned int i_reserved_meta_blocks;
895 unsigned int i_allocated_meta_blocks; 896 unsigned int i_allocated_meta_blocks;
896 ext4_lblk_t i_da_metadata_calc_last_lblock; 897 ext4_lblk_t i_da_metadata_calc_last_lblock;
897 int i_da_metadata_calc_len; 898 int i_da_metadata_calc_len;
898 899
899 /* on-disk additional length */ 900 /* on-disk additional length */
900 __u16 i_extra_isize; 901 __u16 i_extra_isize;
901 902
902 /* Indicate the inline data space. */ 903 /* Indicate the inline data space. */
903 u16 i_inline_off; 904 u16 i_inline_off;
904 u16 i_inline_size; 905 u16 i_inline_size;
905 906
906 #ifdef CONFIG_QUOTA 907 #ifdef CONFIG_QUOTA
907 /* quota space reservation, managed internally by quota code */ 908 /* quota space reservation, managed internally by quota code */
908 qsize_t i_reserved_quota; 909 qsize_t i_reserved_quota;
909 #endif 910 #endif
910 911
911 /* Lock protecting lists below */ 912 /* Lock protecting lists below */
912 spinlock_t i_completed_io_lock; 913 spinlock_t i_completed_io_lock;
913 /* 914 /*
914 * Completed IOs that need unwritten extents handling and have 915 * Completed IOs that need unwritten extents handling and have
915 * transaction reserved 916 * transaction reserved
916 */ 917 */
917 struct list_head i_rsv_conversion_list; 918 struct list_head i_rsv_conversion_list;
918 /* 919 /*
919 * Completed IOs that need unwritten extents handling and don't have 920 * Completed IOs that need unwritten extents handling and don't have
920 * transaction reserved 921 * transaction reserved
921 */ 922 */
922 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 923 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
923 atomic_t i_unwritten; /* Nr. of inflight conversions pending */ 924 atomic_t i_unwritten; /* Nr. of inflight conversions pending */
924 struct work_struct i_rsv_conversion_work; 925 struct work_struct i_rsv_conversion_work;
925 926
926 spinlock_t i_block_reservation_lock; 927 spinlock_t i_block_reservation_lock;
927 928
928 /* 929 /*
929 * Transactions that contain inode's metadata needed to complete 930 * Transactions that contain inode's metadata needed to complete
930 * fsync and fdatasync, respectively. 931 * fsync and fdatasync, respectively.
931 */ 932 */
932 tid_t i_sync_tid; 933 tid_t i_sync_tid;
933 tid_t i_datasync_tid; 934 tid_t i_datasync_tid;
934 935
935 #ifdef CONFIG_QUOTA 936 #ifdef CONFIG_QUOTA
936 struct dquot *i_dquot[MAXQUOTAS]; 937 struct dquot *i_dquot[MAXQUOTAS];
937 #endif 938 #endif
938 939
939 /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ 940 /* Precomputed uuid+inum+igen checksum for seeding inode checksums */
940 __u32 i_csum_seed; 941 __u32 i_csum_seed;
941 }; 942 };
942 943
943 /* 944 /*
944 * File system states 945 * File system states
945 */ 946 */
946 #define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ 947 #define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
947 #define EXT4_ERROR_FS 0x0002 /* Errors detected */ 948 #define EXT4_ERROR_FS 0x0002 /* Errors detected */
948 #define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */ 949 #define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
949 950
950 /* 951 /*
951 * Misc. filesystem flags 952 * Misc. filesystem flags
952 */ 953 */
953 #define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ 954 #define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
954 #define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ 955 #define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
955 #define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ 956 #define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
956 957
957 /* 958 /*
958 * Mount flags set via mount options or defaults 959 * Mount flags set via mount options or defaults
959 */ 960 */
960 #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ 961 #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
961 #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ 962 #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
962 #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ 963 #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
963 #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ 964 #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
964 #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ 965 #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
965 #define EXT4_MOUNT_ERRORS_MASK 0x00070 966 #define EXT4_MOUNT_ERRORS_MASK 0x00070
966 #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ 967 #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
967 #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ 968 #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
968 #ifdef CONFIG_FS_DAX 969 #ifdef CONFIG_FS_DAX
969 #define EXT4_MOUNT_DAX 0x00200 /* Direct Access */ 970 #define EXT4_MOUNT_DAX 0x00200 /* Direct Access */
970 #else 971 #else
971 #define EXT4_MOUNT_DAX 0 972 #define EXT4_MOUNT_DAX 0
972 #endif 973 #endif
973 #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ 974 #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
974 #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ 975 #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
975 #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ 976 #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
976 #define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ 977 #define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
977 #define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ 978 #define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
978 #define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ 979 #define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
979 #define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ 980 #define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
980 #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ 981 #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
981 #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ 982 #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
982 #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ 983 #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
983 #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ 984 #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
984 #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ 985 #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
985 #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ 986 #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
986 #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ 987 #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
987 #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 988 #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
988 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 989 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
989 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 990 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
990 #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 991 #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
991 #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 992 #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
992 #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ 993 #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
993 #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ 994 #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
994 995
995 /* 996 /*
996 * Mount flags set either automatically (could not be set by mount option) 997 * Mount flags set either automatically (could not be set by mount option)
997 * based on per file system feature or property or in special cases such as 998 * based on per file system feature or property or in special cases such as
998 * distinguishing between explicit mount option definition and default. 999 * distinguishing between explicit mount option definition and default.
999 */ 1000 */
1000 #define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly 1001 #define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly
1001 specified delalloc */ 1002 specified delalloc */
1002 #define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group 1003 #define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group
1003 size of blocksize * 8 1004 size of blocksize * 8
1004 blocks */ 1005 blocks */
1005 #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated 1006 #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
1006 file systems */ 1007 file systems */
1007 1008
1008 #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ 1009 #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
1009 ~EXT4_MOUNT_##opt 1010 ~EXT4_MOUNT_##opt
1010 #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ 1011 #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \
1011 EXT4_MOUNT_##opt 1012 EXT4_MOUNT_##opt
1012 #define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ 1013 #define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
1013 EXT4_MOUNT_##opt) 1014 EXT4_MOUNT_##opt)
1014 1015
1015 #define clear_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 &= \ 1016 #define clear_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 &= \
1016 ~EXT4_MOUNT2_##opt 1017 ~EXT4_MOUNT2_##opt
1017 #define set_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 |= \ 1018 #define set_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 |= \
1018 EXT4_MOUNT2_##opt 1019 EXT4_MOUNT2_##opt
1019 #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ 1020 #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
1020 EXT4_MOUNT2_##opt) 1021 EXT4_MOUNT2_##opt)
1021 1022
1022 #define ext4_test_and_set_bit __test_and_set_bit_le 1023 #define ext4_test_and_set_bit __test_and_set_bit_le
1023 #define ext4_set_bit __set_bit_le 1024 #define ext4_set_bit __set_bit_le
1024 #define ext4_set_bit_atomic ext2_set_bit_atomic 1025 #define ext4_set_bit_atomic ext2_set_bit_atomic
1025 #define ext4_test_and_clear_bit __test_and_clear_bit_le 1026 #define ext4_test_and_clear_bit __test_and_clear_bit_le
1026 #define ext4_clear_bit __clear_bit_le 1027 #define ext4_clear_bit __clear_bit_le
1027 #define ext4_clear_bit_atomic ext2_clear_bit_atomic 1028 #define ext4_clear_bit_atomic ext2_clear_bit_atomic
1028 #define ext4_test_bit test_bit_le 1029 #define ext4_test_bit test_bit_le
1029 #define ext4_find_next_zero_bit find_next_zero_bit_le 1030 #define ext4_find_next_zero_bit find_next_zero_bit_le
1030 #define ext4_find_next_bit find_next_bit_le 1031 #define ext4_find_next_bit find_next_bit_le
1031 1032
1032 extern void ext4_set_bits(void *bm, int cur, int len); 1033 extern void ext4_set_bits(void *bm, int cur, int len);
1033 1034
1034 /* 1035 /*
1035 * Maximal mount counts between two filesystem checks 1036 * Maximal mount counts between two filesystem checks
1036 */ 1037 */
1037 #define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ 1038 #define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
1038 #define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */ 1039 #define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */
1039 1040
1040 /* 1041 /*
1041 * Behaviour when detecting errors 1042 * Behaviour when detecting errors
1042 */ 1043 */
1043 #define EXT4_ERRORS_CONTINUE 1 /* Continue execution */ 1044 #define EXT4_ERRORS_CONTINUE 1 /* Continue execution */
1044 #define EXT4_ERRORS_RO 2 /* Remount fs read-only */ 1045 #define EXT4_ERRORS_RO 2 /* Remount fs read-only */
1045 #define EXT4_ERRORS_PANIC 3 /* Panic */ 1046 #define EXT4_ERRORS_PANIC 3 /* Panic */
1046 #define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE 1047 #define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE
1047 1048
1048 /* Metadata checksum algorithm codes */ 1049 /* Metadata checksum algorithm codes */
1049 #define EXT4_CRC32C_CHKSUM 1 1050 #define EXT4_CRC32C_CHKSUM 1
1050 1051
1052 /* Encryption algorithms */
1053 #define EXT4_ENCRYPTION_MODE_INVALID 0
1054 #define EXT4_ENCRYPTION_MODE_AES_256_XTS 1
1055 #define EXT4_ENCRYPTION_MODE_AES_256_GCM 2
1056 #define EXT4_ENCRYPTION_MODE_AES_256_CBC 3
1057
1051 /* 1058 /*
1052 * Structure of the super block 1059 * Structure of the super block
1053 */ 1060 */
1054 struct ext4_super_block { 1061 struct ext4_super_block {
1055 /*00*/ __le32 s_inodes_count; /* Inodes count */ 1062 /*00*/ __le32 s_inodes_count; /* Inodes count */
1056 __le32 s_blocks_count_lo; /* Blocks count */ 1063 __le32 s_blocks_count_lo; /* Blocks count */
1057 __le32 s_r_blocks_count_lo; /* Reserved blocks count */ 1064 __le32 s_r_blocks_count_lo; /* Reserved blocks count */
1058 __le32 s_free_blocks_count_lo; /* Free blocks count */ 1065 __le32 s_free_blocks_count_lo; /* Free blocks count */
1059 /*10*/ __le32 s_free_inodes_count; /* Free inodes count */ 1066 /*10*/ __le32 s_free_inodes_count; /* Free inodes count */
1060 __le32 s_first_data_block; /* First Data Block */ 1067 __le32 s_first_data_block; /* First Data Block */
1061 __le32 s_log_block_size; /* Block size */ 1068 __le32 s_log_block_size; /* Block size */
1062 __le32 s_log_cluster_size; /* Allocation cluster size */ 1069 __le32 s_log_cluster_size; /* Allocation cluster size */
1063 /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ 1070 /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
1064 __le32 s_clusters_per_group; /* # Clusters per group */ 1071 __le32 s_clusters_per_group; /* # Clusters per group */
1065 __le32 s_inodes_per_group; /* # Inodes per group */ 1072 __le32 s_inodes_per_group; /* # Inodes per group */
1066 __le32 s_mtime; /* Mount time */ 1073 __le32 s_mtime; /* Mount time */
1067 /*30*/ __le32 s_wtime; /* Write time */ 1074 /*30*/ __le32 s_wtime; /* Write time */
1068 __le16 s_mnt_count; /* Mount count */ 1075 __le16 s_mnt_count; /* Mount count */
1069 __le16 s_max_mnt_count; /* Maximal mount count */ 1076 __le16 s_max_mnt_count; /* Maximal mount count */
1070 __le16 s_magic; /* Magic signature */ 1077 __le16 s_magic; /* Magic signature */
1071 __le16 s_state; /* File system state */ 1078 __le16 s_state; /* File system state */
1072 __le16 s_errors; /* Behaviour when detecting errors */ 1079 __le16 s_errors; /* Behaviour when detecting errors */
1073 __le16 s_minor_rev_level; /* minor revision level */ 1080 __le16 s_minor_rev_level; /* minor revision level */
1074 /*40*/ __le32 s_lastcheck; /* time of last check */ 1081 /*40*/ __le32 s_lastcheck; /* time of last check */
1075 __le32 s_checkinterval; /* max. time between checks */ 1082 __le32 s_checkinterval; /* max. time between checks */
1076 __le32 s_creator_os; /* OS */ 1083 __le32 s_creator_os; /* OS */
1077 __le32 s_rev_level; /* Revision level */ 1084 __le32 s_rev_level; /* Revision level */
1078 /*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */ 1085 /*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
1079 __le16 s_def_resgid; /* Default gid for reserved blocks */ 1086 __le16 s_def_resgid; /* Default gid for reserved blocks */
1080 /* 1087 /*
1081 * These fields are for EXT4_DYNAMIC_REV superblocks only. 1088 * These fields are for EXT4_DYNAMIC_REV superblocks only.
1082 * 1089 *
1083 * Note: the difference between the compatible feature set and 1090 * Note: the difference between the compatible feature set and
1084 * the incompatible feature set is that if there is a bit set 1091 * the incompatible feature set is that if there is a bit set
1085 * in the incompatible feature set that the kernel doesn't 1092 * in the incompatible feature set that the kernel doesn't
1086 * know about, it should refuse to mount the filesystem. 1093 * know about, it should refuse to mount the filesystem.
1087 * 1094 *
1088 * e2fsck's requirements are more strict; if it doesn't know 1095 * e2fsck's requirements are more strict; if it doesn't know
1089 * about a feature in either the compatible or incompatible 1096 * about a feature in either the compatible or incompatible
1090 * feature set, it must abort and not try to meddle with 1097 * feature set, it must abort and not try to meddle with
1091 * things it doesn't understand... 1098 * things it doesn't understand...
1092 */ 1099 */
1093 __le32 s_first_ino; /* First non-reserved inode */ 1100 __le32 s_first_ino; /* First non-reserved inode */
1094 __le16 s_inode_size; /* size of inode structure */ 1101 __le16 s_inode_size; /* size of inode structure */
1095 __le16 s_block_group_nr; /* block group # of this superblock */ 1102 __le16 s_block_group_nr; /* block group # of this superblock */
1096 __le32 s_feature_compat; /* compatible feature set */ 1103 __le32 s_feature_compat; /* compatible feature set */
1097 /*60*/ __le32 s_feature_incompat; /* incompatible feature set */ 1104 /*60*/ __le32 s_feature_incompat; /* incompatible feature set */
1098 __le32 s_feature_ro_compat; /* readonly-compatible feature set */ 1105 __le32 s_feature_ro_compat; /* readonly-compatible feature set */
1099 /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ 1106 /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
1100 /*78*/ char s_volume_name[16]; /* volume name */ 1107 /*78*/ char s_volume_name[16]; /* volume name */
1101 /*88*/ char s_last_mounted[64]; /* directory where last mounted */ 1108 /*88*/ char s_last_mounted[64]; /* directory where last mounted */
1102 /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ 1109 /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
1103 /* 1110 /*
1104 * Performance hints. Directory preallocation should only 1111 * Performance hints. Directory preallocation should only
1105 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on. 1112 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
1106 */ 1113 */
1107 __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ 1114 __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
1108 __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ 1115 __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
1109 __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ 1116 __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
1110 /* 1117 /*
1111 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set. 1118 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
1112 */ 1119 */
1113 /*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ 1120 /*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
1114 /*E0*/ __le32 s_journal_inum; /* inode number of journal file */ 1121 /*E0*/ __le32 s_journal_inum; /* inode number of journal file */
1115 __le32 s_journal_dev; /* device number of journal file */ 1122 __le32 s_journal_dev; /* device number of journal file */
1116 __le32 s_last_orphan; /* start of list of inodes to delete */ 1123 __le32 s_last_orphan; /* start of list of inodes to delete */
1117 __le32 s_hash_seed[4]; /* HTREE hash seed */ 1124 __le32 s_hash_seed[4]; /* HTREE hash seed */
1118 __u8 s_def_hash_version; /* Default hash version to use */ 1125 __u8 s_def_hash_version; /* Default hash version to use */
1119 __u8 s_jnl_backup_type; 1126 __u8 s_jnl_backup_type;
1120 __le16 s_desc_size; /* size of group descriptor */ 1127 __le16 s_desc_size; /* size of group descriptor */
1121 /*100*/ __le32 s_default_mount_opts; 1128 /*100*/ __le32 s_default_mount_opts;
1122 __le32 s_first_meta_bg; /* First metablock block group */ 1129 __le32 s_first_meta_bg; /* First metablock block group */
1123 __le32 s_mkfs_time; /* When the filesystem was created */ 1130 __le32 s_mkfs_time; /* When the filesystem was created */
1124 __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ 1131 __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
1125 /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ 1132 /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
1126 /*150*/ __le32 s_blocks_count_hi; /* Blocks count */ 1133 /*150*/ __le32 s_blocks_count_hi; /* Blocks count */
1127 __le32 s_r_blocks_count_hi; /* Reserved blocks count */ 1134 __le32 s_r_blocks_count_hi; /* Reserved blocks count */
1128 __le32 s_free_blocks_count_hi; /* Free blocks count */ 1135 __le32 s_free_blocks_count_hi; /* Free blocks count */
1129 __le16 s_min_extra_isize; /* All inodes have at least # bytes */ 1136 __le16 s_min_extra_isize; /* All inodes have at least # bytes */
1130 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ 1137 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
1131 __le32 s_flags; /* Miscellaneous flags */ 1138 __le32 s_flags; /* Miscellaneous flags */
1132 __le16 s_raid_stride; /* RAID stride */ 1139 __le16 s_raid_stride; /* RAID stride */
1133 __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */ 1140 __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */
1134 __le64 s_mmp_block; /* Block for multi-mount protection */ 1141 __le64 s_mmp_block; /* Block for multi-mount protection */
1135 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ 1142 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
1136 __u8 s_log_groups_per_flex; /* FLEX_BG group size */ 1143 __u8 s_log_groups_per_flex; /* FLEX_BG group size */
1137 __u8 s_checksum_type; /* metadata checksum algorithm used */ 1144 __u8 s_checksum_type; /* metadata checksum algorithm used */
1138 __le16 s_reserved_pad; 1145 __le16 s_reserved_pad;
1139 __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ 1146 __le64 s_kbytes_written; /* nr of lifetime kilobytes written */
1140 __le32 s_snapshot_inum; /* Inode number of active snapshot */ 1147 __le32 s_snapshot_inum; /* Inode number of active snapshot */
1141 __le32 s_snapshot_id; /* sequential ID of active snapshot */ 1148 __le32 s_snapshot_id; /* sequential ID of active snapshot */
1142 __le64 s_snapshot_r_blocks_count; /* reserved blocks for active 1149 __le64 s_snapshot_r_blocks_count; /* reserved blocks for active
1143 snapshot's future use */ 1150 snapshot's future use */
1144 __le32 s_snapshot_list; /* inode number of the head of the 1151 __le32 s_snapshot_list; /* inode number of the head of the
1145 on-disk snapshot list */ 1152 on-disk snapshot list */
1146 #define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count) 1153 #define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
1147 __le32 s_error_count; /* number of fs errors */ 1154 __le32 s_error_count; /* number of fs errors */
1148 __le32 s_first_error_time; /* first time an error happened */ 1155 __le32 s_first_error_time; /* first time an error happened */
1149 __le32 s_first_error_ino; /* inode involved in first error */ 1156 __le32 s_first_error_ino; /* inode involved in first error */
1150 __le64 s_first_error_block; /* block involved of first error */ 1157 __le64 s_first_error_block; /* block involved of first error */
1151 __u8 s_first_error_func[32]; /* function where the error happened */ 1158 __u8 s_first_error_func[32]; /* function where the error happened */
1152 __le32 s_first_error_line; /* line number where error happened */ 1159 __le32 s_first_error_line; /* line number where error happened */
1153 __le32 s_last_error_time; /* most recent time of an error */ 1160 __le32 s_last_error_time; /* most recent time of an error */
1154 __le32 s_last_error_ino; /* inode involved in last error */ 1161 __le32 s_last_error_ino; /* inode involved in last error */
1155 __le32 s_last_error_line; /* line number where error happened */ 1162 __le32 s_last_error_line; /* line number where error happened */
1156 __le64 s_last_error_block; /* block involved of last error */ 1163 __le64 s_last_error_block; /* block involved of last error */
1157 __u8 s_last_error_func[32]; /* function where the error happened */ 1164 __u8 s_last_error_func[32]; /* function where the error happened */
1158 #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) 1165 #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
1159 __u8 s_mount_opts[64]; 1166 __u8 s_mount_opts[64];
1160 __le32 s_usr_quota_inum; /* inode for tracking user quota */ 1167 __le32 s_usr_quota_inum; /* inode for tracking user quota */
1161 __le32 s_grp_quota_inum; /* inode for tracking group quota */ 1168 __le32 s_grp_quota_inum; /* inode for tracking group quota */
1162 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ 1169 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
1163 __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */ 1170 __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */
1164 __le32 s_reserved[106]; /* Padding to the end of the block */ 1171 __u8 s_encrypt_algos[4]; /* Encryption algorithms in use */
1172 __le32 s_reserved[105]; /* Padding to the end of the block */
1165 __le32 s_checksum; /* crc32c(superblock) */ 1173 __le32 s_checksum; /* crc32c(superblock) */
1166 }; 1174 };
1167 1175
1168 #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) 1176 #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
1169 1177
1170 #ifdef __KERNEL__ 1178 #ifdef __KERNEL__
1171 1179
1172 /* 1180 /*
1173 * run-time mount flags 1181 * run-time mount flags
1174 */ 1182 */
1175 #define EXT4_MF_MNTDIR_SAMPLED 0x0001 1183 #define EXT4_MF_MNTDIR_SAMPLED 0x0001
1176 #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ 1184 #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
1177 1185
1178 /* Number of quota types we support */ 1186 /* Number of quota types we support */
1179 #define EXT4_MAXQUOTAS 2 1187 #define EXT4_MAXQUOTAS 2
1180 1188
1181 /* 1189 /*
1182 * fourth extended-fs super-block data in memory 1190 * fourth extended-fs super-block data in memory
1183 */ 1191 */
1184 struct ext4_sb_info { 1192 struct ext4_sb_info {
1185 unsigned long s_desc_size; /* Size of a group descriptor in bytes */ 1193 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
1186 unsigned long s_inodes_per_block;/* Number of inodes per block */ 1194 unsigned long s_inodes_per_block;/* Number of inodes per block */
1187 unsigned long s_blocks_per_group;/* Number of blocks in a group */ 1195 unsigned long s_blocks_per_group;/* Number of blocks in a group */
1188 unsigned long s_clusters_per_group; /* Number of clusters in a group */ 1196 unsigned long s_clusters_per_group; /* Number of clusters in a group */
1189 unsigned long s_inodes_per_group;/* Number of inodes in a group */ 1197 unsigned long s_inodes_per_group;/* Number of inodes in a group */
1190 unsigned long s_itb_per_group; /* Number of inode table blocks per group */ 1198 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
1191 unsigned long s_gdb_count; /* Number of group descriptor blocks */ 1199 unsigned long s_gdb_count; /* Number of group descriptor blocks */
1192 unsigned long s_desc_per_block; /* Number of group descriptors per block */ 1200 unsigned long s_desc_per_block; /* Number of group descriptors per block */
1193 ext4_group_t s_groups_count; /* Number of groups in the fs */ 1201 ext4_group_t s_groups_count; /* Number of groups in the fs */
1194 ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ 1202 ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
1195 unsigned long s_overhead; /* # of fs overhead clusters */ 1203 unsigned long s_overhead; /* # of fs overhead clusters */
1196 unsigned int s_cluster_ratio; /* Number of blocks per cluster */ 1204 unsigned int s_cluster_ratio; /* Number of blocks per cluster */
1197 unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */ 1205 unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */
1198 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ 1206 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
1199 struct buffer_head * s_sbh; /* Buffer containing the super block */ 1207 struct buffer_head * s_sbh; /* Buffer containing the super block */
1200 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ 1208 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
1201 struct buffer_head **s_group_desc; 1209 struct buffer_head **s_group_desc;
1202 unsigned int s_mount_opt; 1210 unsigned int s_mount_opt;
1203 unsigned int s_mount_opt2; 1211 unsigned int s_mount_opt2;
1204 unsigned int s_mount_flags; 1212 unsigned int s_mount_flags;
1205 unsigned int s_def_mount_opt; 1213 unsigned int s_def_mount_opt;
1206 ext4_fsblk_t s_sb_block; 1214 ext4_fsblk_t s_sb_block;
1207 atomic64_t s_resv_clusters; 1215 atomic64_t s_resv_clusters;
1208 kuid_t s_resuid; 1216 kuid_t s_resuid;
1209 kgid_t s_resgid; 1217 kgid_t s_resgid;
1210 unsigned short s_mount_state; 1218 unsigned short s_mount_state;
1211 unsigned short s_pad; 1219 unsigned short s_pad;
1212 int s_addr_per_block_bits; 1220 int s_addr_per_block_bits;
1213 int s_desc_per_block_bits; 1221 int s_desc_per_block_bits;
1214 int s_inode_size; 1222 int s_inode_size;
1215 int s_first_ino; 1223 int s_first_ino;
1216 unsigned int s_inode_readahead_blks; 1224 unsigned int s_inode_readahead_blks;
1217 unsigned int s_inode_goal; 1225 unsigned int s_inode_goal;
1218 spinlock_t s_next_gen_lock; 1226 spinlock_t s_next_gen_lock;
1219 u32 s_next_generation; 1227 u32 s_next_generation;
1220 u32 s_hash_seed[4]; 1228 u32 s_hash_seed[4];
1221 int s_def_hash_version; 1229 int s_def_hash_version;
1222 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ 1230 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
1223 struct percpu_counter s_freeclusters_counter; 1231 struct percpu_counter s_freeclusters_counter;
1224 struct percpu_counter s_freeinodes_counter; 1232 struct percpu_counter s_freeinodes_counter;
1225 struct percpu_counter s_dirs_counter; 1233 struct percpu_counter s_dirs_counter;
1226 struct percpu_counter s_dirtyclusters_counter; 1234 struct percpu_counter s_dirtyclusters_counter;
1227 struct blockgroup_lock *s_blockgroup_lock; 1235 struct blockgroup_lock *s_blockgroup_lock;
1228 struct proc_dir_entry *s_proc; 1236 struct proc_dir_entry *s_proc;
1229 struct kobject s_kobj; 1237 struct kobject s_kobj;
1230 struct completion s_kobj_unregister; 1238 struct completion s_kobj_unregister;
1231 struct super_block *s_sb; 1239 struct super_block *s_sb;
1232 1240
1233 /* Journaling */ 1241 /* Journaling */
1234 struct journal_s *s_journal; 1242 struct journal_s *s_journal;
1235 struct list_head s_orphan; 1243 struct list_head s_orphan;
1236 struct mutex s_orphan_lock; 1244 struct mutex s_orphan_lock;
1237 unsigned long s_resize_flags; /* Flags indicating if there 1245 unsigned long s_resize_flags; /* Flags indicating if there
1238 is a resizer */ 1246 is a resizer */
1239 unsigned long s_commit_interval; 1247 unsigned long s_commit_interval;
1240 u32 s_max_batch_time; 1248 u32 s_max_batch_time;
1241 u32 s_min_batch_time; 1249 u32 s_min_batch_time;
1242 struct block_device *journal_bdev; 1250 struct block_device *journal_bdev;
1243 #ifdef CONFIG_QUOTA 1251 #ifdef CONFIG_QUOTA
1244 char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */ 1252 char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */
1245 int s_jquota_fmt; /* Format of quota to use */ 1253 int s_jquota_fmt; /* Format of quota to use */
1246 #endif 1254 #endif
1247 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ 1255 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
1248 struct rb_root system_blks; 1256 struct rb_root system_blks;
1249 1257
1250 #ifdef EXTENTS_STATS 1258 #ifdef EXTENTS_STATS
1251 /* ext4 extents stats */ 1259 /* ext4 extents stats */
1252 unsigned long s_ext_min; 1260 unsigned long s_ext_min;
1253 unsigned long s_ext_max; 1261 unsigned long s_ext_max;
1254 unsigned long s_depth_max; 1262 unsigned long s_depth_max;
1255 spinlock_t s_ext_stats_lock; 1263 spinlock_t s_ext_stats_lock;
1256 unsigned long s_ext_blocks; 1264 unsigned long s_ext_blocks;
1257 unsigned long s_ext_extents; 1265 unsigned long s_ext_extents;
1258 #endif 1266 #endif
1259 1267
1260 /* for buddy allocator */ 1268 /* for buddy allocator */
1261 struct ext4_group_info ***s_group_info; 1269 struct ext4_group_info ***s_group_info;
1262 struct inode *s_buddy_cache; 1270 struct inode *s_buddy_cache;
1263 spinlock_t s_md_lock; 1271 spinlock_t s_md_lock;
1264 unsigned short *s_mb_offsets; 1272 unsigned short *s_mb_offsets;
1265 unsigned int *s_mb_maxs; 1273 unsigned int *s_mb_maxs;
1266 unsigned int s_group_info_size; 1274 unsigned int s_group_info_size;
1267 1275
1268 /* tunables */ 1276 /* tunables */
1269 unsigned long s_stripe; 1277 unsigned long s_stripe;
1270 unsigned int s_mb_stream_request; 1278 unsigned int s_mb_stream_request;
1271 unsigned int s_mb_max_to_scan; 1279 unsigned int s_mb_max_to_scan;
1272 unsigned int s_mb_min_to_scan; 1280 unsigned int s_mb_min_to_scan;
1273 unsigned int s_mb_stats; 1281 unsigned int s_mb_stats;
1274 unsigned int s_mb_order2_reqs; 1282 unsigned int s_mb_order2_reqs;
1275 unsigned int s_mb_group_prealloc; 1283 unsigned int s_mb_group_prealloc;
1276 unsigned int s_max_dir_size_kb; 1284 unsigned int s_max_dir_size_kb;
1277 /* where last allocation was done - for stream allocation */ 1285 /* where last allocation was done - for stream allocation */
1278 unsigned long s_mb_last_group; 1286 unsigned long s_mb_last_group;
1279 unsigned long s_mb_last_start; 1287 unsigned long s_mb_last_start;
1280 1288
1281 /* stats for buddy allocator */ 1289 /* stats for buddy allocator */
1282 atomic_t s_bal_reqs; /* number of reqs with len > 1 */ 1290 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
1283 atomic_t s_bal_success; /* we found long enough chunks */ 1291 atomic_t s_bal_success; /* we found long enough chunks */
1284 atomic_t s_bal_allocated; /* in blocks */ 1292 atomic_t s_bal_allocated; /* in blocks */
1285 atomic_t s_bal_ex_scanned; /* total extents scanned */ 1293 atomic_t s_bal_ex_scanned; /* total extents scanned */
1286 atomic_t s_bal_goals; /* goal hits */ 1294 atomic_t s_bal_goals; /* goal hits */
1287 atomic_t s_bal_breaks; /* too long searches */ 1295 atomic_t s_bal_breaks; /* too long searches */
1288 atomic_t s_bal_2orders; /* 2^order hits */ 1296 atomic_t s_bal_2orders; /* 2^order hits */
1289 spinlock_t s_bal_lock; 1297 spinlock_t s_bal_lock;
1290 unsigned long s_mb_buddies_generated; 1298 unsigned long s_mb_buddies_generated;
1291 unsigned long long s_mb_generation_time; 1299 unsigned long long s_mb_generation_time;
1292 atomic_t s_mb_lost_chunks; 1300 atomic_t s_mb_lost_chunks;
1293 atomic_t s_mb_preallocated; 1301 atomic_t s_mb_preallocated;
1294 atomic_t s_mb_discarded; 1302 atomic_t s_mb_discarded;
1295 atomic_t s_lock_busy; 1303 atomic_t s_lock_busy;
1296 1304
1297 /* locality groups */ 1305 /* locality groups */
1298 struct ext4_locality_group __percpu *s_locality_groups; 1306 struct ext4_locality_group __percpu *s_locality_groups;
1299 1307
1300 /* for write statistics */ 1308 /* for write statistics */
1301 unsigned long s_sectors_written_start; 1309 unsigned long s_sectors_written_start;
1302 u64 s_kbytes_written; 1310 u64 s_kbytes_written;
1303 1311
1304 /* the size of zero-out chunk */ 1312 /* the size of zero-out chunk */
1305 unsigned int s_extent_max_zeroout_kb; 1313 unsigned int s_extent_max_zeroout_kb;
1306 1314
1307 unsigned int s_log_groups_per_flex; 1315 unsigned int s_log_groups_per_flex;
1308 struct flex_groups *s_flex_groups; 1316 struct flex_groups *s_flex_groups;
1309 ext4_group_t s_flex_groups_allocated; 1317 ext4_group_t s_flex_groups_allocated;
1310 1318
1311 /* workqueue for reserved extent conversions (buffered io) */ 1319 /* workqueue for reserved extent conversions (buffered io) */
1312 struct workqueue_struct *rsv_conversion_wq; 1320 struct workqueue_struct *rsv_conversion_wq;
1313 1321
1314 /* timer for periodic error stats printing */ 1322 /* timer for periodic error stats printing */
1315 struct timer_list s_err_report; 1323 struct timer_list s_err_report;
1316 1324
1317 /* Lazy inode table initialization info */ 1325 /* Lazy inode table initialization info */
1318 struct ext4_li_request *s_li_request; 1326 struct ext4_li_request *s_li_request;
1319 /* Wait multiplier for lazy initialization thread */ 1327 /* Wait multiplier for lazy initialization thread */
1320 unsigned int s_li_wait_mult; 1328 unsigned int s_li_wait_mult;
1321 1329
1322 /* Kernel thread for multiple mount protection */ 1330 /* Kernel thread for multiple mount protection */
1323 struct task_struct *s_mmp_tsk; 1331 struct task_struct *s_mmp_tsk;
1324 1332
1325 /* record the last minlen when FITRIM is called. */ 1333 /* record the last minlen when FITRIM is called. */
1326 atomic_t s_last_trim_minblks; 1334 atomic_t s_last_trim_minblks;
1327 1335
1328 /* Reference to checksum algorithm driver via cryptoapi */ 1336 /* Reference to checksum algorithm driver via cryptoapi */
1329 struct crypto_shash *s_chksum_driver; 1337 struct crypto_shash *s_chksum_driver;
1330 1338
1331 /* Precomputed FS UUID checksum for seeding other checksums */ 1339 /* Precomputed FS UUID checksum for seeding other checksums */
1332 __u32 s_csum_seed; 1340 __u32 s_csum_seed;
1333 1341
1334 /* Reclaim extents from extent status tree */ 1342 /* Reclaim extents from extent status tree */
1335 struct shrinker s_es_shrinker; 1343 struct shrinker s_es_shrinker;
1336 struct list_head s_es_list; /* List of inodes with reclaimable extents */ 1344 struct list_head s_es_list; /* List of inodes with reclaimable extents */
1337 long s_es_nr_inode; 1345 long s_es_nr_inode;
1338 struct ext4_es_stats s_es_stats; 1346 struct ext4_es_stats s_es_stats;
1339 struct mb_cache *s_mb_cache; 1347 struct mb_cache *s_mb_cache;
1340 spinlock_t s_es_lock ____cacheline_aligned_in_smp; 1348 spinlock_t s_es_lock ____cacheline_aligned_in_smp;
1341 1349
1342 /* Ratelimit ext4 messages. */ 1350 /* Ratelimit ext4 messages. */
1343 struct ratelimit_state s_err_ratelimit_state; 1351 struct ratelimit_state s_err_ratelimit_state;
1344 struct ratelimit_state s_warning_ratelimit_state; 1352 struct ratelimit_state s_warning_ratelimit_state;
1345 struct ratelimit_state s_msg_ratelimit_state; 1353 struct ratelimit_state s_msg_ratelimit_state;
1346 }; 1354 };
1347 1355
1348 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1356 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
1349 { 1357 {
1350 return sb->s_fs_info; 1358 return sb->s_fs_info;
1351 } 1359 }
1352 static inline struct ext4_inode_info *EXT4_I(struct inode *inode) 1360 static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
1353 { 1361 {
1354 return container_of(inode, struct ext4_inode_info, vfs_inode); 1362 return container_of(inode, struct ext4_inode_info, vfs_inode);
1355 } 1363 }
1356 1364
1357 static inline struct timespec ext4_current_time(struct inode *inode) 1365 static inline struct timespec ext4_current_time(struct inode *inode)
1358 { 1366 {
1359 return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? 1367 return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
1360 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; 1368 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
1361 } 1369 }
1362 1370
1363 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) 1371 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
1364 { 1372 {
1365 return ino == EXT4_ROOT_INO || 1373 return ino == EXT4_ROOT_INO ||
1366 ino == EXT4_USR_QUOTA_INO || 1374 ino == EXT4_USR_QUOTA_INO ||
1367 ino == EXT4_GRP_QUOTA_INO || 1375 ino == EXT4_GRP_QUOTA_INO ||
1368 ino == EXT4_BOOT_LOADER_INO || 1376 ino == EXT4_BOOT_LOADER_INO ||
1369 ino == EXT4_JOURNAL_INO || 1377 ino == EXT4_JOURNAL_INO ||
1370 ino == EXT4_RESIZE_INO || 1378 ino == EXT4_RESIZE_INO ||
1371 (ino >= EXT4_FIRST_INO(sb) && 1379 (ino >= EXT4_FIRST_INO(sb) &&
1372 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); 1380 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
1373 } 1381 }
1374 1382
1375 static inline void ext4_set_io_unwritten_flag(struct inode *inode, 1383 static inline void ext4_set_io_unwritten_flag(struct inode *inode,
1376 struct ext4_io_end *io_end) 1384 struct ext4_io_end *io_end)
1377 { 1385 {
1378 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 1386 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
1379 io_end->flag |= EXT4_IO_END_UNWRITTEN; 1387 io_end->flag |= EXT4_IO_END_UNWRITTEN;
1380 atomic_inc(&EXT4_I(inode)->i_unwritten); 1388 atomic_inc(&EXT4_I(inode)->i_unwritten);
1381 } 1389 }
1382 } 1390 }
1383 1391
1384 static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode) 1392 static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode)
1385 { 1393 {
1386 return inode->i_private; 1394 return inode->i_private;
1387 } 1395 }
1388 1396
1389 static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io) 1397 static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io)
1390 { 1398 {
1391 inode->i_private = io; 1399 inode->i_private = io;
1392 } 1400 }
1393 1401
1394 /* 1402 /*
1395 * Inode dynamic state flags 1403 * Inode dynamic state flags
1396 */ 1404 */
1397 enum { 1405 enum {
1398 EXT4_STATE_JDATA, /* journaled data exists */ 1406 EXT4_STATE_JDATA, /* journaled data exists */
1399 EXT4_STATE_NEW, /* inode is newly created */ 1407 EXT4_STATE_NEW, /* inode is newly created */
1400 EXT4_STATE_XATTR, /* has in-inode xattrs */ 1408 EXT4_STATE_XATTR, /* has in-inode xattrs */
1401 EXT4_STATE_NO_EXPAND, /* No space for expansion */ 1409 EXT4_STATE_NO_EXPAND, /* No space for expansion */
1402 EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ 1410 EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */
1403 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ 1411 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
1404 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ 1412 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
1405 EXT4_STATE_NEWENTRY, /* File just added to dir */ 1413 EXT4_STATE_NEWENTRY, /* File just added to dir */
1406 EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read 1414 EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
1407 nolocking */ 1415 nolocking */
1408 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ 1416 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
1409 EXT4_STATE_ORDERED_MODE, /* data=ordered mode */ 1417 EXT4_STATE_ORDERED_MODE, /* data=ordered mode */
1410 EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ 1418 EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
1411 }; 1419 };
1412 1420
1413 #define EXT4_INODE_BIT_FNS(name, field, offset) \ 1421 #define EXT4_INODE_BIT_FNS(name, field, offset) \
1414 static inline int ext4_test_inode_##name(struct inode *inode, int bit) \ 1422 static inline int ext4_test_inode_##name(struct inode *inode, int bit) \
1415 { \ 1423 { \
1416 return test_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ 1424 return test_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
1417 } \ 1425 } \
1418 static inline void ext4_set_inode_##name(struct inode *inode, int bit) \ 1426 static inline void ext4_set_inode_##name(struct inode *inode, int bit) \
1419 { \ 1427 { \
1420 set_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ 1428 set_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
1421 } \ 1429 } \
1422 static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ 1430 static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
1423 { \ 1431 { \
1424 clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ 1432 clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
1425 } 1433 }
1426 1434
1427 /* Add these declarations here only so that these functions can be 1435 /* Add these declarations here only so that these functions can be
1428 * found by name. Otherwise, they are very hard to locate. */ 1436 * found by name. Otherwise, they are very hard to locate. */
1429 static inline int ext4_test_inode_flag(struct inode *inode, int bit); 1437 static inline int ext4_test_inode_flag(struct inode *inode, int bit);
1430 static inline void ext4_set_inode_flag(struct inode *inode, int bit); 1438 static inline void ext4_set_inode_flag(struct inode *inode, int bit);
1431 static inline void ext4_clear_inode_flag(struct inode *inode, int bit); 1439 static inline void ext4_clear_inode_flag(struct inode *inode, int bit);
1432 EXT4_INODE_BIT_FNS(flag, flags, 0) 1440 EXT4_INODE_BIT_FNS(flag, flags, 0)
1433 1441
1434 /* Add these declarations here only so that these functions can be 1442 /* Add these declarations here only so that these functions can be
1435 * found by name. Otherwise, they are very hard to locate. */ 1443 * found by name. Otherwise, they are very hard to locate. */
1436 static inline int ext4_test_inode_state(struct inode *inode, int bit); 1444 static inline int ext4_test_inode_state(struct inode *inode, int bit);
1437 static inline void ext4_set_inode_state(struct inode *inode, int bit); 1445 static inline void ext4_set_inode_state(struct inode *inode, int bit);
1438 static inline void ext4_clear_inode_state(struct inode *inode, int bit); 1446 static inline void ext4_clear_inode_state(struct inode *inode, int bit);
1439 #if (BITS_PER_LONG < 64) 1447 #if (BITS_PER_LONG < 64)
1440 EXT4_INODE_BIT_FNS(state, state_flags, 0) 1448 EXT4_INODE_BIT_FNS(state, state_flags, 0)
1441 1449
1442 static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) 1450 static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1443 { 1451 {
1444 (ei)->i_state_flags = 0; 1452 (ei)->i_state_flags = 0;
1445 } 1453 }
1446 #else 1454 #else
1447 EXT4_INODE_BIT_FNS(state, flags, 32) 1455 EXT4_INODE_BIT_FNS(state, flags, 32)
1448 1456
1449 static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) 1457 static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1450 { 1458 {
1451 /* We depend on the fact that callers will set i_flags */ 1459 /* We depend on the fact that callers will set i_flags */
1452 } 1460 }
1453 #endif 1461 #endif
1454 #else 1462 #else
1455 /* Assume that user mode programs are passing in an ext4fs superblock, not 1463 /* Assume that user mode programs are passing in an ext4fs superblock, not
1456 * a kernel struct super_block. This will allow us to call the feature-test 1464 * a kernel struct super_block. This will allow us to call the feature-test
1457 * macros from user land. */ 1465 * macros from user land. */
1458 #define EXT4_SB(sb) (sb) 1466 #define EXT4_SB(sb) (sb)
1459 #endif 1467 #endif
1460 1468
1461 #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime 1469 #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
1462 1470
1463 /* 1471 /*
1464 * Codes for operating systems 1472 * Codes for operating systems
1465 */ 1473 */
1466 #define EXT4_OS_LINUX 0 1474 #define EXT4_OS_LINUX 0
1467 #define EXT4_OS_HURD 1 1475 #define EXT4_OS_HURD 1
1468 #define EXT4_OS_MASIX 2 1476 #define EXT4_OS_MASIX 2
1469 #define EXT4_OS_FREEBSD 3 1477 #define EXT4_OS_FREEBSD 3
1470 #define EXT4_OS_LITES 4 1478 #define EXT4_OS_LITES 4
1471 1479
1472 /* 1480 /*
1473 * Revision levels 1481 * Revision levels
1474 */ 1482 */
1475 #define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */ 1483 #define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */
1476 #define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ 1484 #define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
1477 1485
1478 #define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV 1486 #define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV
1479 #define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV 1487 #define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV
1480 1488
1481 #define EXT4_GOOD_OLD_INODE_SIZE 128 1489 #define EXT4_GOOD_OLD_INODE_SIZE 128
1482 1490
1483 /* 1491 /*
1484 * Feature set definitions 1492 * Feature set definitions
1485 */ 1493 */
1486 1494
1487 #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ 1495 #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
1488 ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0) 1496 ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0)
1489 #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ 1497 #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
1490 ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0) 1498 ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0)
1491 #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ 1499 #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
1492 ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0) 1500 ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0)
1493 #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ 1501 #define EXT4_SET_COMPAT_FEATURE(sb,mask) \
1494 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) 1502 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
1495 #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ 1503 #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
1496 EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) 1504 EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
1497 #define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \ 1505 #define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \
1498 EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) 1506 EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
1499 #define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \ 1507 #define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \
1500 EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) 1508 EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
1501 #define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ 1509 #define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
1502 EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) 1510 EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
1503 #define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \ 1511 #define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \
1504 EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) 1512 EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
1505 1513
1506 #define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001 1514 #define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001
1507 #define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002 1515 #define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002
1508 #define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004 1516 #define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004
1509 #define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 1517 #define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008
1510 #define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 1518 #define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010
1511 #define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 1519 #define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020
1512 #define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200 1520 #define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200
1513 1521
1514 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 1522 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
1515 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 1523 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
1516 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 1524 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
1517 #define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008 1525 #define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008
1518 #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 1526 #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
1519 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 1527 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
1520 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 1528 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
1521 #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 1529 #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
1522 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 1530 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
1523 /* 1531 /*
1524 * METADATA_CSUM also enables group descriptor checksums (GDT_CSUM). When 1532 * METADATA_CSUM also enables group descriptor checksums (GDT_CSUM). When
1525 * METADATA_CSUM is set, group descriptor checksums use the same algorithm as 1533 * METADATA_CSUM is set, group descriptor checksums use the same algorithm as
1526 * all other data structures' checksums. However, the METADATA_CSUM and 1534 * all other data structures' checksums. However, the METADATA_CSUM and
1527 * GDT_CSUM bits are mutually exclusive. 1535 * GDT_CSUM bits are mutually exclusive.
1528 */ 1536 */
1529 #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 1537 #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
1538 #define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000
1530 1539
1531 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 1540 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
1532 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 1541 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
1533 #define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ 1542 #define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
1534 #define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ 1543 #define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
1535 #define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 1544 #define EXT4_FEATURE_INCOMPAT_META_BG 0x0010
1536 #define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ 1545 #define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
1537 #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 1546 #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
1538 #define EXT4_FEATURE_INCOMPAT_MMP 0x0100 1547 #define EXT4_FEATURE_INCOMPAT_MMP 0x0100
1539 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 1548 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
1540 #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ 1549 #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
1541 #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ 1550 #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
1542 #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */ 1551 #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */
1543 #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ 1552 #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
1544 #define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */ 1553 #define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */
1554 #define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000
1545 1555
1546 #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR 1556 #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
1547 #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ 1557 #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
1548 EXT4_FEATURE_INCOMPAT_META_BG) 1558 EXT4_FEATURE_INCOMPAT_META_BG)
1549 #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ 1559 #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
1550 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ 1560 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
1551 EXT4_FEATURE_RO_COMPAT_BTREE_DIR) 1561 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
1552 1562
1553 #define EXT3_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR 1563 #define EXT3_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
1554 #define EXT3_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ 1564 #define EXT3_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
1555 EXT4_FEATURE_INCOMPAT_RECOVER| \ 1565 EXT4_FEATURE_INCOMPAT_RECOVER| \
1556 EXT4_FEATURE_INCOMPAT_META_BG) 1566 EXT4_FEATURE_INCOMPAT_META_BG)
1557 #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ 1567 #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
1558 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ 1568 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
1559 EXT4_FEATURE_RO_COMPAT_BTREE_DIR) 1569 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
1560 1570
1561 #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR 1571 #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
1562 #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ 1572 #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
1563 EXT4_FEATURE_INCOMPAT_RECOVER| \ 1573 EXT4_FEATURE_INCOMPAT_RECOVER| \
1564 EXT4_FEATURE_INCOMPAT_META_BG| \ 1574 EXT4_FEATURE_INCOMPAT_META_BG| \
1565 EXT4_FEATURE_INCOMPAT_EXTENTS| \ 1575 EXT4_FEATURE_INCOMPAT_EXTENTS| \
1566 EXT4_FEATURE_INCOMPAT_64BIT| \ 1576 EXT4_FEATURE_INCOMPAT_64BIT| \
1567 EXT4_FEATURE_INCOMPAT_FLEX_BG| \ 1577 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
1568 EXT4_FEATURE_INCOMPAT_MMP | \ 1578 EXT4_FEATURE_INCOMPAT_MMP | \
1569 EXT4_FEATURE_INCOMPAT_INLINE_DATA) 1579 EXT4_FEATURE_INCOMPAT_INLINE_DATA)
1570 #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ 1580 #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
1571 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ 1581 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
1572 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ 1582 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
1573 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ 1583 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
1574 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ 1584 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
1575 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ 1585 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
1576 EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ 1586 EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
1577 EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ 1587 EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
1578 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ 1588 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
1579 EXT4_FEATURE_RO_COMPAT_QUOTA) 1589 EXT4_FEATURE_RO_COMPAT_QUOTA)
1580 1590
1581 /* 1591 /*
1582 * Default values for user and/or group using reserved blocks 1592 * Default values for user and/or group using reserved blocks
1583 */ 1593 */
1584 #define EXT4_DEF_RESUID 0 1594 #define EXT4_DEF_RESUID 0
1585 #define EXT4_DEF_RESGID 0 1595 #define EXT4_DEF_RESGID 0
1586 1596
1587 #define EXT4_DEF_INODE_READAHEAD_BLKS 32 1597 #define EXT4_DEF_INODE_READAHEAD_BLKS 32
1588 1598
1589 /* 1599 /*
1590 * Default mount options 1600 * Default mount options
1591 */ 1601 */
1592 #define EXT4_DEFM_DEBUG 0x0001 1602 #define EXT4_DEFM_DEBUG 0x0001
1593 #define EXT4_DEFM_BSDGROUPS 0x0002 1603 #define EXT4_DEFM_BSDGROUPS 0x0002
1594 #define EXT4_DEFM_XATTR_USER 0x0004 1604 #define EXT4_DEFM_XATTR_USER 0x0004
1595 #define EXT4_DEFM_ACL 0x0008 1605 #define EXT4_DEFM_ACL 0x0008
1596 #define EXT4_DEFM_UID16 0x0010 1606 #define EXT4_DEFM_UID16 0x0010
1597 #define EXT4_DEFM_JMODE 0x0060 1607 #define EXT4_DEFM_JMODE 0x0060
1598 #define EXT4_DEFM_JMODE_DATA 0x0020 1608 #define EXT4_DEFM_JMODE_DATA 0x0020
1599 #define EXT4_DEFM_JMODE_ORDERED 0x0040 1609 #define EXT4_DEFM_JMODE_ORDERED 0x0040
1600 #define EXT4_DEFM_JMODE_WBACK 0x0060 1610 #define EXT4_DEFM_JMODE_WBACK 0x0060
1601 #define EXT4_DEFM_NOBARRIER 0x0100 1611 #define EXT4_DEFM_NOBARRIER 0x0100
1602 #define EXT4_DEFM_BLOCK_VALIDITY 0x0200 1612 #define EXT4_DEFM_BLOCK_VALIDITY 0x0200
1603 #define EXT4_DEFM_DISCARD 0x0400 1613 #define EXT4_DEFM_DISCARD 0x0400
1604 #define EXT4_DEFM_NODELALLOC 0x0800 1614 #define EXT4_DEFM_NODELALLOC 0x0800
1605 1615
1606 /* 1616 /*
1607 * Default journal batch times 1617 * Default journal batch times
1608 */ 1618 */
1609 #define EXT4_DEF_MIN_BATCH_TIME 0 1619 #define EXT4_DEF_MIN_BATCH_TIME 0
1610 #define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */ 1620 #define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */
1611 1621
1612 /* 1622 /*
1613 * Minimum number of groups in a flexgroup before we separate out 1623 * Minimum number of groups in a flexgroup before we separate out
1614 * directories into the first block group of a flexgroup 1624 * directories into the first block group of a flexgroup
1615 */ 1625 */
1616 #define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4 1626 #define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4
1617 1627
1618 /* 1628 /*
1619 * Structure of a directory entry 1629 * Structure of a directory entry
1620 */ 1630 */
1621 #define EXT4_NAME_LEN 255 1631 #define EXT4_NAME_LEN 255
1622 1632
1623 struct ext4_dir_entry { 1633 struct ext4_dir_entry {
1624 __le32 inode; /* Inode number */ 1634 __le32 inode; /* Inode number */
1625 __le16 rec_len; /* Directory entry length */ 1635 __le16 rec_len; /* Directory entry length */
1626 __le16 name_len; /* Name length */ 1636 __le16 name_len; /* Name length */
1627 char name[EXT4_NAME_LEN]; /* File name */ 1637 char name[EXT4_NAME_LEN]; /* File name */
1628 }; 1638 };
1629 1639
1630 /* 1640 /*
1631 * The new version of the directory entry. Since EXT4 structures are 1641 * The new version of the directory entry. Since EXT4 structures are
1632 * stored in intel byte order, and the name_len field could never be 1642 * stored in intel byte order, and the name_len field could never be
1633 * bigger than 255 chars, it's safe to reclaim the extra byte for the 1643 * bigger than 255 chars, it's safe to reclaim the extra byte for the
1634 * file_type field. 1644 * file_type field.
1635 */ 1645 */
1636 struct ext4_dir_entry_2 { 1646 struct ext4_dir_entry_2 {
1637 __le32 inode; /* Inode number */ 1647 __le32 inode; /* Inode number */
1638 __le16 rec_len; /* Directory entry length */ 1648 __le16 rec_len; /* Directory entry length */
1639 __u8 name_len; /* Name length */ 1649 __u8 name_len; /* Name length */
1640 __u8 file_type; 1650 __u8 file_type;
1641 char name[EXT4_NAME_LEN]; /* File name */ 1651 char name[EXT4_NAME_LEN]; /* File name */
1642 }; 1652 };
1643 1653
1644 /* 1654 /*
1645 * This is a bogus directory entry at the end of each leaf block that 1655 * This is a bogus directory entry at the end of each leaf block that
1646 * records checksums. 1656 * records checksums.
1647 */ 1657 */
1648 struct ext4_dir_entry_tail { 1658 struct ext4_dir_entry_tail {
1649 __le32 det_reserved_zero1; /* Pretend to be unused */ 1659 __le32 det_reserved_zero1; /* Pretend to be unused */
1650 __le16 det_rec_len; /* 12 */ 1660 __le16 det_rec_len; /* 12 */
1651 __u8 det_reserved_zero2; /* Zero name length */ 1661 __u8 det_reserved_zero2; /* Zero name length */
1652 __u8 det_reserved_ft; /* 0xDE, fake file type */ 1662 __u8 det_reserved_ft; /* 0xDE, fake file type */
1653 __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */ 1663 __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */
1654 }; 1664 };
1655 1665
1656 #define EXT4_DIRENT_TAIL(block, blocksize) \ 1666 #define EXT4_DIRENT_TAIL(block, blocksize) \
1657 ((struct ext4_dir_entry_tail *)(((void *)(block)) + \ 1667 ((struct ext4_dir_entry_tail *)(((void *)(block)) + \
1658 ((blocksize) - \ 1668 ((blocksize) - \
1659 sizeof(struct ext4_dir_entry_tail)))) 1669 sizeof(struct ext4_dir_entry_tail))))
1660 1670
1661 /* 1671 /*
1662 * Ext4 directory file types. Only the low 3 bits are used. The 1672 * Ext4 directory file types. Only the low 3 bits are used. The
1663 * other bits are reserved for now. 1673 * other bits are reserved for now.
1664 */ 1674 */
1665 #define EXT4_FT_UNKNOWN 0 1675 #define EXT4_FT_UNKNOWN 0
1666 #define EXT4_FT_REG_FILE 1 1676 #define EXT4_FT_REG_FILE 1
1667 #define EXT4_FT_DIR 2 1677 #define EXT4_FT_DIR 2
1668 #define EXT4_FT_CHRDEV 3 1678 #define EXT4_FT_CHRDEV 3
1669 #define EXT4_FT_BLKDEV 4 1679 #define EXT4_FT_BLKDEV 4
1670 #define EXT4_FT_FIFO 5 1680 #define EXT4_FT_FIFO 5
1671 #define EXT4_FT_SOCK 6 1681 #define EXT4_FT_SOCK 6
1672 #define EXT4_FT_SYMLINK 7 1682 #define EXT4_FT_SYMLINK 7
1673 1683
1674 #define EXT4_FT_MAX 8 1684 #define EXT4_FT_MAX 8
1675 1685
1676 #define EXT4_FT_DIR_CSUM 0xDE 1686 #define EXT4_FT_DIR_CSUM 0xDE
1677 1687
1678 /* 1688 /*
1679 * EXT4_DIR_PAD defines the directory entries boundaries 1689 * EXT4_DIR_PAD defines the directory entries boundaries
1680 * 1690 *
1681 * NOTE: It must be a multiple of 4 1691 * NOTE: It must be a multiple of 4
1682 */ 1692 */
1683 #define EXT4_DIR_PAD 4 1693 #define EXT4_DIR_PAD 4
1684 #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) 1694 #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
1685 #define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ 1695 #define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
1686 ~EXT4_DIR_ROUND) 1696 ~EXT4_DIR_ROUND)
1687 #define EXT4_MAX_REC_LEN ((1<<16)-1) 1697 #define EXT4_MAX_REC_LEN ((1<<16)-1)
1688 1698
1689 /* 1699 /*
1690 * If we ever get support for fs block sizes > page_size, we'll need 1700 * If we ever get support for fs block sizes > page_size, we'll need
1691 * to remove the #if statements in the next two functions... 1701 * to remove the #if statements in the next two functions...
1692 */ 1702 */
1693 static inline unsigned int 1703 static inline unsigned int
1694 ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) 1704 ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
1695 { 1705 {
1696 unsigned len = le16_to_cpu(dlen); 1706 unsigned len = le16_to_cpu(dlen);
1697 1707
1698 #if (PAGE_CACHE_SIZE >= 65536) 1708 #if (PAGE_CACHE_SIZE >= 65536)
1699 if (len == EXT4_MAX_REC_LEN || len == 0) 1709 if (len == EXT4_MAX_REC_LEN || len == 0)
1700 return blocksize; 1710 return blocksize;
1701 return (len & 65532) | ((len & 3) << 16); 1711 return (len & 65532) | ((len & 3) << 16);
1702 #else 1712 #else
1703 return len; 1713 return len;
1704 #endif 1714 #endif
1705 } 1715 }
1706 1716
1707 static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) 1717 static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
1708 { 1718 {
1709 if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) 1719 if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
1710 BUG(); 1720 BUG();
1711 #if (PAGE_CACHE_SIZE >= 65536) 1721 #if (PAGE_CACHE_SIZE >= 65536)
1712 if (len < 65536) 1722 if (len < 65536)
1713 return cpu_to_le16(len); 1723 return cpu_to_le16(len);
1714 if (len == blocksize) { 1724 if (len == blocksize) {
1715 if (blocksize == 65536) 1725 if (blocksize == 65536)
1716 return cpu_to_le16(EXT4_MAX_REC_LEN); 1726 return cpu_to_le16(EXT4_MAX_REC_LEN);
1717 else 1727 else
1718 return cpu_to_le16(0); 1728 return cpu_to_le16(0);
1719 } 1729 }
1720 return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); 1730 return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
1721 #else 1731 #else
1722 return cpu_to_le16(len); 1732 return cpu_to_le16(len);
1723 #endif 1733 #endif
1724 } 1734 }
1725 1735
1726 /* 1736 /*
1727 * Hash Tree Directory indexing 1737 * Hash Tree Directory indexing
1728 * (c) Daniel Phillips, 2001 1738 * (c) Daniel Phillips, 2001
1729 */ 1739 */
1730 1740
1731 #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ 1741 #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
1732 EXT4_FEATURE_COMPAT_DIR_INDEX) && \ 1742 EXT4_FEATURE_COMPAT_DIR_INDEX) && \
1733 ext4_test_inode_flag((dir), EXT4_INODE_INDEX)) 1743 ext4_test_inode_flag((dir), EXT4_INODE_INDEX))
1734 #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) 1744 #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
1735 #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) 1745 #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
1736 1746
1737 /* Legal values for the dx_root hash_version field: */ 1747 /* Legal values for the dx_root hash_version field: */
1738 1748
1739 #define DX_HASH_LEGACY 0 1749 #define DX_HASH_LEGACY 0
1740 #define DX_HASH_HALF_MD4 1 1750 #define DX_HASH_HALF_MD4 1
1741 #define DX_HASH_TEA 2 1751 #define DX_HASH_TEA 2
1742 #define DX_HASH_LEGACY_UNSIGNED 3 1752 #define DX_HASH_LEGACY_UNSIGNED 3
1743 #define DX_HASH_HALF_MD4_UNSIGNED 4 1753 #define DX_HASH_HALF_MD4_UNSIGNED 4
1744 #define DX_HASH_TEA_UNSIGNED 5 1754 #define DX_HASH_TEA_UNSIGNED 5
1745 1755
1746 static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, 1756 static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
1747 const void *address, unsigned int length) 1757 const void *address, unsigned int length)
1748 { 1758 {
1749 struct { 1759 struct {
1750 struct shash_desc shash; 1760 struct shash_desc shash;
1751 char ctx[4]; 1761 char ctx[4];
1752 } desc; 1762 } desc;
1753 int err; 1763 int err;
1754 1764
1755 BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx)); 1765 BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx));
1756 1766
1757 desc.shash.tfm = sbi->s_chksum_driver; 1767 desc.shash.tfm = sbi->s_chksum_driver;
1758 desc.shash.flags = 0; 1768 desc.shash.flags = 0;
1759 *(u32 *)desc.ctx = crc; 1769 *(u32 *)desc.ctx = crc;
1760 1770
1761 err = crypto_shash_update(&desc.shash, address, length); 1771 err = crypto_shash_update(&desc.shash, address, length);
1762 BUG_ON(err); 1772 BUG_ON(err);
1763 1773
1764 return *(u32 *)desc.ctx; 1774 return *(u32 *)desc.ctx;
1765 } 1775 }
1766 1776
1767 #ifdef __KERNEL__ 1777 #ifdef __KERNEL__
1768 1778
1769 /* hash info structure used by the directory hash */ 1779 /* hash info structure used by the directory hash */
1770 struct dx_hash_info 1780 struct dx_hash_info
1771 { 1781 {
1772 u32 hash; 1782 u32 hash;
1773 u32 minor_hash; 1783 u32 minor_hash;
1774 int hash_version; 1784 int hash_version;
1775 u32 *seed; 1785 u32 *seed;
1776 }; 1786 };
1777 1787
1778 1788
1779 /* 32 and 64 bit signed EOF for dx directories */ 1789 /* 32 and 64 bit signed EOF for dx directories */
1780 #define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) 1790 #define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
1781 #define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) 1791 #define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
1782 1792
1783 1793
1784 /* 1794 /*
1785 * Control parameters used by ext4_htree_next_block 1795 * Control parameters used by ext4_htree_next_block
1786 */ 1796 */
1787 #define HASH_NB_ALWAYS 1 1797 #define HASH_NB_ALWAYS 1
1788 1798
1789 1799
1790 /* 1800 /*
1791 * Describe an inode's exact location on disk and in memory 1801 * Describe an inode's exact location on disk and in memory
1792 */ 1802 */
1793 struct ext4_iloc 1803 struct ext4_iloc
1794 { 1804 {
1795 struct buffer_head *bh; 1805 struct buffer_head *bh;
1796 unsigned long offset; 1806 unsigned long offset;
1797 ext4_group_t block_group; 1807 ext4_group_t block_group;
1798 }; 1808 };
1799 1809
1800 static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc) 1810 static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
1801 { 1811 {
1802 return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset); 1812 return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
1803 } 1813 }
1804 1814
1805 /* 1815 /*
1806 * This structure is stuffed into the struct file's private_data field 1816 * This structure is stuffed into the struct file's private_data field
1807 * for directories. It is where we put information so that we can do 1817 * for directories. It is where we put information so that we can do
1808 * readdir operations in hash tree order. 1818 * readdir operations in hash tree order.
1809 */ 1819 */
1810 struct dir_private_info { 1820 struct dir_private_info {
1811 struct rb_root root; 1821 struct rb_root root;
1812 struct rb_node *curr_node; 1822 struct rb_node *curr_node;
1813 struct fname *extra_fname; 1823 struct fname *extra_fname;
1814 loff_t last_pos; 1824 loff_t last_pos;
1815 __u32 curr_hash; 1825 __u32 curr_hash;
1816 __u32 curr_minor_hash; 1826 __u32 curr_minor_hash;
1817 __u32 next_hash; 1827 __u32 next_hash;
1818 }; 1828 };
1819 1829
1820 /* calculate the first block number of the group */ 1830 /* calculate the first block number of the group */
1821 static inline ext4_fsblk_t 1831 static inline ext4_fsblk_t
1822 ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) 1832 ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
1823 { 1833 {
1824 return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + 1834 return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
1825 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 1835 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1826 } 1836 }
1827 1837
1828 /* 1838 /*
1829 * Special error return code only used by dx_probe() and its callers. 1839 * Special error return code only used by dx_probe() and its callers.
1830 */ 1840 */
1831 #define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1)) 1841 #define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
1832 1842
1833 /* 1843 /*
1834 * Timeout and state flag for lazy initialization inode thread. 1844 * Timeout and state flag for lazy initialization inode thread.
1835 */ 1845 */
1836 #define EXT4_DEF_LI_WAIT_MULT 10 1846 #define EXT4_DEF_LI_WAIT_MULT 10
1837 #define EXT4_DEF_LI_MAX_START_DELAY 5 1847 #define EXT4_DEF_LI_MAX_START_DELAY 5
1838 #define EXT4_LAZYINIT_QUIT 0x0001 1848 #define EXT4_LAZYINIT_QUIT 0x0001
1839 #define EXT4_LAZYINIT_RUNNING 0x0002 1849 #define EXT4_LAZYINIT_RUNNING 0x0002
1840 1850
1841 /* 1851 /*
1842 * Lazy inode table initialization info 1852 * Lazy inode table initialization info
1843 */ 1853 */
1844 struct ext4_lazy_init { 1854 struct ext4_lazy_init {
1845 unsigned long li_state; 1855 unsigned long li_state;
1846 struct list_head li_request_list; 1856 struct list_head li_request_list;
1847 struct mutex li_list_mtx; 1857 struct mutex li_list_mtx;
1848 }; 1858 };
1849 1859
1850 struct ext4_li_request { 1860 struct ext4_li_request {
1851 struct super_block *lr_super; 1861 struct super_block *lr_super;
1852 struct ext4_sb_info *lr_sbi; 1862 struct ext4_sb_info *lr_sbi;
1853 ext4_group_t lr_next_group; 1863 ext4_group_t lr_next_group;
1854 struct list_head lr_request; 1864 struct list_head lr_request;
1855 unsigned long lr_next_sched; 1865 unsigned long lr_next_sched;
1856 unsigned long lr_timeout; 1866 unsigned long lr_timeout;
1857 }; 1867 };
1858 1868
1859 struct ext4_features { 1869 struct ext4_features {
1860 struct kobject f_kobj; 1870 struct kobject f_kobj;
1861 struct completion f_kobj_unregister; 1871 struct completion f_kobj_unregister;
1862 }; 1872 };
1863 1873
1864 /* 1874 /*
1865 * This structure will be used for multiple mount protection. It will be 1875 * This structure will be used for multiple mount protection. It will be
1866 * written into the block number saved in the s_mmp_block field in the 1876 * written into the block number saved in the s_mmp_block field in the
1867 * superblock. Programs that check MMP should assume that if 1877 * superblock. Programs that check MMP should assume that if
1868 * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe 1878 * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
1869 * to use the filesystem, regardless of how old the timestamp is. 1879 * to use the filesystem, regardless of how old the timestamp is.
1870 */ 1880 */
1871 #define EXT4_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */ 1881 #define EXT4_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */
1872 #define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */ 1882 #define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
1873 #define EXT4_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */ 1883 #define EXT4_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */
1874 #define EXT4_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */ 1884 #define EXT4_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */
1875 1885
1876 struct mmp_struct { 1886 struct mmp_struct {
1877 __le32 mmp_magic; /* Magic number for MMP */ 1887 __le32 mmp_magic; /* Magic number for MMP */
1878 __le32 mmp_seq; /* Sequence no. updated periodically */ 1888 __le32 mmp_seq; /* Sequence no. updated periodically */
1879 1889
1880 /* 1890 /*
1881 * mmp_time, mmp_nodename & mmp_bdevname are only used for information 1891 * mmp_time, mmp_nodename & mmp_bdevname are only used for information
1882 * purposes and do not affect the correctness of the algorithm 1892 * purposes and do not affect the correctness of the algorithm
1883 */ 1893 */
1884 __le64 mmp_time; /* Time last updated */ 1894 __le64 mmp_time; /* Time last updated */
1885 char mmp_nodename[64]; /* Node which last updated MMP block */ 1895 char mmp_nodename[64]; /* Node which last updated MMP block */
1886 char mmp_bdevname[32]; /* Bdev which last updated MMP block */ 1896 char mmp_bdevname[32]; /* Bdev which last updated MMP block */
1887 1897
1888 /* 1898 /*
1889 * mmp_check_interval is used to verify if the MMP block has been 1899 * mmp_check_interval is used to verify if the MMP block has been
1890 * updated on the block device. The value is updated based on the 1900 * updated on the block device. The value is updated based on the
1891 * maximum time to write the MMP block during an update cycle. 1901 * maximum time to write the MMP block during an update cycle.
1892 */ 1902 */
1893 __le16 mmp_check_interval; 1903 __le16 mmp_check_interval;
1894 1904
1895 __le16 mmp_pad1; 1905 __le16 mmp_pad1;
1896 __le32 mmp_pad2[226]; 1906 __le32 mmp_pad2[226];
1897 __le32 mmp_checksum; /* crc32c(uuid+mmp_block) */ 1907 __le32 mmp_checksum; /* crc32c(uuid+mmp_block) */
1898 }; 1908 };
1899 1909
1900 /* arguments passed to the mmp thread */ 1910 /* arguments passed to the mmp thread */
1901 struct mmpd_data { 1911 struct mmpd_data {
1902 struct buffer_head *bh; /* bh from initial read_mmp_block() */ 1912 struct buffer_head *bh; /* bh from initial read_mmp_block() */
1903 struct super_block *sb; /* super block of the fs */ 1913 struct super_block *sb; /* super block of the fs */
1904 }; 1914 };
1905 1915
1906 /* 1916 /*
1907 * Check interval multiplier 1917 * Check interval multiplier
1908 * The MMP block is written every update interval and initially checked every 1918 * The MMP block is written every update interval and initially checked every
1909 * update interval x the multiplier (the value is then adapted based on the 1919 * update interval x the multiplier (the value is then adapted based on the
1910 * write latency). The reason is that writes can be delayed under load and we 1920 * write latency). The reason is that writes can be delayed under load and we
1911 * don't want readers to incorrectly assume that the filesystem is no longer 1921 * don't want readers to incorrectly assume that the filesystem is no longer
1912 * in use. 1922 * in use.
1913 */ 1923 */
1914 #define EXT4_MMP_CHECK_MULT 2UL 1924 #define EXT4_MMP_CHECK_MULT 2UL
1915 1925
1916 /* 1926 /*
1917 * Minimum interval for MMP checking in seconds. 1927 * Minimum interval for MMP checking in seconds.
1918 */ 1928 */
1919 #define EXT4_MMP_MIN_CHECK_INTERVAL 5UL 1929 #define EXT4_MMP_MIN_CHECK_INTERVAL 5UL
1920 1930
1921 /* 1931 /*
1922 * Maximum interval for MMP checking in seconds. 1932 * Maximum interval for MMP checking in seconds.
1923 */ 1933 */
1924 #define EXT4_MMP_MAX_CHECK_INTERVAL 300UL 1934 #define EXT4_MMP_MAX_CHECK_INTERVAL 300UL
1925 1935
1926 /* 1936 /*
1927 * Function prototypes 1937 * Function prototypes
1928 */ 1938 */
1929 1939
1930 /* 1940 /*
1931 * Ok, these declarations are also in <linux/kernel.h> but none of the 1941 * Ok, these declarations are also in <linux/kernel.h> but none of the
1932 * ext4 source programs needs to include it so they are duplicated here. 1942 * ext4 source programs needs to include it so they are duplicated here.
1933 */ 1943 */
1934 # define NORET_TYPE /**/ 1944 # define NORET_TYPE /**/
1935 # define ATTRIB_NORET __attribute__((noreturn)) 1945 # define ATTRIB_NORET __attribute__((noreturn))
1936 # define NORET_AND noreturn, 1946 # define NORET_AND noreturn,
1937 1947
1938 /* bitmap.c */ 1948 /* bitmap.c */
1939 extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); 1949 extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
1940 void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, 1950 void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
1941 struct ext4_group_desc *gdp, 1951 struct ext4_group_desc *gdp,
1942 struct buffer_head *bh, int sz); 1952 struct buffer_head *bh, int sz);
1943 int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, 1953 int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
1944 struct ext4_group_desc *gdp, 1954 struct ext4_group_desc *gdp,
1945 struct buffer_head *bh, int sz); 1955 struct buffer_head *bh, int sz);
1946 void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, 1956 void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
1947 struct ext4_group_desc *gdp, 1957 struct ext4_group_desc *gdp,
1948 struct buffer_head *bh); 1958 struct buffer_head *bh);
1949 int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, 1959 int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
1950 struct ext4_group_desc *gdp, 1960 struct ext4_group_desc *gdp,
1951 struct buffer_head *bh); 1961 struct buffer_head *bh);
1952 1962
1953 /* balloc.c */ 1963 /* balloc.c */
1954 extern void ext4_get_group_no_and_offset(struct super_block *sb, 1964 extern void ext4_get_group_no_and_offset(struct super_block *sb,
1955 ext4_fsblk_t blocknr, 1965 ext4_fsblk_t blocknr,
1956 ext4_group_t *blockgrpp, 1966 ext4_group_t *blockgrpp,
1957 ext4_grpblk_t *offsetp); 1967 ext4_grpblk_t *offsetp);
1958 extern ext4_group_t ext4_get_group_number(struct super_block *sb, 1968 extern ext4_group_t ext4_get_group_number(struct super_block *sb,
1959 ext4_fsblk_t block); 1969 ext4_fsblk_t block);
1960 1970
1961 extern unsigned int ext4_block_group(struct super_block *sb, 1971 extern unsigned int ext4_block_group(struct super_block *sb,
1962 ext4_fsblk_t blocknr); 1972 ext4_fsblk_t blocknr);
1963 extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, 1973 extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
1964 ext4_fsblk_t blocknr); 1974 ext4_fsblk_t blocknr);
1965 extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); 1975 extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
1966 extern unsigned long ext4_bg_num_gdb(struct super_block *sb, 1976 extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
1967 ext4_group_t group); 1977 ext4_group_t group);
1968 extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, 1978 extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1969 ext4_fsblk_t goal, 1979 ext4_fsblk_t goal,
1970 unsigned int flags, 1980 unsigned int flags,
1971 unsigned long *count, 1981 unsigned long *count,
1972 int *errp); 1982 int *errp);
1973 extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi, 1983 extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
1974 s64 nclusters, unsigned int flags); 1984 s64 nclusters, unsigned int flags);
1975 extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *); 1985 extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
1976 extern void ext4_check_blocks_bitmap(struct super_block *); 1986 extern void ext4_check_blocks_bitmap(struct super_block *);
1977 extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 1987 extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
1978 ext4_group_t block_group, 1988 ext4_group_t block_group,
1979 struct buffer_head ** bh); 1989 struct buffer_head ** bh);
1980 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1990 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
1981 1991
1982 extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, 1992 extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
1983 ext4_group_t block_group); 1993 ext4_group_t block_group);
1984 extern int ext4_wait_block_bitmap(struct super_block *sb, 1994 extern int ext4_wait_block_bitmap(struct super_block *sb,
1985 ext4_group_t block_group, 1995 ext4_group_t block_group,
1986 struct buffer_head *bh); 1996 struct buffer_head *bh);
1987 extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, 1997 extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
1988 ext4_group_t block_group); 1998 ext4_group_t block_group);
1989 extern unsigned ext4_free_clusters_after_init(struct super_block *sb, 1999 extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
1990 ext4_group_t block_group, 2000 ext4_group_t block_group,
1991 struct ext4_group_desc *gdp); 2001 struct ext4_group_desc *gdp);
1992 ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); 2002 ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
1993 2003
1994 /* dir.c */ 2004 /* dir.c */
1995 extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, 2005 extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
1996 struct file *, 2006 struct file *,
1997 struct ext4_dir_entry_2 *, 2007 struct ext4_dir_entry_2 *,
1998 struct buffer_head *, char *, int, 2008 struct buffer_head *, char *, int,
1999 unsigned int); 2009 unsigned int);
2000 #define ext4_check_dir_entry(dir, filp, de, bh, buf, size, offset) \ 2010 #define ext4_check_dir_entry(dir, filp, de, bh, buf, size, offset) \
2001 unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \ 2011 unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \
2002 (de), (bh), (buf), (size), (offset))) 2012 (de), (bh), (buf), (size), (offset)))
2003 extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, 2013 extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
2004 __u32 minor_hash, 2014 __u32 minor_hash,
2005 struct ext4_dir_entry_2 *dirent); 2015 struct ext4_dir_entry_2 *dirent);
2006 extern void ext4_htree_free_dir_info(struct dir_private_info *p); 2016 extern void ext4_htree_free_dir_info(struct dir_private_info *p);
2007 extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, 2017 extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
2008 struct buffer_head *bh, 2018 struct buffer_head *bh,
2009 void *buf, int buf_size, 2019 void *buf, int buf_size,
2010 const char *name, int namelen, 2020 const char *name, int namelen,
2011 struct ext4_dir_entry_2 **dest_de); 2021 struct ext4_dir_entry_2 **dest_de);
2012 void ext4_insert_dentry(struct inode *inode, 2022 void ext4_insert_dentry(struct inode *inode,
2013 struct ext4_dir_entry_2 *de, 2023 struct ext4_dir_entry_2 *de,
2014 int buf_size, 2024 int buf_size,
2015 const char *name, int namelen); 2025 const char *name, int namelen);
2016 static inline void ext4_update_dx_flag(struct inode *inode) 2026 static inline void ext4_update_dx_flag(struct inode *inode)
2017 { 2027 {
2018 if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, 2028 if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
2019 EXT4_FEATURE_COMPAT_DIR_INDEX)) 2029 EXT4_FEATURE_COMPAT_DIR_INDEX))
2020 ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); 2030 ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
2021 } 2031 }
2022 static unsigned char ext4_filetype_table[] = { 2032 static unsigned char ext4_filetype_table[] = {
2023 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 2033 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
2024 }; 2034 };
2025 2035
2026 static inline unsigned char get_dtype(struct super_block *sb, int filetype) 2036 static inline unsigned char get_dtype(struct super_block *sb, int filetype)
2027 { 2037 {
2028 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) || 2038 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
2029 (filetype >= EXT4_FT_MAX)) 2039 (filetype >= EXT4_FT_MAX))
2030 return DT_UNKNOWN; 2040 return DT_UNKNOWN;
2031 2041
2032 return ext4_filetype_table[filetype]; 2042 return ext4_filetype_table[filetype];
2033 } 2043 }
2034 extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, 2044 extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
2035 void *buf, int buf_size); 2045 void *buf, int buf_size);
2036 2046
2037 /* fsync.c */ 2047 /* fsync.c */
2038 extern int ext4_sync_file(struct file *, loff_t, loff_t, int); 2048 extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
2039 2049
2040 /* hash.c */ 2050 /* hash.c */
2041 extern int ext4fs_dirhash(const char *name, int len, struct 2051 extern int ext4fs_dirhash(const char *name, int len, struct
2042 dx_hash_info *hinfo); 2052 dx_hash_info *hinfo);
2043 2053
2044 /* ialloc.c */ 2054 /* ialloc.c */
2045 extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t, 2055 extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
2046 const struct qstr *qstr, __u32 goal, 2056 const struct qstr *qstr, __u32 goal,
2047 uid_t *owner, int handle_type, 2057 uid_t *owner, int handle_type,
2048 unsigned int line_no, int nblocks); 2058 unsigned int line_no, int nblocks);
2049 2059
2050 #define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \ 2060 #define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \
2051 __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \ 2061 __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \
2052 0, 0, 0) 2062 0, 0, 0)
2053 #define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \ 2063 #define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \
2054 type, nblocks) \ 2064 type, nblocks) \
2055 __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \ 2065 __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
2056 (type), __LINE__, (nblocks)) 2066 (type), __LINE__, (nblocks))
2057 2067
2058 2068
2059 extern void ext4_free_inode(handle_t *, struct inode *); 2069 extern void ext4_free_inode(handle_t *, struct inode *);
2060 extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); 2070 extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
2061 extern unsigned long ext4_count_free_inodes(struct super_block *); 2071 extern unsigned long ext4_count_free_inodes(struct super_block *);
2062 extern unsigned long ext4_count_dirs(struct super_block *); 2072 extern unsigned long ext4_count_dirs(struct super_block *);
2063 extern void ext4_check_inodes_bitmap(struct super_block *); 2073 extern void ext4_check_inodes_bitmap(struct super_block *);
2064 extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); 2074 extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
2065 extern int ext4_init_inode_table(struct super_block *sb, 2075 extern int ext4_init_inode_table(struct super_block *sb,
2066 ext4_group_t group, int barrier); 2076 ext4_group_t group, int barrier);
2067 extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); 2077 extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
2068 2078
2069 /* mballoc.c */ 2079 /* mballoc.c */
2070 extern long ext4_mb_stats; 2080 extern long ext4_mb_stats;
2071 extern long ext4_mb_max_to_scan; 2081 extern long ext4_mb_max_to_scan;
2072 extern int ext4_mb_init(struct super_block *); 2082 extern int ext4_mb_init(struct super_block *);
2073 extern int ext4_mb_release(struct super_block *); 2083 extern int ext4_mb_release(struct super_block *);
2074 extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, 2084 extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
2075 struct ext4_allocation_request *, int *); 2085 struct ext4_allocation_request *, int *);
2076 extern int ext4_mb_reserve_blocks(struct super_block *, int); 2086 extern int ext4_mb_reserve_blocks(struct super_block *, int);
2077 extern void ext4_discard_preallocations(struct inode *); 2087 extern void ext4_discard_preallocations(struct inode *);
2078 extern int __init ext4_init_mballoc(void); 2088 extern int __init ext4_init_mballoc(void);
2079 extern void ext4_exit_mballoc(void); 2089 extern void ext4_exit_mballoc(void);
2080 extern void ext4_free_blocks(handle_t *handle, struct inode *inode, 2090 extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
2081 struct buffer_head *bh, ext4_fsblk_t block, 2091 struct buffer_head *bh, ext4_fsblk_t block,
2082 unsigned long count, int flags); 2092 unsigned long count, int flags);
2083 extern int ext4_mb_alloc_groupinfo(struct super_block *sb, 2093 extern int ext4_mb_alloc_groupinfo(struct super_block *sb,
2084 ext4_group_t ngroups); 2094 ext4_group_t ngroups);
2085 extern int ext4_mb_add_groupinfo(struct super_block *sb, 2095 extern int ext4_mb_add_groupinfo(struct super_block *sb,
2086 ext4_group_t i, struct ext4_group_desc *desc); 2096 ext4_group_t i, struct ext4_group_desc *desc);
2087 extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, 2097 extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
2088 ext4_fsblk_t block, unsigned long count); 2098 ext4_fsblk_t block, unsigned long count);
2089 extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); 2099 extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
2090 2100
2091 /* inode.c */ 2101 /* inode.c */
2092 struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); 2102 struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
2093 struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); 2103 struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
2094 int ext4_get_block_write(struct inode *inode, sector_t iblock, 2104 int ext4_get_block_write(struct inode *inode, sector_t iblock,
2095 struct buffer_head *bh_result, int create); 2105 struct buffer_head *bh_result, int create);
2096 int ext4_get_block(struct inode *inode, sector_t iblock, 2106 int ext4_get_block(struct inode *inode, sector_t iblock,
2097 struct buffer_head *bh_result, int create); 2107 struct buffer_head *bh_result, int create);
2098 int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, 2108 int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2099 struct buffer_head *bh, int create); 2109 struct buffer_head *bh, int create);
2100 int ext4_walk_page_buffers(handle_t *handle, 2110 int ext4_walk_page_buffers(handle_t *handle,
2101 struct buffer_head *head, 2111 struct buffer_head *head,
2102 unsigned from, 2112 unsigned from,
2103 unsigned to, 2113 unsigned to,
2104 int *partial, 2114 int *partial,
2105 int (*fn)(handle_t *handle, 2115 int (*fn)(handle_t *handle,
2106 struct buffer_head *bh)); 2116 struct buffer_head *bh));
2107 int do_journal_get_write_access(handle_t *handle, 2117 int do_journal_get_write_access(handle_t *handle,
2108 struct buffer_head *bh); 2118 struct buffer_head *bh);
2109 #define FALL_BACK_TO_NONDELALLOC 1 2119 #define FALL_BACK_TO_NONDELALLOC 1
2110 #define CONVERT_INLINE_DATA 2 2120 #define CONVERT_INLINE_DATA 2
2111 2121
2112 extern struct inode *ext4_iget(struct super_block *, unsigned long); 2122 extern struct inode *ext4_iget(struct super_block *, unsigned long);
2113 extern struct inode *ext4_iget_normal(struct super_block *, unsigned long); 2123 extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
2114 extern int ext4_write_inode(struct inode *, struct writeback_control *); 2124 extern int ext4_write_inode(struct inode *, struct writeback_control *);
2115 extern int ext4_setattr(struct dentry *, struct iattr *); 2125 extern int ext4_setattr(struct dentry *, struct iattr *);
2116 extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, 2126 extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
2117 struct kstat *stat); 2127 struct kstat *stat);
2118 extern void ext4_evict_inode(struct inode *); 2128 extern void ext4_evict_inode(struct inode *);
2119 extern void ext4_clear_inode(struct inode *); 2129 extern void ext4_clear_inode(struct inode *);
2120 extern int ext4_sync_inode(handle_t *, struct inode *); 2130 extern int ext4_sync_inode(handle_t *, struct inode *);
2121 extern void ext4_dirty_inode(struct inode *, int); 2131 extern void ext4_dirty_inode(struct inode *, int);
2122 extern int ext4_change_inode_journal_flag(struct inode *, int); 2132 extern int ext4_change_inode_journal_flag(struct inode *, int);
2123 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); 2133 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
2124 extern int ext4_inode_attach_jinode(struct inode *inode); 2134 extern int ext4_inode_attach_jinode(struct inode *inode);
2125 extern int ext4_can_truncate(struct inode *inode); 2135 extern int ext4_can_truncate(struct inode *inode);
2126 extern void ext4_truncate(struct inode *); 2136 extern void ext4_truncate(struct inode *);
2127 extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); 2137 extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
2128 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); 2138 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
2129 extern void ext4_set_inode_flags(struct inode *); 2139 extern void ext4_set_inode_flags(struct inode *);
2130 extern void ext4_get_inode_flags(struct ext4_inode_info *); 2140 extern void ext4_get_inode_flags(struct ext4_inode_info *);
2131 extern int ext4_alloc_da_blocks(struct inode *inode); 2141 extern int ext4_alloc_da_blocks(struct inode *inode);
2132 extern void ext4_set_aops(struct inode *inode); 2142 extern void ext4_set_aops(struct inode *inode);
2133 extern int ext4_writepage_trans_blocks(struct inode *); 2143 extern int ext4_writepage_trans_blocks(struct inode *);
2134 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 2144 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
2135 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 2145 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
2136 loff_t lstart, loff_t lend); 2146 loff_t lstart, loff_t lend);
2137 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2147 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2138 extern qsize_t *ext4_get_reserved_space(struct inode *inode); 2148 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
2139 extern void ext4_da_update_reserve_space(struct inode *inode, 2149 extern void ext4_da_update_reserve_space(struct inode *inode,
2140 int used, int quota_claim); 2150 int used, int quota_claim);
2141 2151
2142 /* indirect.c */ 2152 /* indirect.c */
2143 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, 2153 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
2144 struct ext4_map_blocks *map, int flags); 2154 struct ext4_map_blocks *map, int flags);
2145 extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, 2155 extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
2146 struct iov_iter *iter, loff_t offset); 2156 struct iov_iter *iter, loff_t offset);
2147 extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); 2157 extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
2148 extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); 2158 extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
2149 extern void ext4_ind_truncate(handle_t *, struct inode *inode); 2159 extern void ext4_ind_truncate(handle_t *, struct inode *inode);
2150 extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode, 2160 extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
2151 ext4_lblk_t start, ext4_lblk_t end); 2161 ext4_lblk_t start, ext4_lblk_t end);
2152 2162
2153 /* ioctl.c */ 2163 /* ioctl.c */
2154 extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 2164 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
2155 extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); 2165 extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
2156 2166
2157 /* migrate.c */ 2167 /* migrate.c */
2158 extern int ext4_ext_migrate(struct inode *); 2168 extern int ext4_ext_migrate(struct inode *);
2159 extern int ext4_ind_migrate(struct inode *inode); 2169 extern int ext4_ind_migrate(struct inode *inode);
2160 2170
2161 /* namei.c */ 2171 /* namei.c */
2162 extern int ext4_dirent_csum_verify(struct inode *inode, 2172 extern int ext4_dirent_csum_verify(struct inode *inode,
2163 struct ext4_dir_entry *dirent); 2173 struct ext4_dir_entry *dirent);
2164 extern int ext4_orphan_add(handle_t *, struct inode *); 2174 extern int ext4_orphan_add(handle_t *, struct inode *);
2165 extern int ext4_orphan_del(handle_t *, struct inode *); 2175 extern int ext4_orphan_del(handle_t *, struct inode *);
2166 extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, 2176 extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
2167 __u32 start_minor_hash, __u32 *next_hash); 2177 __u32 start_minor_hash, __u32 *next_hash);
2168 extern int search_dir(struct buffer_head *bh, 2178 extern int search_dir(struct buffer_head *bh,
2169 char *search_buf, 2179 char *search_buf,
2170 int buf_size, 2180 int buf_size,
2171 struct inode *dir, 2181 struct inode *dir,
2172 const struct qstr *d_name, 2182 const struct qstr *d_name,
2173 unsigned int offset, 2183 unsigned int offset,
2174 struct ext4_dir_entry_2 **res_dir); 2184 struct ext4_dir_entry_2 **res_dir);
2175 extern int ext4_generic_delete_entry(handle_t *handle, 2185 extern int ext4_generic_delete_entry(handle_t *handle,
2176 struct inode *dir, 2186 struct inode *dir,
2177 struct ext4_dir_entry_2 *de_del, 2187 struct ext4_dir_entry_2 *de_del,
2178 struct buffer_head *bh, 2188 struct buffer_head *bh,
2179 void *entry_buf, 2189 void *entry_buf,
2180 int buf_size, 2190 int buf_size,
2181 int csum_size); 2191 int csum_size);
2182 2192
2183 /* resize.c */ 2193 /* resize.c */
2184 extern int ext4_group_add(struct super_block *sb, 2194 extern int ext4_group_add(struct super_block *sb,
2185 struct ext4_new_group_data *input); 2195 struct ext4_new_group_data *input);
2186 extern int ext4_group_extend(struct super_block *sb, 2196 extern int ext4_group_extend(struct super_block *sb,
2187 struct ext4_super_block *es, 2197 struct ext4_super_block *es,
2188 ext4_fsblk_t n_blocks_count); 2198 ext4_fsblk_t n_blocks_count);
2189 extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); 2199 extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
2190 2200
2191 /* super.c */ 2201 /* super.c */
2192 extern int ext4_calculate_overhead(struct super_block *sb); 2202 extern int ext4_calculate_overhead(struct super_block *sb);
2193 extern void ext4_superblock_csum_set(struct super_block *sb); 2203 extern void ext4_superblock_csum_set(struct super_block *sb);
2194 extern void *ext4_kvmalloc(size_t size, gfp_t flags); 2204 extern void *ext4_kvmalloc(size_t size, gfp_t flags);
2195 extern void *ext4_kvzalloc(size_t size, gfp_t flags); 2205 extern void *ext4_kvzalloc(size_t size, gfp_t flags);
2196 extern int ext4_alloc_flex_bg_array(struct super_block *sb, 2206 extern int ext4_alloc_flex_bg_array(struct super_block *sb,
2197 ext4_group_t ngroup); 2207 ext4_group_t ngroup);
2198 extern const char *ext4_decode_error(struct super_block *sb, int errno, 2208 extern const char *ext4_decode_error(struct super_block *sb, int errno,
2199 char nbuf[16]); 2209 char nbuf[16]);
2200 2210
2201 extern __printf(4, 5) 2211 extern __printf(4, 5)
2202 void __ext4_error(struct super_block *, const char *, unsigned int, 2212 void __ext4_error(struct super_block *, const char *, unsigned int,
2203 const char *, ...); 2213 const char *, ...);
2204 extern __printf(5, 6) 2214 extern __printf(5, 6)
2205 void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, 2215 void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t,
2206 const char *, ...); 2216 const char *, ...);
2207 extern __printf(5, 6) 2217 extern __printf(5, 6)
2208 void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, 2218 void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t,
2209 const char *, ...); 2219 const char *, ...);
2210 extern void __ext4_std_error(struct super_block *, const char *, 2220 extern void __ext4_std_error(struct super_block *, const char *,
2211 unsigned int, int); 2221 unsigned int, int);
2212 extern __printf(4, 5) 2222 extern __printf(4, 5)
2213 void __ext4_abort(struct super_block *, const char *, unsigned int, 2223 void __ext4_abort(struct super_block *, const char *, unsigned int,
2214 const char *, ...); 2224 const char *, ...);
2215 extern __printf(4, 5) 2225 extern __printf(4, 5)
2216 void __ext4_warning(struct super_block *, const char *, unsigned int, 2226 void __ext4_warning(struct super_block *, const char *, unsigned int,
2217 const char *, ...); 2227 const char *, ...);
2218 extern __printf(3, 4) 2228 extern __printf(3, 4)
2219 void __ext4_msg(struct super_block *, const char *, const char *, ...); 2229 void __ext4_msg(struct super_block *, const char *, const char *, ...);
2220 extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, 2230 extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
2221 const char *, unsigned int, const char *); 2231 const char *, unsigned int, const char *);
2222 extern __printf(7, 8) 2232 extern __printf(7, 8)
2223 void __ext4_grp_locked_error(const char *, unsigned int, 2233 void __ext4_grp_locked_error(const char *, unsigned int,
2224 struct super_block *, ext4_group_t, 2234 struct super_block *, ext4_group_t,
2225 unsigned long, ext4_fsblk_t, 2235 unsigned long, ext4_fsblk_t,
2226 const char *, ...); 2236 const char *, ...);
2227 2237
2228 #ifdef CONFIG_PRINTK 2238 #ifdef CONFIG_PRINTK
2229 2239
2230 #define ext4_error_inode(inode, func, line, block, fmt, ...) \ 2240 #define ext4_error_inode(inode, func, line, block, fmt, ...) \
2231 __ext4_error_inode(inode, func, line, block, fmt, ##__VA_ARGS__) 2241 __ext4_error_inode(inode, func, line, block, fmt, ##__VA_ARGS__)
2232 #define ext4_error_file(file, func, line, block, fmt, ...) \ 2242 #define ext4_error_file(file, func, line, block, fmt, ...) \
2233 __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__) 2243 __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__)
2234 #define ext4_error(sb, fmt, ...) \ 2244 #define ext4_error(sb, fmt, ...) \
2235 __ext4_error(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) 2245 __ext4_error(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
2236 #define ext4_abort(sb, fmt, ...) \ 2246 #define ext4_abort(sb, fmt, ...) \
2237 __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) 2247 __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
2238 #define ext4_warning(sb, fmt, ...) \ 2248 #define ext4_warning(sb, fmt, ...) \
2239 __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) 2249 __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
2240 #define ext4_msg(sb, level, fmt, ...) \ 2250 #define ext4_msg(sb, level, fmt, ...) \
2241 __ext4_msg(sb, level, fmt, ##__VA_ARGS__) 2251 __ext4_msg(sb, level, fmt, ##__VA_ARGS__)
2242 #define dump_mmp_msg(sb, mmp, msg) \ 2252 #define dump_mmp_msg(sb, mmp, msg) \
2243 __dump_mmp_msg(sb, mmp, __func__, __LINE__, msg) 2253 __dump_mmp_msg(sb, mmp, __func__, __LINE__, msg)
2244 #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ 2254 #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \
2245 __ext4_grp_locked_error(__func__, __LINE__, sb, grp, ino, block, \ 2255 __ext4_grp_locked_error(__func__, __LINE__, sb, grp, ino, block, \
2246 fmt, ##__VA_ARGS__) 2256 fmt, ##__VA_ARGS__)
2247 2257
2248 #else 2258 #else
2249 2259
2250 #define ext4_error_inode(inode, func, line, block, fmt, ...) \ 2260 #define ext4_error_inode(inode, func, line, block, fmt, ...) \
2251 do { \ 2261 do { \
2252 no_printk(fmt, ##__VA_ARGS__); \ 2262 no_printk(fmt, ##__VA_ARGS__); \
2253 __ext4_error_inode(inode, "", 0, block, " "); \ 2263 __ext4_error_inode(inode, "", 0, block, " "); \
2254 } while (0) 2264 } while (0)
2255 #define ext4_error_file(file, func, line, block, fmt, ...) \ 2265 #define ext4_error_file(file, func, line, block, fmt, ...) \
2256 do { \ 2266 do { \
2257 no_printk(fmt, ##__VA_ARGS__); \ 2267 no_printk(fmt, ##__VA_ARGS__); \
2258 __ext4_error_file(file, "", 0, block, " "); \ 2268 __ext4_error_file(file, "", 0, block, " "); \
2259 } while (0) 2269 } while (0)
2260 #define ext4_error(sb, fmt, ...) \ 2270 #define ext4_error(sb, fmt, ...) \
2261 do { \ 2271 do { \
2262 no_printk(fmt, ##__VA_ARGS__); \ 2272 no_printk(fmt, ##__VA_ARGS__); \
2263 __ext4_error(sb, "", 0, " "); \ 2273 __ext4_error(sb, "", 0, " "); \
2264 } while (0) 2274 } while (0)
2265 #define ext4_abort(sb, fmt, ...) \ 2275 #define ext4_abort(sb, fmt, ...) \
2266 do { \ 2276 do { \
2267 no_printk(fmt, ##__VA_ARGS__); \ 2277 no_printk(fmt, ##__VA_ARGS__); \
2268 __ext4_abort(sb, "", 0, " "); \ 2278 __ext4_abort(sb, "", 0, " "); \
2269 } while (0) 2279 } while (0)
2270 #define ext4_warning(sb, fmt, ...) \ 2280 #define ext4_warning(sb, fmt, ...) \
2271 do { \ 2281 do { \
2272 no_printk(fmt, ##__VA_ARGS__); \ 2282 no_printk(fmt, ##__VA_ARGS__); \
2273 __ext4_warning(sb, "", 0, " "); \ 2283 __ext4_warning(sb, "", 0, " "); \
2274 } while (0) 2284 } while (0)
2275 #define ext4_msg(sb, level, fmt, ...) \ 2285 #define ext4_msg(sb, level, fmt, ...) \
2276 do { \ 2286 do { \
2277 no_printk(fmt, ##__VA_ARGS__); \ 2287 no_printk(fmt, ##__VA_ARGS__); \
2278 __ext4_msg(sb, "", " "); \ 2288 __ext4_msg(sb, "", " "); \
2279 } while (0) 2289 } while (0)
2280 #define dump_mmp_msg(sb, mmp, msg) \ 2290 #define dump_mmp_msg(sb, mmp, msg) \
2281 __dump_mmp_msg(sb, mmp, "", 0, "") 2291 __dump_mmp_msg(sb, mmp, "", 0, "")
2282 #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ 2292 #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \
2283 do { \ 2293 do { \
2284 no_printk(fmt, ##__VA_ARGS__); \ 2294 no_printk(fmt, ##__VA_ARGS__); \
2285 __ext4_grp_locked_error("", 0, sb, grp, ino, block, " "); \ 2295 __ext4_grp_locked_error("", 0, sb, grp, ino, block, " "); \
2286 } while (0) 2296 } while (0)
2287 2297
2288 #endif 2298 #endif
2289 2299
2290 extern void ext4_update_dynamic_rev(struct super_block *sb); 2300 extern void ext4_update_dynamic_rev(struct super_block *sb);
2291 extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, 2301 extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
2292 __u32 compat); 2302 __u32 compat);
2293 extern int ext4_update_rocompat_feature(handle_t *handle, 2303 extern int ext4_update_rocompat_feature(handle_t *handle,
2294 struct super_block *sb, __u32 rocompat); 2304 struct super_block *sb, __u32 rocompat);
2295 extern int ext4_update_incompat_feature(handle_t *handle, 2305 extern int ext4_update_incompat_feature(handle_t *handle,
2296 struct super_block *sb, __u32 incompat); 2306 struct super_block *sb, __u32 incompat);
2297 extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 2307 extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
2298 struct ext4_group_desc *bg); 2308 struct ext4_group_desc *bg);
2299 extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 2309 extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
2300 struct ext4_group_desc *bg); 2310 struct ext4_group_desc *bg);
2301 extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, 2311 extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
2302 struct ext4_group_desc *bg); 2312 struct ext4_group_desc *bg);
2303 extern __u32 ext4_free_group_clusters(struct super_block *sb, 2313 extern __u32 ext4_free_group_clusters(struct super_block *sb,
2304 struct ext4_group_desc *bg); 2314 struct ext4_group_desc *bg);
2305 extern __u32 ext4_free_inodes_count(struct super_block *sb, 2315 extern __u32 ext4_free_inodes_count(struct super_block *sb,
2306 struct ext4_group_desc *bg); 2316 struct ext4_group_desc *bg);
2307 extern __u32 ext4_used_dirs_count(struct super_block *sb, 2317 extern __u32 ext4_used_dirs_count(struct super_block *sb,
2308 struct ext4_group_desc *bg); 2318 struct ext4_group_desc *bg);
2309 extern __u32 ext4_itable_unused_count(struct super_block *sb, 2319 extern __u32 ext4_itable_unused_count(struct super_block *sb,
2310 struct ext4_group_desc *bg); 2320 struct ext4_group_desc *bg);
2311 extern void ext4_block_bitmap_set(struct super_block *sb, 2321 extern void ext4_block_bitmap_set(struct super_block *sb,
2312 struct ext4_group_desc *bg, ext4_fsblk_t blk); 2322 struct ext4_group_desc *bg, ext4_fsblk_t blk);
2313 extern void ext4_inode_bitmap_set(struct super_block *sb, 2323 extern void ext4_inode_bitmap_set(struct super_block *sb,
2314 struct ext4_group_desc *bg, ext4_fsblk_t blk); 2324 struct ext4_group_desc *bg, ext4_fsblk_t blk);
2315 extern void ext4_inode_table_set(struct super_block *sb, 2325 extern void ext4_inode_table_set(struct super_block *sb,
2316 struct ext4_group_desc *bg, ext4_fsblk_t blk); 2326 struct ext4_group_desc *bg, ext4_fsblk_t blk);
2317 extern void ext4_free_group_clusters_set(struct super_block *sb, 2327 extern void ext4_free_group_clusters_set(struct super_block *sb,
2318 struct ext4_group_desc *bg, 2328 struct ext4_group_desc *bg,
2319 __u32 count); 2329 __u32 count);
2320 extern void ext4_free_inodes_set(struct super_block *sb, 2330 extern void ext4_free_inodes_set(struct super_block *sb,
2321 struct ext4_group_desc *bg, __u32 count); 2331 struct ext4_group_desc *bg, __u32 count);
2322 extern void ext4_used_dirs_set(struct super_block *sb, 2332 extern void ext4_used_dirs_set(struct super_block *sb,
2323 struct ext4_group_desc *bg, __u32 count); 2333 struct ext4_group_desc *bg, __u32 count);
2324 extern void ext4_itable_unused_set(struct super_block *sb, 2334 extern void ext4_itable_unused_set(struct super_block *sb,
2325 struct ext4_group_desc *bg, __u32 count); 2335 struct ext4_group_desc *bg, __u32 count);
2326 extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group, 2336 extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group,
2327 struct ext4_group_desc *gdp); 2337 struct ext4_group_desc *gdp);
2328 extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group, 2338 extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
2329 struct ext4_group_desc *gdp); 2339 struct ext4_group_desc *gdp);
2330 extern int ext4_register_li_request(struct super_block *sb, 2340 extern int ext4_register_li_request(struct super_block *sb,
2331 ext4_group_t first_not_zeroed); 2341 ext4_group_t first_not_zeroed);
2332 2342
2333 static inline int ext4_has_group_desc_csum(struct super_block *sb) 2343 static inline int ext4_has_group_desc_csum(struct super_block *sb)
2334 { 2344 {
2335 return EXT4_HAS_RO_COMPAT_FEATURE(sb, 2345 return EXT4_HAS_RO_COMPAT_FEATURE(sb,
2336 EXT4_FEATURE_RO_COMPAT_GDT_CSUM) || 2346 EXT4_FEATURE_RO_COMPAT_GDT_CSUM) ||
2337 (EXT4_SB(sb)->s_chksum_driver != NULL); 2347 (EXT4_SB(sb)->s_chksum_driver != NULL);
2338 } 2348 }
2339 2349
2340 static inline int ext4_has_metadata_csum(struct super_block *sb) 2350 static inline int ext4_has_metadata_csum(struct super_block *sb)
2341 { 2351 {
2342 WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb, 2352 WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb,
2343 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && 2353 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
2344 !EXT4_SB(sb)->s_chksum_driver); 2354 !EXT4_SB(sb)->s_chksum_driver);
2345 2355
2346 return (EXT4_SB(sb)->s_chksum_driver != NULL); 2356 return (EXT4_SB(sb)->s_chksum_driver != NULL);
2347 } 2357 }
2348 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) 2358 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
2349 { 2359 {
2350 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | 2360 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
2351 le32_to_cpu(es->s_blocks_count_lo); 2361 le32_to_cpu(es->s_blocks_count_lo);
2352 } 2362 }
2353 2363
2354 static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es) 2364 static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
2355 { 2365 {
2356 return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) | 2366 return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
2357 le32_to_cpu(es->s_r_blocks_count_lo); 2367 le32_to_cpu(es->s_r_blocks_count_lo);
2358 } 2368 }
2359 2369
2360 static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es) 2370 static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
2361 { 2371 {
2362 return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) | 2372 return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) |
2363 le32_to_cpu(es->s_free_blocks_count_lo); 2373 le32_to_cpu(es->s_free_blocks_count_lo);
2364 } 2374 }
2365 2375
2366 static inline void ext4_blocks_count_set(struct ext4_super_block *es, 2376 static inline void ext4_blocks_count_set(struct ext4_super_block *es,
2367 ext4_fsblk_t blk) 2377 ext4_fsblk_t blk)
2368 { 2378 {
2369 es->s_blocks_count_lo = cpu_to_le32((u32)blk); 2379 es->s_blocks_count_lo = cpu_to_le32((u32)blk);
2370 es->s_blocks_count_hi = cpu_to_le32(blk >> 32); 2380 es->s_blocks_count_hi = cpu_to_le32(blk >> 32);
2371 } 2381 }
2372 2382
2373 static inline void ext4_free_blocks_count_set(struct ext4_super_block *es, 2383 static inline void ext4_free_blocks_count_set(struct ext4_super_block *es,
2374 ext4_fsblk_t blk) 2384 ext4_fsblk_t blk)
2375 { 2385 {
2376 es->s_free_blocks_count_lo = cpu_to_le32((u32)blk); 2386 es->s_free_blocks_count_lo = cpu_to_le32((u32)blk);
2377 es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32); 2387 es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32);
2378 } 2388 }
2379 2389
2380 static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, 2390 static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
2381 ext4_fsblk_t blk) 2391 ext4_fsblk_t blk)
2382 { 2392 {
2383 es->s_r_blocks_count_lo = cpu_to_le32((u32)blk); 2393 es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
2384 es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); 2394 es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
2385 } 2395 }
2386 2396
2387 static inline loff_t ext4_isize(struct ext4_inode *raw_inode) 2397 static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
2388 { 2398 {
2389 if (S_ISREG(le16_to_cpu(raw_inode->i_mode))) 2399 if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
2390 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | 2400 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
2391 le32_to_cpu(raw_inode->i_size_lo); 2401 le32_to_cpu(raw_inode->i_size_lo);
2392 else 2402 else
2393 return (loff_t) le32_to_cpu(raw_inode->i_size_lo); 2403 return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
2394 } 2404 }
2395 2405
2396 static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) 2406 static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
2397 { 2407 {
2398 raw_inode->i_size_lo = cpu_to_le32(i_size); 2408 raw_inode->i_size_lo = cpu_to_le32(i_size);
2399 raw_inode->i_size_high = cpu_to_le32(i_size >> 32); 2409 raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
2400 } 2410 }
2401 2411
2402 static inline 2412 static inline
2403 struct ext4_group_info *ext4_get_group_info(struct super_block *sb, 2413 struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
2404 ext4_group_t group) 2414 ext4_group_t group)
2405 { 2415 {
2406 struct ext4_group_info ***grp_info; 2416 struct ext4_group_info ***grp_info;
2407 long indexv, indexh; 2417 long indexv, indexh;
2408 BUG_ON(group >= EXT4_SB(sb)->s_groups_count); 2418 BUG_ON(group >= EXT4_SB(sb)->s_groups_count);
2409 grp_info = EXT4_SB(sb)->s_group_info; 2419 grp_info = EXT4_SB(sb)->s_group_info;
2410 indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); 2420 indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
2411 indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); 2421 indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
2412 return grp_info[indexv][indexh]; 2422 return grp_info[indexv][indexh];
2413 } 2423 }
2414 2424
2415 /* 2425 /*
2416 * Reading s_groups_count requires using smp_rmb() afterwards. See 2426 * Reading s_groups_count requires using smp_rmb() afterwards. See
2417 * the locking protocol documented in the comments of ext4_group_add() 2427 * the locking protocol documented in the comments of ext4_group_add()
2418 * in resize.c 2428 * in resize.c
2419 */ 2429 */
2420 static inline ext4_group_t ext4_get_groups_count(struct super_block *sb) 2430 static inline ext4_group_t ext4_get_groups_count(struct super_block *sb)
2421 { 2431 {
2422 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 2432 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
2423 2433
2424 smp_rmb(); 2434 smp_rmb();
2425 return ngroups; 2435 return ngroups;
2426 } 2436 }
2427 2437
2428 static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, 2438 static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
2429 ext4_group_t block_group) 2439 ext4_group_t block_group)
2430 { 2440 {
2431 return block_group >> sbi->s_log_groups_per_flex; 2441 return block_group >> sbi->s_log_groups_per_flex;
2432 } 2442 }
2433 2443
2434 static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi) 2444 static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
2435 { 2445 {
2436 return 1 << sbi->s_log_groups_per_flex; 2446 return 1 << sbi->s_log_groups_per_flex;
2437 } 2447 }
2438 2448
2439 #define ext4_std_error(sb, errno) \ 2449 #define ext4_std_error(sb, errno) \
2440 do { \ 2450 do { \
2441 if ((errno)) \ 2451 if ((errno)) \
2442 __ext4_std_error((sb), __func__, __LINE__, (errno)); \ 2452 __ext4_std_error((sb), __func__, __LINE__, (errno)); \
2443 } while (0) 2453 } while (0)
2444 2454
2445 #ifdef CONFIG_SMP 2455 #ifdef CONFIG_SMP
2446 /* Each CPU can accumulate percpu_counter_batch clusters in their local 2456 /* Each CPU can accumulate percpu_counter_batch clusters in their local
2447 * counters. So we need to make sure we have free clusters more 2457 * counters. So we need to make sure we have free clusters more
2448 * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times. 2458 * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times.
2449 */ 2459 */
2450 #define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids)) 2460 #define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
2451 #else 2461 #else
2452 #define EXT4_FREECLUSTERS_WATERMARK 0 2462 #define EXT4_FREECLUSTERS_WATERMARK 0
2453 #endif 2463 #endif
2454 2464
2455 /* Update i_disksize. Requires i_mutex to avoid races with truncate */ 2465 /* Update i_disksize. Requires i_mutex to avoid races with truncate */
2456 static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) 2466 static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
2457 { 2467 {
2458 WARN_ON_ONCE(S_ISREG(inode->i_mode) && 2468 WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
2459 !mutex_is_locked(&inode->i_mutex)); 2469 !mutex_is_locked(&inode->i_mutex));
2460 down_write(&EXT4_I(inode)->i_data_sem); 2470 down_write(&EXT4_I(inode)->i_data_sem);
2461 if (newsize > EXT4_I(inode)->i_disksize) 2471 if (newsize > EXT4_I(inode)->i_disksize)
2462 EXT4_I(inode)->i_disksize = newsize; 2472 EXT4_I(inode)->i_disksize = newsize;
2463 up_write(&EXT4_I(inode)->i_data_sem); 2473 up_write(&EXT4_I(inode)->i_data_sem);
2464 } 2474 }
2465 2475
2466 /* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */ 2476 /* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */
2467 static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) 2477 static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
2468 { 2478 {
2469 int changed = 0; 2479 int changed = 0;
2470 2480
2471 if (newsize > inode->i_size) { 2481 if (newsize > inode->i_size) {
2472 i_size_write(inode, newsize); 2482 i_size_write(inode, newsize);
2473 changed = 1; 2483 changed = 1;
2474 } 2484 }
2475 if (newsize > EXT4_I(inode)->i_disksize) { 2485 if (newsize > EXT4_I(inode)->i_disksize) {
2476 ext4_update_i_disksize(inode, newsize); 2486 ext4_update_i_disksize(inode, newsize);
2477 changed |= 2; 2487 changed |= 2;
2478 } 2488 }
2479 return changed; 2489 return changed;
2480 } 2490 }
2481 2491
2482 struct ext4_group_info { 2492 struct ext4_group_info {
2483 unsigned long bb_state; 2493 unsigned long bb_state;
2484 struct rb_root bb_free_root; 2494 struct rb_root bb_free_root;
2485 ext4_grpblk_t bb_first_free; /* first free block */ 2495 ext4_grpblk_t bb_first_free; /* first free block */
2486 ext4_grpblk_t bb_free; /* total free blocks */ 2496 ext4_grpblk_t bb_free; /* total free blocks */
2487 ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ 2497 ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
2488 ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */ 2498 ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
2489 struct list_head bb_prealloc_list; 2499 struct list_head bb_prealloc_list;
2490 #ifdef DOUBLE_CHECK 2500 #ifdef DOUBLE_CHECK
2491 void *bb_bitmap; 2501 void *bb_bitmap;
2492 #endif 2502 #endif
2493 struct rw_semaphore alloc_sem; 2503 struct rw_semaphore alloc_sem;
2494 ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block 2504 ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
2495 * regions, index is order. 2505 * regions, index is order.
2496 * bb_counters[3] = 5 means 2506 * bb_counters[3] = 5 means
2497 * 5 free 8-block regions. */ 2507 * 5 free 8-block regions. */
2498 }; 2508 };
2499 2509
2500 #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 2510 #define EXT4_GROUP_INFO_NEED_INIT_BIT 0
2501 #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1 2511 #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1
2502 #define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT 2 2512 #define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT 2
2503 #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3 2513 #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3
2504 2514
2505 #define EXT4_MB_GRP_NEED_INIT(grp) \ 2515 #define EXT4_MB_GRP_NEED_INIT(grp) \
2506 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) 2516 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
2507 #define EXT4_MB_GRP_BBITMAP_CORRUPT(grp) \ 2517 #define EXT4_MB_GRP_BBITMAP_CORRUPT(grp) \
2508 (test_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &((grp)->bb_state))) 2518 (test_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &((grp)->bb_state)))
2509 #define EXT4_MB_GRP_IBITMAP_CORRUPT(grp) \ 2519 #define EXT4_MB_GRP_IBITMAP_CORRUPT(grp) \
2510 (test_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &((grp)->bb_state))) 2520 (test_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &((grp)->bb_state)))
2511 2521
2512 #define EXT4_MB_GRP_WAS_TRIMMED(grp) \ 2522 #define EXT4_MB_GRP_WAS_TRIMMED(grp) \
2513 (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) 2523 (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
2514 #define EXT4_MB_GRP_SET_TRIMMED(grp) \ 2524 #define EXT4_MB_GRP_SET_TRIMMED(grp) \
2515 (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) 2525 (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
2516 #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ 2526 #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \
2517 (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) 2527 (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
2518 2528
2519 #define EXT4_MAX_CONTENTION 8 2529 #define EXT4_MAX_CONTENTION 8
2520 #define EXT4_CONTENTION_THRESHOLD 2 2530 #define EXT4_CONTENTION_THRESHOLD 2
2521 2531
2522 static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, 2532 static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
2523 ext4_group_t group) 2533 ext4_group_t group)
2524 { 2534 {
2525 return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); 2535 return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
2526 } 2536 }
2527 2537
2528 /* 2538 /*
2529 * Returns true if the filesystem is busy enough that attempts to 2539 * Returns true if the filesystem is busy enough that attempts to
2530 * access the block group locks has run into contention. 2540 * access the block group locks has run into contention.
2531 */ 2541 */
2532 static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi) 2542 static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi)
2533 { 2543 {
2534 return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD); 2544 return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
2535 } 2545 }
2536 2546
2537 static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) 2547 static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
2538 { 2548 {
2539 spinlock_t *lock = ext4_group_lock_ptr(sb, group); 2549 spinlock_t *lock = ext4_group_lock_ptr(sb, group);
2540 if (spin_trylock(lock)) 2550 if (spin_trylock(lock))
2541 /* 2551 /*
2542 * We're able to grab the lock right away, so drop the 2552 * We're able to grab the lock right away, so drop the
2543 * lock contention counter. 2553 * lock contention counter.
2544 */ 2554 */
2545 atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0); 2555 atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
2546 else { 2556 else {
2547 /* 2557 /*
2548 * The lock is busy, so bump the contention counter, 2558 * The lock is busy, so bump the contention counter,
2549 * and then wait on the spin lock. 2559 * and then wait on the spin lock.
2550 */ 2560 */
2551 atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1, 2561 atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
2552 EXT4_MAX_CONTENTION); 2562 EXT4_MAX_CONTENTION);
2553 spin_lock(lock); 2563 spin_lock(lock);
2554 } 2564 }
2555 } 2565 }
2556 2566
2557 static inline void ext4_unlock_group(struct super_block *sb, 2567 static inline void ext4_unlock_group(struct super_block *sb,
2558 ext4_group_t group) 2568 ext4_group_t group)
2559 { 2569 {
2560 spin_unlock(ext4_group_lock_ptr(sb, group)); 2570 spin_unlock(ext4_group_lock_ptr(sb, group));
2561 } 2571 }
2562 2572
2563 /* 2573 /*
2564 * Block validity checking 2574 * Block validity checking
2565 */ 2575 */
2566 #define ext4_check_indirect_blockref(inode, bh) \ 2576 #define ext4_check_indirect_blockref(inode, bh) \
2567 ext4_check_blockref(__func__, __LINE__, inode, \ 2577 ext4_check_blockref(__func__, __LINE__, inode, \
2568 (__le32 *)(bh)->b_data, \ 2578 (__le32 *)(bh)->b_data, \
2569 EXT4_ADDR_PER_BLOCK((inode)->i_sb)) 2579 EXT4_ADDR_PER_BLOCK((inode)->i_sb))
2570 2580
2571 #define ext4_ind_check_inode(inode) \ 2581 #define ext4_ind_check_inode(inode) \
2572 ext4_check_blockref(__func__, __LINE__, inode, \ 2582 ext4_check_blockref(__func__, __LINE__, inode, \
2573 EXT4_I(inode)->i_data, \ 2583 EXT4_I(inode)->i_data, \
2574 EXT4_NDIR_BLOCKS) 2584 EXT4_NDIR_BLOCKS)
2575 2585
2576 /* 2586 /*
2577 * Inodes and files operations 2587 * Inodes and files operations
2578 */ 2588 */
2579 2589
2580 /* dir.c */ 2590 /* dir.c */
2581 extern const struct file_operations ext4_dir_operations; 2591 extern const struct file_operations ext4_dir_operations;
2582 2592
2583 /* file.c */ 2593 /* file.c */
2584 extern const struct inode_operations ext4_file_inode_operations; 2594 extern const struct inode_operations ext4_file_inode_operations;
2585 extern const struct file_operations ext4_file_operations; 2595 extern const struct file_operations ext4_file_operations;
2586 extern const struct file_operations ext4_dax_file_operations; 2596 extern const struct file_operations ext4_dax_file_operations;
2587 extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2597 extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2588 2598
2589 /* inline.c */ 2599 /* inline.c */
2590 extern int ext4_get_max_inline_size(struct inode *inode); 2600 extern int ext4_get_max_inline_size(struct inode *inode);
2591 extern int ext4_find_inline_data_nolock(struct inode *inode); 2601 extern int ext4_find_inline_data_nolock(struct inode *inode);
2592 extern int ext4_init_inline_data(handle_t *handle, struct inode *inode, 2602 extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
2593 unsigned int len); 2603 unsigned int len);
2594 extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); 2604 extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
2595 2605
2596 extern int ext4_readpage_inline(struct inode *inode, struct page *page); 2606 extern int ext4_readpage_inline(struct inode *inode, struct page *page);
2597 extern int ext4_try_to_write_inline_data(struct address_space *mapping, 2607 extern int ext4_try_to_write_inline_data(struct address_space *mapping,
2598 struct inode *inode, 2608 struct inode *inode,
2599 loff_t pos, unsigned len, 2609 loff_t pos, unsigned len,
2600 unsigned flags, 2610 unsigned flags,
2601 struct page **pagep); 2611 struct page **pagep);
2602 extern int ext4_write_inline_data_end(struct inode *inode, 2612 extern int ext4_write_inline_data_end(struct inode *inode,
2603 loff_t pos, unsigned len, 2613 loff_t pos, unsigned len,
2604 unsigned copied, 2614 unsigned copied,
2605 struct page *page); 2615 struct page *page);
2606 extern struct buffer_head * 2616 extern struct buffer_head *
2607 ext4_journalled_write_inline_data(struct inode *inode, 2617 ext4_journalled_write_inline_data(struct inode *inode,
2608 unsigned len, 2618 unsigned len,
2609 struct page *page); 2619 struct page *page);
2610 extern int ext4_da_write_inline_data_begin(struct address_space *mapping, 2620 extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
2611 struct inode *inode, 2621 struct inode *inode,
2612 loff_t pos, unsigned len, 2622 loff_t pos, unsigned len,
2613 unsigned flags, 2623 unsigned flags,
2614 struct page **pagep, 2624 struct page **pagep,
2615 void **fsdata); 2625 void **fsdata);
2616 extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, 2626 extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
2617 unsigned len, unsigned copied, 2627 unsigned len, unsigned copied,
2618 struct page *page); 2628 struct page *page);
2619 extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry, 2629 extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
2620 struct inode *inode); 2630 struct inode *inode);
2621 extern int ext4_try_create_inline_dir(handle_t *handle, 2631 extern int ext4_try_create_inline_dir(handle_t *handle,
2622 struct inode *parent, 2632 struct inode *parent,
2623 struct inode *inode); 2633 struct inode *inode);
2624 extern int ext4_read_inline_dir(struct file *filp, 2634 extern int ext4_read_inline_dir(struct file *filp,
2625 struct dir_context *ctx, 2635 struct dir_context *ctx,
2626 int *has_inline_data); 2636 int *has_inline_data);
2627 extern int htree_inlinedir_to_tree(struct file *dir_file, 2637 extern int htree_inlinedir_to_tree(struct file *dir_file,
2628 struct inode *dir, ext4_lblk_t block, 2638 struct inode *dir, ext4_lblk_t block,
2629 struct dx_hash_info *hinfo, 2639 struct dx_hash_info *hinfo,
2630 __u32 start_hash, __u32 start_minor_hash, 2640 __u32 start_hash, __u32 start_minor_hash,
2631 int *has_inline_data); 2641 int *has_inline_data);
2632 extern struct buffer_head *ext4_find_inline_entry(struct inode *dir, 2642 extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
2633 const struct qstr *d_name, 2643 const struct qstr *d_name,
2634 struct ext4_dir_entry_2 **res_dir, 2644 struct ext4_dir_entry_2 **res_dir,
2635 int *has_inline_data); 2645 int *has_inline_data);
2636 extern int ext4_delete_inline_entry(handle_t *handle, 2646 extern int ext4_delete_inline_entry(handle_t *handle,
2637 struct inode *dir, 2647 struct inode *dir,
2638 struct ext4_dir_entry_2 *de_del, 2648 struct ext4_dir_entry_2 *de_del,
2639 struct buffer_head *bh, 2649 struct buffer_head *bh,
2640 int *has_inline_data); 2650 int *has_inline_data);
2641 extern int empty_inline_dir(struct inode *dir, int *has_inline_data); 2651 extern int empty_inline_dir(struct inode *dir, int *has_inline_data);
2642 extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, 2652 extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
2643 struct ext4_dir_entry_2 **parent_de, 2653 struct ext4_dir_entry_2 **parent_de,
2644 int *retval); 2654 int *retval);
2645 extern int ext4_inline_data_fiemap(struct inode *inode, 2655 extern int ext4_inline_data_fiemap(struct inode *inode,
2646 struct fiemap_extent_info *fieinfo, 2656 struct fiemap_extent_info *fieinfo,
2647 int *has_inline, __u64 start, __u64 len); 2657 int *has_inline, __u64 start, __u64 len);
2648 extern int ext4_try_to_evict_inline_data(handle_t *handle, 2658 extern int ext4_try_to_evict_inline_data(handle_t *handle,
2649 struct inode *inode, 2659 struct inode *inode,
2650 int needed); 2660 int needed);
2651 extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline); 2661 extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
2652 2662
2653 extern int ext4_convert_inline_data(struct inode *inode); 2663 extern int ext4_convert_inline_data(struct inode *inode);
2654 2664
2655 static inline int ext4_has_inline_data(struct inode *inode) 2665 static inline int ext4_has_inline_data(struct inode *inode)
2656 { 2666 {
2657 return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) && 2667 return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
2658 EXT4_I(inode)->i_inline_off; 2668 EXT4_I(inode)->i_inline_off;
2659 } 2669 }
2660 2670
2661 /* namei.c */ 2671 /* namei.c */
2662 extern const struct inode_operations ext4_dir_inode_operations; 2672 extern const struct inode_operations ext4_dir_inode_operations;
2663 extern const struct inode_operations ext4_special_inode_operations; 2673 extern const struct inode_operations ext4_special_inode_operations;
2664 extern struct dentry *ext4_get_parent(struct dentry *child); 2674 extern struct dentry *ext4_get_parent(struct dentry *child);
2665 extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, 2675 extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
2666 struct ext4_dir_entry_2 *de, 2676 struct ext4_dir_entry_2 *de,
2667 int blocksize, int csum_size, 2677 int blocksize, int csum_size,
2668 unsigned int parent_ino, int dotdot_real_len); 2678 unsigned int parent_ino, int dotdot_real_len);
2669 extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t, 2679 extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
2670 unsigned int blocksize); 2680 unsigned int blocksize);
2671 extern int ext4_handle_dirty_dirent_node(handle_t *handle, 2681 extern int ext4_handle_dirty_dirent_node(handle_t *handle,
2672 struct inode *inode, 2682 struct inode *inode,
2673 struct buffer_head *bh); 2683 struct buffer_head *bh);
2674 #define S_SHIFT 12 2684 #define S_SHIFT 12
2675 static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { 2685 static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
2676 [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE, 2686 [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE,
2677 [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR, 2687 [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR,
2678 [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV, 2688 [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV,
2679 [S_IFBLK >> S_SHIFT] = EXT4_FT_BLKDEV, 2689 [S_IFBLK >> S_SHIFT] = EXT4_FT_BLKDEV,
2680 [S_IFIFO >> S_SHIFT] = EXT4_FT_FIFO, 2690 [S_IFIFO >> S_SHIFT] = EXT4_FT_FIFO,
2681 [S_IFSOCK >> S_SHIFT] = EXT4_FT_SOCK, 2691 [S_IFSOCK >> S_SHIFT] = EXT4_FT_SOCK,
2682 [S_IFLNK >> S_SHIFT] = EXT4_FT_SYMLINK, 2692 [S_IFLNK >> S_SHIFT] = EXT4_FT_SYMLINK,
2683 }; 2693 };
2684 2694
2685 static inline void ext4_set_de_type(struct super_block *sb, 2695 static inline void ext4_set_de_type(struct super_block *sb,
2686 struct ext4_dir_entry_2 *de, 2696 struct ext4_dir_entry_2 *de,
2687 umode_t mode) { 2697 umode_t mode) {
2688 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE)) 2698 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
2689 de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; 2699 de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
2690 } 2700 }
2691 2701
2692 2702
2693 /* symlink.c */ 2703 /* symlink.c */
2694 extern const struct inode_operations ext4_symlink_inode_operations; 2704 extern const struct inode_operations ext4_symlink_inode_operations;
2695 extern const struct inode_operations ext4_fast_symlink_inode_operations; 2705 extern const struct inode_operations ext4_fast_symlink_inode_operations;
2696 2706
2697 /* block_validity */ 2707 /* block_validity */
2698 extern void ext4_release_system_zone(struct super_block *sb); 2708 extern void ext4_release_system_zone(struct super_block *sb);
2699 extern int ext4_setup_system_zone(struct super_block *sb); 2709 extern int ext4_setup_system_zone(struct super_block *sb);
2700 extern int __init ext4_init_system_zone(void); 2710 extern int __init ext4_init_system_zone(void);
2701 extern void ext4_exit_system_zone(void); 2711 extern void ext4_exit_system_zone(void);
2702 extern int ext4_data_block_valid(struct ext4_sb_info *sbi, 2712 extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
2703 ext4_fsblk_t start_blk, 2713 ext4_fsblk_t start_blk,
2704 unsigned int count); 2714 unsigned int count);
2705 extern int ext4_check_blockref(const char *, unsigned int, 2715 extern int ext4_check_blockref(const char *, unsigned int,
2706 struct inode *, __le32 *, unsigned int); 2716 struct inode *, __le32 *, unsigned int);
2707 2717
2708 /* extents.c */ 2718 /* extents.c */
2709 struct ext4_ext_path; 2719 struct ext4_ext_path;
2710 struct ext4_extent; 2720 struct ext4_extent;
2711 2721
2712 /* 2722 /*
2713 * Maximum number of logical blocks in a file; ext4_extent's ee_block is 2723 * Maximum number of logical blocks in a file; ext4_extent's ee_block is
2714 * __le32. 2724 * __le32.
2715 */ 2725 */
2716 #define EXT_MAX_BLOCKS 0xffffffff 2726 #define EXT_MAX_BLOCKS 0xffffffff
2717 2727
2718 extern int ext4_ext_tree_init(handle_t *handle, struct inode *); 2728 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
2719 extern int ext4_ext_writepage_trans_blocks(struct inode *, int); 2729 extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
2720 extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); 2730 extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents);
2721 extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, 2731 extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
2722 struct ext4_map_blocks *map, int flags); 2732 struct ext4_map_blocks *map, int flags);
2723 extern void ext4_ext_truncate(handle_t *, struct inode *); 2733 extern void ext4_ext_truncate(handle_t *, struct inode *);
2724 extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, 2734 extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2725 ext4_lblk_t end); 2735 ext4_lblk_t end);
2726 extern void ext4_ext_init(struct super_block *); 2736 extern void ext4_ext_init(struct super_block *);
2727 extern void ext4_ext_release(struct super_block *); 2737 extern void ext4_ext_release(struct super_block *);
2728 extern long ext4_fallocate(struct file *file, int mode, loff_t offset, 2738 extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
2729 loff_t len); 2739 loff_t len);
2730 extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, 2740 extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
2731 loff_t offset, ssize_t len); 2741 loff_t offset, ssize_t len);
2732 extern int ext4_map_blocks(handle_t *handle, struct inode *inode, 2742 extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
2733 struct ext4_map_blocks *map, int flags); 2743 struct ext4_map_blocks *map, int flags);
2734 extern int ext4_ext_calc_metadata_amount(struct inode *inode, 2744 extern int ext4_ext_calc_metadata_amount(struct inode *inode,
2735 ext4_lblk_t lblocks); 2745 ext4_lblk_t lblocks);
2736 extern int ext4_extent_tree_init(handle_t *, struct inode *); 2746 extern int ext4_extent_tree_init(handle_t *, struct inode *);
2737 extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 2747 extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
2738 int num, 2748 int num,
2739 struct ext4_ext_path *path); 2749 struct ext4_ext_path *path);
2740 extern int ext4_can_extents_be_merged(struct inode *inode, 2750 extern int ext4_can_extents_be_merged(struct inode *inode,
2741 struct ext4_extent *ex1, 2751 struct ext4_extent *ex1,
2742 struct ext4_extent *ex2); 2752 struct ext4_extent *ex2);
2743 extern int ext4_ext_insert_extent(handle_t *, struct inode *, 2753 extern int ext4_ext_insert_extent(handle_t *, struct inode *,
2744 struct ext4_ext_path **, 2754 struct ext4_ext_path **,
2745 struct ext4_extent *, int); 2755 struct ext4_extent *, int);
2746 extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t, 2756 extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
2747 struct ext4_ext_path **, 2757 struct ext4_ext_path **,
2748 int flags); 2758 int flags);
2749 extern void ext4_ext_drop_refs(struct ext4_ext_path *); 2759 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
2750 extern int ext4_ext_check_inode(struct inode *inode); 2760 extern int ext4_ext_check_inode(struct inode *inode);
2751 extern int ext4_find_delalloc_range(struct inode *inode, 2761 extern int ext4_find_delalloc_range(struct inode *inode,
2752 ext4_lblk_t lblk_start, 2762 ext4_lblk_t lblk_start,
2753 ext4_lblk_t lblk_end); 2763 ext4_lblk_t lblk_end);
2754 extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); 2764 extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2755 extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path); 2765 extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
2756 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2766 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2757 __u64 start, __u64 len); 2767 __u64 start, __u64 len);
2758 extern int ext4_ext_precache(struct inode *inode); 2768 extern int ext4_ext_precache(struct inode *inode);
2759 extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); 2769 extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
2760 extern int ext4_swap_extents(handle_t *handle, struct inode *inode1, 2770 extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
2761 struct inode *inode2, ext4_lblk_t lblk1, 2771 struct inode *inode2, ext4_lblk_t lblk1,
2762 ext4_lblk_t lblk2, ext4_lblk_t count, 2772 ext4_lblk_t lblk2, ext4_lblk_t count,
2763 int mark_unwritten,int *err); 2773 int mark_unwritten,int *err);
2764 2774
2765 /* move_extent.c */ 2775 /* move_extent.c */
2766 extern void ext4_double_down_write_data_sem(struct inode *first, 2776 extern void ext4_double_down_write_data_sem(struct inode *first,
2767 struct inode *second); 2777 struct inode *second);
2768 extern void ext4_double_up_write_data_sem(struct inode *orig_inode, 2778 extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
2769 struct inode *donor_inode); 2779 struct inode *donor_inode);
2770 extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, 2780 extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2771 __u64 start_orig, __u64 start_donor, 2781 __u64 start_orig, __u64 start_donor,
2772 __u64 len, __u64 *moved_len); 2782 __u64 len, __u64 *moved_len);
2773 2783
2774 /* page-io.c */ 2784 /* page-io.c */
2775 extern int __init ext4_init_pageio(void); 2785 extern int __init ext4_init_pageio(void);
2776 extern void ext4_exit_pageio(void); 2786 extern void ext4_exit_pageio(void);
2777 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2787 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2778 extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end); 2788 extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
2779 extern int ext4_put_io_end(ext4_io_end_t *io_end); 2789 extern int ext4_put_io_end(ext4_io_end_t *io_end);
2780 extern void ext4_put_io_end_defer(ext4_io_end_t *io_end); 2790 extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
2781 extern void ext4_io_submit_init(struct ext4_io_submit *io, 2791 extern void ext4_io_submit_init(struct ext4_io_submit *io,
2782 struct writeback_control *wbc); 2792 struct writeback_control *wbc);
2783 extern void ext4_end_io_rsv_work(struct work_struct *work); 2793 extern void ext4_end_io_rsv_work(struct work_struct *work);
2784 extern void ext4_io_submit(struct ext4_io_submit *io); 2794 extern void ext4_io_submit(struct ext4_io_submit *io);
2785 extern int ext4_bio_write_page(struct ext4_io_submit *io, 2795 extern int ext4_bio_write_page(struct ext4_io_submit *io,
2786 struct page *page, 2796 struct page *page,
2787 int len, 2797 int len,
2788 struct writeback_control *wbc, 2798 struct writeback_control *wbc,
2789 bool keep_towrite); 2799 bool keep_towrite);
2790 2800
2791 /* mmp.c */ 2801 /* mmp.c */
2792 extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); 2802 extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
2793 2803
2794 /* 2804 /*
2795 * Add new method to test whether block and inode bitmaps are properly 2805 * Add new method to test whether block and inode bitmaps are properly
2796 * initialized. With uninit_bg reading the block from disk is not enough 2806 * initialized. With uninit_bg reading the block from disk is not enough
2797 * to mark the bitmap uptodate. We need to also zero-out the bitmap 2807 * to mark the bitmap uptodate. We need to also zero-out the bitmap
2798 */ 2808 */
2799 #define BH_BITMAP_UPTODATE BH_JBDPrivateStart 2809 #define BH_BITMAP_UPTODATE BH_JBDPrivateStart
2800 2810
2801 static inline int bitmap_uptodate(struct buffer_head *bh) 2811 static inline int bitmap_uptodate(struct buffer_head *bh)
2802 { 2812 {
2803 return (buffer_uptodate(bh) && 2813 return (buffer_uptodate(bh) &&
2804 test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state)); 2814 test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state));
2805 } 2815 }
2806 static inline void set_bitmap_uptodate(struct buffer_head *bh) 2816 static inline void set_bitmap_uptodate(struct buffer_head *bh)
2807 { 2817 {
2808 set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); 2818 set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
2809 } 2819 }
2810 2820
2811 /* 2821 /*
2812 * Disable DIO read nolock optimization, so new dioreaders will be forced 2822 * Disable DIO read nolock optimization, so new dioreaders will be forced
2813 * to grab i_mutex 2823 * to grab i_mutex
2814 */ 2824 */
2815 static inline void ext4_inode_block_unlocked_dio(struct inode *inode) 2825 static inline void ext4_inode_block_unlocked_dio(struct inode *inode)
2816 { 2826 {
2817 ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); 2827 ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
2818 smp_mb(); 2828 smp_mb();
2819 } 2829 }
2820 static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) 2830 static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
2821 { 2831 {
2822 smp_mb(); 2832 smp_mb();
2823 ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); 2833 ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
2824 } 2834 }
2825 2835
2826 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 2836 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
2827 2837
2828 /* For ioend & aio unwritten conversion wait queues */ 2838 /* For ioend & aio unwritten conversion wait queues */
2829 #define EXT4_WQ_HASH_SZ 37 2839 #define EXT4_WQ_HASH_SZ 37
2830 #define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\ 2840 #define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
2831 EXT4_WQ_HASH_SZ]) 2841 EXT4_WQ_HASH_SZ])
2832 #define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\ 2842 #define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
2833 EXT4_WQ_HASH_SZ]) 2843 EXT4_WQ_HASH_SZ])
2834 extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; 2844 extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
2835 extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; 2845 extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
2836 2846
2837 #define EXT4_RESIZING 0 2847 #define EXT4_RESIZING 0
2838 extern int ext4_resize_begin(struct super_block *sb); 2848 extern int ext4_resize_begin(struct super_block *sb);
2839 extern void ext4_resize_end(struct super_block *sb); 2849 extern void ext4_resize_end(struct super_block *sb);
2840 2850
2841 #endif /* __KERNEL__ */ 2851 #endif /* __KERNEL__ */
2842 2852
2843 #endif /* _EXT4_H */ 2853 #endif /* _EXT4_H */
2844 2854
1 /* 1 /*
2 * linux/fs/ext4/indirect.c 2 * linux/fs/ext4/indirect.c
3 * 3 *
4 * from 4 * from
5 * 5 *
6 * linux/fs/ext4/inode.c 6 * linux/fs/ext4/inode.c
7 * 7 *
8 * Copyright (C) 1992, 1993, 1994, 1995 8 * Copyright (C) 1992, 1993, 1994, 1995
9 * Remy Card (card@masi.ibp.fr) 9 * Remy Card (card@masi.ibp.fr)
10 * Laboratoire MASI - Institut Blaise Pascal 10 * Laboratoire MASI - Institut Blaise Pascal
11 * Universite Pierre et Marie Curie (Paris VI) 11 * Universite Pierre et Marie Curie (Paris VI)
12 * 12 *
13 * from 13 * from
14 * 14 *
15 * linux/fs/minix/inode.c 15 * linux/fs/minix/inode.c
16 * 16 *
17 * Copyright (C) 1991, 1992 Linus Torvalds 17 * Copyright (C) 1991, 1992 Linus Torvalds
18 * 18 *
19 * Goal-directed block allocation by Stephen Tweedie 19 * Goal-directed block allocation by Stephen Tweedie
20 * (sct@redhat.com), 1993, 1998 20 * (sct@redhat.com), 1993, 1998
21 */ 21 */
22 22
23 #include <linux/aio.h> 23 #include <linux/aio.h>
24 #include "ext4_jbd2.h" 24 #include "ext4_jbd2.h"
25 #include "truncate.h" 25 #include "truncate.h"
26 26
27 #include <trace/events/ext4.h> 27 #include <trace/events/ext4.h>
28 28
29 typedef struct { 29 typedef struct {
30 __le32 *p; 30 __le32 *p;
31 __le32 key; 31 __le32 key;
32 struct buffer_head *bh; 32 struct buffer_head *bh;
33 } Indirect; 33 } Indirect;
34 34
35 static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) 35 static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
36 { 36 {
37 p->key = *(p->p = v); 37 p->key = *(p->p = v);
38 p->bh = bh; 38 p->bh = bh;
39 } 39 }
40 40
41 /** 41 /**
42 * ext4_block_to_path - parse the block number into array of offsets 42 * ext4_block_to_path - parse the block number into array of offsets
43 * @inode: inode in question (we are only interested in its superblock) 43 * @inode: inode in question (we are only interested in its superblock)
44 * @i_block: block number to be parsed 44 * @i_block: block number to be parsed
45 * @offsets: array to store the offsets in 45 * @offsets: array to store the offsets in
46 * @boundary: set this non-zero if the referred-to block is likely to be 46 * @boundary: set this non-zero if the referred-to block is likely to be
47 * followed (on disk) by an indirect block. 47 * followed (on disk) by an indirect block.
48 * 48 *
49 * To store the locations of file's data ext4 uses a data structure common 49 * To store the locations of file's data ext4 uses a data structure common
50 * for UNIX filesystems - tree of pointers anchored in the inode, with 50 * for UNIX filesystems - tree of pointers anchored in the inode, with
51 * data blocks at leaves and indirect blocks in intermediate nodes. 51 * data blocks at leaves and indirect blocks in intermediate nodes.
52 * This function translates the block number into path in that tree - 52 * This function translates the block number into path in that tree -
53 * return value is the path length and @offsets[n] is the offset of 53 * return value is the path length and @offsets[n] is the offset of
54 * pointer to (n+1)th node in the nth one. If @block is out of range 54 * pointer to (n+1)th node in the nth one. If @block is out of range
55 * (negative or too large) warning is printed and zero returned. 55 * (negative or too large) warning is printed and zero returned.
56 * 56 *
57 * Note: function doesn't find node addresses, so no IO is needed. All 57 * Note: function doesn't find node addresses, so no IO is needed. All
58 * we need to know is the capacity of indirect blocks (taken from the 58 * we need to know is the capacity of indirect blocks (taken from the
59 * inode->i_sb). 59 * inode->i_sb).
60 */ 60 */
61 61
62 /* 62 /*
63 * Portability note: the last comparison (check that we fit into triple 63 * Portability note: the last comparison (check that we fit into triple
64 * indirect block) is spelled differently, because otherwise on an 64 * indirect block) is spelled differently, because otherwise on an
65 * architecture with 32-bit longs and 8Kb pages we might get into trouble 65 * architecture with 32-bit longs and 8Kb pages we might get into trouble
66 * if our filesystem had 8Kb blocks. We might use long long, but that would 66 * if our filesystem had 8Kb blocks. We might use long long, but that would
67 * kill us on x86. Oh, well, at least the sign propagation does not matter - 67 * kill us on x86. Oh, well, at least the sign propagation does not matter -
68 * i_block would have to be negative in the very beginning, so we would not 68 * i_block would have to be negative in the very beginning, so we would not
69 * get there at all. 69 * get there at all.
70 */ 70 */
71 71
72 static int ext4_block_to_path(struct inode *inode, 72 static int ext4_block_to_path(struct inode *inode,
73 ext4_lblk_t i_block, 73 ext4_lblk_t i_block,
74 ext4_lblk_t offsets[4], int *boundary) 74 ext4_lblk_t offsets[4], int *boundary)
75 { 75 {
76 int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); 76 int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
77 int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); 77 int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
78 const long direct_blocks = EXT4_NDIR_BLOCKS, 78 const long direct_blocks = EXT4_NDIR_BLOCKS,
79 indirect_blocks = ptrs, 79 indirect_blocks = ptrs,
80 double_blocks = (1 << (ptrs_bits * 2)); 80 double_blocks = (1 << (ptrs_bits * 2));
81 int n = 0; 81 int n = 0;
82 int final = 0; 82 int final = 0;
83 83
84 if (i_block < direct_blocks) { 84 if (i_block < direct_blocks) {
85 offsets[n++] = i_block; 85 offsets[n++] = i_block;
86 final = direct_blocks; 86 final = direct_blocks;
87 } else if ((i_block -= direct_blocks) < indirect_blocks) { 87 } else if ((i_block -= direct_blocks) < indirect_blocks) {
88 offsets[n++] = EXT4_IND_BLOCK; 88 offsets[n++] = EXT4_IND_BLOCK;
89 offsets[n++] = i_block; 89 offsets[n++] = i_block;
90 final = ptrs; 90 final = ptrs;
91 } else if ((i_block -= indirect_blocks) < double_blocks) { 91 } else if ((i_block -= indirect_blocks) < double_blocks) {
92 offsets[n++] = EXT4_DIND_BLOCK; 92 offsets[n++] = EXT4_DIND_BLOCK;
93 offsets[n++] = i_block >> ptrs_bits; 93 offsets[n++] = i_block >> ptrs_bits;
94 offsets[n++] = i_block & (ptrs - 1); 94 offsets[n++] = i_block & (ptrs - 1);
95 final = ptrs; 95 final = ptrs;
96 } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { 96 } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
97 offsets[n++] = EXT4_TIND_BLOCK; 97 offsets[n++] = EXT4_TIND_BLOCK;
98 offsets[n++] = i_block >> (ptrs_bits * 2); 98 offsets[n++] = i_block >> (ptrs_bits * 2);
99 offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); 99 offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
100 offsets[n++] = i_block & (ptrs - 1); 100 offsets[n++] = i_block & (ptrs - 1);
101 final = ptrs; 101 final = ptrs;
102 } else { 102 } else {
103 ext4_warning(inode->i_sb, "block %lu > max in inode %lu", 103 ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
104 i_block + direct_blocks + 104 i_block + direct_blocks +
105 indirect_blocks + double_blocks, inode->i_ino); 105 indirect_blocks + double_blocks, inode->i_ino);
106 } 106 }
107 if (boundary) 107 if (boundary)
108 *boundary = final - 1 - (i_block & (ptrs - 1)); 108 *boundary = final - 1 - (i_block & (ptrs - 1));
109 return n; 109 return n;
110 } 110 }
111 111
112 /** 112 /**
113 * ext4_get_branch - read the chain of indirect blocks leading to data 113 * ext4_get_branch - read the chain of indirect blocks leading to data
114 * @inode: inode in question 114 * @inode: inode in question
115 * @depth: depth of the chain (1 - direct pointer, etc.) 115 * @depth: depth of the chain (1 - direct pointer, etc.)
116 * @offsets: offsets of pointers in inode/indirect blocks 116 * @offsets: offsets of pointers in inode/indirect blocks
117 * @chain: place to store the result 117 * @chain: place to store the result
118 * @err: here we store the error value 118 * @err: here we store the error value
119 * 119 *
120 * Function fills the array of triples <key, p, bh> and returns %NULL 120 * Function fills the array of triples <key, p, bh> and returns %NULL
121 * if everything went OK or the pointer to the last filled triple 121 * if everything went OK or the pointer to the last filled triple
122 * (incomplete one) otherwise. Upon the return chain[i].key contains 122 * (incomplete one) otherwise. Upon the return chain[i].key contains
123 * the number of (i+1)-th block in the chain (as it is stored in memory, 123 * the number of (i+1)-th block in the chain (as it is stored in memory,
124 * i.e. little-endian 32-bit), chain[i].p contains the address of that 124 * i.e. little-endian 32-bit), chain[i].p contains the address of that
125 * number (it points into struct inode for i==0 and into the bh->b_data 125 * number (it points into struct inode for i==0 and into the bh->b_data
126 * for i>0) and chain[i].bh points to the buffer_head of i-th indirect 126 * for i>0) and chain[i].bh points to the buffer_head of i-th indirect
127 * block for i>0 and NULL for i==0. In other words, it holds the block 127 * block for i>0 and NULL for i==0. In other words, it holds the block
128 * numbers of the chain, addresses they were taken from (and where we can 128 * numbers of the chain, addresses they were taken from (and where we can
129 * verify that chain did not change) and buffer_heads hosting these 129 * verify that chain did not change) and buffer_heads hosting these
130 * numbers. 130 * numbers.
131 * 131 *
132 * Function stops when it stumbles upon zero pointer (absent block) 132 * Function stops when it stumbles upon zero pointer (absent block)
133 * (pointer to last triple returned, *@err == 0) 133 * (pointer to last triple returned, *@err == 0)
134 * or when it gets an IO error reading an indirect block 134 * or when it gets an IO error reading an indirect block
135 * (ditto, *@err == -EIO) 135 * (ditto, *@err == -EIO)
136 * or when it reads all @depth-1 indirect blocks successfully and finds 136 * or when it reads all @depth-1 indirect blocks successfully and finds
137 * the whole chain, all way to the data (returns %NULL, *err == 0). 137 * the whole chain, all way to the data (returns %NULL, *err == 0).
138 * 138 *
139 * Need to be called with 139 * Need to be called with
140 * down_read(&EXT4_I(inode)->i_data_sem) 140 * down_read(&EXT4_I(inode)->i_data_sem)
141 */ 141 */
142 static Indirect *ext4_get_branch(struct inode *inode, int depth, 142 static Indirect *ext4_get_branch(struct inode *inode, int depth,
143 ext4_lblk_t *offsets, 143 ext4_lblk_t *offsets,
144 Indirect chain[4], int *err) 144 Indirect chain[4], int *err)
145 { 145 {
146 struct super_block *sb = inode->i_sb; 146 struct super_block *sb = inode->i_sb;
147 Indirect *p = chain; 147 Indirect *p = chain;
148 struct buffer_head *bh; 148 struct buffer_head *bh;
149 int ret = -EIO; 149 int ret = -EIO;
150 150
151 *err = 0; 151 *err = 0;
152 /* i_data is not going away, no lock needed */ 152 /* i_data is not going away, no lock needed */
153 add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); 153 add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
154 if (!p->key) 154 if (!p->key)
155 goto no_block; 155 goto no_block;
156 while (--depth) { 156 while (--depth) {
157 bh = sb_getblk(sb, le32_to_cpu(p->key)); 157 bh = sb_getblk(sb, le32_to_cpu(p->key));
158 if (unlikely(!bh)) { 158 if (unlikely(!bh)) {
159 ret = -ENOMEM; 159 ret = -ENOMEM;
160 goto failure; 160 goto failure;
161 } 161 }
162 162
163 if (!bh_uptodate_or_lock(bh)) { 163 if (!bh_uptodate_or_lock(bh)) {
164 if (bh_submit_read(bh) < 0) { 164 if (bh_submit_read(bh) < 0) {
165 put_bh(bh); 165 put_bh(bh);
166 goto failure; 166 goto failure;
167 } 167 }
168 /* validate block references */ 168 /* validate block references */
169 if (ext4_check_indirect_blockref(inode, bh)) { 169 if (ext4_check_indirect_blockref(inode, bh)) {
170 put_bh(bh); 170 put_bh(bh);
171 goto failure; 171 goto failure;
172 } 172 }
173 } 173 }
174 174
175 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); 175 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
176 /* Reader: end */ 176 /* Reader: end */
177 if (!p->key) 177 if (!p->key)
178 goto no_block; 178 goto no_block;
179 } 179 }
180 return NULL; 180 return NULL;
181 181
182 failure: 182 failure:
183 *err = ret; 183 *err = ret;
184 no_block: 184 no_block:
185 return p; 185 return p;
186 } 186 }
187 187
188 /** 188 /**
189 * ext4_find_near - find a place for allocation with sufficient locality 189 * ext4_find_near - find a place for allocation with sufficient locality
190 * @inode: owner 190 * @inode: owner
191 * @ind: descriptor of indirect block. 191 * @ind: descriptor of indirect block.
192 * 192 *
193 * This function returns the preferred place for block allocation. 193 * This function returns the preferred place for block allocation.
194 * It is used when heuristic for sequential allocation fails. 194 * It is used when heuristic for sequential allocation fails.
195 * Rules are: 195 * Rules are:
196 * + if there is a block to the left of our position - allocate near it. 196 * + if there is a block to the left of our position - allocate near it.
197 * + if pointer will live in indirect block - allocate near that block. 197 * + if pointer will live in indirect block - allocate near that block.
198 * + if pointer will live in inode - allocate in the same 198 * + if pointer will live in inode - allocate in the same
199 * cylinder group. 199 * cylinder group.
200 * 200 *
201 * In the latter case we colour the starting block by the callers PID to 201 * In the latter case we colour the starting block by the callers PID to
202 * prevent it from clashing with concurrent allocations for a different inode 202 * prevent it from clashing with concurrent allocations for a different inode
203 * in the same block group. The PID is used here so that functionally related 203 * in the same block group. The PID is used here so that functionally related
204 * files will be close-by on-disk. 204 * files will be close-by on-disk.
205 * 205 *
206 * Caller must make sure that @ind is valid and will stay that way. 206 * Caller must make sure that @ind is valid and will stay that way.
207 */ 207 */
208 static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) 208 static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
209 { 209 {
210 struct ext4_inode_info *ei = EXT4_I(inode); 210 struct ext4_inode_info *ei = EXT4_I(inode);
211 __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; 211 __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
212 __le32 *p; 212 __le32 *p;
213 213
214 /* Try to find previous block */ 214 /* Try to find previous block */
215 for (p = ind->p - 1; p >= start; p--) { 215 for (p = ind->p - 1; p >= start; p--) {
216 if (*p) 216 if (*p)
217 return le32_to_cpu(*p); 217 return le32_to_cpu(*p);
218 } 218 }
219 219
220 /* No such thing, so let's try location of indirect block */ 220 /* No such thing, so let's try location of indirect block */
221 if (ind->bh) 221 if (ind->bh)
222 return ind->bh->b_blocknr; 222 return ind->bh->b_blocknr;
223 223
224 /* 224 /*
225 * It is going to be referred to from the inode itself? OK, just put it 225 * It is going to be referred to from the inode itself? OK, just put it
226 * into the same cylinder group then. 226 * into the same cylinder group then.
227 */ 227 */
228 return ext4_inode_to_goal_block(inode); 228 return ext4_inode_to_goal_block(inode);
229 } 229 }
230 230
231 /** 231 /**
232 * ext4_find_goal - find a preferred place for allocation. 232 * ext4_find_goal - find a preferred place for allocation.
233 * @inode: owner 233 * @inode: owner
234 * @block: block we want 234 * @block: block we want
235 * @partial: pointer to the last triple within a chain 235 * @partial: pointer to the last triple within a chain
236 * 236 *
237 * Normally this function find the preferred place for block allocation, 237 * Normally this function find the preferred place for block allocation,
238 * returns it. 238 * returns it.
239 * Because this is only used for non-extent files, we limit the block nr 239 * Because this is only used for non-extent files, we limit the block nr
240 * to 32 bits. 240 * to 32 bits.
241 */ 241 */
242 static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, 242 static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
243 Indirect *partial) 243 Indirect *partial)
244 { 244 {
245 ext4_fsblk_t goal; 245 ext4_fsblk_t goal;
246 246
247 /* 247 /*
248 * XXX need to get goal block from mballoc's data structures 248 * XXX need to get goal block from mballoc's data structures
249 */ 249 */
250 250
251 goal = ext4_find_near(inode, partial); 251 goal = ext4_find_near(inode, partial);
252 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; 252 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
253 return goal; 253 return goal;
254 } 254 }
255 255
256 /** 256 /**
257 * ext4_blks_to_allocate - Look up the block map and count the number 257 * ext4_blks_to_allocate - Look up the block map and count the number
258 * of direct blocks need to be allocated for the given branch. 258 * of direct blocks need to be allocated for the given branch.
259 * 259 *
260 * @branch: chain of indirect blocks 260 * @branch: chain of indirect blocks
261 * @k: number of blocks need for indirect blocks 261 * @k: number of blocks need for indirect blocks
262 * @blks: number of data blocks to be mapped. 262 * @blks: number of data blocks to be mapped.
263 * @blocks_to_boundary: the offset in the indirect block 263 * @blocks_to_boundary: the offset in the indirect block
264 * 264 *
265 * return the total number of blocks to be allocate, including the 265 * return the total number of blocks to be allocate, including the
266 * direct and indirect blocks. 266 * direct and indirect blocks.
267 */ 267 */
268 static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, 268 static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
269 int blocks_to_boundary) 269 int blocks_to_boundary)
270 { 270 {
271 unsigned int count = 0; 271 unsigned int count = 0;
272 272
273 /* 273 /*
274 * Simple case, [t,d]Indirect block(s) has not allocated yet 274 * Simple case, [t,d]Indirect block(s) has not allocated yet
275 * then it's clear blocks on that path have not allocated 275 * then it's clear blocks on that path have not allocated
276 */ 276 */
277 if (k > 0) { 277 if (k > 0) {
278 /* right now we don't handle cross boundary allocation */ 278 /* right now we don't handle cross boundary allocation */
279 if (blks < blocks_to_boundary + 1) 279 if (blks < blocks_to_boundary + 1)
280 count += blks; 280 count += blks;
281 else 281 else
282 count += blocks_to_boundary + 1; 282 count += blocks_to_boundary + 1;
283 return count; 283 return count;
284 } 284 }
285 285
286 count++; 286 count++;
287 while (count < blks && count <= blocks_to_boundary && 287 while (count < blks && count <= blocks_to_boundary &&
288 le32_to_cpu(*(branch[0].p + count)) == 0) { 288 le32_to_cpu(*(branch[0].p + count)) == 0) {
289 count++; 289 count++;
290 } 290 }
291 return count; 291 return count;
292 } 292 }
293 293
294 /** 294 /**
295 * ext4_alloc_branch - allocate and set up a chain of blocks. 295 * ext4_alloc_branch - allocate and set up a chain of blocks.
296 * @handle: handle for this transaction 296 * @handle: handle for this transaction
297 * @inode: owner 297 * @inode: owner
298 * @indirect_blks: number of allocated indirect blocks 298 * @indirect_blks: number of allocated indirect blocks
299 * @blks: number of allocated direct blocks 299 * @blks: number of allocated direct blocks
300 * @goal: preferred place for allocation 300 * @goal: preferred place for allocation
301 * @offsets: offsets (in the blocks) to store the pointers to next. 301 * @offsets: offsets (in the blocks) to store the pointers to next.
302 * @branch: place to store the chain in. 302 * @branch: place to store the chain in.
303 * 303 *
304 * This function allocates blocks, zeroes out all but the last one, 304 * This function allocates blocks, zeroes out all but the last one,
305 * links them into chain and (if we are synchronous) writes them to disk. 305 * links them into chain and (if we are synchronous) writes them to disk.
306 * In other words, it prepares a branch that can be spliced onto the 306 * In other words, it prepares a branch that can be spliced onto the
307 * inode. It stores the information about that chain in the branch[], in 307 * inode. It stores the information about that chain in the branch[], in
308 * the same format as ext4_get_branch() would do. We are calling it after 308 * the same format as ext4_get_branch() would do. We are calling it after
309 * we had read the existing part of chain and partial points to the last 309 * we had read the existing part of chain and partial points to the last
310 * triple of that (one with zero ->key). Upon the exit we have the same 310 * triple of that (one with zero ->key). Upon the exit we have the same
311 * picture as after the successful ext4_get_block(), except that in one 311 * picture as after the successful ext4_get_block(), except that in one
312 * place chain is disconnected - *branch->p is still zero (we did not 312 * place chain is disconnected - *branch->p is still zero (we did not
313 * set the last link), but branch->key contains the number that should 313 * set the last link), but branch->key contains the number that should
314 * be placed into *branch->p to fill that gap. 314 * be placed into *branch->p to fill that gap.
315 * 315 *
316 * If allocation fails we free all blocks we've allocated (and forget 316 * If allocation fails we free all blocks we've allocated (and forget
317 * their buffer_heads) and return the error value the from failed 317 * their buffer_heads) and return the error value the from failed
318 * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain 318 * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
319 * as described above and return 0. 319 * as described above and return 0.
320 */ 320 */
321 static int ext4_alloc_branch(handle_t *handle, 321 static int ext4_alloc_branch(handle_t *handle,
322 struct ext4_allocation_request *ar, 322 struct ext4_allocation_request *ar,
323 int indirect_blks, ext4_lblk_t *offsets, 323 int indirect_blks, ext4_lblk_t *offsets,
324 Indirect *branch) 324 Indirect *branch)
325 { 325 {
326 struct buffer_head * bh; 326 struct buffer_head * bh;
327 ext4_fsblk_t b, new_blocks[4]; 327 ext4_fsblk_t b, new_blocks[4];
328 __le32 *p; 328 __le32 *p;
329 int i, j, err, len = 1; 329 int i, j, err, len = 1;
330 330
331 for (i = 0; i <= indirect_blks; i++) { 331 for (i = 0; i <= indirect_blks; i++) {
332 if (i == indirect_blks) { 332 if (i == indirect_blks) {
333 new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err); 333 new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
334 } else 334 } else
335 ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle, 335 ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
336 ar->inode, ar->goal, 336 ar->inode, ar->goal,
337 ar->flags & EXT4_MB_DELALLOC_RESERVED, 337 ar->flags & EXT4_MB_DELALLOC_RESERVED,
338 NULL, &err); 338 NULL, &err);
339 if (err) { 339 if (err) {
340 i--; 340 i--;
341 goto failed; 341 goto failed;
342 } 342 }
343 branch[i].key = cpu_to_le32(new_blocks[i]); 343 branch[i].key = cpu_to_le32(new_blocks[i]);
344 if (i == 0) 344 if (i == 0)
345 continue; 345 continue;
346 346
347 bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]); 347 bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]);
348 if (unlikely(!bh)) { 348 if (unlikely(!bh)) {
349 err = -ENOMEM; 349 err = -ENOMEM;
350 goto failed; 350 goto failed;
351 } 351 }
352 lock_buffer(bh); 352 lock_buffer(bh);
353 BUFFER_TRACE(bh, "call get_create_access"); 353 BUFFER_TRACE(bh, "call get_create_access");
354 err = ext4_journal_get_create_access(handle, bh); 354 err = ext4_journal_get_create_access(handle, bh);
355 if (err) { 355 if (err) {
356 unlock_buffer(bh); 356 unlock_buffer(bh);
357 goto failed; 357 goto failed;
358 } 358 }
359 359
360 memset(bh->b_data, 0, bh->b_size); 360 memset(bh->b_data, 0, bh->b_size);
361 p = branch[i].p = (__le32 *) bh->b_data + offsets[i]; 361 p = branch[i].p = (__le32 *) bh->b_data + offsets[i];
362 b = new_blocks[i]; 362 b = new_blocks[i];
363 363
364 if (i == indirect_blks) 364 if (i == indirect_blks)
365 len = ar->len; 365 len = ar->len;
366 for (j = 0; j < len; j++) 366 for (j = 0; j < len; j++)
367 *p++ = cpu_to_le32(b++); 367 *p++ = cpu_to_le32(b++);
368 368
369 BUFFER_TRACE(bh, "marking uptodate"); 369 BUFFER_TRACE(bh, "marking uptodate");
370 set_buffer_uptodate(bh); 370 set_buffer_uptodate(bh);
371 unlock_buffer(bh); 371 unlock_buffer(bh);
372 372
373 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 373 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
374 err = ext4_handle_dirty_metadata(handle, ar->inode, bh); 374 err = ext4_handle_dirty_metadata(handle, ar->inode, bh);
375 if (err) 375 if (err)
376 goto failed; 376 goto failed;
377 } 377 }
378 return 0; 378 return 0;
379 failed: 379 failed:
380 for (; i >= 0; i--) { 380 for (; i >= 0; i--) {
381 /* 381 /*
382 * We want to ext4_forget() only freshly allocated indirect 382 * We want to ext4_forget() only freshly allocated indirect
383 * blocks. Buffer for new_blocks[i-1] is at branch[i].bh and 383 * blocks. Buffer for new_blocks[i-1] is at branch[i].bh and
384 * buffer at branch[0].bh is indirect block / inode already 384 * buffer at branch[0].bh is indirect block / inode already
385 * existing before ext4_alloc_branch() was called. 385 * existing before ext4_alloc_branch() was called.
386 */ 386 */
387 if (i > 0 && i != indirect_blks && branch[i].bh) 387 if (i > 0 && i != indirect_blks && branch[i].bh)
388 ext4_forget(handle, 1, ar->inode, branch[i].bh, 388 ext4_forget(handle, 1, ar->inode, branch[i].bh,
389 branch[i].bh->b_blocknr); 389 branch[i].bh->b_blocknr);
390 ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i], 390 ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
391 (i == indirect_blks) ? ar->len : 1, 0); 391 (i == indirect_blks) ? ar->len : 1, 0);
392 } 392 }
393 return err; 393 return err;
394 } 394 }
395 395
396 /** 396 /**
397 * ext4_splice_branch - splice the allocated branch onto inode. 397 * ext4_splice_branch - splice the allocated branch onto inode.
398 * @handle: handle for this transaction 398 * @handle: handle for this transaction
399 * @inode: owner 399 * @inode: owner
400 * @block: (logical) number of block we are adding 400 * @block: (logical) number of block we are adding
401 * @chain: chain of indirect blocks (with a missing link - see 401 * @chain: chain of indirect blocks (with a missing link - see
402 * ext4_alloc_branch) 402 * ext4_alloc_branch)
403 * @where: location of missing link 403 * @where: location of missing link
404 * @num: number of indirect blocks we are adding 404 * @num: number of indirect blocks we are adding
405 * @blks: number of direct blocks we are adding 405 * @blks: number of direct blocks we are adding
406 * 406 *
407 * This function fills the missing link and does all housekeeping needed in 407 * This function fills the missing link and does all housekeeping needed in
408 * inode (->i_blocks, etc.). In case of success we end up with the full 408 * inode (->i_blocks, etc.). In case of success we end up with the full
409 * chain to new block and return 0. 409 * chain to new block and return 0.
410 */ 410 */
411 static int ext4_splice_branch(handle_t *handle, 411 static int ext4_splice_branch(handle_t *handle,
412 struct ext4_allocation_request *ar, 412 struct ext4_allocation_request *ar,
413 Indirect *where, int num) 413 Indirect *where, int num)
414 { 414 {
415 int i; 415 int i;
416 int err = 0; 416 int err = 0;
417 ext4_fsblk_t current_block; 417 ext4_fsblk_t current_block;
418 418
419 /* 419 /*
420 * If we're splicing into a [td]indirect block (as opposed to the 420 * If we're splicing into a [td]indirect block (as opposed to the
421 * inode) then we need to get write access to the [td]indirect block 421 * inode) then we need to get write access to the [td]indirect block
422 * before the splice. 422 * before the splice.
423 */ 423 */
424 if (where->bh) { 424 if (where->bh) {
425 BUFFER_TRACE(where->bh, "get_write_access"); 425 BUFFER_TRACE(where->bh, "get_write_access");
426 err = ext4_journal_get_write_access(handle, where->bh); 426 err = ext4_journal_get_write_access(handle, where->bh);
427 if (err) 427 if (err)
428 goto err_out; 428 goto err_out;
429 } 429 }
430 /* That's it */ 430 /* That's it */
431 431
432 *where->p = where->key; 432 *where->p = where->key;
433 433
434 /* 434 /*
435 * Update the host buffer_head or inode to point to more just allocated 435 * Update the host buffer_head or inode to point to more just allocated
436 * direct blocks blocks 436 * direct blocks blocks
437 */ 437 */
438 if (num == 0 && ar->len > 1) { 438 if (num == 0 && ar->len > 1) {
439 current_block = le32_to_cpu(where->key) + 1; 439 current_block = le32_to_cpu(where->key) + 1;
440 for (i = 1; i < ar->len; i++) 440 for (i = 1; i < ar->len; i++)
441 *(where->p + i) = cpu_to_le32(current_block++); 441 *(where->p + i) = cpu_to_le32(current_block++);
442 } 442 }
443 443
444 /* We are done with atomic stuff, now do the rest of housekeeping */ 444 /* We are done with atomic stuff, now do the rest of housekeeping */
445 /* had we spliced it onto indirect block? */ 445 /* had we spliced it onto indirect block? */
446 if (where->bh) { 446 if (where->bh) {
447 /* 447 /*
448 * If we spliced it onto an indirect block, we haven't 448 * If we spliced it onto an indirect block, we haven't
449 * altered the inode. Note however that if it is being spliced 449 * altered the inode. Note however that if it is being spliced
450 * onto an indirect block at the very end of the file (the 450 * onto an indirect block at the very end of the file (the
451 * file is growing) then we *will* alter the inode to reflect 451 * file is growing) then we *will* alter the inode to reflect
452 * the new i_size. But that is not done here - it is done in 452 * the new i_size. But that is not done here - it is done in
453 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. 453 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
454 */ 454 */
455 jbd_debug(5, "splicing indirect only\n"); 455 jbd_debug(5, "splicing indirect only\n");
456 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); 456 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
457 err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh); 457 err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
458 if (err) 458 if (err)
459 goto err_out; 459 goto err_out;
460 } else { 460 } else {
461 /* 461 /*
462 * OK, we spliced it into the inode itself on a direct block. 462 * OK, we spliced it into the inode itself on a direct block.
463 */ 463 */
464 ext4_mark_inode_dirty(handle, ar->inode); 464 ext4_mark_inode_dirty(handle, ar->inode);
465 jbd_debug(5, "splicing direct\n"); 465 jbd_debug(5, "splicing direct\n");
466 } 466 }
467 return err; 467 return err;
468 468
469 err_out: 469 err_out:
470 for (i = 1; i <= num; i++) { 470 for (i = 1; i <= num; i++) {
471 /* 471 /*
472 * branch[i].bh is newly allocated, so there is no 472 * branch[i].bh is newly allocated, so there is no
473 * need to revoke the block, which is why we don't 473 * need to revoke the block, which is why we don't
474 * need to set EXT4_FREE_BLOCKS_METADATA. 474 * need to set EXT4_FREE_BLOCKS_METADATA.
475 */ 475 */
476 ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1, 476 ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1,
477 EXT4_FREE_BLOCKS_FORGET); 477 EXT4_FREE_BLOCKS_FORGET);
478 } 478 }
479 ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key), 479 ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key),
480 ar->len, 0); 480 ar->len, 0);
481 481
482 return err; 482 return err;
483 } 483 }
484 484
485 /* 485 /*
486 * The ext4_ind_map_blocks() function handles non-extents inodes 486 * The ext4_ind_map_blocks() function handles non-extents inodes
487 * (i.e., using the traditional indirect/double-indirect i_blocks 487 * (i.e., using the traditional indirect/double-indirect i_blocks
488 * scheme) for ext4_map_blocks(). 488 * scheme) for ext4_map_blocks().
489 * 489 *
490 * Allocation strategy is simple: if we have to allocate something, we will 490 * Allocation strategy is simple: if we have to allocate something, we will
491 * have to go the whole way to leaf. So let's do it before attaching anything 491 * have to go the whole way to leaf. So let's do it before attaching anything
492 * to tree, set linkage between the newborn blocks, write them if sync is 492 * to tree, set linkage between the newborn blocks, write them if sync is
493 * required, recheck the path, free and repeat if check fails, otherwise 493 * required, recheck the path, free and repeat if check fails, otherwise
494 * set the last missing link (that will protect us from any truncate-generated 494 * set the last missing link (that will protect us from any truncate-generated
495 * removals - all blocks on the path are immune now) and possibly force the 495 * removals - all blocks on the path are immune now) and possibly force the
496 * write on the parent block. 496 * write on the parent block.
497 * That has a nice additional property: no special recovery from the failed 497 * That has a nice additional property: no special recovery from the failed
498 * allocations is needed - we simply release blocks and do not touch anything 498 * allocations is needed - we simply release blocks and do not touch anything
499 * reachable from inode. 499 * reachable from inode.
500 * 500 *
501 * `handle' can be NULL if create == 0. 501 * `handle' can be NULL if create == 0.
502 * 502 *
503 * return > 0, # of blocks mapped or allocated. 503 * return > 0, # of blocks mapped or allocated.
504 * return = 0, if plain lookup failed. 504 * return = 0, if plain lookup failed.
505 * return < 0, error case. 505 * return < 0, error case.
506 * 506 *
507 * The ext4_ind_get_blocks() function should be called with 507 * The ext4_ind_get_blocks() function should be called with
508 * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem 508 * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
509 * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or 509 * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
510 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system 510 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
511 * blocks. 511 * blocks.
512 */ 512 */
513 int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, 513 int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
514 struct ext4_map_blocks *map, 514 struct ext4_map_blocks *map,
515 int flags) 515 int flags)
516 { 516 {
517 struct ext4_allocation_request ar; 517 struct ext4_allocation_request ar;
518 int err = -EIO; 518 int err = -EIO;
519 ext4_lblk_t offsets[4]; 519 ext4_lblk_t offsets[4];
520 Indirect chain[4]; 520 Indirect chain[4];
521 Indirect *partial; 521 Indirect *partial;
522 int indirect_blks; 522 int indirect_blks;
523 int blocks_to_boundary = 0; 523 int blocks_to_boundary = 0;
524 int depth; 524 int depth;
525 int count = 0; 525 int count = 0;
526 ext4_fsblk_t first_block = 0; 526 ext4_fsblk_t first_block = 0;
527 527
528 trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 528 trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
529 J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); 529 J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
530 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); 530 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
531 depth = ext4_block_to_path(inode, map->m_lblk, offsets, 531 depth = ext4_block_to_path(inode, map->m_lblk, offsets,
532 &blocks_to_boundary); 532 &blocks_to_boundary);
533 533
534 if (depth == 0) 534 if (depth == 0)
535 goto out; 535 goto out;
536 536
537 partial = ext4_get_branch(inode, depth, offsets, chain, &err); 537 partial = ext4_get_branch(inode, depth, offsets, chain, &err);
538 538
539 /* Simplest case - block found, no allocation needed */ 539 /* Simplest case - block found, no allocation needed */
540 if (!partial) { 540 if (!partial) {
541 first_block = le32_to_cpu(chain[depth - 1].key); 541 first_block = le32_to_cpu(chain[depth - 1].key);
542 count++; 542 count++;
543 /*map more blocks*/ 543 /*map more blocks*/
544 while (count < map->m_len && count <= blocks_to_boundary) { 544 while (count < map->m_len && count <= blocks_to_boundary) {
545 ext4_fsblk_t blk; 545 ext4_fsblk_t blk;
546 546
547 blk = le32_to_cpu(*(chain[depth-1].p + count)); 547 blk = le32_to_cpu(*(chain[depth-1].p + count));
548 548
549 if (blk == first_block + count) 549 if (blk == first_block + count)
550 count++; 550 count++;
551 else 551 else
552 break; 552 break;
553 } 553 }
554 goto got_it; 554 goto got_it;
555 } 555 }
556 556
557 /* Next simple case - plain lookup or failed read of indirect block */ 557 /* Next simple case - plain lookup or failed read of indirect block */
558 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) 558 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
559 goto cleanup; 559 goto cleanup;
560 560
561 /* 561 /*
562 * Okay, we need to do block allocation. 562 * Okay, we need to do block allocation.
563 */ 563 */
564 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 564 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
565 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 565 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
566 EXT4_ERROR_INODE(inode, "Can't allocate blocks for " 566 EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
567 "non-extent mapped inodes with bigalloc"); 567 "non-extent mapped inodes with bigalloc");
568 return -ENOSPC; 568 return -ENOSPC;
569 } 569 }
570 570
571 /* Set up for the direct block allocation */ 571 /* Set up for the direct block allocation */
572 memset(&ar, 0, sizeof(ar)); 572 memset(&ar, 0, sizeof(ar));
573 ar.inode = inode; 573 ar.inode = inode;
574 ar.logical = map->m_lblk; 574 ar.logical = map->m_lblk;
575 if (S_ISREG(inode->i_mode)) 575 if (S_ISREG(inode->i_mode))
576 ar.flags = EXT4_MB_HINT_DATA; 576 ar.flags = EXT4_MB_HINT_DATA;
577 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 577 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
578 ar.flags |= EXT4_MB_DELALLOC_RESERVED; 578 ar.flags |= EXT4_MB_DELALLOC_RESERVED;
579 579
580 ar.goal = ext4_find_goal(inode, map->m_lblk, partial); 580 ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
581 581
582 /* the number of blocks need to allocate for [d,t]indirect blocks */ 582 /* the number of blocks need to allocate for [d,t]indirect blocks */
583 indirect_blks = (chain + depth) - partial - 1; 583 indirect_blks = (chain + depth) - partial - 1;
584 584
585 /* 585 /*
586 * Next look up the indirect map to count the totoal number of 586 * Next look up the indirect map to count the totoal number of
587 * direct blocks to allocate for this branch. 587 * direct blocks to allocate for this branch.
588 */ 588 */
589 ar.len = ext4_blks_to_allocate(partial, indirect_blks, 589 ar.len = ext4_blks_to_allocate(partial, indirect_blks,
590 map->m_len, blocks_to_boundary); 590 map->m_len, blocks_to_boundary);
591 591
592 /* 592 /*
593 * Block out ext4_truncate while we alter the tree 593 * Block out ext4_truncate while we alter the tree
594 */ 594 */
595 err = ext4_alloc_branch(handle, &ar, indirect_blks, 595 err = ext4_alloc_branch(handle, &ar, indirect_blks,
596 offsets + (partial - chain), partial); 596 offsets + (partial - chain), partial);
597 597
598 /* 598 /*
599 * The ext4_splice_branch call will free and forget any buffers 599 * The ext4_splice_branch call will free and forget any buffers
600 * on the new chain if there is a failure, but that risks using 600 * on the new chain if there is a failure, but that risks using
601 * up transaction credits, especially for bitmaps where the 601 * up transaction credits, especially for bitmaps where the
602 * credits cannot be returned. Can we handle this somehow? We 602 * credits cannot be returned. Can we handle this somehow? We
603 * may need to return -EAGAIN upwards in the worst case. --sct 603 * may need to return -EAGAIN upwards in the worst case. --sct
604 */ 604 */
605 if (!err) 605 if (!err)
606 err = ext4_splice_branch(handle, &ar, partial, indirect_blks); 606 err = ext4_splice_branch(handle, &ar, partial, indirect_blks);
607 if (err) 607 if (err)
608 goto cleanup; 608 goto cleanup;
609 609
610 map->m_flags |= EXT4_MAP_NEW; 610 map->m_flags |= EXT4_MAP_NEW;
611 611
612 ext4_update_inode_fsync_trans(handle, inode, 1); 612 ext4_update_inode_fsync_trans(handle, inode, 1);
613 count = ar.len; 613 count = ar.len;
614 got_it: 614 got_it:
615 map->m_flags |= EXT4_MAP_MAPPED; 615 map->m_flags |= EXT4_MAP_MAPPED;
616 map->m_pblk = le32_to_cpu(chain[depth-1].key); 616 map->m_pblk = le32_to_cpu(chain[depth-1].key);
617 map->m_len = count; 617 map->m_len = count;
618 if (count > blocks_to_boundary) 618 if (count > blocks_to_boundary)
619 map->m_flags |= EXT4_MAP_BOUNDARY; 619 map->m_flags |= EXT4_MAP_BOUNDARY;
620 err = count; 620 err = count;
621 /* Clean up and exit */ 621 /* Clean up and exit */
622 partial = chain + depth - 1; /* the whole chain */ 622 partial = chain + depth - 1; /* the whole chain */
623 cleanup: 623 cleanup:
624 while (partial > chain) { 624 while (partial > chain) {
625 BUFFER_TRACE(partial->bh, "call brelse"); 625 BUFFER_TRACE(partial->bh, "call brelse");
626 brelse(partial->bh); 626 brelse(partial->bh);
627 partial--; 627 partial--;
628 } 628 }
629 out: 629 out:
630 trace_ext4_ind_map_blocks_exit(inode, flags, map, err); 630 trace_ext4_ind_map_blocks_exit(inode, flags, map, err);
631 return err; 631 return err;
632 } 632 }
633 633
634 /* 634 /*
635 * O_DIRECT for ext3 (or indirect map) based files 635 * O_DIRECT for ext3 (or indirect map) based files
636 * 636 *
637 * If the O_DIRECT write will extend the file then add this inode to the 637 * If the O_DIRECT write will extend the file then add this inode to the
638 * orphan list. So recovery will truncate it back to the original size 638 * orphan list. So recovery will truncate it back to the original size
639 * if the machine crashes during the write. 639 * if the machine crashes during the write.
640 * 640 *
641 * If the O_DIRECT write is intantiating holes inside i_size and the machine 641 * If the O_DIRECT write is intantiating holes inside i_size and the machine
642 * crashes then stale disk data _may_ be exposed inside the file. But current 642 * crashes then stale disk data _may_ be exposed inside the file. But current
643 * VFS code falls back into buffered path in that case so we are safe. 643 * VFS code falls back into buffered path in that case so we are safe.
644 */ 644 */
645 ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, 645 ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
646 struct iov_iter *iter, loff_t offset) 646 struct iov_iter *iter, loff_t offset)
647 { 647 {
648 struct file *file = iocb->ki_filp; 648 struct file *file = iocb->ki_filp;
649 struct inode *inode = file->f_mapping->host; 649 struct inode *inode = file->f_mapping->host;
650 struct ext4_inode_info *ei = EXT4_I(inode); 650 struct ext4_inode_info *ei = EXT4_I(inode);
651 handle_t *handle; 651 handle_t *handle;
652 ssize_t ret; 652 ssize_t ret;
653 int orphan = 0; 653 int orphan = 0;
654 size_t count = iov_iter_count(iter); 654 size_t count = iov_iter_count(iter);
655 int retries = 0; 655 int retries = 0;
656 656
657 if (rw == WRITE) { 657 if (rw == WRITE) {
658 loff_t final_size = offset + count; 658 loff_t final_size = offset + count;
659 659
660 if (final_size > inode->i_size) { 660 if (final_size > inode->i_size) {
661 /* Credits for sb + inode write */ 661 /* Credits for sb + inode write */
662 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 662 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
663 if (IS_ERR(handle)) { 663 if (IS_ERR(handle)) {
664 ret = PTR_ERR(handle); 664 ret = PTR_ERR(handle);
665 goto out; 665 goto out;
666 } 666 }
667 ret = ext4_orphan_add(handle, inode); 667 ret = ext4_orphan_add(handle, inode);
668 if (ret) { 668 if (ret) {
669 ext4_journal_stop(handle); 669 ext4_journal_stop(handle);
670 goto out; 670 goto out;
671 } 671 }
672 orphan = 1; 672 orphan = 1;
673 ei->i_disksize = inode->i_size; 673 ei->i_disksize = inode->i_size;
674 ext4_journal_stop(handle); 674 ext4_journal_stop(handle);
675 } 675 }
676 } 676 }
677 677
678 retry: 678 retry:
679 if (rw == READ && ext4_should_dioread_nolock(inode)) { 679 if (rw == READ && ext4_should_dioread_nolock(inode)) {
680 /* 680 /*
681 * Nolock dioread optimization may be dynamically disabled 681 * Nolock dioread optimization may be dynamically disabled
682 * via ext4_inode_block_unlocked_dio(). Check inode's state 682 * via ext4_inode_block_unlocked_dio(). Check inode's state
683 * while holding extra i_dio_count ref. 683 * while holding extra i_dio_count ref.
684 */ 684 */
685 atomic_inc(&inode->i_dio_count); 685 atomic_inc(&inode->i_dio_count);
686 smp_mb(); 686 smp_mb();
687 if (unlikely(ext4_test_inode_state(inode, 687 if (unlikely(ext4_test_inode_state(inode,
688 EXT4_STATE_DIOREAD_LOCK))) { 688 EXT4_STATE_DIOREAD_LOCK))) {
689 inode_dio_done(inode); 689 inode_dio_done(inode);
690 goto locked; 690 goto locked;
691 } 691 }
692 if (IS_DAX(inode)) 692 if (IS_DAX(inode))
693 ret = dax_do_io(rw, iocb, inode, iter, offset, 693 ret = dax_do_io(rw, iocb, inode, iter, offset,
694 ext4_get_block, NULL, 0); 694 ext4_get_block, NULL, 0);
695 else 695 else
696 ret = __blockdev_direct_IO(rw, iocb, inode, 696 ret = __blockdev_direct_IO(rw, iocb, inode,
697 inode->i_sb->s_bdev, iter, offset, 697 inode->i_sb->s_bdev, iter, offset,
698 ext4_get_block, NULL, NULL, 0); 698 ext4_get_block, NULL, NULL, 0);
699 inode_dio_done(inode); 699 inode_dio_done(inode);
700 } else { 700 } else {
701 locked: 701 locked:
702 if (IS_DAX(inode)) 702 if (IS_DAX(inode))
703 ret = dax_do_io(rw, iocb, inode, iter, offset, 703 ret = dax_do_io(rw, iocb, inode, iter, offset,
704 ext4_get_block, NULL, DIO_LOCKING); 704 ext4_get_block, NULL, DIO_LOCKING);
705 else 705 else
706 ret = blockdev_direct_IO(rw, iocb, inode, iter, 706 ret = blockdev_direct_IO(rw, iocb, inode, iter,
707 offset, ext4_get_block); 707 offset, ext4_get_block);
708 708
709 if (unlikely((rw & WRITE) && ret < 0)) { 709 if (unlikely((rw & WRITE) && ret < 0)) {
710 loff_t isize = i_size_read(inode); 710 loff_t isize = i_size_read(inode);
711 loff_t end = offset + count; 711 loff_t end = offset + count;
712 712
713 if (end > isize) 713 if (end > isize)
714 ext4_truncate_failed_write(inode); 714 ext4_truncate_failed_write(inode);
715 } 715 }
716 } 716 }
717 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 717 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
718 goto retry; 718 goto retry;
719 719
720 if (orphan) { 720 if (orphan) {
721 int err; 721 int err;
722 722
723 /* Credits for sb + inode write */ 723 /* Credits for sb + inode write */
724 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 724 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
725 if (IS_ERR(handle)) { 725 if (IS_ERR(handle)) {
726 /* This is really bad luck. We've written the data 726 /* This is really bad luck. We've written the data
727 * but cannot extend i_size. Bail out and pretend 727 * but cannot extend i_size. Bail out and pretend
728 * the write failed... */ 728 * the write failed... */
729 ret = PTR_ERR(handle); 729 ret = PTR_ERR(handle);
730 if (inode->i_nlink) 730 if (inode->i_nlink)
731 ext4_orphan_del(NULL, inode); 731 ext4_orphan_del(NULL, inode);
732 732
733 goto out; 733 goto out;
734 } 734 }
735 if (inode->i_nlink) 735 if (inode->i_nlink)
736 ext4_orphan_del(handle, inode); 736 ext4_orphan_del(handle, inode);
737 if (ret > 0) { 737 if (ret > 0) {
738 loff_t end = offset + ret; 738 loff_t end = offset + ret;
739 if (end > inode->i_size) { 739 if (end > inode->i_size) {
740 ei->i_disksize = end; 740 ei->i_disksize = end;
741 i_size_write(inode, end); 741 i_size_write(inode, end);
742 /* 742 /*
743 * We're going to return a positive `ret' 743 * We're going to return a positive `ret'
744 * here due to non-zero-length I/O, so there's 744 * here due to non-zero-length I/O, so there's
745 * no way of reporting error returns from 745 * no way of reporting error returns from
746 * ext4_mark_inode_dirty() to userspace. So 746 * ext4_mark_inode_dirty() to userspace. So
747 * ignore it. 747 * ignore it.
748 */ 748 */
749 ext4_mark_inode_dirty(handle, inode); 749 ext4_mark_inode_dirty(handle, inode);
750 } 750 }
751 } 751 }
752 err = ext4_journal_stop(handle); 752 err = ext4_journal_stop(handle);
753 if (ret == 0) 753 if (ret == 0)
754 ret = err; 754 ret = err;
755 } 755 }
756 out: 756 out:
757 return ret; 757 return ret;
758 } 758 }
759 759
760 /* 760 /*
761 * Calculate the number of metadata blocks need to reserve 761 * Calculate the number of metadata blocks need to reserve
762 * to allocate a new block at @lblocks for non extent file based file 762 * to allocate a new block at @lblocks for non extent file based file
763 */ 763 */
764 int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) 764 int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock)
765 { 765 {
766 struct ext4_inode_info *ei = EXT4_I(inode); 766 struct ext4_inode_info *ei = EXT4_I(inode);
767 sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); 767 sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1);
768 int blk_bits; 768 int blk_bits;
769 769
770 if (lblock < EXT4_NDIR_BLOCKS) 770 if (lblock < EXT4_NDIR_BLOCKS)
771 return 0; 771 return 0;
772 772
773 lblock -= EXT4_NDIR_BLOCKS; 773 lblock -= EXT4_NDIR_BLOCKS;
774 774
775 if (ei->i_da_metadata_calc_len && 775 if (ei->i_da_metadata_calc_len &&
776 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { 776 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
777 ei->i_da_metadata_calc_len++; 777 ei->i_da_metadata_calc_len++;
778 return 0; 778 return 0;
779 } 779 }
780 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; 780 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
781 ei->i_da_metadata_calc_len = 1; 781 ei->i_da_metadata_calc_len = 1;
782 blk_bits = order_base_2(lblock); 782 blk_bits = order_base_2(lblock);
783 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; 783 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
784 } 784 }
785 785
786 /* 786 /*
787 * Calculate number of indirect blocks touched by mapping @nrblocks logically 787 * Calculate number of indirect blocks touched by mapping @nrblocks logically
788 * contiguous blocks 788 * contiguous blocks
789 */ 789 */
790 int ext4_ind_trans_blocks(struct inode *inode, int nrblocks) 790 int ext4_ind_trans_blocks(struct inode *inode, int nrblocks)
791 { 791 {
792 /* 792 /*
793 * With N contiguous data blocks, we need at most 793 * With N contiguous data blocks, we need at most
794 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, 794 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
795 * 2 dindirect blocks, and 1 tindirect block 795 * 2 dindirect blocks, and 1 tindirect block
796 */ 796 */
797 return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; 797 return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
798 } 798 }
799 799
800 /* 800 /*
801 * Truncate transactions can be complex and absolutely huge. So we need to 801 * Truncate transactions can be complex and absolutely huge. So we need to
802 * be able to restart the transaction at a conventient checkpoint to make 802 * be able to restart the transaction at a conventient checkpoint to make
803 * sure we don't overflow the journal. 803 * sure we don't overflow the journal.
804 * 804 *
805 * Try to extend this transaction for the purposes of truncation. If 805 * Try to extend this transaction for the purposes of truncation. If
806 * extend fails, we need to propagate the failure up and restart the 806 * extend fails, we need to propagate the failure up and restart the
807 * transaction in the top-level truncate loop. --sct 807 * transaction in the top-level truncate loop. --sct
808 * 808 *
809 * Returns 0 if we managed to create more room. If we can't create more 809 * Returns 0 if we managed to create more room. If we can't create more
810 * room, and the transaction must be restarted we return 1. 810 * room, and the transaction must be restarted we return 1.
811 */ 811 */
812 static int try_to_extend_transaction(handle_t *handle, struct inode *inode) 812 static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
813 { 813 {
814 if (!ext4_handle_valid(handle)) 814 if (!ext4_handle_valid(handle))
815 return 0; 815 return 0;
816 if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) 816 if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
817 return 0; 817 return 0;
818 if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) 818 if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode)))
819 return 0; 819 return 0;
820 return 1; 820 return 1;
821 } 821 }
822 822
823 /* 823 /*
824 * Probably it should be a library function... search for first non-zero word 824 * Probably it should be a library function... search for first non-zero word
825 * or memcmp with zero_page, whatever is better for particular architecture. 825 * or memcmp with zero_page, whatever is better for particular architecture.
826 * Linus? 826 * Linus?
827 */ 827 */
828 static inline int all_zeroes(__le32 *p, __le32 *q) 828 static inline int all_zeroes(__le32 *p, __le32 *q)
829 { 829 {
830 while (p < q) 830 while (p < q)
831 if (*p++) 831 if (*p++)
832 return 0; 832 return 0;
833 return 1; 833 return 1;
834 } 834 }
835 835
836 /** 836 /**
837 * ext4_find_shared - find the indirect blocks for partial truncation. 837 * ext4_find_shared - find the indirect blocks for partial truncation.
838 * @inode: inode in question 838 * @inode: inode in question
839 * @depth: depth of the affected branch 839 * @depth: depth of the affected branch
840 * @offsets: offsets of pointers in that branch (see ext4_block_to_path) 840 * @offsets: offsets of pointers in that branch (see ext4_block_to_path)
841 * @chain: place to store the pointers to partial indirect blocks 841 * @chain: place to store the pointers to partial indirect blocks
842 * @top: place to the (detached) top of branch 842 * @top: place to the (detached) top of branch
843 * 843 *
844 * This is a helper function used by ext4_truncate(). 844 * This is a helper function used by ext4_truncate().
845 * 845 *
846 * When we do truncate() we may have to clean the ends of several 846 * When we do truncate() we may have to clean the ends of several
847 * indirect blocks but leave the blocks themselves alive. Block is 847 * indirect blocks but leave the blocks themselves alive. Block is
848 * partially truncated if some data below the new i_size is referred 848 * partially truncated if some data below the new i_size is referred
849 * from it (and it is on the path to the first completely truncated 849 * from it (and it is on the path to the first completely truncated
850 * data block, indeed). We have to free the top of that path along 850 * data block, indeed). We have to free the top of that path along
851 * with everything to the right of the path. Since no allocation 851 * with everything to the right of the path. Since no allocation
852 * past the truncation point is possible until ext4_truncate() 852 * past the truncation point is possible until ext4_truncate()
853 * finishes, we may safely do the latter, but top of branch may 853 * finishes, we may safely do the latter, but top of branch may
854 * require special attention - pageout below the truncation point 854 * require special attention - pageout below the truncation point
855 * might try to populate it. 855 * might try to populate it.
856 * 856 *
857 * We atomically detach the top of branch from the tree, store the 857 * We atomically detach the top of branch from the tree, store the
858 * block number of its root in *@top, pointers to buffer_heads of 858 * block number of its root in *@top, pointers to buffer_heads of
859 * partially truncated blocks - in @chain[].bh and pointers to 859 * partially truncated blocks - in @chain[].bh and pointers to
860 * their last elements that should not be removed - in 860 * their last elements that should not be removed - in
861 * @chain[].p. Return value is the pointer to last filled element 861 * @chain[].p. Return value is the pointer to last filled element
862 * of @chain. 862 * of @chain.
863 * 863 *
864 * The work left to caller to do the actual freeing of subtrees: 864 * The work left to caller to do the actual freeing of subtrees:
865 * a) free the subtree starting from *@top 865 * a) free the subtree starting from *@top
866 * b) free the subtrees whose roots are stored in 866 * b) free the subtrees whose roots are stored in
867 * (@chain[i].p+1 .. end of @chain[i].bh->b_data) 867 * (@chain[i].p+1 .. end of @chain[i].bh->b_data)
868 * c) free the subtrees growing from the inode past the @chain[0]. 868 * c) free the subtrees growing from the inode past the @chain[0].
869 * (no partially truncated stuff there). */ 869 * (no partially truncated stuff there). */
870 870
871 static Indirect *ext4_find_shared(struct inode *inode, int depth, 871 static Indirect *ext4_find_shared(struct inode *inode, int depth,
872 ext4_lblk_t offsets[4], Indirect chain[4], 872 ext4_lblk_t offsets[4], Indirect chain[4],
873 __le32 *top) 873 __le32 *top)
874 { 874 {
875 Indirect *partial, *p; 875 Indirect *partial, *p;
876 int k, err; 876 int k, err;
877 877
878 *top = 0; 878 *top = 0;
879 /* Make k index the deepest non-null offset + 1 */ 879 /* Make k index the deepest non-null offset + 1 */
880 for (k = depth; k > 1 && !offsets[k-1]; k--) 880 for (k = depth; k > 1 && !offsets[k-1]; k--)
881 ; 881 ;
882 partial = ext4_get_branch(inode, k, offsets, chain, &err); 882 partial = ext4_get_branch(inode, k, offsets, chain, &err);
883 /* Writer: pointers */ 883 /* Writer: pointers */
884 if (!partial) 884 if (!partial)
885 partial = chain + k-1; 885 partial = chain + k-1;
886 /* 886 /*
887 * If the branch acquired continuation since we've looked at it - 887 * If the branch acquired continuation since we've looked at it -
888 * fine, it should all survive and (new) top doesn't belong to us. 888 * fine, it should all survive and (new) top doesn't belong to us.
889 */ 889 */
890 if (!partial->key && *partial->p) 890 if (!partial->key && *partial->p)
891 /* Writer: end */ 891 /* Writer: end */
892 goto no_top; 892 goto no_top;
893 for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) 893 for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
894 ; 894 ;
895 /* 895 /*
896 * OK, we've found the last block that must survive. The rest of our 896 * OK, we've found the last block that must survive. The rest of our
897 * branch should be detached before unlocking. However, if that rest 897 * branch should be detached before unlocking. However, if that rest
898 * of branch is all ours and does not grow immediately from the inode 898 * of branch is all ours and does not grow immediately from the inode
899 * it's easier to cheat and just decrement partial->p. 899 * it's easier to cheat and just decrement partial->p.
900 */ 900 */
901 if (p == chain + k - 1 && p > chain) { 901 if (p == chain + k - 1 && p > chain) {
902 p->p--; 902 p->p--;
903 } else { 903 } else {
904 *top = *p->p; 904 *top = *p->p;
905 /* Nope, don't do this in ext4. Must leave the tree intact */ 905 /* Nope, don't do this in ext4. Must leave the tree intact */
906 #if 0 906 #if 0
907 *p->p = 0; 907 *p->p = 0;
908 #endif 908 #endif
909 } 909 }
910 /* Writer: end */ 910 /* Writer: end */
911 911
912 while (partial > p) { 912 while (partial > p) {
913 brelse(partial->bh); 913 brelse(partial->bh);
914 partial--; 914 partial--;
915 } 915 }
916 no_top: 916 no_top:
917 return partial; 917 return partial;
918 } 918 }
919 919
920 /* 920 /*
921 * Zero a number of block pointers in either an inode or an indirect block. 921 * Zero a number of block pointers in either an inode or an indirect block.
922 * If we restart the transaction we must again get write access to the 922 * If we restart the transaction we must again get write access to the
923 * indirect block for further modification. 923 * indirect block for further modification.
924 * 924 *
925 * We release `count' blocks on disk, but (last - first) may be greater 925 * We release `count' blocks on disk, but (last - first) may be greater
926 * than `count' because there can be holes in there. 926 * than `count' because there can be holes in there.
927 * 927 *
928 * Return 0 on success, 1 on invalid block range 928 * Return 0 on success, 1 on invalid block range
929 * and < 0 on fatal error. 929 * and < 0 on fatal error.
930 */ 930 */
931 static int ext4_clear_blocks(handle_t *handle, struct inode *inode, 931 static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
932 struct buffer_head *bh, 932 struct buffer_head *bh,
933 ext4_fsblk_t block_to_free, 933 ext4_fsblk_t block_to_free,
934 unsigned long count, __le32 *first, 934 unsigned long count, __le32 *first,
935 __le32 *last) 935 __le32 *last)
936 { 936 {
937 __le32 *p; 937 __le32 *p;
938 int flags = EXT4_FREE_BLOCKS_VALIDATED; 938 int flags = EXT4_FREE_BLOCKS_VALIDATED;
939 int err; 939 int err;
940 940
941 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 941 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
942 flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA; 942 flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA;
943 else if (ext4_should_journal_data(inode)) 943 else if (ext4_should_journal_data(inode))
944 flags |= EXT4_FREE_BLOCKS_FORGET; 944 flags |= EXT4_FREE_BLOCKS_FORGET;
945 945
946 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, 946 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
947 count)) { 947 count)) {
948 EXT4_ERROR_INODE(inode, "attempt to clear invalid " 948 EXT4_ERROR_INODE(inode, "attempt to clear invalid "
949 "blocks %llu len %lu", 949 "blocks %llu len %lu",
950 (unsigned long long) block_to_free, count); 950 (unsigned long long) block_to_free, count);
951 return 1; 951 return 1;
952 } 952 }
953 953
954 if (try_to_extend_transaction(handle, inode)) { 954 if (try_to_extend_transaction(handle, inode)) {
955 if (bh) { 955 if (bh) {
956 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 956 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
957 err = ext4_handle_dirty_metadata(handle, inode, bh); 957 err = ext4_handle_dirty_metadata(handle, inode, bh);
958 if (unlikely(err)) 958 if (unlikely(err))
959 goto out_err; 959 goto out_err;
960 } 960 }
961 err = ext4_mark_inode_dirty(handle, inode); 961 err = ext4_mark_inode_dirty(handle, inode);
962 if (unlikely(err)) 962 if (unlikely(err))
963 goto out_err; 963 goto out_err;
964 err = ext4_truncate_restart_trans(handle, inode, 964 err = ext4_truncate_restart_trans(handle, inode,
965 ext4_blocks_for_truncate(inode)); 965 ext4_blocks_for_truncate(inode));
966 if (unlikely(err)) 966 if (unlikely(err))
967 goto out_err; 967 goto out_err;
968 if (bh) { 968 if (bh) {
969 BUFFER_TRACE(bh, "retaking write access"); 969 BUFFER_TRACE(bh, "retaking write access");
970 err = ext4_journal_get_write_access(handle, bh); 970 err = ext4_journal_get_write_access(handle, bh);
971 if (unlikely(err)) 971 if (unlikely(err))
972 goto out_err; 972 goto out_err;
973 } 973 }
974 } 974 }
975 975
976 for (p = first; p < last; p++) 976 for (p = first; p < last; p++)
977 *p = 0; 977 *p = 0;
978 978
979 ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); 979 ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
980 return 0; 980 return 0;
981 out_err: 981 out_err:
982 ext4_std_error(inode->i_sb, err); 982 ext4_std_error(inode->i_sb, err);
983 return err; 983 return err;
984 } 984 }
985 985
986 /** 986 /**
987 * ext4_free_data - free a list of data blocks 987 * ext4_free_data - free a list of data blocks
988 * @handle: handle for this transaction 988 * @handle: handle for this transaction
989 * @inode: inode we are dealing with 989 * @inode: inode we are dealing with
990 * @this_bh: indirect buffer_head which contains *@first and *@last 990 * @this_bh: indirect buffer_head which contains *@first and *@last
991 * @first: array of block numbers 991 * @first: array of block numbers
992 * @last: points immediately past the end of array 992 * @last: points immediately past the end of array
993 * 993 *
994 * We are freeing all blocks referred from that array (numbers are stored as 994 * We are freeing all blocks referred from that array (numbers are stored as
995 * little-endian 32-bit) and updating @inode->i_blocks appropriately. 995 * little-endian 32-bit) and updating @inode->i_blocks appropriately.
996 * 996 *
997 * We accumulate contiguous runs of blocks to free. Conveniently, if these 997 * We accumulate contiguous runs of blocks to free. Conveniently, if these
998 * blocks are contiguous then releasing them at one time will only affect one 998 * blocks are contiguous then releasing them at one time will only affect one
999 * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't 999 * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
1000 * actually use a lot of journal space. 1000 * actually use a lot of journal space.
1001 * 1001 *
1002 * @this_bh will be %NULL if @first and @last point into the inode's direct 1002 * @this_bh will be %NULL if @first and @last point into the inode's direct
1003 * block pointers. 1003 * block pointers.
1004 */ 1004 */
1005 static void ext4_free_data(handle_t *handle, struct inode *inode, 1005 static void ext4_free_data(handle_t *handle, struct inode *inode,
1006 struct buffer_head *this_bh, 1006 struct buffer_head *this_bh,
1007 __le32 *first, __le32 *last) 1007 __le32 *first, __le32 *last)
1008 { 1008 {
1009 ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ 1009 ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */
1010 unsigned long count = 0; /* Number of blocks in the run */ 1010 unsigned long count = 0; /* Number of blocks in the run */
1011 __le32 *block_to_free_p = NULL; /* Pointer into inode/ind 1011 __le32 *block_to_free_p = NULL; /* Pointer into inode/ind
1012 corresponding to 1012 corresponding to
1013 block_to_free */ 1013 block_to_free */
1014 ext4_fsblk_t nr; /* Current block # */ 1014 ext4_fsblk_t nr; /* Current block # */
1015 __le32 *p; /* Pointer into inode/ind 1015 __le32 *p; /* Pointer into inode/ind
1016 for current block */ 1016 for current block */
1017 int err = 0; 1017 int err = 0;
1018 1018
1019 if (this_bh) { /* For indirect block */ 1019 if (this_bh) { /* For indirect block */
1020 BUFFER_TRACE(this_bh, "get_write_access"); 1020 BUFFER_TRACE(this_bh, "get_write_access");
1021 err = ext4_journal_get_write_access(handle, this_bh); 1021 err = ext4_journal_get_write_access(handle, this_bh);
1022 /* Important: if we can't update the indirect pointers 1022 /* Important: if we can't update the indirect pointers
1023 * to the blocks, we can't free them. */ 1023 * to the blocks, we can't free them. */
1024 if (err) 1024 if (err)
1025 return; 1025 return;
1026 } 1026 }
1027 1027
1028 for (p = first; p < last; p++) { 1028 for (p = first; p < last; p++) {
1029 nr = le32_to_cpu(*p); 1029 nr = le32_to_cpu(*p);
1030 if (nr) { 1030 if (nr) {
1031 /* accumulate blocks to free if they're contiguous */ 1031 /* accumulate blocks to free if they're contiguous */
1032 if (count == 0) { 1032 if (count == 0) {
1033 block_to_free = nr; 1033 block_to_free = nr;
1034 block_to_free_p = p; 1034 block_to_free_p = p;
1035 count = 1; 1035 count = 1;
1036 } else if (nr == block_to_free + count) { 1036 } else if (nr == block_to_free + count) {
1037 count++; 1037 count++;
1038 } else { 1038 } else {
1039 err = ext4_clear_blocks(handle, inode, this_bh, 1039 err = ext4_clear_blocks(handle, inode, this_bh,
1040 block_to_free, count, 1040 block_to_free, count,
1041 block_to_free_p, p); 1041 block_to_free_p, p);
1042 if (err) 1042 if (err)
1043 break; 1043 break;
1044 block_to_free = nr; 1044 block_to_free = nr;
1045 block_to_free_p = p; 1045 block_to_free_p = p;
1046 count = 1; 1046 count = 1;
1047 } 1047 }
1048 } 1048 }
1049 } 1049 }
1050 1050
1051 if (!err && count > 0) 1051 if (!err && count > 0)
1052 err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, 1052 err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
1053 count, block_to_free_p, p); 1053 count, block_to_free_p, p);
1054 if (err < 0) 1054 if (err < 0)
1055 /* fatal error */ 1055 /* fatal error */
1056 return; 1056 return;
1057 1057
1058 if (this_bh) { 1058 if (this_bh) {
1059 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); 1059 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
1060 1060
1061 /* 1061 /*
1062 * The buffer head should have an attached journal head at this 1062 * The buffer head should have an attached journal head at this
1063 * point. However, if the data is corrupted and an indirect 1063 * point. However, if the data is corrupted and an indirect
1064 * block pointed to itself, it would have been detached when 1064 * block pointed to itself, it would have been detached when
1065 * the block was cleared. Check for this instead of OOPSing. 1065 * the block was cleared. Check for this instead of OOPSing.
1066 */ 1066 */
1067 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) 1067 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
1068 ext4_handle_dirty_metadata(handle, inode, this_bh); 1068 ext4_handle_dirty_metadata(handle, inode, this_bh);
1069 else 1069 else
1070 EXT4_ERROR_INODE(inode, 1070 EXT4_ERROR_INODE(inode,
1071 "circular indirect block detected at " 1071 "circular indirect block detected at "
1072 "block %llu", 1072 "block %llu",
1073 (unsigned long long) this_bh->b_blocknr); 1073 (unsigned long long) this_bh->b_blocknr);
1074 } 1074 }
1075 } 1075 }
1076 1076
1077 /** 1077 /**
1078 * ext4_free_branches - free an array of branches 1078 * ext4_free_branches - free an array of branches
1079 * @handle: JBD handle for this transaction 1079 * @handle: JBD handle for this transaction
1080 * @inode: inode we are dealing with 1080 * @inode: inode we are dealing with
1081 * @parent_bh: the buffer_head which contains *@first and *@last 1081 * @parent_bh: the buffer_head which contains *@first and *@last
1082 * @first: array of block numbers 1082 * @first: array of block numbers
1083 * @last: pointer immediately past the end of array 1083 * @last: pointer immediately past the end of array
1084 * @depth: depth of the branches to free 1084 * @depth: depth of the branches to free
1085 * 1085 *
1086 * We are freeing all blocks referred from these branches (numbers are 1086 * We are freeing all blocks referred from these branches (numbers are
1087 * stored as little-endian 32-bit) and updating @inode->i_blocks 1087 * stored as little-endian 32-bit) and updating @inode->i_blocks
1088 * appropriately. 1088 * appropriately.
1089 */ 1089 */
1090 static void ext4_free_branches(handle_t *handle, struct inode *inode, 1090 static void ext4_free_branches(handle_t *handle, struct inode *inode,
1091 struct buffer_head *parent_bh, 1091 struct buffer_head *parent_bh,
1092 __le32 *first, __le32 *last, int depth) 1092 __le32 *first, __le32 *last, int depth)
1093 { 1093 {
1094 ext4_fsblk_t nr; 1094 ext4_fsblk_t nr;
1095 __le32 *p; 1095 __le32 *p;
1096 1096
1097 if (ext4_handle_is_aborted(handle)) 1097 if (ext4_handle_is_aborted(handle))
1098 return; 1098 return;
1099 1099
1100 if (depth--) { 1100 if (depth--) {
1101 struct buffer_head *bh; 1101 struct buffer_head *bh;
1102 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1102 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1103 p = last; 1103 p = last;
1104 while (--p >= first) { 1104 while (--p >= first) {
1105 nr = le32_to_cpu(*p); 1105 nr = le32_to_cpu(*p);
1106 if (!nr) 1106 if (!nr)
1107 continue; /* A hole */ 1107 continue; /* A hole */
1108 1108
1109 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), 1109 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
1110 nr, 1)) { 1110 nr, 1)) {
1111 EXT4_ERROR_INODE(inode, 1111 EXT4_ERROR_INODE(inode,
1112 "invalid indirect mapped " 1112 "invalid indirect mapped "
1113 "block %lu (level %d)", 1113 "block %lu (level %d)",
1114 (unsigned long) nr, depth); 1114 (unsigned long) nr, depth);
1115 break; 1115 break;
1116 } 1116 }
1117 1117
1118 /* Go read the buffer for the next level down */ 1118 /* Go read the buffer for the next level down */
1119 bh = sb_bread(inode->i_sb, nr); 1119 bh = sb_bread(inode->i_sb, nr);
1120 1120
1121 /* 1121 /*
1122 * A read failure? Report error and clear slot 1122 * A read failure? Report error and clear slot
1123 * (should be rare). 1123 * (should be rare).
1124 */ 1124 */
1125 if (!bh) { 1125 if (!bh) {
1126 EXT4_ERROR_INODE_BLOCK(inode, nr, 1126 EXT4_ERROR_INODE_BLOCK(inode, nr,
1127 "Read failure"); 1127 "Read failure");
1128 continue; 1128 continue;
1129 } 1129 }
1130 1130
1131 /* This zaps the entire block. Bottom up. */ 1131 /* This zaps the entire block. Bottom up. */
1132 BUFFER_TRACE(bh, "free child branches"); 1132 BUFFER_TRACE(bh, "free child branches");
1133 ext4_free_branches(handle, inode, bh, 1133 ext4_free_branches(handle, inode, bh,
1134 (__le32 *) bh->b_data, 1134 (__le32 *) bh->b_data,
1135 (__le32 *) bh->b_data + addr_per_block, 1135 (__le32 *) bh->b_data + addr_per_block,
1136 depth); 1136 depth);
1137 brelse(bh); 1137 brelse(bh);
1138 1138
1139 /* 1139 /*
1140 * Everything below this this pointer has been 1140 * Everything below this this pointer has been
1141 * released. Now let this top-of-subtree go. 1141 * released. Now let this top-of-subtree go.
1142 * 1142 *
1143 * We want the freeing of this indirect block to be 1143 * We want the freeing of this indirect block to be
1144 * atomic in the journal with the updating of the 1144 * atomic in the journal with the updating of the
1145 * bitmap block which owns it. So make some room in 1145 * bitmap block which owns it. So make some room in
1146 * the journal. 1146 * the journal.
1147 * 1147 *
1148 * We zero the parent pointer *after* freeing its 1148 * We zero the parent pointer *after* freeing its
1149 * pointee in the bitmaps, so if extend_transaction() 1149 * pointee in the bitmaps, so if extend_transaction()
1150 * for some reason fails to put the bitmap changes and 1150 * for some reason fails to put the bitmap changes and
1151 * the release into the same transaction, recovery 1151 * the release into the same transaction, recovery
1152 * will merely complain about releasing a free block, 1152 * will merely complain about releasing a free block,
1153 * rather than leaking blocks. 1153 * rather than leaking blocks.
1154 */ 1154 */
1155 if (ext4_handle_is_aborted(handle)) 1155 if (ext4_handle_is_aborted(handle))
1156 return; 1156 return;
1157 if (try_to_extend_transaction(handle, inode)) { 1157 if (try_to_extend_transaction(handle, inode)) {
1158 ext4_mark_inode_dirty(handle, inode); 1158 ext4_mark_inode_dirty(handle, inode);
1159 ext4_truncate_restart_trans(handle, inode, 1159 ext4_truncate_restart_trans(handle, inode,
1160 ext4_blocks_for_truncate(inode)); 1160 ext4_blocks_for_truncate(inode));
1161 } 1161 }
1162 1162
1163 /* 1163 /*
1164 * The forget flag here is critical because if 1164 * The forget flag here is critical because if
1165 * we are journaling (and not doing data 1165 * we are journaling (and not doing data
1166 * journaling), we have to make sure a revoke 1166 * journaling), we have to make sure a revoke
1167 * record is written to prevent the journal 1167 * record is written to prevent the journal
1168 * replay from overwriting the (former) 1168 * replay from overwriting the (former)
1169 * indirect block if it gets reallocated as a 1169 * indirect block if it gets reallocated as a
1170 * data block. This must happen in the same 1170 * data block. This must happen in the same
1171 * transaction where the data blocks are 1171 * transaction where the data blocks are
1172 * actually freed. 1172 * actually freed.
1173 */ 1173 */
1174 ext4_free_blocks(handle, inode, NULL, nr, 1, 1174 ext4_free_blocks(handle, inode, NULL, nr, 1,
1175 EXT4_FREE_BLOCKS_METADATA| 1175 EXT4_FREE_BLOCKS_METADATA|
1176 EXT4_FREE_BLOCKS_FORGET); 1176 EXT4_FREE_BLOCKS_FORGET);
1177 1177
1178 if (parent_bh) { 1178 if (parent_bh) {
1179 /* 1179 /*
1180 * The block which we have just freed is 1180 * The block which we have just freed is
1181 * pointed to by an indirect block: journal it 1181 * pointed to by an indirect block: journal it
1182 */ 1182 */
1183 BUFFER_TRACE(parent_bh, "get_write_access"); 1183 BUFFER_TRACE(parent_bh, "get_write_access");
1184 if (!ext4_journal_get_write_access(handle, 1184 if (!ext4_journal_get_write_access(handle,
1185 parent_bh)){ 1185 parent_bh)){
1186 *p = 0; 1186 *p = 0;
1187 BUFFER_TRACE(parent_bh, 1187 BUFFER_TRACE(parent_bh,
1188 "call ext4_handle_dirty_metadata"); 1188 "call ext4_handle_dirty_metadata");
1189 ext4_handle_dirty_metadata(handle, 1189 ext4_handle_dirty_metadata(handle,
1190 inode, 1190 inode,
1191 parent_bh); 1191 parent_bh);
1192 } 1192 }
1193 } 1193 }
1194 } 1194 }
1195 } else { 1195 } else {
1196 /* We have reached the bottom of the tree. */ 1196 /* We have reached the bottom of the tree. */
1197 BUFFER_TRACE(parent_bh, "free data blocks"); 1197 BUFFER_TRACE(parent_bh, "free data blocks");
1198 ext4_free_data(handle, inode, parent_bh, first, last); 1198 ext4_free_data(handle, inode, parent_bh, first, last);
1199 } 1199 }
1200 } 1200 }
1201 1201
1202 void ext4_ind_truncate(handle_t *handle, struct inode *inode) 1202 void ext4_ind_truncate(handle_t *handle, struct inode *inode)
1203 { 1203 {
1204 struct ext4_inode_info *ei = EXT4_I(inode); 1204 struct ext4_inode_info *ei = EXT4_I(inode);
1205 __le32 *i_data = ei->i_data; 1205 __le32 *i_data = ei->i_data;
1206 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1206 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1207 ext4_lblk_t offsets[4]; 1207 ext4_lblk_t offsets[4];
1208 Indirect chain[4]; 1208 Indirect chain[4];
1209 Indirect *partial; 1209 Indirect *partial;
1210 __le32 nr = 0; 1210 __le32 nr = 0;
1211 int n = 0; 1211 int n = 0;
1212 ext4_lblk_t last_block, max_block; 1212 ext4_lblk_t last_block, max_block;
1213 unsigned blocksize = inode->i_sb->s_blocksize; 1213 unsigned blocksize = inode->i_sb->s_blocksize;
1214 1214
1215 last_block = (inode->i_size + blocksize-1) 1215 last_block = (inode->i_size + blocksize-1)
1216 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 1216 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1217 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) 1217 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
1218 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 1218 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1219 1219
1220 if (last_block != max_block) { 1220 if (last_block != max_block) {
1221 n = ext4_block_to_path(inode, last_block, offsets, NULL); 1221 n = ext4_block_to_path(inode, last_block, offsets, NULL);
1222 if (n == 0) 1222 if (n == 0)
1223 return; 1223 return;
1224 } 1224 }
1225 1225
1226 ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); 1226 ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
1227 1227
1228 /* 1228 /*
1229 * The orphan list entry will now protect us from any crash which 1229 * The orphan list entry will now protect us from any crash which
1230 * occurs before the truncate completes, so it is now safe to propagate 1230 * occurs before the truncate completes, so it is now safe to propagate
1231 * the new, shorter inode size (held for now in i_size) into the 1231 * the new, shorter inode size (held for now in i_size) into the
1232 * on-disk inode. We do this via i_disksize, which is the value which 1232 * on-disk inode. We do this via i_disksize, which is the value which
1233 * ext4 *really* writes onto the disk inode. 1233 * ext4 *really* writes onto the disk inode.
1234 */ 1234 */
1235 ei->i_disksize = inode->i_size; 1235 ei->i_disksize = inode->i_size;
1236 1236
1237 if (last_block == max_block) { 1237 if (last_block == max_block) {
1238 /* 1238 /*
1239 * It is unnecessary to free any data blocks if last_block is 1239 * It is unnecessary to free any data blocks if last_block is
1240 * equal to the indirect block limit. 1240 * equal to the indirect block limit.
1241 */ 1241 */
1242 return; 1242 return;
1243 } else if (n == 1) { /* direct blocks */ 1243 } else if (n == 1) { /* direct blocks */
1244 ext4_free_data(handle, inode, NULL, i_data+offsets[0], 1244 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
1245 i_data + EXT4_NDIR_BLOCKS); 1245 i_data + EXT4_NDIR_BLOCKS);
1246 goto do_indirects; 1246 goto do_indirects;
1247 } 1247 }
1248 1248
1249 partial = ext4_find_shared(inode, n, offsets, chain, &nr); 1249 partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1250 /* Kill the top of shared branch (not detached) */ 1250 /* Kill the top of shared branch (not detached) */
1251 if (nr) { 1251 if (nr) {
1252 if (partial == chain) { 1252 if (partial == chain) {
1253 /* Shared branch grows from the inode */ 1253 /* Shared branch grows from the inode */
1254 ext4_free_branches(handle, inode, NULL, 1254 ext4_free_branches(handle, inode, NULL,
1255 &nr, &nr+1, (chain+n-1) - partial); 1255 &nr, &nr+1, (chain+n-1) - partial);
1256 *partial->p = 0; 1256 *partial->p = 0;
1257 /* 1257 /*
1258 * We mark the inode dirty prior to restart, 1258 * We mark the inode dirty prior to restart,
1259 * and prior to stop. No need for it here. 1259 * and prior to stop. No need for it here.
1260 */ 1260 */
1261 } else { 1261 } else {
1262 /* Shared branch grows from an indirect block */ 1262 /* Shared branch grows from an indirect block */
1263 BUFFER_TRACE(partial->bh, "get_write_access"); 1263 BUFFER_TRACE(partial->bh, "get_write_access");
1264 ext4_free_branches(handle, inode, partial->bh, 1264 ext4_free_branches(handle, inode, partial->bh,
1265 partial->p, 1265 partial->p,
1266 partial->p+1, (chain+n-1) - partial); 1266 partial->p+1, (chain+n-1) - partial);
1267 } 1267 }
1268 } 1268 }
1269 /* Clear the ends of indirect blocks on the shared branch */ 1269 /* Clear the ends of indirect blocks on the shared branch */
1270 while (partial > chain) { 1270 while (partial > chain) {
1271 ext4_free_branches(handle, inode, partial->bh, partial->p + 1, 1271 ext4_free_branches(handle, inode, partial->bh, partial->p + 1,
1272 (__le32*)partial->bh->b_data+addr_per_block, 1272 (__le32*)partial->bh->b_data+addr_per_block,
1273 (chain+n-1) - partial); 1273 (chain+n-1) - partial);
1274 BUFFER_TRACE(partial->bh, "call brelse"); 1274 BUFFER_TRACE(partial->bh, "call brelse");
1275 brelse(partial->bh); 1275 brelse(partial->bh);
1276 partial--; 1276 partial--;
1277 } 1277 }
1278 do_indirects: 1278 do_indirects:
1279 /* Kill the remaining (whole) subtrees */ 1279 /* Kill the remaining (whole) subtrees */
1280 switch (offsets[0]) { 1280 switch (offsets[0]) {
1281 default: 1281 default:
1282 nr = i_data[EXT4_IND_BLOCK]; 1282 nr = i_data[EXT4_IND_BLOCK];
1283 if (nr) { 1283 if (nr) {
1284 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); 1284 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
1285 i_data[EXT4_IND_BLOCK] = 0; 1285 i_data[EXT4_IND_BLOCK] = 0;
1286 } 1286 }
1287 case EXT4_IND_BLOCK: 1287 case EXT4_IND_BLOCK:
1288 nr = i_data[EXT4_DIND_BLOCK]; 1288 nr = i_data[EXT4_DIND_BLOCK];
1289 if (nr) { 1289 if (nr) {
1290 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); 1290 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
1291 i_data[EXT4_DIND_BLOCK] = 0; 1291 i_data[EXT4_DIND_BLOCK] = 0;
1292 } 1292 }
1293 case EXT4_DIND_BLOCK: 1293 case EXT4_DIND_BLOCK:
1294 nr = i_data[EXT4_TIND_BLOCK]; 1294 nr = i_data[EXT4_TIND_BLOCK];
1295 if (nr) { 1295 if (nr) {
1296 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); 1296 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
1297 i_data[EXT4_TIND_BLOCK] = 0; 1297 i_data[EXT4_TIND_BLOCK] = 0;
1298 } 1298 }
1299 case EXT4_TIND_BLOCK: 1299 case EXT4_TIND_BLOCK:
1300 ; 1300 ;
1301 } 1301 }
1302 } 1302 }
1303 1303
1304 /** 1304 /**
1305 * ext4_ind_remove_space - remove space from the range 1305 * ext4_ind_remove_space - remove space from the range
1306 * @handle: JBD handle for this transaction 1306 * @handle: JBD handle for this transaction
1307 * @inode: inode we are dealing with 1307 * @inode: inode we are dealing with
1308 * @start: First block to remove 1308 * @start: First block to remove
1309 * @end: One block after the last block to remove (exclusive) 1309 * @end: One block after the last block to remove (exclusive)
1310 * 1310 *
1311 * Free the blocks in the defined range (end is exclusive endpoint of 1311 * Free the blocks in the defined range (end is exclusive endpoint of
1312 * range). This is used by ext4_punch_hole(). 1312 * range). This is used by ext4_punch_hole().
1313 */ 1313 */
1314 int ext4_ind_remove_space(handle_t *handle, struct inode *inode, 1314 int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
1315 ext4_lblk_t start, ext4_lblk_t end) 1315 ext4_lblk_t start, ext4_lblk_t end)
1316 { 1316 {
1317 struct ext4_inode_info *ei = EXT4_I(inode); 1317 struct ext4_inode_info *ei = EXT4_I(inode);
1318 __le32 *i_data = ei->i_data; 1318 __le32 *i_data = ei->i_data;
1319 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1319 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1320 ext4_lblk_t offsets[4], offsets2[4]; 1320 ext4_lblk_t offsets[4], offsets2[4];
1321 Indirect chain[4], chain2[4]; 1321 Indirect chain[4], chain2[4];
1322 Indirect *partial, *partial2; 1322 Indirect *partial, *partial2;
1323 ext4_lblk_t max_block; 1323 ext4_lblk_t max_block;
1324 __le32 nr = 0, nr2 = 0; 1324 __le32 nr = 0, nr2 = 0;
1325 int n = 0, n2 = 0; 1325 int n = 0, n2 = 0;
1326 unsigned blocksize = inode->i_sb->s_blocksize; 1326 unsigned blocksize = inode->i_sb->s_blocksize;
1327 1327
1328 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) 1328 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
1329 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 1329 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1330 if (end >= max_block) 1330 if (end >= max_block)
1331 end = max_block; 1331 end = max_block;
1332 if ((start >= end) || (start > max_block)) 1332 if ((start >= end) || (start > max_block))
1333 return 0; 1333 return 0;
1334 1334
1335 n = ext4_block_to_path(inode, start, offsets, NULL); 1335 n = ext4_block_to_path(inode, start, offsets, NULL);
1336 n2 = ext4_block_to_path(inode, end, offsets2, NULL); 1336 n2 = ext4_block_to_path(inode, end, offsets2, NULL);
1337 1337
1338 BUG_ON(n > n2); 1338 BUG_ON(n > n2);
1339 1339
1340 if ((n == 1) && (n == n2)) { 1340 if ((n == 1) && (n == n2)) {
1341 /* We're punching only within direct block range */ 1341 /* We're punching only within direct block range */
1342 ext4_free_data(handle, inode, NULL, i_data + offsets[0], 1342 ext4_free_data(handle, inode, NULL, i_data + offsets[0],
1343 i_data + offsets2[0]); 1343 i_data + offsets2[0]);
1344 return 0; 1344 return 0;
1345 } else if (n2 > n) { 1345 } else if (n2 > n) {
1346 /* 1346 /*
1347 * Start and end are on a different levels so we're going to 1347 * Start and end are on a different levels so we're going to
1348 * free partial block at start, and partial block at end of 1348 * free partial block at start, and partial block at end of
1349 * the range. If there are some levels in between then 1349 * the range. If there are some levels in between then
1350 * do_indirects label will take care of that. 1350 * do_indirects label will take care of that.
1351 */ 1351 */
1352 1352
1353 if (n == 1) { 1353 if (n == 1) {
1354 /* 1354 /*
1355 * Start is at the direct block level, free 1355 * Start is at the direct block level, free
1356 * everything to the end of the level. 1356 * everything to the end of the level.
1357 */ 1357 */
1358 ext4_free_data(handle, inode, NULL, i_data + offsets[0], 1358 ext4_free_data(handle, inode, NULL, i_data + offsets[0],
1359 i_data + EXT4_NDIR_BLOCKS); 1359 i_data + EXT4_NDIR_BLOCKS);
1360 goto end_range; 1360 goto end_range;
1361 } 1361 }
1362 1362
1363 1363
1364 partial = ext4_find_shared(inode, n, offsets, chain, &nr); 1364 partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1365 if (nr) { 1365 if (nr) {
1366 if (partial == chain) { 1366 if (partial == chain) {
1367 /* Shared branch grows from the inode */ 1367 /* Shared branch grows from the inode */
1368 ext4_free_branches(handle, inode, NULL, 1368 ext4_free_branches(handle, inode, NULL,
1369 &nr, &nr+1, (chain+n-1) - partial); 1369 &nr, &nr+1, (chain+n-1) - partial);
1370 *partial->p = 0; 1370 *partial->p = 0;
1371 } else { 1371 } else {
1372 /* Shared branch grows from an indirect block */ 1372 /* Shared branch grows from an indirect block */
1373 BUFFER_TRACE(partial->bh, "get_write_access"); 1373 BUFFER_TRACE(partial->bh, "get_write_access");
1374 ext4_free_branches(handle, inode, partial->bh, 1374 ext4_free_branches(handle, inode, partial->bh,
1375 partial->p, 1375 partial->p,
1376 partial->p+1, (chain+n-1) - partial); 1376 partial->p+1, (chain+n-1) - partial);
1377 } 1377 }
1378 } 1378 }
1379 1379
1380 /* 1380 /*
1381 * Clear the ends of indirect blocks on the shared branch 1381 * Clear the ends of indirect blocks on the shared branch
1382 * at the start of the range 1382 * at the start of the range
1383 */ 1383 */
1384 while (partial > chain) { 1384 while (partial > chain) {
1385 ext4_free_branches(handle, inode, partial->bh, 1385 ext4_free_branches(handle, inode, partial->bh,
1386 partial->p + 1, 1386 partial->p + 1,
1387 (__le32 *)partial->bh->b_data+addr_per_block, 1387 (__le32 *)partial->bh->b_data+addr_per_block,
1388 (chain+n-1) - partial); 1388 (chain+n-1) - partial);
1389 BUFFER_TRACE(partial->bh, "call brelse"); 1389 BUFFER_TRACE(partial->bh, "call brelse");
1390 brelse(partial->bh); 1390 brelse(partial->bh);
1391 partial--; 1391 partial--;
1392 } 1392 }
1393 1393
1394 end_range: 1394 end_range:
1395 partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); 1395 partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1396 if (nr2) { 1396 if (nr2) {
1397 if (partial2 == chain2) { 1397 if (partial2 == chain2) {
1398 /* 1398 /*
1399 * Remember, end is exclusive so here we're at 1399 * Remember, end is exclusive so here we're at
1400 * the start of the next level we're not going 1400 * the start of the next level we're not going
1401 * to free. Everything was covered by the start 1401 * to free. Everything was covered by the start
1402 * of the range. 1402 * of the range.
1403 */ 1403 */
1404 return 0; 1404 goto do_indirects;
1405 } else {
1406 /* Shared branch grows from an indirect block */
1407 partial2--;
1408 } 1405 }
1409 } else { 1406 } else {
1410 /* 1407 /*
1411 * ext4_find_shared returns Indirect structure which 1408 * ext4_find_shared returns Indirect structure which
1412 * points to the last element which should not be 1409 * points to the last element which should not be
1413 * removed by truncate. But this is end of the range 1410 * removed by truncate. But this is end of the range
1414 * in punch_hole so we need to point to the next element 1411 * in punch_hole so we need to point to the next element
1415 */ 1412 */
1416 partial2->p++; 1413 partial2->p++;
1417 } 1414 }
1418 1415
1419 /* 1416 /*
1420 * Clear the ends of indirect blocks on the shared branch 1417 * Clear the ends of indirect blocks on the shared branch
1421 * at the end of the range 1418 * at the end of the range
1422 */ 1419 */
1423 while (partial2 > chain2) { 1420 while (partial2 > chain2) {
1424 ext4_free_branches(handle, inode, partial2->bh, 1421 ext4_free_branches(handle, inode, partial2->bh,
1425 (__le32 *)partial2->bh->b_data, 1422 (__le32 *)partial2->bh->b_data,
1426 partial2->p, 1423 partial2->p,
1427 (chain2+n2-1) - partial2); 1424 (chain2+n2-1) - partial2);
1428 BUFFER_TRACE(partial2->bh, "call brelse"); 1425 BUFFER_TRACE(partial2->bh, "call brelse");
1429 brelse(partial2->bh); 1426 brelse(partial2->bh);
1430 partial2--; 1427 partial2--;
1431 } 1428 }
1432 goto do_indirects; 1429 goto do_indirects;
1433 } 1430 }
1434 1431
1435 /* Punch happened within the same level (n == n2) */ 1432 /* Punch happened within the same level (n == n2) */
1436 partial = ext4_find_shared(inode, n, offsets, chain, &nr); 1433 partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1437 partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); 1434 partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1438 /* 1435
1439 * ext4_find_shared returns Indirect structure which 1436 /* Free top, but only if partial2 isn't its subtree. */
1440 * points to the last element which should not be 1437 if (nr) {
1441 * removed by truncate. But this is end of the range 1438 int level = min(partial - chain, partial2 - chain2);
1442 * in punch_hole so we need to point to the next element 1439 int i;
1443 */ 1440 int subtree = 1;
1444 partial2->p++; 1441
1445 while ((partial > chain) || (partial2 > chain2)) { 1442 for (i = 0; i <= level; i++) {
1446 /* We're at the same block, so we're almost finished */ 1443 if (offsets[i] != offsets2[i]) {
1447 if ((partial->bh && partial2->bh) && 1444 subtree = 0;
1448 (partial->bh->b_blocknr == partial2->bh->b_blocknr)) { 1445 break;
1449 if ((partial > chain) && (partial2 > chain2)) { 1446 }
1447 }
1448
1449 if (!subtree) {
1450 if (partial == chain) {
1451 /* Shared branch grows from the inode */
1452 ext4_free_branches(handle, inode, NULL,
1453 &nr, &nr+1,
1454 (chain+n-1) - partial);
1455 *partial->p = 0;
1456 } else {
1457 /* Shared branch grows from an indirect block */
1458 BUFFER_TRACE(partial->bh, "get_write_access");
1450 ext4_free_branches(handle, inode, partial->bh, 1459 ext4_free_branches(handle, inode, partial->bh,
1451 partial->p + 1, 1460 partial->p,
1452 partial2->p, 1461 partial->p+1,
1453 (chain+n-1) - partial); 1462 (chain+n-1) - partial);
1454 BUFFER_TRACE(partial->bh, "call brelse");
1455 brelse(partial->bh);
1456 BUFFER_TRACE(partial2->bh, "call brelse");
1457 brelse(partial2->bh);
1458 } 1463 }
1459 return 0;
1460 } 1464 }
1465 }
1466
1467 if (!nr2) {
1461 /* 1468 /*
1462 * Clear the ends of indirect blocks on the shared branch 1469 * ext4_find_shared returns Indirect structure which
1463 * at the start of the range 1470 * points to the last element which should not be
1471 * removed by truncate. But this is end of the range
1472 * in punch_hole so we need to point to the next element
1464 */ 1473 */
1465 if (partial > chain) { 1474 partial2->p++;
1475 }
1476
1477 while (partial > chain || partial2 > chain2) {
1478 int depth = (chain+n-1) - partial;
1479 int depth2 = (chain2+n2-1) - partial2;
1480
1481 if (partial > chain && partial2 > chain2 &&
1482 partial->bh->b_blocknr == partial2->bh->b_blocknr) {
1483 /*
1484 * We've converged on the same block. Clear the range,
1485 * then we're done.
1486 */
1466 ext4_free_branches(handle, inode, partial->bh, 1487 ext4_free_branches(handle, inode, partial->bh,
1467 partial->p + 1, 1488 partial->p + 1,
1468 (__le32 *)partial->bh->b_data+addr_per_block, 1489 partial2->p,
1469 (chain+n-1) - partial); 1490 (chain+n-1) - partial);
1470 BUFFER_TRACE(partial->bh, "call brelse"); 1491 BUFFER_TRACE(partial->bh, "call brelse");
1471 brelse(partial->bh); 1492 brelse(partial->bh);
1472 partial--; 1493 BUFFER_TRACE(partial2->bh, "call brelse");
1494 brelse(partial2->bh);
1495 return 0;
1473 } 1496 }
1497
1474 /* 1498 /*
1475 * Clear the ends of indirect blocks on the shared branch 1499 * The start and end partial branches may not be at the same
1476 * at the end of the range 1500 * level even though the punch happened within one level. So, we
1501 * give them a chance to arrive at the same level, then walk
1502 * them in step with each other until we converge on the same
1503 * block.
1477 */ 1504 */
1478 if (partial2 > chain2) { 1505 if (partial > chain && depth <= depth2) {
1506 ext4_free_branches(handle, inode, partial->bh,
1507 partial->p + 1,
1508 (__le32 *)partial->bh->b_data+addr_per_block,
1509 (chain+n-1) - partial);
1510 BUFFER_TRACE(partial->bh, "call brelse");
1511 brelse(partial->bh);
1512 partial--;
1513 }
1514 if (partial2 > chain2 && depth2 <= depth) {
1479 ext4_free_branches(handle, inode, partial2->bh, 1515 ext4_free_branches(handle, inode, partial2->bh,
1480 (__le32 *)partial2->bh->b_data, 1516 (__le32 *)partial2->bh->b_data,
1481 partial2->p, 1517 partial2->p,
1482 (chain2+n-1) - partial2); 1518 (chain2+n2-1) - partial2);
1483 BUFFER_TRACE(partial2->bh, "call brelse"); 1519 BUFFER_TRACE(partial2->bh, "call brelse");
1484 brelse(partial2->bh); 1520 brelse(partial2->bh);
1485 partial2--; 1521 partial2--;
1486 } 1522 }
1487 } 1523 }
1524 return 0;
1488 1525
1489 do_indirects: 1526 do_indirects:
1490 /* Kill the remaining (whole) subtrees */ 1527 /* Kill the remaining (whole) subtrees */
1491 switch (offsets[0]) { 1528 switch (offsets[0]) {
1492 default: 1529 default:
1493 if (++n >= n2) 1530 if (++n >= n2)
1494 return 0; 1531 return 0;
1495 nr = i_data[EXT4_IND_BLOCK]; 1532 nr = i_data[EXT4_IND_BLOCK];
1496 if (nr) { 1533 if (nr) {
1497 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); 1534 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
1498 i_data[EXT4_IND_BLOCK] = 0; 1535 i_data[EXT4_IND_BLOCK] = 0;
1499 } 1536 }
1500 case EXT4_IND_BLOCK: 1537 case EXT4_IND_BLOCK:
1501 if (++n >= n2) 1538 if (++n >= n2)
1502 return 0; 1539 return 0;
1503 nr = i_data[EXT4_DIND_BLOCK]; 1540 nr = i_data[EXT4_DIND_BLOCK];
1504 if (nr) { 1541 if (nr) {
1505 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); 1542 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
1506 i_data[EXT4_DIND_BLOCK] = 0; 1543 i_data[EXT4_DIND_BLOCK] = 0;
1507 } 1544 }
1508 case EXT4_DIND_BLOCK: 1545 case EXT4_DIND_BLOCK:
1509 if (++n >= n2) 1546 if (++n >= n2)
1510 return 0; 1547 return 0;
1511 nr = i_data[EXT4_TIND_BLOCK]; 1548 nr = i_data[EXT4_TIND_BLOCK];
1512 if (nr) { 1549 if (nr) {
1513 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); 1550 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
1 /* 1 /*
2 * linux/fs/ext4/inode.c 2 * linux/fs/ext4/inode.c
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 * 8 *
9 * from 9 * from
10 * 10 *
11 * linux/fs/minix/inode.c 11 * linux/fs/minix/inode.c
12 * 12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 * 14 *
15 * 64-bit file support on 64-bit platforms by Jakub Jelinek 15 * 64-bit file support on 64-bit platforms by Jakub Jelinek
16 * (jj@sunsite.ms.mff.cuni.cz) 16 * (jj@sunsite.ms.mff.cuni.cz)
17 * 17 *
18 * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 18 * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000
19 */ 19 */
20 20
21 #include <linux/fs.h> 21 #include <linux/fs.h>
22 #include <linux/time.h> 22 #include <linux/time.h>
23 #include <linux/jbd2.h> 23 #include <linux/jbd2.h>
24 #include <linux/highuid.h> 24 #include <linux/highuid.h>
25 #include <linux/pagemap.h> 25 #include <linux/pagemap.h>
26 #include <linux/quotaops.h> 26 #include <linux/quotaops.h>
27 #include <linux/string.h> 27 #include <linux/string.h>
28 #include <linux/buffer_head.h> 28 #include <linux/buffer_head.h>
29 #include <linux/writeback.h> 29 #include <linux/writeback.h>
30 #include <linux/pagevec.h> 30 #include <linux/pagevec.h>
31 #include <linux/mpage.h> 31 #include <linux/mpage.h>
32 #include <linux/namei.h> 32 #include <linux/namei.h>
33 #include <linux/uio.h> 33 #include <linux/uio.h>
34 #include <linux/bio.h> 34 #include <linux/bio.h>
35 #include <linux/workqueue.h> 35 #include <linux/workqueue.h>
36 #include <linux/kernel.h> 36 #include <linux/kernel.h>
37 #include <linux/printk.h> 37 #include <linux/printk.h>
38 #include <linux/slab.h> 38 #include <linux/slab.h>
39 #include <linux/ratelimit.h> 39 #include <linux/ratelimit.h>
40 #include <linux/aio.h> 40 #include <linux/aio.h>
41 #include <linux/bitops.h> 41 #include <linux/bitops.h>
42 42
43 #include "ext4_jbd2.h" 43 #include "ext4_jbd2.h"
44 #include "xattr.h" 44 #include "xattr.h"
45 #include "acl.h" 45 #include "acl.h"
46 #include "truncate.h" 46 #include "truncate.h"
47 47
48 #include <trace/events/ext4.h> 48 #include <trace/events/ext4.h>
49 49
50 #define MPAGE_DA_EXTENT_TAIL 0x01 50 #define MPAGE_DA_EXTENT_TAIL 0x01
51 51
52 static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, 52 static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
53 struct ext4_inode_info *ei) 53 struct ext4_inode_info *ei)
54 { 54 {
55 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 55 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
56 __u16 csum_lo; 56 __u16 csum_lo;
57 __u16 csum_hi = 0; 57 __u16 csum_hi = 0;
58 __u32 csum; 58 __u32 csum;
59 59
60 csum_lo = le16_to_cpu(raw->i_checksum_lo); 60 csum_lo = le16_to_cpu(raw->i_checksum_lo);
61 raw->i_checksum_lo = 0; 61 raw->i_checksum_lo = 0;
62 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 62 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
63 EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { 63 EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
64 csum_hi = le16_to_cpu(raw->i_checksum_hi); 64 csum_hi = le16_to_cpu(raw->i_checksum_hi);
65 raw->i_checksum_hi = 0; 65 raw->i_checksum_hi = 0;
66 } 66 }
67 67
68 csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, 68 csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
69 EXT4_INODE_SIZE(inode->i_sb)); 69 EXT4_INODE_SIZE(inode->i_sb));
70 70
71 raw->i_checksum_lo = cpu_to_le16(csum_lo); 71 raw->i_checksum_lo = cpu_to_le16(csum_lo);
72 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 72 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
73 EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) 73 EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
74 raw->i_checksum_hi = cpu_to_le16(csum_hi); 74 raw->i_checksum_hi = cpu_to_le16(csum_hi);
75 75
76 return csum; 76 return csum;
77 } 77 }
78 78
79 static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, 79 static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
80 struct ext4_inode_info *ei) 80 struct ext4_inode_info *ei)
81 { 81 {
82 __u32 provided, calculated; 82 __u32 provided, calculated;
83 83
84 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 84 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
85 cpu_to_le32(EXT4_OS_LINUX) || 85 cpu_to_le32(EXT4_OS_LINUX) ||
86 !ext4_has_metadata_csum(inode->i_sb)) 86 !ext4_has_metadata_csum(inode->i_sb))
87 return 1; 87 return 1;
88 88
89 provided = le16_to_cpu(raw->i_checksum_lo); 89 provided = le16_to_cpu(raw->i_checksum_lo);
90 calculated = ext4_inode_csum(inode, raw, ei); 90 calculated = ext4_inode_csum(inode, raw, ei);
91 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 91 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
92 EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) 92 EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
93 provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16; 93 provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16;
94 else 94 else
95 calculated &= 0xFFFF; 95 calculated &= 0xFFFF;
96 96
97 return provided == calculated; 97 return provided == calculated;
98 } 98 }
99 99
100 static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, 100 static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
101 struct ext4_inode_info *ei) 101 struct ext4_inode_info *ei)
102 { 102 {
103 __u32 csum; 103 __u32 csum;
104 104
105 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 105 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
106 cpu_to_le32(EXT4_OS_LINUX) || 106 cpu_to_le32(EXT4_OS_LINUX) ||
107 !ext4_has_metadata_csum(inode->i_sb)) 107 !ext4_has_metadata_csum(inode->i_sb))
108 return; 108 return;
109 109
110 csum = ext4_inode_csum(inode, raw, ei); 110 csum = ext4_inode_csum(inode, raw, ei);
111 raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF); 111 raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF);
112 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 112 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
113 EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) 113 EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
114 raw->i_checksum_hi = cpu_to_le16(csum >> 16); 114 raw->i_checksum_hi = cpu_to_le16(csum >> 16);
115 } 115 }
116 116
117 static inline int ext4_begin_ordered_truncate(struct inode *inode, 117 static inline int ext4_begin_ordered_truncate(struct inode *inode,
118 loff_t new_size) 118 loff_t new_size)
119 { 119 {
120 trace_ext4_begin_ordered_truncate(inode, new_size); 120 trace_ext4_begin_ordered_truncate(inode, new_size);
121 /* 121 /*
122 * If jinode is zero, then we never opened the file for 122 * If jinode is zero, then we never opened the file for
123 * writing, so there's no need to call 123 * writing, so there's no need to call
124 * jbd2_journal_begin_ordered_truncate() since there's no 124 * jbd2_journal_begin_ordered_truncate() since there's no
125 * outstanding writes we need to flush. 125 * outstanding writes we need to flush.
126 */ 126 */
127 if (!EXT4_I(inode)->jinode) 127 if (!EXT4_I(inode)->jinode)
128 return 0; 128 return 0;
129 return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), 129 return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
130 EXT4_I(inode)->jinode, 130 EXT4_I(inode)->jinode,
131 new_size); 131 new_size);
132 } 132 }
133 133
134 static void ext4_invalidatepage(struct page *page, unsigned int offset, 134 static void ext4_invalidatepage(struct page *page, unsigned int offset,
135 unsigned int length); 135 unsigned int length);
136 static int __ext4_journalled_writepage(struct page *page, unsigned int len); 136 static int __ext4_journalled_writepage(struct page *page, unsigned int len);
137 static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); 137 static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
138 static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, 138 static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
139 int pextents); 139 int pextents);
140 140
141 /* 141 /*
142 * Test whether an inode is a fast symlink. 142 * Test whether an inode is a fast symlink.
143 */ 143 */
144 static int ext4_inode_is_fast_symlink(struct inode *inode) 144 static int ext4_inode_is_fast_symlink(struct inode *inode)
145 { 145 {
146 int ea_blocks = EXT4_I(inode)->i_file_acl ? 146 int ea_blocks = EXT4_I(inode)->i_file_acl ?
147 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; 147 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
148 148
149 if (ext4_has_inline_data(inode)) 149 if (ext4_has_inline_data(inode))
150 return 0; 150 return 0;
151 151
152 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); 152 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
153 } 153 }
154 154
155 /* 155 /*
156 * Restart the transaction associated with *handle. This does a commit, 156 * Restart the transaction associated with *handle. This does a commit,
157 * so before we call here everything must be consistently dirtied against 157 * so before we call here everything must be consistently dirtied against
158 * this transaction. 158 * this transaction.
159 */ 159 */
160 int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, 160 int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
161 int nblocks) 161 int nblocks)
162 { 162 {
163 int ret; 163 int ret;
164 164
165 /* 165 /*
166 * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this 166 * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
167 * moment, get_block can be called only for blocks inside i_size since 167 * moment, get_block can be called only for blocks inside i_size since
168 * page cache has been already dropped and writes are blocked by 168 * page cache has been already dropped and writes are blocked by
169 * i_mutex. So we can safely drop the i_data_sem here. 169 * i_mutex. So we can safely drop the i_data_sem here.
170 */ 170 */
171 BUG_ON(EXT4_JOURNAL(inode) == NULL); 171 BUG_ON(EXT4_JOURNAL(inode) == NULL);
172 jbd_debug(2, "restarting handle %p\n", handle); 172 jbd_debug(2, "restarting handle %p\n", handle);
173 up_write(&EXT4_I(inode)->i_data_sem); 173 up_write(&EXT4_I(inode)->i_data_sem);
174 ret = ext4_journal_restart(handle, nblocks); 174 ret = ext4_journal_restart(handle, nblocks);
175 down_write(&EXT4_I(inode)->i_data_sem); 175 down_write(&EXT4_I(inode)->i_data_sem);
176 ext4_discard_preallocations(inode); 176 ext4_discard_preallocations(inode);
177 177
178 return ret; 178 return ret;
179 } 179 }
180 180
181 /* 181 /*
182 * Called at the last iput() if i_nlink is zero. 182 * Called at the last iput() if i_nlink is zero.
183 */ 183 */
184 void ext4_evict_inode(struct inode *inode) 184 void ext4_evict_inode(struct inode *inode)
185 { 185 {
186 handle_t *handle; 186 handle_t *handle;
187 int err; 187 int err;
188 188
189 trace_ext4_evict_inode(inode); 189 trace_ext4_evict_inode(inode);
190 190
191 if (inode->i_nlink) { 191 if (inode->i_nlink) {
192 /* 192 /*
193 * When journalling data dirty buffers are tracked only in the 193 * When journalling data dirty buffers are tracked only in the
194 * journal. So although mm thinks everything is clean and 194 * journal. So although mm thinks everything is clean and
195 * ready for reaping the inode might still have some pages to 195 * ready for reaping the inode might still have some pages to
196 * write in the running transaction or waiting to be 196 * write in the running transaction or waiting to be
197 * checkpointed. Thus calling jbd2_journal_invalidatepage() 197 * checkpointed. Thus calling jbd2_journal_invalidatepage()
198 * (via truncate_inode_pages()) to discard these buffers can 198 * (via truncate_inode_pages()) to discard these buffers can
199 * cause data loss. Also even if we did not discard these 199 * cause data loss. Also even if we did not discard these
200 * buffers, we would have no way to find them after the inode 200 * buffers, we would have no way to find them after the inode
201 * is reaped and thus user could see stale data if he tries to 201 * is reaped and thus user could see stale data if he tries to
202 * read them before the transaction is checkpointed. So be 202 * read them before the transaction is checkpointed. So be
203 * careful and force everything to disk here... We use 203 * careful and force everything to disk here... We use
204 * ei->i_datasync_tid to store the newest transaction 204 * ei->i_datasync_tid to store the newest transaction
205 * containing inode's data. 205 * containing inode's data.
206 * 206 *
207 * Note that directories do not have this problem because they 207 * Note that directories do not have this problem because they
208 * don't use page cache. 208 * don't use page cache.
209 */ 209 */
210 if (ext4_should_journal_data(inode) && 210 if (ext4_should_journal_data(inode) &&
211 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && 211 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
212 inode->i_ino != EXT4_JOURNAL_INO) { 212 inode->i_ino != EXT4_JOURNAL_INO) {
213 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 213 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
214 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; 214 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
215 215
216 jbd2_complete_transaction(journal, commit_tid); 216 jbd2_complete_transaction(journal, commit_tid);
217 filemap_write_and_wait(&inode->i_data); 217 filemap_write_and_wait(&inode->i_data);
218 } 218 }
219 truncate_inode_pages_final(&inode->i_data); 219 truncate_inode_pages_final(&inode->i_data);
220 220
221 WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); 221 WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
222 goto no_delete; 222 goto no_delete;
223 } 223 }
224 224
225 if (is_bad_inode(inode)) 225 if (is_bad_inode(inode))
226 goto no_delete; 226 goto no_delete;
227 dquot_initialize(inode); 227 dquot_initialize(inode);
228 228
229 if (ext4_should_order_data(inode)) 229 if (ext4_should_order_data(inode))
230 ext4_begin_ordered_truncate(inode, 0); 230 ext4_begin_ordered_truncate(inode, 0);
231 truncate_inode_pages_final(&inode->i_data); 231 truncate_inode_pages_final(&inode->i_data);
232 232
233 WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); 233 WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
234 234
235 /* 235 /*
236 * Protect us against freezing - iput() caller didn't have to have any 236 * Protect us against freezing - iput() caller didn't have to have any
237 * protection against it 237 * protection against it
238 */ 238 */
239 sb_start_intwrite(inode->i_sb); 239 sb_start_intwrite(inode->i_sb);
240 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, 240 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
241 ext4_blocks_for_truncate(inode)+3); 241 ext4_blocks_for_truncate(inode)+3);
242 if (IS_ERR(handle)) { 242 if (IS_ERR(handle)) {
243 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 243 ext4_std_error(inode->i_sb, PTR_ERR(handle));
244 /* 244 /*
245 * If we're going to skip the normal cleanup, we still need to 245 * If we're going to skip the normal cleanup, we still need to
246 * make sure that the in-core orphan linked list is properly 246 * make sure that the in-core orphan linked list is properly
247 * cleaned up. 247 * cleaned up.
248 */ 248 */
249 ext4_orphan_del(NULL, inode); 249 ext4_orphan_del(NULL, inode);
250 sb_end_intwrite(inode->i_sb); 250 sb_end_intwrite(inode->i_sb);
251 goto no_delete; 251 goto no_delete;
252 } 252 }
253 253
254 if (IS_SYNC(inode)) 254 if (IS_SYNC(inode))
255 ext4_handle_sync(handle); 255 ext4_handle_sync(handle);
256 inode->i_size = 0; 256 inode->i_size = 0;
257 err = ext4_mark_inode_dirty(handle, inode); 257 err = ext4_mark_inode_dirty(handle, inode);
258 if (err) { 258 if (err) {
259 ext4_warning(inode->i_sb, 259 ext4_warning(inode->i_sb,
260 "couldn't mark inode dirty (err %d)", err); 260 "couldn't mark inode dirty (err %d)", err);
261 goto stop_handle; 261 goto stop_handle;
262 } 262 }
263 if (inode->i_blocks) 263 if (inode->i_blocks)
264 ext4_truncate(inode); 264 ext4_truncate(inode);
265 265
266 /* 266 /*
267 * ext4_ext_truncate() doesn't reserve any slop when it 267 * ext4_ext_truncate() doesn't reserve any slop when it
268 * restarts journal transactions; therefore there may not be 268 * restarts journal transactions; therefore there may not be
269 * enough credits left in the handle to remove the inode from 269 * enough credits left in the handle to remove the inode from
270 * the orphan list and set the dtime field. 270 * the orphan list and set the dtime field.
271 */ 271 */
272 if (!ext4_handle_has_enough_credits(handle, 3)) { 272 if (!ext4_handle_has_enough_credits(handle, 3)) {
273 err = ext4_journal_extend(handle, 3); 273 err = ext4_journal_extend(handle, 3);
274 if (err > 0) 274 if (err > 0)
275 err = ext4_journal_restart(handle, 3); 275 err = ext4_journal_restart(handle, 3);
276 if (err != 0) { 276 if (err != 0) {
277 ext4_warning(inode->i_sb, 277 ext4_warning(inode->i_sb,
278 "couldn't extend journal (err %d)", err); 278 "couldn't extend journal (err %d)", err);
279 stop_handle: 279 stop_handle:
280 ext4_journal_stop(handle); 280 ext4_journal_stop(handle);
281 ext4_orphan_del(NULL, inode); 281 ext4_orphan_del(NULL, inode);
282 sb_end_intwrite(inode->i_sb); 282 sb_end_intwrite(inode->i_sb);
283 goto no_delete; 283 goto no_delete;
284 } 284 }
285 } 285 }
286 286
287 /* 287 /*
288 * Kill off the orphan record which ext4_truncate created. 288 * Kill off the orphan record which ext4_truncate created.
289 * AKPM: I think this can be inside the above `if'. 289 * AKPM: I think this can be inside the above `if'.
290 * Note that ext4_orphan_del() has to be able to cope with the 290 * Note that ext4_orphan_del() has to be able to cope with the
291 * deletion of a non-existent orphan - this is because we don't 291 * deletion of a non-existent orphan - this is because we don't
292 * know if ext4_truncate() actually created an orphan record. 292 * know if ext4_truncate() actually created an orphan record.
293 * (Well, we could do this if we need to, but heck - it works) 293 * (Well, we could do this if we need to, but heck - it works)
294 */ 294 */
295 ext4_orphan_del(handle, inode); 295 ext4_orphan_del(handle, inode);
296 EXT4_I(inode)->i_dtime = get_seconds(); 296 EXT4_I(inode)->i_dtime = get_seconds();
297 297
298 /* 298 /*
299 * One subtle ordering requirement: if anything has gone wrong 299 * One subtle ordering requirement: if anything has gone wrong
300 * (transaction abort, IO errors, whatever), then we can still 300 * (transaction abort, IO errors, whatever), then we can still
301 * do these next steps (the fs will already have been marked as 301 * do these next steps (the fs will already have been marked as
302 * having errors), but we can't free the inode if the mark_dirty 302 * having errors), but we can't free the inode if the mark_dirty
303 * fails. 303 * fails.
304 */ 304 */
305 if (ext4_mark_inode_dirty(handle, inode)) 305 if (ext4_mark_inode_dirty(handle, inode))
306 /* If that failed, just do the required in-core inode clear. */ 306 /* If that failed, just do the required in-core inode clear. */
307 ext4_clear_inode(inode); 307 ext4_clear_inode(inode);
308 else 308 else
309 ext4_free_inode(handle, inode); 309 ext4_free_inode(handle, inode);
310 ext4_journal_stop(handle); 310 ext4_journal_stop(handle);
311 sb_end_intwrite(inode->i_sb); 311 sb_end_intwrite(inode->i_sb);
312 return; 312 return;
313 no_delete: 313 no_delete:
314 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ 314 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
315 } 315 }
316 316
317 #ifdef CONFIG_QUOTA 317 #ifdef CONFIG_QUOTA
318 qsize_t *ext4_get_reserved_space(struct inode *inode) 318 qsize_t *ext4_get_reserved_space(struct inode *inode)
319 { 319 {
320 return &EXT4_I(inode)->i_reserved_quota; 320 return &EXT4_I(inode)->i_reserved_quota;
321 } 321 }
322 #endif 322 #endif
323 323
324 /* 324 /*
325 * Called with i_data_sem down, which is important since we can call 325 * Called with i_data_sem down, which is important since we can call
326 * ext4_discard_preallocations() from here. 326 * ext4_discard_preallocations() from here.
327 */ 327 */
328 void ext4_da_update_reserve_space(struct inode *inode, 328 void ext4_da_update_reserve_space(struct inode *inode,
329 int used, int quota_claim) 329 int used, int quota_claim)
330 { 330 {
331 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 331 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
332 struct ext4_inode_info *ei = EXT4_I(inode); 332 struct ext4_inode_info *ei = EXT4_I(inode);
333 333
334 spin_lock(&ei->i_block_reservation_lock); 334 spin_lock(&ei->i_block_reservation_lock);
335 trace_ext4_da_update_reserve_space(inode, used, quota_claim); 335 trace_ext4_da_update_reserve_space(inode, used, quota_claim);
336 if (unlikely(used > ei->i_reserved_data_blocks)) { 336 if (unlikely(used > ei->i_reserved_data_blocks)) {
337 ext4_warning(inode->i_sb, "%s: ino %lu, used %d " 337 ext4_warning(inode->i_sb, "%s: ino %lu, used %d "
338 "with only %d reserved data blocks", 338 "with only %d reserved data blocks",
339 __func__, inode->i_ino, used, 339 __func__, inode->i_ino, used,
340 ei->i_reserved_data_blocks); 340 ei->i_reserved_data_blocks);
341 WARN_ON(1); 341 WARN_ON(1);
342 used = ei->i_reserved_data_blocks; 342 used = ei->i_reserved_data_blocks;
343 } 343 }
344 344
345 /* Update per-inode reservations */ 345 /* Update per-inode reservations */
346 ei->i_reserved_data_blocks -= used; 346 ei->i_reserved_data_blocks -= used;
347 percpu_counter_sub(&sbi->s_dirtyclusters_counter, used); 347 percpu_counter_sub(&sbi->s_dirtyclusters_counter, used);
348 348
349 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 349 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
350 350
351 /* Update quota subsystem for data blocks */ 351 /* Update quota subsystem for data blocks */
352 if (quota_claim) 352 if (quota_claim)
353 dquot_claim_block(inode, EXT4_C2B(sbi, used)); 353 dquot_claim_block(inode, EXT4_C2B(sbi, used));
354 else { 354 else {
355 /* 355 /*
356 * We did fallocate with an offset that is already delayed 356 * We did fallocate with an offset that is already delayed
357 * allocated. So on delayed allocated writeback we should 357 * allocated. So on delayed allocated writeback we should
358 * not re-claim the quota for fallocated blocks. 358 * not re-claim the quota for fallocated blocks.
359 */ 359 */
360 dquot_release_reservation_block(inode, EXT4_C2B(sbi, used)); 360 dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
361 } 361 }
362 362
363 /* 363 /*
364 * If we have done all the pending block allocations and if 364 * If we have done all the pending block allocations and if
365 * there aren't any writers on the inode, we can discard the 365 * there aren't any writers on the inode, we can discard the
366 * inode's preallocations. 366 * inode's preallocations.
367 */ 367 */
368 if ((ei->i_reserved_data_blocks == 0) && 368 if ((ei->i_reserved_data_blocks == 0) &&
369 (atomic_read(&inode->i_writecount) == 0)) 369 (atomic_read(&inode->i_writecount) == 0))
370 ext4_discard_preallocations(inode); 370 ext4_discard_preallocations(inode);
371 } 371 }
372 372
373 static int __check_block_validity(struct inode *inode, const char *func, 373 static int __check_block_validity(struct inode *inode, const char *func,
374 unsigned int line, 374 unsigned int line,
375 struct ext4_map_blocks *map) 375 struct ext4_map_blocks *map)
376 { 376 {
377 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, 377 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
378 map->m_len)) { 378 map->m_len)) {
379 ext4_error_inode(inode, func, line, map->m_pblk, 379 ext4_error_inode(inode, func, line, map->m_pblk,
380 "lblock %lu mapped to illegal pblock " 380 "lblock %lu mapped to illegal pblock "
381 "(length %d)", (unsigned long) map->m_lblk, 381 "(length %d)", (unsigned long) map->m_lblk,
382 map->m_len); 382 map->m_len);
383 return -EIO; 383 return -EIO;
384 } 384 }
385 return 0; 385 return 0;
386 } 386 }
387 387
388 #define check_block_validity(inode, map) \ 388 #define check_block_validity(inode, map) \
389 __check_block_validity((inode), __func__, __LINE__, (map)) 389 __check_block_validity((inode), __func__, __LINE__, (map))
390 390
391 #ifdef ES_AGGRESSIVE_TEST 391 #ifdef ES_AGGRESSIVE_TEST
392 static void ext4_map_blocks_es_recheck(handle_t *handle, 392 static void ext4_map_blocks_es_recheck(handle_t *handle,
393 struct inode *inode, 393 struct inode *inode,
394 struct ext4_map_blocks *es_map, 394 struct ext4_map_blocks *es_map,
395 struct ext4_map_blocks *map, 395 struct ext4_map_blocks *map,
396 int flags) 396 int flags)
397 { 397 {
398 int retval; 398 int retval;
399 399
400 map->m_flags = 0; 400 map->m_flags = 0;
401 /* 401 /*
402 * There is a race window that the result is not the same. 402 * There is a race window that the result is not the same.
403 * e.g. xfstests #223 when dioread_nolock enables. The reason 403 * e.g. xfstests #223 when dioread_nolock enables. The reason
404 * is that we lookup a block mapping in extent status tree with 404 * is that we lookup a block mapping in extent status tree with
405 * out taking i_data_sem. So at the time the unwritten extent 405 * out taking i_data_sem. So at the time the unwritten extent
406 * could be converted. 406 * could be converted.
407 */ 407 */
408 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 408 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
409 down_read(&EXT4_I(inode)->i_data_sem); 409 down_read(&EXT4_I(inode)->i_data_sem);
410 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 410 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
411 retval = ext4_ext_map_blocks(handle, inode, map, flags & 411 retval = ext4_ext_map_blocks(handle, inode, map, flags &
412 EXT4_GET_BLOCKS_KEEP_SIZE); 412 EXT4_GET_BLOCKS_KEEP_SIZE);
413 } else { 413 } else {
414 retval = ext4_ind_map_blocks(handle, inode, map, flags & 414 retval = ext4_ind_map_blocks(handle, inode, map, flags &
415 EXT4_GET_BLOCKS_KEEP_SIZE); 415 EXT4_GET_BLOCKS_KEEP_SIZE);
416 } 416 }
417 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 417 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
418 up_read((&EXT4_I(inode)->i_data_sem)); 418 up_read((&EXT4_I(inode)->i_data_sem));
419 419
420 /* 420 /*
421 * We don't check m_len because extent will be collpased in status 421 * We don't check m_len because extent will be collpased in status
422 * tree. So the m_len might not equal. 422 * tree. So the m_len might not equal.
423 */ 423 */
424 if (es_map->m_lblk != map->m_lblk || 424 if (es_map->m_lblk != map->m_lblk ||
425 es_map->m_flags != map->m_flags || 425 es_map->m_flags != map->m_flags ||
426 es_map->m_pblk != map->m_pblk) { 426 es_map->m_pblk != map->m_pblk) {
427 printk("ES cache assertion failed for inode: %lu " 427 printk("ES cache assertion failed for inode: %lu "
428 "es_cached ex [%d/%d/%llu/%x] != " 428 "es_cached ex [%d/%d/%llu/%x] != "
429 "found ex [%d/%d/%llu/%x] retval %d flags %x\n", 429 "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
430 inode->i_ino, es_map->m_lblk, es_map->m_len, 430 inode->i_ino, es_map->m_lblk, es_map->m_len,
431 es_map->m_pblk, es_map->m_flags, map->m_lblk, 431 es_map->m_pblk, es_map->m_flags, map->m_lblk,
432 map->m_len, map->m_pblk, map->m_flags, 432 map->m_len, map->m_pblk, map->m_flags,
433 retval, flags); 433 retval, flags);
434 } 434 }
435 } 435 }
436 #endif /* ES_AGGRESSIVE_TEST */ 436 #endif /* ES_AGGRESSIVE_TEST */
437 437
438 /* 438 /*
439 * The ext4_map_blocks() function tries to look up the requested blocks, 439 * The ext4_map_blocks() function tries to look up the requested blocks,
440 * and returns if the blocks are already mapped. 440 * and returns if the blocks are already mapped.
441 * 441 *
442 * Otherwise it takes the write lock of the i_data_sem and allocate blocks 442 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
443 * and store the allocated blocks in the result buffer head and mark it 443 * and store the allocated blocks in the result buffer head and mark it
444 * mapped. 444 * mapped.
445 * 445 *
446 * If file type is extents based, it will call ext4_ext_map_blocks(), 446 * If file type is extents based, it will call ext4_ext_map_blocks(),
447 * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping 447 * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
448 * based files 448 * based files
449 * 449 *
450 * On success, it returns the number of blocks being mapped or allocated. 450 * On success, it returns the number of blocks being mapped or allocated.
451 * if create==0 and the blocks are pre-allocated and unwritten block, 451 * if create==0 and the blocks are pre-allocated and unwritten block,
452 * the result buffer head is unmapped. If the create ==1, it will make sure 452 * the result buffer head is unmapped. If the create ==1, it will make sure
453 * the buffer head is mapped. 453 * the buffer head is mapped.
454 * 454 *
455 * It returns 0 if plain look up failed (blocks have not been allocated), in 455 * It returns 0 if plain look up failed (blocks have not been allocated), in
456 * that case, buffer head is unmapped 456 * that case, buffer head is unmapped
457 * 457 *
458 * It returns the error in case of allocation failure. 458 * It returns the error in case of allocation failure.
459 */ 459 */
460 int ext4_map_blocks(handle_t *handle, struct inode *inode, 460 int ext4_map_blocks(handle_t *handle, struct inode *inode,
461 struct ext4_map_blocks *map, int flags) 461 struct ext4_map_blocks *map, int flags)
462 { 462 {
463 struct extent_status es; 463 struct extent_status es;
464 int retval; 464 int retval;
465 int ret = 0; 465 int ret = 0;
466 #ifdef ES_AGGRESSIVE_TEST 466 #ifdef ES_AGGRESSIVE_TEST
467 struct ext4_map_blocks orig_map; 467 struct ext4_map_blocks orig_map;
468 468
469 memcpy(&orig_map, map, sizeof(*map)); 469 memcpy(&orig_map, map, sizeof(*map));
470 #endif 470 #endif
471 471
472 map->m_flags = 0; 472 map->m_flags = 0;
473 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," 473 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
474 "logical block %lu\n", inode->i_ino, flags, map->m_len, 474 "logical block %lu\n", inode->i_ino, flags, map->m_len,
475 (unsigned long) map->m_lblk); 475 (unsigned long) map->m_lblk);
476 476
477 /* 477 /*
478 * ext4_map_blocks returns an int, and m_len is an unsigned int 478 * ext4_map_blocks returns an int, and m_len is an unsigned int
479 */ 479 */
480 if (unlikely(map->m_len > INT_MAX)) 480 if (unlikely(map->m_len > INT_MAX))
481 map->m_len = INT_MAX; 481 map->m_len = INT_MAX;
482 482
483 /* We can handle the block number less than EXT_MAX_BLOCKS */ 483 /* We can handle the block number less than EXT_MAX_BLOCKS */
484 if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS)) 484 if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
485 return -EIO; 485 return -EIO;
486 486
487 /* Lookup extent status tree firstly */ 487 /* Lookup extent status tree firstly */
488 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { 488 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
489 if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { 489 if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
490 map->m_pblk = ext4_es_pblock(&es) + 490 map->m_pblk = ext4_es_pblock(&es) +
491 map->m_lblk - es.es_lblk; 491 map->m_lblk - es.es_lblk;
492 map->m_flags |= ext4_es_is_written(&es) ? 492 map->m_flags |= ext4_es_is_written(&es) ?
493 EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN; 493 EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN;
494 retval = es.es_len - (map->m_lblk - es.es_lblk); 494 retval = es.es_len - (map->m_lblk - es.es_lblk);
495 if (retval > map->m_len) 495 if (retval > map->m_len)
496 retval = map->m_len; 496 retval = map->m_len;
497 map->m_len = retval; 497 map->m_len = retval;
498 } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { 498 } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
499 retval = 0; 499 retval = 0;
500 } else { 500 } else {
501 BUG_ON(1); 501 BUG_ON(1);
502 } 502 }
503 #ifdef ES_AGGRESSIVE_TEST 503 #ifdef ES_AGGRESSIVE_TEST
504 ext4_map_blocks_es_recheck(handle, inode, map, 504 ext4_map_blocks_es_recheck(handle, inode, map,
505 &orig_map, flags); 505 &orig_map, flags);
506 #endif 506 #endif
507 goto found; 507 goto found;
508 } 508 }
509 509
510 /* 510 /*
511 * Try to see if we can get the block without requesting a new 511 * Try to see if we can get the block without requesting a new
512 * file system block. 512 * file system block.
513 */ 513 */
514 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 514 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
515 down_read(&EXT4_I(inode)->i_data_sem); 515 down_read(&EXT4_I(inode)->i_data_sem);
516 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 516 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
517 retval = ext4_ext_map_blocks(handle, inode, map, flags & 517 retval = ext4_ext_map_blocks(handle, inode, map, flags &
518 EXT4_GET_BLOCKS_KEEP_SIZE); 518 EXT4_GET_BLOCKS_KEEP_SIZE);
519 } else { 519 } else {
520 retval = ext4_ind_map_blocks(handle, inode, map, flags & 520 retval = ext4_ind_map_blocks(handle, inode, map, flags &
521 EXT4_GET_BLOCKS_KEEP_SIZE); 521 EXT4_GET_BLOCKS_KEEP_SIZE);
522 } 522 }
523 if (retval > 0) { 523 if (retval > 0) {
524 unsigned int status; 524 unsigned int status;
525 525
526 if (unlikely(retval != map->m_len)) { 526 if (unlikely(retval != map->m_len)) {
527 ext4_warning(inode->i_sb, 527 ext4_warning(inode->i_sb,
528 "ES len assertion failed for inode " 528 "ES len assertion failed for inode "
529 "%lu: retval %d != map->m_len %d", 529 "%lu: retval %d != map->m_len %d",
530 inode->i_ino, retval, map->m_len); 530 inode->i_ino, retval, map->m_len);
531 WARN_ON(1); 531 WARN_ON(1);
532 } 532 }
533 533
534 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 534 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
535 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 535 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
536 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 536 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
537 ext4_find_delalloc_range(inode, map->m_lblk, 537 ext4_find_delalloc_range(inode, map->m_lblk,
538 map->m_lblk + map->m_len - 1)) 538 map->m_lblk + map->m_len - 1))
539 status |= EXTENT_STATUS_DELAYED; 539 status |= EXTENT_STATUS_DELAYED;
540 ret = ext4_es_insert_extent(inode, map->m_lblk, 540 ret = ext4_es_insert_extent(inode, map->m_lblk,
541 map->m_len, map->m_pblk, status); 541 map->m_len, map->m_pblk, status);
542 if (ret < 0) 542 if (ret < 0)
543 retval = ret; 543 retval = ret;
544 } 544 }
545 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 545 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
546 up_read((&EXT4_I(inode)->i_data_sem)); 546 up_read((&EXT4_I(inode)->i_data_sem));
547 547
548 found: 548 found:
549 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 549 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
550 ret = check_block_validity(inode, map); 550 ret = check_block_validity(inode, map);
551 if (ret != 0) 551 if (ret != 0)
552 return ret; 552 return ret;
553 } 553 }
554 554
555 /* If it is only a block(s) look up */ 555 /* If it is only a block(s) look up */
556 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) 556 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
557 return retval; 557 return retval;
558 558
559 /* 559 /*
560 * Returns if the blocks have already allocated 560 * Returns if the blocks have already allocated
561 * 561 *
562 * Note that if blocks have been preallocated 562 * Note that if blocks have been preallocated
563 * ext4_ext_get_block() returns the create = 0 563 * ext4_ext_get_block() returns the create = 0
564 * with buffer head unmapped. 564 * with buffer head unmapped.
565 */ 565 */
566 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 566 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
567 /* 567 /*
568 * If we need to convert extent to unwritten 568 * If we need to convert extent to unwritten
569 * we continue and do the actual work in 569 * we continue and do the actual work in
570 * ext4_ext_map_blocks() 570 * ext4_ext_map_blocks()
571 */ 571 */
572 if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) 572 if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN))
573 return retval; 573 return retval;
574 574
575 /* 575 /*
576 * Here we clear m_flags because after allocating an new extent, 576 * Here we clear m_flags because after allocating an new extent,
577 * it will be set again. 577 * it will be set again.
578 */ 578 */
579 map->m_flags &= ~EXT4_MAP_FLAGS; 579 map->m_flags &= ~EXT4_MAP_FLAGS;
580 580
581 /* 581 /*
582 * New blocks allocate and/or writing to unwritten extent 582 * New blocks allocate and/or writing to unwritten extent
583 * will possibly result in updating i_data, so we take 583 * will possibly result in updating i_data, so we take
584 * the write lock of i_data_sem, and call get_block() 584 * the write lock of i_data_sem, and call get_block()
585 * with create == 1 flag. 585 * with create == 1 flag.
586 */ 586 */
587 down_write(&EXT4_I(inode)->i_data_sem); 587 down_write(&EXT4_I(inode)->i_data_sem);
588 588
589 /* 589 /*
590 * We need to check for EXT4 here because migrate 590 * We need to check for EXT4 here because migrate
591 * could have changed the inode type in between 591 * could have changed the inode type in between
592 */ 592 */
593 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 593 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
594 retval = ext4_ext_map_blocks(handle, inode, map, flags); 594 retval = ext4_ext_map_blocks(handle, inode, map, flags);
595 } else { 595 } else {
596 retval = ext4_ind_map_blocks(handle, inode, map, flags); 596 retval = ext4_ind_map_blocks(handle, inode, map, flags);
597 597
598 if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { 598 if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
599 /* 599 /*
600 * We allocated new blocks which will result in 600 * We allocated new blocks which will result in
601 * i_data's format changing. Force the migrate 601 * i_data's format changing. Force the migrate
602 * to fail by clearing migrate flags 602 * to fail by clearing migrate flags
603 */ 603 */
604 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 604 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
605 } 605 }
606 606
607 /* 607 /*
608 * Update reserved blocks/metadata blocks after successful 608 * Update reserved blocks/metadata blocks after successful
609 * block allocation which had been deferred till now. We don't 609 * block allocation which had been deferred till now. We don't
610 * support fallocate for non extent files. So we can update 610 * support fallocate for non extent files. So we can update
611 * reserve space here. 611 * reserve space here.
612 */ 612 */
613 if ((retval > 0) && 613 if ((retval > 0) &&
614 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) 614 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
615 ext4_da_update_reserve_space(inode, retval, 1); 615 ext4_da_update_reserve_space(inode, retval, 1);
616 } 616 }
617 617
618 if (retval > 0) { 618 if (retval > 0) {
619 unsigned int status; 619 unsigned int status;
620 620
621 if (unlikely(retval != map->m_len)) { 621 if (unlikely(retval != map->m_len)) {
622 ext4_warning(inode->i_sb, 622 ext4_warning(inode->i_sb,
623 "ES len assertion failed for inode " 623 "ES len assertion failed for inode "
624 "%lu: retval %d != map->m_len %d", 624 "%lu: retval %d != map->m_len %d",
625 inode->i_ino, retval, map->m_len); 625 inode->i_ino, retval, map->m_len);
626 WARN_ON(1); 626 WARN_ON(1);
627 } 627 }
628 628
629 /* 629 /*
630 * If the extent has been zeroed out, we don't need to update 630 * If the extent has been zeroed out, we don't need to update
631 * extent status tree. 631 * extent status tree.
632 */ 632 */
633 if ((flags & EXT4_GET_BLOCKS_PRE_IO) && 633 if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
634 ext4_es_lookup_extent(inode, map->m_lblk, &es)) { 634 ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
635 if (ext4_es_is_written(&es)) 635 if (ext4_es_is_written(&es))
636 goto has_zeroout; 636 goto has_zeroout;
637 } 637 }
638 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 638 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
639 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 639 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
640 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 640 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
641 ext4_find_delalloc_range(inode, map->m_lblk, 641 ext4_find_delalloc_range(inode, map->m_lblk,
642 map->m_lblk + map->m_len - 1)) 642 map->m_lblk + map->m_len - 1))
643 status |= EXTENT_STATUS_DELAYED; 643 status |= EXTENT_STATUS_DELAYED;
644 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 644 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
645 map->m_pblk, status); 645 map->m_pblk, status);
646 if (ret < 0) 646 if (ret < 0)
647 retval = ret; 647 retval = ret;
648 } 648 }
649 649
650 has_zeroout: 650 has_zeroout:
651 up_write((&EXT4_I(inode)->i_data_sem)); 651 up_write((&EXT4_I(inode)->i_data_sem));
652 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 652 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
653 ret = check_block_validity(inode, map); 653 ret = check_block_validity(inode, map);
654 if (ret != 0) 654 if (ret != 0)
655 return ret; 655 return ret;
656 } 656 }
657 return retval; 657 return retval;
658 } 658 }
659 659
660 static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) 660 static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
661 { 661 {
662 struct inode *inode = bh->b_assoc_map->host; 662 struct inode *inode = bh->b_assoc_map->host;
663 /* XXX: breaks on 32-bit > 16GB. Is that even supported? */ 663 /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
664 loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; 664 loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
665 int err; 665 int err;
666 if (!uptodate) 666 if (!uptodate)
667 return; 667 return;
668 WARN_ON(!buffer_unwritten(bh)); 668 WARN_ON(!buffer_unwritten(bh));
669 err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); 669 err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
670 } 670 }
671 671
672 /* Maximum number of blocks we map for direct IO at once. */ 672 /* Maximum number of blocks we map for direct IO at once. */
673 #define DIO_MAX_BLOCKS 4096 673 #define DIO_MAX_BLOCKS 4096
674 674
675 static int _ext4_get_block(struct inode *inode, sector_t iblock, 675 static int _ext4_get_block(struct inode *inode, sector_t iblock,
676 struct buffer_head *bh, int flags) 676 struct buffer_head *bh, int flags)
677 { 677 {
678 handle_t *handle = ext4_journal_current_handle(); 678 handle_t *handle = ext4_journal_current_handle();
679 struct ext4_map_blocks map; 679 struct ext4_map_blocks map;
680 int ret = 0, started = 0; 680 int ret = 0, started = 0;
681 int dio_credits; 681 int dio_credits;
682 682
683 if (ext4_has_inline_data(inode)) 683 if (ext4_has_inline_data(inode))
684 return -ERANGE; 684 return -ERANGE;
685 685
686 map.m_lblk = iblock; 686 map.m_lblk = iblock;
687 map.m_len = bh->b_size >> inode->i_blkbits; 687 map.m_len = bh->b_size >> inode->i_blkbits;
688 688
689 if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) { 689 if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
690 /* Direct IO write... */ 690 /* Direct IO write... */
691 if (map.m_len > DIO_MAX_BLOCKS) 691 if (map.m_len > DIO_MAX_BLOCKS)
692 map.m_len = DIO_MAX_BLOCKS; 692 map.m_len = DIO_MAX_BLOCKS;
693 dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); 693 dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
694 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, 694 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
695 dio_credits); 695 dio_credits);
696 if (IS_ERR(handle)) { 696 if (IS_ERR(handle)) {
697 ret = PTR_ERR(handle); 697 ret = PTR_ERR(handle);
698 return ret; 698 return ret;
699 } 699 }
700 started = 1; 700 started = 1;
701 } 701 }
702 702
703 ret = ext4_map_blocks(handle, inode, &map, flags); 703 ret = ext4_map_blocks(handle, inode, &map, flags);
704 if (ret > 0) { 704 if (ret > 0) {
705 ext4_io_end_t *io_end = ext4_inode_aio(inode); 705 ext4_io_end_t *io_end = ext4_inode_aio(inode);
706 706
707 map_bh(bh, inode->i_sb, map.m_pblk); 707 map_bh(bh, inode->i_sb, map.m_pblk);
708 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 708 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
709 if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) { 709 if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) {
710 bh->b_assoc_map = inode->i_mapping; 710 bh->b_assoc_map = inode->i_mapping;
711 bh->b_private = (void *)(unsigned long)iblock; 711 bh->b_private = (void *)(unsigned long)iblock;
712 bh->b_end_io = ext4_end_io_unwritten; 712 bh->b_end_io = ext4_end_io_unwritten;
713 } 713 }
714 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) 714 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
715 set_buffer_defer_completion(bh); 715 set_buffer_defer_completion(bh);
716 bh->b_size = inode->i_sb->s_blocksize * map.m_len; 716 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
717 ret = 0; 717 ret = 0;
718 } 718 }
719 if (started) 719 if (started)
720 ext4_journal_stop(handle); 720 ext4_journal_stop(handle);
721 return ret; 721 return ret;
722 } 722 }
723 723
724 int ext4_get_block(struct inode *inode, sector_t iblock, 724 int ext4_get_block(struct inode *inode, sector_t iblock,
725 struct buffer_head *bh, int create) 725 struct buffer_head *bh, int create)
726 { 726 {
727 return _ext4_get_block(inode, iblock, bh, 727 return _ext4_get_block(inode, iblock, bh,
728 create ? EXT4_GET_BLOCKS_CREATE : 0); 728 create ? EXT4_GET_BLOCKS_CREATE : 0);
729 } 729 }
730 730
731 /* 731 /*
732 * `handle' can be NULL if create is zero 732 * `handle' can be NULL if create is zero
733 */ 733 */
734 struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, 734 struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
735 ext4_lblk_t block, int create) 735 ext4_lblk_t block, int create)
736 { 736 {
737 struct ext4_map_blocks map; 737 struct ext4_map_blocks map;
738 struct buffer_head *bh; 738 struct buffer_head *bh;
739 int err; 739 int err;
740 740
741 J_ASSERT(handle != NULL || create == 0); 741 J_ASSERT(handle != NULL || create == 0);
742 742
743 map.m_lblk = block; 743 map.m_lblk = block;
744 map.m_len = 1; 744 map.m_len = 1;
745 err = ext4_map_blocks(handle, inode, &map, 745 err = ext4_map_blocks(handle, inode, &map,
746 create ? EXT4_GET_BLOCKS_CREATE : 0); 746 create ? EXT4_GET_BLOCKS_CREATE : 0);
747 747
748 if (err == 0) 748 if (err == 0)
749 return create ? ERR_PTR(-ENOSPC) : NULL; 749 return create ? ERR_PTR(-ENOSPC) : NULL;
750 if (err < 0) 750 if (err < 0)
751 return ERR_PTR(err); 751 return ERR_PTR(err);
752 752
753 bh = sb_getblk(inode->i_sb, map.m_pblk); 753 bh = sb_getblk(inode->i_sb, map.m_pblk);
754 if (unlikely(!bh)) 754 if (unlikely(!bh))
755 return ERR_PTR(-ENOMEM); 755 return ERR_PTR(-ENOMEM);
756 if (map.m_flags & EXT4_MAP_NEW) { 756 if (map.m_flags & EXT4_MAP_NEW) {
757 J_ASSERT(create != 0); 757 J_ASSERT(create != 0);
758 J_ASSERT(handle != NULL); 758 J_ASSERT(handle != NULL);
759 759
760 /* 760 /*
761 * Now that we do not always journal data, we should 761 * Now that we do not always journal data, we should
762 * keep in mind whether this should always journal the 762 * keep in mind whether this should always journal the
763 * new buffer as metadata. For now, regular file 763 * new buffer as metadata. For now, regular file
764 * writes use ext4_get_block instead, so it's not a 764 * writes use ext4_get_block instead, so it's not a
765 * problem. 765 * problem.
766 */ 766 */
767 lock_buffer(bh); 767 lock_buffer(bh);
768 BUFFER_TRACE(bh, "call get_create_access"); 768 BUFFER_TRACE(bh, "call get_create_access");
769 err = ext4_journal_get_create_access(handle, bh); 769 err = ext4_journal_get_create_access(handle, bh);
770 if (unlikely(err)) { 770 if (unlikely(err)) {
771 unlock_buffer(bh); 771 unlock_buffer(bh);
772 goto errout; 772 goto errout;
773 } 773 }
774 if (!buffer_uptodate(bh)) { 774 if (!buffer_uptodate(bh)) {
775 memset(bh->b_data, 0, inode->i_sb->s_blocksize); 775 memset(bh->b_data, 0, inode->i_sb->s_blocksize);
776 set_buffer_uptodate(bh); 776 set_buffer_uptodate(bh);
777 } 777 }
778 unlock_buffer(bh); 778 unlock_buffer(bh);
779 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 779 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
780 err = ext4_handle_dirty_metadata(handle, inode, bh); 780 err = ext4_handle_dirty_metadata(handle, inode, bh);
781 if (unlikely(err)) 781 if (unlikely(err))
782 goto errout; 782 goto errout;
783 } else 783 } else
784 BUFFER_TRACE(bh, "not a new buffer"); 784 BUFFER_TRACE(bh, "not a new buffer");
785 return bh; 785 return bh;
786 errout: 786 errout:
787 brelse(bh); 787 brelse(bh);
788 return ERR_PTR(err); 788 return ERR_PTR(err);
789 } 789 }
790 790
791 struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, 791 struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
792 ext4_lblk_t block, int create) 792 ext4_lblk_t block, int create)
793 { 793 {
794 struct buffer_head *bh; 794 struct buffer_head *bh;
795 795
796 bh = ext4_getblk(handle, inode, block, create); 796 bh = ext4_getblk(handle, inode, block, create);
797 if (IS_ERR(bh)) 797 if (IS_ERR(bh))
798 return bh; 798 return bh;
799 if (!bh || buffer_uptodate(bh)) 799 if (!bh || buffer_uptodate(bh))
800 return bh; 800 return bh;
801 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); 801 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
802 wait_on_buffer(bh); 802 wait_on_buffer(bh);
803 if (buffer_uptodate(bh)) 803 if (buffer_uptodate(bh))
804 return bh; 804 return bh;
805 put_bh(bh); 805 put_bh(bh);
806 return ERR_PTR(-EIO); 806 return ERR_PTR(-EIO);
807 } 807 }
808 808
809 int ext4_walk_page_buffers(handle_t *handle, 809 int ext4_walk_page_buffers(handle_t *handle,
810 struct buffer_head *head, 810 struct buffer_head *head,
811 unsigned from, 811 unsigned from,
812 unsigned to, 812 unsigned to,
813 int *partial, 813 int *partial,
814 int (*fn)(handle_t *handle, 814 int (*fn)(handle_t *handle,
815 struct buffer_head *bh)) 815 struct buffer_head *bh))
816 { 816 {
817 struct buffer_head *bh; 817 struct buffer_head *bh;
818 unsigned block_start, block_end; 818 unsigned block_start, block_end;
819 unsigned blocksize = head->b_size; 819 unsigned blocksize = head->b_size;
820 int err, ret = 0; 820 int err, ret = 0;
821 struct buffer_head *next; 821 struct buffer_head *next;
822 822
823 for (bh = head, block_start = 0; 823 for (bh = head, block_start = 0;
824 ret == 0 && (bh != head || !block_start); 824 ret == 0 && (bh != head || !block_start);
825 block_start = block_end, bh = next) { 825 block_start = block_end, bh = next) {
826 next = bh->b_this_page; 826 next = bh->b_this_page;
827 block_end = block_start + blocksize; 827 block_end = block_start + blocksize;
828 if (block_end <= from || block_start >= to) { 828 if (block_end <= from || block_start >= to) {
829 if (partial && !buffer_uptodate(bh)) 829 if (partial && !buffer_uptodate(bh))
830 *partial = 1; 830 *partial = 1;
831 continue; 831 continue;
832 } 832 }
833 err = (*fn)(handle, bh); 833 err = (*fn)(handle, bh);
834 if (!ret) 834 if (!ret)
835 ret = err; 835 ret = err;
836 } 836 }
837 return ret; 837 return ret;
838 } 838 }
839 839
840 /* 840 /*
841 * To preserve ordering, it is essential that the hole instantiation and 841 * To preserve ordering, it is essential that the hole instantiation and
842 * the data write be encapsulated in a single transaction. We cannot 842 * the data write be encapsulated in a single transaction. We cannot
843 * close off a transaction and start a new one between the ext4_get_block() 843 * close off a transaction and start a new one between the ext4_get_block()
844 * and the commit_write(). So doing the jbd2_journal_start at the start of 844 * and the commit_write(). So doing the jbd2_journal_start at the start of
845 * prepare_write() is the right place. 845 * prepare_write() is the right place.
846 * 846 *
847 * Also, this function can nest inside ext4_writepage(). In that case, we 847 * Also, this function can nest inside ext4_writepage(). In that case, we
848 * *know* that ext4_writepage() has generated enough buffer credits to do the 848 * *know* that ext4_writepage() has generated enough buffer credits to do the
849 * whole page. So we won't block on the journal in that case, which is good, 849 * whole page. So we won't block on the journal in that case, which is good,
850 * because the caller may be PF_MEMALLOC. 850 * because the caller may be PF_MEMALLOC.
851 * 851 *
852 * By accident, ext4 can be reentered when a transaction is open via 852 * By accident, ext4 can be reentered when a transaction is open via
853 * quota file writes. If we were to commit the transaction while thus 853 * quota file writes. If we were to commit the transaction while thus
854 * reentered, there can be a deadlock - we would be holding a quota 854 * reentered, there can be a deadlock - we would be holding a quota
855 * lock, and the commit would never complete if another thread had a 855 * lock, and the commit would never complete if another thread had a
856 * transaction open and was blocking on the quota lock - a ranking 856 * transaction open and was blocking on the quota lock - a ranking
857 * violation. 857 * violation.
858 * 858 *
859 * So what we do is to rely on the fact that jbd2_journal_stop/journal_start 859 * So what we do is to rely on the fact that jbd2_journal_stop/journal_start
860 * will _not_ run commit under these circumstances because handle->h_ref 860 * will _not_ run commit under these circumstances because handle->h_ref
861 * is elevated. We'll still have enough credits for the tiny quotafile 861 * is elevated. We'll still have enough credits for the tiny quotafile
862 * write. 862 * write.
863 */ 863 */
864 int do_journal_get_write_access(handle_t *handle, 864 int do_journal_get_write_access(handle_t *handle,
865 struct buffer_head *bh) 865 struct buffer_head *bh)
866 { 866 {
867 int dirty = buffer_dirty(bh); 867 int dirty = buffer_dirty(bh);
868 int ret; 868 int ret;
869 869
870 if (!buffer_mapped(bh) || buffer_freed(bh)) 870 if (!buffer_mapped(bh) || buffer_freed(bh))
871 return 0; 871 return 0;
872 /* 872 /*
873 * __block_write_begin() could have dirtied some buffers. Clean 873 * __block_write_begin() could have dirtied some buffers. Clean
874 * the dirty bit as jbd2_journal_get_write_access() could complain 874 * the dirty bit as jbd2_journal_get_write_access() could complain
875 * otherwise about fs integrity issues. Setting of the dirty bit 875 * otherwise about fs integrity issues. Setting of the dirty bit
876 * by __block_write_begin() isn't a real problem here as we clear 876 * by __block_write_begin() isn't a real problem here as we clear
877 * the bit before releasing a page lock and thus writeback cannot 877 * the bit before releasing a page lock and thus writeback cannot
878 * ever write the buffer. 878 * ever write the buffer.
879 */ 879 */
880 if (dirty) 880 if (dirty)
881 clear_buffer_dirty(bh); 881 clear_buffer_dirty(bh);
882 BUFFER_TRACE(bh, "get write access"); 882 BUFFER_TRACE(bh, "get write access");
883 ret = ext4_journal_get_write_access(handle, bh); 883 ret = ext4_journal_get_write_access(handle, bh);
884 if (!ret && dirty) 884 if (!ret && dirty)
885 ret = ext4_handle_dirty_metadata(handle, NULL, bh); 885 ret = ext4_handle_dirty_metadata(handle, NULL, bh);
886 return ret; 886 return ret;
887 } 887 }
888 888
889 static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, 889 static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
890 struct buffer_head *bh_result, int create); 890 struct buffer_head *bh_result, int create);
891 static int ext4_write_begin(struct file *file, struct address_space *mapping, 891 static int ext4_write_begin(struct file *file, struct address_space *mapping,
892 loff_t pos, unsigned len, unsigned flags, 892 loff_t pos, unsigned len, unsigned flags,
893 struct page **pagep, void **fsdata) 893 struct page **pagep, void **fsdata)
894 { 894 {
895 struct inode *inode = mapping->host; 895 struct inode *inode = mapping->host;
896 int ret, needed_blocks; 896 int ret, needed_blocks;
897 handle_t *handle; 897 handle_t *handle;
898 int retries = 0; 898 int retries = 0;
899 struct page *page; 899 struct page *page;
900 pgoff_t index; 900 pgoff_t index;
901 unsigned from, to; 901 unsigned from, to;
902 902
903 trace_ext4_write_begin(inode, pos, len, flags); 903 trace_ext4_write_begin(inode, pos, len, flags);
904 /* 904 /*
905 * Reserve one block more for addition to orphan list in case 905 * Reserve one block more for addition to orphan list in case
906 * we allocate blocks but write fails for some reason 906 * we allocate blocks but write fails for some reason
907 */ 907 */
908 needed_blocks = ext4_writepage_trans_blocks(inode) + 1; 908 needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
909 index = pos >> PAGE_CACHE_SHIFT; 909 index = pos >> PAGE_CACHE_SHIFT;
910 from = pos & (PAGE_CACHE_SIZE - 1); 910 from = pos & (PAGE_CACHE_SIZE - 1);
911 to = from + len; 911 to = from + len;
912 912
913 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 913 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
914 ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, 914 ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
915 flags, pagep); 915 flags, pagep);
916 if (ret < 0) 916 if (ret < 0)
917 return ret; 917 return ret;
918 if (ret == 1) 918 if (ret == 1)
919 return 0; 919 return 0;
920 } 920 }
921 921
922 /* 922 /*
923 * grab_cache_page_write_begin() can take a long time if the 923 * grab_cache_page_write_begin() can take a long time if the
924 * system is thrashing due to memory pressure, or if the page 924 * system is thrashing due to memory pressure, or if the page
925 * is being written back. So grab it first before we start 925 * is being written back. So grab it first before we start
926 * the transaction handle. This also allows us to allocate 926 * the transaction handle. This also allows us to allocate
927 * the page (if needed) without using GFP_NOFS. 927 * the page (if needed) without using GFP_NOFS.
928 */ 928 */
929 retry_grab: 929 retry_grab:
930 page = grab_cache_page_write_begin(mapping, index, flags); 930 page = grab_cache_page_write_begin(mapping, index, flags);
931 if (!page) 931 if (!page)
932 return -ENOMEM; 932 return -ENOMEM;
933 unlock_page(page); 933 unlock_page(page);
934 934
935 retry_journal: 935 retry_journal:
936 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 936 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
937 if (IS_ERR(handle)) { 937 if (IS_ERR(handle)) {
938 page_cache_release(page); 938 page_cache_release(page);
939 return PTR_ERR(handle); 939 return PTR_ERR(handle);
940 } 940 }
941 941
942 lock_page(page); 942 lock_page(page);
943 if (page->mapping != mapping) { 943 if (page->mapping != mapping) {
944 /* The page got truncated from under us */ 944 /* The page got truncated from under us */
945 unlock_page(page); 945 unlock_page(page);
946 page_cache_release(page); 946 page_cache_release(page);
947 ext4_journal_stop(handle); 947 ext4_journal_stop(handle);
948 goto retry_grab; 948 goto retry_grab;
949 } 949 }
950 /* In case writeback began while the page was unlocked */ 950 /* In case writeback began while the page was unlocked */
951 wait_for_stable_page(page); 951 wait_for_stable_page(page);
952 952
953 if (ext4_should_dioread_nolock(inode)) 953 if (ext4_should_dioread_nolock(inode))
954 ret = __block_write_begin(page, pos, len, ext4_get_block_write); 954 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
955 else 955 else
956 ret = __block_write_begin(page, pos, len, ext4_get_block); 956 ret = __block_write_begin(page, pos, len, ext4_get_block);
957 957
958 if (!ret && ext4_should_journal_data(inode)) { 958 if (!ret && ext4_should_journal_data(inode)) {
959 ret = ext4_walk_page_buffers(handle, page_buffers(page), 959 ret = ext4_walk_page_buffers(handle, page_buffers(page),
960 from, to, NULL, 960 from, to, NULL,
961 do_journal_get_write_access); 961 do_journal_get_write_access);
962 } 962 }
963 963
964 if (ret) { 964 if (ret) {
965 unlock_page(page); 965 unlock_page(page);
966 /* 966 /*
967 * __block_write_begin may have instantiated a few blocks 967 * __block_write_begin may have instantiated a few blocks
968 * outside i_size. Trim these off again. Don't need 968 * outside i_size. Trim these off again. Don't need
969 * i_size_read because we hold i_mutex. 969 * i_size_read because we hold i_mutex.
970 * 970 *
971 * Add inode to orphan list in case we crash before 971 * Add inode to orphan list in case we crash before
972 * truncate finishes 972 * truncate finishes
973 */ 973 */
974 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 974 if (pos + len > inode->i_size && ext4_can_truncate(inode))
975 ext4_orphan_add(handle, inode); 975 ext4_orphan_add(handle, inode);
976 976
977 ext4_journal_stop(handle); 977 ext4_journal_stop(handle);
978 if (pos + len > inode->i_size) { 978 if (pos + len > inode->i_size) {
979 ext4_truncate_failed_write(inode); 979 ext4_truncate_failed_write(inode);
980 /* 980 /*
981 * If truncate failed early the inode might 981 * If truncate failed early the inode might
982 * still be on the orphan list; we need to 982 * still be on the orphan list; we need to
983 * make sure the inode is removed from the 983 * make sure the inode is removed from the
984 * orphan list in that case. 984 * orphan list in that case.
985 */ 985 */
986 if (inode->i_nlink) 986 if (inode->i_nlink)
987 ext4_orphan_del(NULL, inode); 987 ext4_orphan_del(NULL, inode);
988 } 988 }
989 989
990 if (ret == -ENOSPC && 990 if (ret == -ENOSPC &&
991 ext4_should_retry_alloc(inode->i_sb, &retries)) 991 ext4_should_retry_alloc(inode->i_sb, &retries))
992 goto retry_journal; 992 goto retry_journal;
993 page_cache_release(page); 993 page_cache_release(page);
994 return ret; 994 return ret;
995 } 995 }
996 *pagep = page; 996 *pagep = page;
997 return ret; 997 return ret;
998 } 998 }
999 999
1000 /* For write_end() in data=journal mode */ 1000 /* For write_end() in data=journal mode */
1001 static int write_end_fn(handle_t *handle, struct buffer_head *bh) 1001 static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1002 { 1002 {
1003 int ret; 1003 int ret;
1004 if (!buffer_mapped(bh) || buffer_freed(bh)) 1004 if (!buffer_mapped(bh) || buffer_freed(bh))
1005 return 0; 1005 return 0;
1006 set_buffer_uptodate(bh); 1006 set_buffer_uptodate(bh);
1007 ret = ext4_handle_dirty_metadata(handle, NULL, bh); 1007 ret = ext4_handle_dirty_metadata(handle, NULL, bh);
1008 clear_buffer_meta(bh); 1008 clear_buffer_meta(bh);
1009 clear_buffer_prio(bh); 1009 clear_buffer_prio(bh);
1010 return ret; 1010 return ret;
1011 } 1011 }
1012 1012
1013 /* 1013 /*
1014 * We need to pick up the new inode size which generic_commit_write gave us 1014 * We need to pick up the new inode size which generic_commit_write gave us
1015 * `file' can be NULL - eg, when called from page_symlink(). 1015 * `file' can be NULL - eg, when called from page_symlink().
1016 * 1016 *
1017 * ext4 never places buffers on inode->i_mapping->private_list. metadata 1017 * ext4 never places buffers on inode->i_mapping->private_list. metadata
1018 * buffers are managed internally. 1018 * buffers are managed internally.
1019 */ 1019 */
1020 static int ext4_write_end(struct file *file, 1020 static int ext4_write_end(struct file *file,
1021 struct address_space *mapping, 1021 struct address_space *mapping,
1022 loff_t pos, unsigned len, unsigned copied, 1022 loff_t pos, unsigned len, unsigned copied,
1023 struct page *page, void *fsdata) 1023 struct page *page, void *fsdata)
1024 { 1024 {
1025 handle_t *handle = ext4_journal_current_handle(); 1025 handle_t *handle = ext4_journal_current_handle();
1026 struct inode *inode = mapping->host; 1026 struct inode *inode = mapping->host;
1027 loff_t old_size = inode->i_size;
1027 int ret = 0, ret2; 1028 int ret = 0, ret2;
1028 int i_size_changed = 0; 1029 int i_size_changed = 0;
1029 1030
1030 trace_ext4_write_end(inode, pos, len, copied); 1031 trace_ext4_write_end(inode, pos, len, copied);
1031 if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) { 1032 if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) {
1032 ret = ext4_jbd2_file_inode(handle, inode); 1033 ret = ext4_jbd2_file_inode(handle, inode);
1033 if (ret) { 1034 if (ret) {
1034 unlock_page(page); 1035 unlock_page(page);
1035 page_cache_release(page); 1036 page_cache_release(page);
1036 goto errout; 1037 goto errout;
1037 } 1038 }
1038 } 1039 }
1039 1040
1040 if (ext4_has_inline_data(inode)) { 1041 if (ext4_has_inline_data(inode)) {
1041 ret = ext4_write_inline_data_end(inode, pos, len, 1042 ret = ext4_write_inline_data_end(inode, pos, len,
1042 copied, page); 1043 copied, page);
1043 if (ret < 0) 1044 if (ret < 0)
1044 goto errout; 1045 goto errout;
1045 copied = ret; 1046 copied = ret;
1046 } else 1047 } else
1047 copied = block_write_end(file, mapping, pos, 1048 copied = block_write_end(file, mapping, pos,
1048 len, copied, page, fsdata); 1049 len, copied, page, fsdata);
1049 /* 1050 /*
1050 * it's important to update i_size while still holding page lock: 1051 * it's important to update i_size while still holding page lock:
1051 * page writeout could otherwise come in and zero beyond i_size. 1052 * page writeout could otherwise come in and zero beyond i_size.
1052 */ 1053 */
1053 i_size_changed = ext4_update_inode_size(inode, pos + copied); 1054 i_size_changed = ext4_update_inode_size(inode, pos + copied);
1054 unlock_page(page); 1055 unlock_page(page);
1055 page_cache_release(page); 1056 page_cache_release(page);
1056 1057
1058 if (old_size < pos)
1059 pagecache_isize_extended(inode, old_size, pos);
1057 /* 1060 /*
1058 * Don't mark the inode dirty under page lock. First, it unnecessarily 1061 * Don't mark the inode dirty under page lock. First, it unnecessarily
1059 * makes the holding time of page lock longer. Second, it forces lock 1062 * makes the holding time of page lock longer. Second, it forces lock
1060 * ordering of page lock and transaction start for journaling 1063 * ordering of page lock and transaction start for journaling
1061 * filesystems. 1064 * filesystems.
1062 */ 1065 */
1063 if (i_size_changed) 1066 if (i_size_changed)
1064 ext4_mark_inode_dirty(handle, inode); 1067 ext4_mark_inode_dirty(handle, inode);
1065 1068
1066 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 1069 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1067 /* if we have allocated more blocks and copied 1070 /* if we have allocated more blocks and copied
1068 * less. We will have blocks allocated outside 1071 * less. We will have blocks allocated outside
1069 * inode->i_size. So truncate them 1072 * inode->i_size. So truncate them
1070 */ 1073 */
1071 ext4_orphan_add(handle, inode); 1074 ext4_orphan_add(handle, inode);
1072 errout: 1075 errout:
1073 ret2 = ext4_journal_stop(handle); 1076 ret2 = ext4_journal_stop(handle);
1074 if (!ret) 1077 if (!ret)
1075 ret = ret2; 1078 ret = ret2;
1076 1079
1077 if (pos + len > inode->i_size) { 1080 if (pos + len > inode->i_size) {
1078 ext4_truncate_failed_write(inode); 1081 ext4_truncate_failed_write(inode);
1079 /* 1082 /*
1080 * If truncate failed early the inode might still be 1083 * If truncate failed early the inode might still be
1081 * on the orphan list; we need to make sure the inode 1084 * on the orphan list; we need to make sure the inode
1082 * is removed from the orphan list in that case. 1085 * is removed from the orphan list in that case.
1083 */ 1086 */
1084 if (inode->i_nlink) 1087 if (inode->i_nlink)
1085 ext4_orphan_del(NULL, inode); 1088 ext4_orphan_del(NULL, inode);
1086 } 1089 }
1087 1090
1088 return ret ? ret : copied; 1091 return ret ? ret : copied;
1089 } 1092 }
1090 1093
1091 static int ext4_journalled_write_end(struct file *file, 1094 static int ext4_journalled_write_end(struct file *file,
1092 struct address_space *mapping, 1095 struct address_space *mapping,
1093 loff_t pos, unsigned len, unsigned copied, 1096 loff_t pos, unsigned len, unsigned copied,
1094 struct page *page, void *fsdata) 1097 struct page *page, void *fsdata)
1095 { 1098 {
1096 handle_t *handle = ext4_journal_current_handle(); 1099 handle_t *handle = ext4_journal_current_handle();
1097 struct inode *inode = mapping->host; 1100 struct inode *inode = mapping->host;
1101 loff_t old_size = inode->i_size;
1098 int ret = 0, ret2; 1102 int ret = 0, ret2;
1099 int partial = 0; 1103 int partial = 0;
1100 unsigned from, to; 1104 unsigned from, to;
1101 int size_changed = 0; 1105 int size_changed = 0;
1102 1106
1103 trace_ext4_journalled_write_end(inode, pos, len, copied); 1107 trace_ext4_journalled_write_end(inode, pos, len, copied);
1104 from = pos & (PAGE_CACHE_SIZE - 1); 1108 from = pos & (PAGE_CACHE_SIZE - 1);
1105 to = from + len; 1109 to = from + len;
1106 1110
1107 BUG_ON(!ext4_handle_valid(handle)); 1111 BUG_ON(!ext4_handle_valid(handle));
1108 1112
1109 if (ext4_has_inline_data(inode)) 1113 if (ext4_has_inline_data(inode))
1110 copied = ext4_write_inline_data_end(inode, pos, len, 1114 copied = ext4_write_inline_data_end(inode, pos, len,
1111 copied, page); 1115 copied, page);
1112 else { 1116 else {
1113 if (copied < len) { 1117 if (copied < len) {
1114 if (!PageUptodate(page)) 1118 if (!PageUptodate(page))
1115 copied = 0; 1119 copied = 0;
1116 page_zero_new_buffers(page, from+copied, to); 1120 page_zero_new_buffers(page, from+copied, to);
1117 } 1121 }
1118 1122
1119 ret = ext4_walk_page_buffers(handle, page_buffers(page), from, 1123 ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
1120 to, &partial, write_end_fn); 1124 to, &partial, write_end_fn);
1121 if (!partial) 1125 if (!partial)
1122 SetPageUptodate(page); 1126 SetPageUptodate(page);
1123 } 1127 }
1124 size_changed = ext4_update_inode_size(inode, pos + copied); 1128 size_changed = ext4_update_inode_size(inode, pos + copied);
1125 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 1129 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1126 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; 1130 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1127 unlock_page(page); 1131 unlock_page(page);
1128 page_cache_release(page); 1132 page_cache_release(page);
1133
1134 if (old_size < pos)
1135 pagecache_isize_extended(inode, old_size, pos);
1129 1136
1130 if (size_changed) { 1137 if (size_changed) {
1131 ret2 = ext4_mark_inode_dirty(handle, inode); 1138 ret2 = ext4_mark_inode_dirty(handle, inode);
1132 if (!ret) 1139 if (!ret)
1133 ret = ret2; 1140 ret = ret2;
1134 } 1141 }
1135 1142
1136 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 1143 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1137 /* if we have allocated more blocks and copied 1144 /* if we have allocated more blocks and copied
1138 * less. We will have blocks allocated outside 1145 * less. We will have blocks allocated outside
1139 * inode->i_size. So truncate them 1146 * inode->i_size. So truncate them
1140 */ 1147 */
1141 ext4_orphan_add(handle, inode); 1148 ext4_orphan_add(handle, inode);
1142 1149
1143 ret2 = ext4_journal_stop(handle); 1150 ret2 = ext4_journal_stop(handle);
1144 if (!ret) 1151 if (!ret)
1145 ret = ret2; 1152 ret = ret2;
1146 if (pos + len > inode->i_size) { 1153 if (pos + len > inode->i_size) {
1147 ext4_truncate_failed_write(inode); 1154 ext4_truncate_failed_write(inode);
1148 /* 1155 /*
1149 * If truncate failed early the inode might still be 1156 * If truncate failed early the inode might still be
1150 * on the orphan list; we need to make sure the inode 1157 * on the orphan list; we need to make sure the inode
1151 * is removed from the orphan list in that case. 1158 * is removed from the orphan list in that case.
1152 */ 1159 */
1153 if (inode->i_nlink) 1160 if (inode->i_nlink)
1154 ext4_orphan_del(NULL, inode); 1161 ext4_orphan_del(NULL, inode);
1155 } 1162 }
1156 1163
1157 return ret ? ret : copied; 1164 return ret ? ret : copied;
1158 } 1165 }
1159 1166
1160 /* 1167 /*
1161 * Reserve a single cluster located at lblock 1168 * Reserve a single cluster located at lblock
1162 */ 1169 */
1163 static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1170 static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1164 { 1171 {
1165 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1172 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1166 struct ext4_inode_info *ei = EXT4_I(inode); 1173 struct ext4_inode_info *ei = EXT4_I(inode);
1167 unsigned int md_needed; 1174 unsigned int md_needed;
1168 int ret; 1175 int ret;
1169 1176
1170 /* 1177 /*
1171 * We will charge metadata quota at writeout time; this saves 1178 * We will charge metadata quota at writeout time; this saves
1172 * us from metadata over-estimation, though we may go over by 1179 * us from metadata over-estimation, though we may go over by
1173 * a small amount in the end. Here we just reserve for data. 1180 * a small amount in the end. Here we just reserve for data.
1174 */ 1181 */
1175 ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); 1182 ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
1176 if (ret) 1183 if (ret)
1177 return ret; 1184 return ret;
1178 1185
1179 /* 1186 /*
1180 * recalculate the amount of metadata blocks to reserve 1187 * recalculate the amount of metadata blocks to reserve
1181 * in order to allocate nrblocks 1188 * in order to allocate nrblocks
1182 * worse case is one extent per block 1189 * worse case is one extent per block
1183 */ 1190 */
1184 spin_lock(&ei->i_block_reservation_lock); 1191 spin_lock(&ei->i_block_reservation_lock);
1185 /* 1192 /*
1186 * ext4_calc_metadata_amount() has side effects, which we have 1193 * ext4_calc_metadata_amount() has side effects, which we have
1187 * to be prepared undo if we fail to claim space. 1194 * to be prepared undo if we fail to claim space.
1188 */ 1195 */
1189 md_needed = 0; 1196 md_needed = 0;
1190 trace_ext4_da_reserve_space(inode, 0); 1197 trace_ext4_da_reserve_space(inode, 0);
1191 1198
1192 if (ext4_claim_free_clusters(sbi, 1, 0)) { 1199 if (ext4_claim_free_clusters(sbi, 1, 0)) {
1193 spin_unlock(&ei->i_block_reservation_lock); 1200 spin_unlock(&ei->i_block_reservation_lock);
1194 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); 1201 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1195 return -ENOSPC; 1202 return -ENOSPC;
1196 } 1203 }
1197 ei->i_reserved_data_blocks++; 1204 ei->i_reserved_data_blocks++;
1198 spin_unlock(&ei->i_block_reservation_lock); 1205 spin_unlock(&ei->i_block_reservation_lock);
1199 1206
1200 return 0; /* success */ 1207 return 0; /* success */
1201 } 1208 }
1202 1209
1203 static void ext4_da_release_space(struct inode *inode, int to_free) 1210 static void ext4_da_release_space(struct inode *inode, int to_free)
1204 { 1211 {
1205 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1212 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1206 struct ext4_inode_info *ei = EXT4_I(inode); 1213 struct ext4_inode_info *ei = EXT4_I(inode);
1207 1214
1208 if (!to_free) 1215 if (!to_free)
1209 return; /* Nothing to release, exit */ 1216 return; /* Nothing to release, exit */
1210 1217
1211 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1218 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1212 1219
1213 trace_ext4_da_release_space(inode, to_free); 1220 trace_ext4_da_release_space(inode, to_free);
1214 if (unlikely(to_free > ei->i_reserved_data_blocks)) { 1221 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1215 /* 1222 /*
1216 * if there aren't enough reserved blocks, then the 1223 * if there aren't enough reserved blocks, then the
1217 * counter is messed up somewhere. Since this 1224 * counter is messed up somewhere. Since this
1218 * function is called from invalidate page, it's 1225 * function is called from invalidate page, it's
1219 * harmless to return without any action. 1226 * harmless to return without any action.
1220 */ 1227 */
1221 ext4_warning(inode->i_sb, "ext4_da_release_space: " 1228 ext4_warning(inode->i_sb, "ext4_da_release_space: "
1222 "ino %lu, to_free %d with only %d reserved " 1229 "ino %lu, to_free %d with only %d reserved "
1223 "data blocks", inode->i_ino, to_free, 1230 "data blocks", inode->i_ino, to_free,
1224 ei->i_reserved_data_blocks); 1231 ei->i_reserved_data_blocks);
1225 WARN_ON(1); 1232 WARN_ON(1);
1226 to_free = ei->i_reserved_data_blocks; 1233 to_free = ei->i_reserved_data_blocks;
1227 } 1234 }
1228 ei->i_reserved_data_blocks -= to_free; 1235 ei->i_reserved_data_blocks -= to_free;
1229 1236
1230 /* update fs dirty data blocks counter */ 1237 /* update fs dirty data blocks counter */
1231 percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free); 1238 percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
1232 1239
1233 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1240 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1234 1241
1235 dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free)); 1242 dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
1236 } 1243 }
1237 1244
1238 static void ext4_da_page_release_reservation(struct page *page, 1245 static void ext4_da_page_release_reservation(struct page *page,
1239 unsigned int offset, 1246 unsigned int offset,
1240 unsigned int length) 1247 unsigned int length)
1241 { 1248 {
1242 int to_release = 0; 1249 int to_release = 0;
1243 struct buffer_head *head, *bh; 1250 struct buffer_head *head, *bh;
1244 unsigned int curr_off = 0; 1251 unsigned int curr_off = 0;
1245 struct inode *inode = page->mapping->host; 1252 struct inode *inode = page->mapping->host;
1246 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1253 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1247 unsigned int stop = offset + length; 1254 unsigned int stop = offset + length;
1248 int num_clusters; 1255 int num_clusters;
1249 ext4_fsblk_t lblk; 1256 ext4_fsblk_t lblk;
1250 1257
1251 BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); 1258 BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
1252 1259
1253 head = page_buffers(page); 1260 head = page_buffers(page);
1254 bh = head; 1261 bh = head;
1255 do { 1262 do {
1256 unsigned int next_off = curr_off + bh->b_size; 1263 unsigned int next_off = curr_off + bh->b_size;
1257 1264
1258 if (next_off > stop) 1265 if (next_off > stop)
1259 break; 1266 break;
1260 1267
1261 if ((offset <= curr_off) && (buffer_delay(bh))) { 1268 if ((offset <= curr_off) && (buffer_delay(bh))) {
1262 to_release++; 1269 to_release++;
1263 clear_buffer_delay(bh); 1270 clear_buffer_delay(bh);
1264 } 1271 }
1265 curr_off = next_off; 1272 curr_off = next_off;
1266 } while ((bh = bh->b_this_page) != head); 1273 } while ((bh = bh->b_this_page) != head);
1267 1274
1268 if (to_release) { 1275 if (to_release) {
1269 lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 1276 lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1270 ext4_es_remove_extent(inode, lblk, to_release); 1277 ext4_es_remove_extent(inode, lblk, to_release);
1271 } 1278 }
1272 1279
1273 /* If we have released all the blocks belonging to a cluster, then we 1280 /* If we have released all the blocks belonging to a cluster, then we
1274 * need to release the reserved space for that cluster. */ 1281 * need to release the reserved space for that cluster. */
1275 num_clusters = EXT4_NUM_B2C(sbi, to_release); 1282 num_clusters = EXT4_NUM_B2C(sbi, to_release);
1276 while (num_clusters > 0) { 1283 while (num_clusters > 0) {
1277 lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + 1284 lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
1278 ((num_clusters - 1) << sbi->s_cluster_bits); 1285 ((num_clusters - 1) << sbi->s_cluster_bits);
1279 if (sbi->s_cluster_ratio == 1 || 1286 if (sbi->s_cluster_ratio == 1 ||
1280 !ext4_find_delalloc_cluster(inode, lblk)) 1287 !ext4_find_delalloc_cluster(inode, lblk))
1281 ext4_da_release_space(inode, 1); 1288 ext4_da_release_space(inode, 1);
1282 1289
1283 num_clusters--; 1290 num_clusters--;
1284 } 1291 }
1285 } 1292 }
1286 1293
1287 /* 1294 /*
1288 * Delayed allocation stuff 1295 * Delayed allocation stuff
1289 */ 1296 */
1290 1297
1291 struct mpage_da_data { 1298 struct mpage_da_data {
1292 struct inode *inode; 1299 struct inode *inode;
1293 struct writeback_control *wbc; 1300 struct writeback_control *wbc;
1294 1301
1295 pgoff_t first_page; /* The first page to write */ 1302 pgoff_t first_page; /* The first page to write */
1296 pgoff_t next_page; /* Current page to examine */ 1303 pgoff_t next_page; /* Current page to examine */
1297 pgoff_t last_page; /* Last page to examine */ 1304 pgoff_t last_page; /* Last page to examine */
1298 /* 1305 /*
1299 * Extent to map - this can be after first_page because that can be 1306 * Extent to map - this can be after first_page because that can be
1300 * fully mapped. We somewhat abuse m_flags to store whether the extent 1307 * fully mapped. We somewhat abuse m_flags to store whether the extent
1301 * is delalloc or unwritten. 1308 * is delalloc or unwritten.
1302 */ 1309 */
1303 struct ext4_map_blocks map; 1310 struct ext4_map_blocks map;
1304 struct ext4_io_submit io_submit; /* IO submission data */ 1311 struct ext4_io_submit io_submit; /* IO submission data */
1305 }; 1312 };
1306 1313
1307 static void mpage_release_unused_pages(struct mpage_da_data *mpd, 1314 static void mpage_release_unused_pages(struct mpage_da_data *mpd,
1308 bool invalidate) 1315 bool invalidate)
1309 { 1316 {
1310 int nr_pages, i; 1317 int nr_pages, i;
1311 pgoff_t index, end; 1318 pgoff_t index, end;
1312 struct pagevec pvec; 1319 struct pagevec pvec;
1313 struct inode *inode = mpd->inode; 1320 struct inode *inode = mpd->inode;
1314 struct address_space *mapping = inode->i_mapping; 1321 struct address_space *mapping = inode->i_mapping;
1315 1322
1316 /* This is necessary when next_page == 0. */ 1323 /* This is necessary when next_page == 0. */
1317 if (mpd->first_page >= mpd->next_page) 1324 if (mpd->first_page >= mpd->next_page)
1318 return; 1325 return;
1319 1326
1320 index = mpd->first_page; 1327 index = mpd->first_page;
1321 end = mpd->next_page - 1; 1328 end = mpd->next_page - 1;
1322 if (invalidate) { 1329 if (invalidate) {
1323 ext4_lblk_t start, last; 1330 ext4_lblk_t start, last;
1324 start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 1331 start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1325 last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); 1332 last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1326 ext4_es_remove_extent(inode, start, last - start + 1); 1333 ext4_es_remove_extent(inode, start, last - start + 1);
1327 } 1334 }
1328 1335
1329 pagevec_init(&pvec, 0); 1336 pagevec_init(&pvec, 0);
1330 while (index <= end) { 1337 while (index <= end) {
1331 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); 1338 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
1332 if (nr_pages == 0) 1339 if (nr_pages == 0)
1333 break; 1340 break;
1334 for (i = 0; i < nr_pages; i++) { 1341 for (i = 0; i < nr_pages; i++) {
1335 struct page *page = pvec.pages[i]; 1342 struct page *page = pvec.pages[i];
1336 if (page->index > end) 1343 if (page->index > end)
1337 break; 1344 break;
1338 BUG_ON(!PageLocked(page)); 1345 BUG_ON(!PageLocked(page));
1339 BUG_ON(PageWriteback(page)); 1346 BUG_ON(PageWriteback(page));
1340 if (invalidate) { 1347 if (invalidate) {
1341 block_invalidatepage(page, 0, PAGE_CACHE_SIZE); 1348 block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
1342 ClearPageUptodate(page); 1349 ClearPageUptodate(page);
1343 } 1350 }
1344 unlock_page(page); 1351 unlock_page(page);
1345 } 1352 }
1346 index = pvec.pages[nr_pages - 1]->index + 1; 1353 index = pvec.pages[nr_pages - 1]->index + 1;
1347 pagevec_release(&pvec); 1354 pagevec_release(&pvec);
1348 } 1355 }
1349 } 1356 }
1350 1357
1351 static void ext4_print_free_blocks(struct inode *inode) 1358 static void ext4_print_free_blocks(struct inode *inode)
1352 { 1359 {
1353 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1360 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1354 struct super_block *sb = inode->i_sb; 1361 struct super_block *sb = inode->i_sb;
1355 struct ext4_inode_info *ei = EXT4_I(inode); 1362 struct ext4_inode_info *ei = EXT4_I(inode);
1356 1363
1357 ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld", 1364 ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld",
1358 EXT4_C2B(EXT4_SB(inode->i_sb), 1365 EXT4_C2B(EXT4_SB(inode->i_sb),
1359 ext4_count_free_clusters(sb))); 1366 ext4_count_free_clusters(sb)));
1360 ext4_msg(sb, KERN_CRIT, "Free/Dirty block details"); 1367 ext4_msg(sb, KERN_CRIT, "Free/Dirty block details");
1361 ext4_msg(sb, KERN_CRIT, "free_blocks=%lld", 1368 ext4_msg(sb, KERN_CRIT, "free_blocks=%lld",
1362 (long long) EXT4_C2B(EXT4_SB(sb), 1369 (long long) EXT4_C2B(EXT4_SB(sb),
1363 percpu_counter_sum(&sbi->s_freeclusters_counter))); 1370 percpu_counter_sum(&sbi->s_freeclusters_counter)));
1364 ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld", 1371 ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld",
1365 (long long) EXT4_C2B(EXT4_SB(sb), 1372 (long long) EXT4_C2B(EXT4_SB(sb),
1366 percpu_counter_sum(&sbi->s_dirtyclusters_counter))); 1373 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
1367 ext4_msg(sb, KERN_CRIT, "Block reservation details"); 1374 ext4_msg(sb, KERN_CRIT, "Block reservation details");
1368 ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", 1375 ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
1369 ei->i_reserved_data_blocks); 1376 ei->i_reserved_data_blocks);
1370 return; 1377 return;
1371 } 1378 }
1372 1379
1373 static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) 1380 static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
1374 { 1381 {
1375 return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); 1382 return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
1376 } 1383 }
1377 1384
1378 /* 1385 /*
1379 * This function is grabs code from the very beginning of 1386 * This function is grabs code from the very beginning of
1380 * ext4_map_blocks, but assumes that the caller is from delayed write 1387 * ext4_map_blocks, but assumes that the caller is from delayed write
1381 * time. This function looks up the requested blocks and sets the 1388 * time. This function looks up the requested blocks and sets the
1382 * buffer delay bit under the protection of i_data_sem. 1389 * buffer delay bit under the protection of i_data_sem.
1383 */ 1390 */
1384 static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, 1391 static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1385 struct ext4_map_blocks *map, 1392 struct ext4_map_blocks *map,
1386 struct buffer_head *bh) 1393 struct buffer_head *bh)
1387 { 1394 {
1388 struct extent_status es; 1395 struct extent_status es;
1389 int retval; 1396 int retval;
1390 sector_t invalid_block = ~((sector_t) 0xffff); 1397 sector_t invalid_block = ~((sector_t) 0xffff);
1391 #ifdef ES_AGGRESSIVE_TEST 1398 #ifdef ES_AGGRESSIVE_TEST
1392 struct ext4_map_blocks orig_map; 1399 struct ext4_map_blocks orig_map;
1393 1400
1394 memcpy(&orig_map, map, sizeof(*map)); 1401 memcpy(&orig_map, map, sizeof(*map));
1395 #endif 1402 #endif
1396 1403
1397 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) 1404 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1398 invalid_block = ~0; 1405 invalid_block = ~0;
1399 1406
1400 map->m_flags = 0; 1407 map->m_flags = 0;
1401 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," 1408 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
1402 "logical block %lu\n", inode->i_ino, map->m_len, 1409 "logical block %lu\n", inode->i_ino, map->m_len,
1403 (unsigned long) map->m_lblk); 1410 (unsigned long) map->m_lblk);
1404 1411
1405 /* Lookup extent status tree firstly */ 1412 /* Lookup extent status tree firstly */
1406 if (ext4_es_lookup_extent(inode, iblock, &es)) { 1413 if (ext4_es_lookup_extent(inode, iblock, &es)) {
1407 if (ext4_es_is_hole(&es)) { 1414 if (ext4_es_is_hole(&es)) {
1408 retval = 0; 1415 retval = 0;
1409 down_read(&EXT4_I(inode)->i_data_sem); 1416 down_read(&EXT4_I(inode)->i_data_sem);
1410 goto add_delayed; 1417 goto add_delayed;
1411 } 1418 }
1412 1419
1413 /* 1420 /*
1414 * Delayed extent could be allocated by fallocate. 1421 * Delayed extent could be allocated by fallocate.
1415 * So we need to check it. 1422 * So we need to check it.
1416 */ 1423 */
1417 if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) { 1424 if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
1418 map_bh(bh, inode->i_sb, invalid_block); 1425 map_bh(bh, inode->i_sb, invalid_block);
1419 set_buffer_new(bh); 1426 set_buffer_new(bh);
1420 set_buffer_delay(bh); 1427 set_buffer_delay(bh);
1421 return 0; 1428 return 0;
1422 } 1429 }
1423 1430
1424 map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk; 1431 map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
1425 retval = es.es_len - (iblock - es.es_lblk); 1432 retval = es.es_len - (iblock - es.es_lblk);
1426 if (retval > map->m_len) 1433 if (retval > map->m_len)
1427 retval = map->m_len; 1434 retval = map->m_len;
1428 map->m_len = retval; 1435 map->m_len = retval;
1429 if (ext4_es_is_written(&es)) 1436 if (ext4_es_is_written(&es))
1430 map->m_flags |= EXT4_MAP_MAPPED; 1437 map->m_flags |= EXT4_MAP_MAPPED;
1431 else if (ext4_es_is_unwritten(&es)) 1438 else if (ext4_es_is_unwritten(&es))
1432 map->m_flags |= EXT4_MAP_UNWRITTEN; 1439 map->m_flags |= EXT4_MAP_UNWRITTEN;
1433 else 1440 else
1434 BUG_ON(1); 1441 BUG_ON(1);
1435 1442
1436 #ifdef ES_AGGRESSIVE_TEST 1443 #ifdef ES_AGGRESSIVE_TEST
1437 ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0); 1444 ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
1438 #endif 1445 #endif
1439 return retval; 1446 return retval;
1440 } 1447 }
1441 1448
1442 /* 1449 /*
1443 * Try to see if we can get the block without requesting a new 1450 * Try to see if we can get the block without requesting a new
1444 * file system block. 1451 * file system block.
1445 */ 1452 */
1446 down_read(&EXT4_I(inode)->i_data_sem); 1453 down_read(&EXT4_I(inode)->i_data_sem);
1447 if (ext4_has_inline_data(inode)) 1454 if (ext4_has_inline_data(inode))
1448 retval = 0; 1455 retval = 0;
1449 else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 1456 else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1450 retval = ext4_ext_map_blocks(NULL, inode, map, 0); 1457 retval = ext4_ext_map_blocks(NULL, inode, map, 0);
1451 else 1458 else
1452 retval = ext4_ind_map_blocks(NULL, inode, map, 0); 1459 retval = ext4_ind_map_blocks(NULL, inode, map, 0);
1453 1460
1454 add_delayed: 1461 add_delayed:
1455 if (retval == 0) { 1462 if (retval == 0) {
1456 int ret; 1463 int ret;
1457 /* 1464 /*
1458 * XXX: __block_prepare_write() unmaps passed block, 1465 * XXX: __block_prepare_write() unmaps passed block,
1459 * is it OK? 1466 * is it OK?
1460 */ 1467 */
1461 /* 1468 /*
1462 * If the block was allocated from previously allocated cluster, 1469 * If the block was allocated from previously allocated cluster,
1463 * then we don't need to reserve it again. However we still need 1470 * then we don't need to reserve it again. However we still need
1464 * to reserve metadata for every block we're going to write. 1471 * to reserve metadata for every block we're going to write.
1465 */ 1472 */
1466 if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 || 1473 if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 ||
1467 !ext4_find_delalloc_cluster(inode, map->m_lblk)) { 1474 !ext4_find_delalloc_cluster(inode, map->m_lblk)) {
1468 ret = ext4_da_reserve_space(inode, iblock); 1475 ret = ext4_da_reserve_space(inode, iblock);
1469 if (ret) { 1476 if (ret) {
1470 /* not enough space to reserve */ 1477 /* not enough space to reserve */
1471 retval = ret; 1478 retval = ret;
1472 goto out_unlock; 1479 goto out_unlock;
1473 } 1480 }
1474 } 1481 }
1475 1482
1476 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 1483 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
1477 ~0, EXTENT_STATUS_DELAYED); 1484 ~0, EXTENT_STATUS_DELAYED);
1478 if (ret) { 1485 if (ret) {
1479 retval = ret; 1486 retval = ret;
1480 goto out_unlock; 1487 goto out_unlock;
1481 } 1488 }
1482 1489
1483 map_bh(bh, inode->i_sb, invalid_block); 1490 map_bh(bh, inode->i_sb, invalid_block);
1484 set_buffer_new(bh); 1491 set_buffer_new(bh);
1485 set_buffer_delay(bh); 1492 set_buffer_delay(bh);
1486 } else if (retval > 0) { 1493 } else if (retval > 0) {
1487 int ret; 1494 int ret;
1488 unsigned int status; 1495 unsigned int status;
1489 1496
1490 if (unlikely(retval != map->m_len)) { 1497 if (unlikely(retval != map->m_len)) {
1491 ext4_warning(inode->i_sb, 1498 ext4_warning(inode->i_sb,
1492 "ES len assertion failed for inode " 1499 "ES len assertion failed for inode "
1493 "%lu: retval %d != map->m_len %d", 1500 "%lu: retval %d != map->m_len %d",
1494 inode->i_ino, retval, map->m_len); 1501 inode->i_ino, retval, map->m_len);
1495 WARN_ON(1); 1502 WARN_ON(1);
1496 } 1503 }
1497 1504
1498 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 1505 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
1499 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 1506 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
1500 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 1507 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
1501 map->m_pblk, status); 1508 map->m_pblk, status);
1502 if (ret != 0) 1509 if (ret != 0)
1503 retval = ret; 1510 retval = ret;
1504 } 1511 }
1505 1512
1506 out_unlock: 1513 out_unlock:
1507 up_read((&EXT4_I(inode)->i_data_sem)); 1514 up_read((&EXT4_I(inode)->i_data_sem));
1508 1515
1509 return retval; 1516 return retval;
1510 } 1517 }
1511 1518
1512 /* 1519 /*
1513 * This is a special get_block_t callback which is used by 1520 * This is a special get_block_t callback which is used by
1514 * ext4_da_write_begin(). It will either return mapped block or 1521 * ext4_da_write_begin(). It will either return mapped block or
1515 * reserve space for a single block. 1522 * reserve space for a single block.
1516 * 1523 *
1517 * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. 1524 * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
1518 * We also have b_blocknr = -1 and b_bdev initialized properly 1525 * We also have b_blocknr = -1 and b_bdev initialized properly
1519 * 1526 *
1520 * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. 1527 * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
1521 * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev 1528 * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
1522 * initialized properly. 1529 * initialized properly.
1523 */ 1530 */
1524 int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, 1531 int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1525 struct buffer_head *bh, int create) 1532 struct buffer_head *bh, int create)
1526 { 1533 {
1527 struct ext4_map_blocks map; 1534 struct ext4_map_blocks map;
1528 int ret = 0; 1535 int ret = 0;
1529 1536
1530 BUG_ON(create == 0); 1537 BUG_ON(create == 0);
1531 BUG_ON(bh->b_size != inode->i_sb->s_blocksize); 1538 BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
1532 1539
1533 map.m_lblk = iblock; 1540 map.m_lblk = iblock;
1534 map.m_len = 1; 1541 map.m_len = 1;
1535 1542
1536 /* 1543 /*
1537 * first, we need to know whether the block is allocated already 1544 * first, we need to know whether the block is allocated already
1538 * preallocated blocks are unmapped but should treated 1545 * preallocated blocks are unmapped but should treated
1539 * the same as allocated blocks. 1546 * the same as allocated blocks.
1540 */ 1547 */
1541 ret = ext4_da_map_blocks(inode, iblock, &map, bh); 1548 ret = ext4_da_map_blocks(inode, iblock, &map, bh);
1542 if (ret <= 0) 1549 if (ret <= 0)
1543 return ret; 1550 return ret;
1544 1551
1545 map_bh(bh, inode->i_sb, map.m_pblk); 1552 map_bh(bh, inode->i_sb, map.m_pblk);
1546 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 1553 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
1547 1554
1548 if (buffer_unwritten(bh)) { 1555 if (buffer_unwritten(bh)) {
1549 /* A delayed write to unwritten bh should be marked 1556 /* A delayed write to unwritten bh should be marked
1550 * new and mapped. Mapped ensures that we don't do 1557 * new and mapped. Mapped ensures that we don't do
1551 * get_block multiple times when we write to the same 1558 * get_block multiple times when we write to the same
1552 * offset and new ensures that we do proper zero out 1559 * offset and new ensures that we do proper zero out
1553 * for partial write. 1560 * for partial write.
1554 */ 1561 */
1555 set_buffer_new(bh); 1562 set_buffer_new(bh);
1556 set_buffer_mapped(bh); 1563 set_buffer_mapped(bh);
1557 } 1564 }
1558 return 0; 1565 return 0;
1559 } 1566 }
1560 1567
1561 static int bget_one(handle_t *handle, struct buffer_head *bh) 1568 static int bget_one(handle_t *handle, struct buffer_head *bh)
1562 { 1569 {
1563 get_bh(bh); 1570 get_bh(bh);
1564 return 0; 1571 return 0;
1565 } 1572 }
1566 1573
1567 static int bput_one(handle_t *handle, struct buffer_head *bh) 1574 static int bput_one(handle_t *handle, struct buffer_head *bh)
1568 { 1575 {
1569 put_bh(bh); 1576 put_bh(bh);
1570 return 0; 1577 return 0;
1571 } 1578 }
1572 1579
1573 static int __ext4_journalled_writepage(struct page *page, 1580 static int __ext4_journalled_writepage(struct page *page,
1574 unsigned int len) 1581 unsigned int len)
1575 { 1582 {
1576 struct address_space *mapping = page->mapping; 1583 struct address_space *mapping = page->mapping;
1577 struct inode *inode = mapping->host; 1584 struct inode *inode = mapping->host;
1578 struct buffer_head *page_bufs = NULL; 1585 struct buffer_head *page_bufs = NULL;
1579 handle_t *handle = NULL; 1586 handle_t *handle = NULL;
1580 int ret = 0, err = 0; 1587 int ret = 0, err = 0;
1581 int inline_data = ext4_has_inline_data(inode); 1588 int inline_data = ext4_has_inline_data(inode);
1582 struct buffer_head *inode_bh = NULL; 1589 struct buffer_head *inode_bh = NULL;
1583 1590
1584 ClearPageChecked(page); 1591 ClearPageChecked(page);
1585 1592
1586 if (inline_data) { 1593 if (inline_data) {
1587 BUG_ON(page->index != 0); 1594 BUG_ON(page->index != 0);
1588 BUG_ON(len > ext4_get_max_inline_size(inode)); 1595 BUG_ON(len > ext4_get_max_inline_size(inode));
1589 inode_bh = ext4_journalled_write_inline_data(inode, len, page); 1596 inode_bh = ext4_journalled_write_inline_data(inode, len, page);
1590 if (inode_bh == NULL) 1597 if (inode_bh == NULL)
1591 goto out; 1598 goto out;
1592 } else { 1599 } else {
1593 page_bufs = page_buffers(page); 1600 page_bufs = page_buffers(page);
1594 if (!page_bufs) { 1601 if (!page_bufs) {
1595 BUG(); 1602 BUG();
1596 goto out; 1603 goto out;
1597 } 1604 }
1598 ext4_walk_page_buffers(handle, page_bufs, 0, len, 1605 ext4_walk_page_buffers(handle, page_bufs, 0, len,
1599 NULL, bget_one); 1606 NULL, bget_one);
1600 } 1607 }
1601 /* As soon as we unlock the page, it can go away, but we have 1608 /* As soon as we unlock the page, it can go away, but we have
1602 * references to buffers so we are safe */ 1609 * references to buffers so we are safe */
1603 unlock_page(page); 1610 unlock_page(page);
1604 1611
1605 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1612 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
1606 ext4_writepage_trans_blocks(inode)); 1613 ext4_writepage_trans_blocks(inode));
1607 if (IS_ERR(handle)) { 1614 if (IS_ERR(handle)) {
1608 ret = PTR_ERR(handle); 1615 ret = PTR_ERR(handle);
1609 goto out; 1616 goto out;
1610 } 1617 }
1611 1618
1612 BUG_ON(!ext4_handle_valid(handle)); 1619 BUG_ON(!ext4_handle_valid(handle));
1613 1620
1614 if (inline_data) { 1621 if (inline_data) {
1615 BUFFER_TRACE(inode_bh, "get write access"); 1622 BUFFER_TRACE(inode_bh, "get write access");
1616 ret = ext4_journal_get_write_access(handle, inode_bh); 1623 ret = ext4_journal_get_write_access(handle, inode_bh);
1617 1624
1618 err = ext4_handle_dirty_metadata(handle, inode, inode_bh); 1625 err = ext4_handle_dirty_metadata(handle, inode, inode_bh);
1619 1626
1620 } else { 1627 } else {
1621 ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, 1628 ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
1622 do_journal_get_write_access); 1629 do_journal_get_write_access);
1623 1630
1624 err = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, 1631 err = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
1625 write_end_fn); 1632 write_end_fn);
1626 } 1633 }
1627 if (ret == 0) 1634 if (ret == 0)
1628 ret = err; 1635 ret = err;
1629 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; 1636 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1630 err = ext4_journal_stop(handle); 1637 err = ext4_journal_stop(handle);
1631 if (!ret) 1638 if (!ret)
1632 ret = err; 1639 ret = err;
1633 1640
1634 if (!ext4_has_inline_data(inode)) 1641 if (!ext4_has_inline_data(inode))
1635 ext4_walk_page_buffers(NULL, page_bufs, 0, len, 1642 ext4_walk_page_buffers(NULL, page_bufs, 0, len,
1636 NULL, bput_one); 1643 NULL, bput_one);
1637 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 1644 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1638 out: 1645 out:
1639 brelse(inode_bh); 1646 brelse(inode_bh);
1640 return ret; 1647 return ret;
1641 } 1648 }
1642 1649
1643 /* 1650 /*
1644 * Note that we don't need to start a transaction unless we're journaling data 1651 * Note that we don't need to start a transaction unless we're journaling data
1645 * because we should have holes filled from ext4_page_mkwrite(). We even don't 1652 * because we should have holes filled from ext4_page_mkwrite(). We even don't
1646 * need to file the inode to the transaction's list in ordered mode because if 1653 * need to file the inode to the transaction's list in ordered mode because if
1647 * we are writing back data added by write(), the inode is already there and if 1654 * we are writing back data added by write(), the inode is already there and if
1648 * we are writing back data modified via mmap(), no one guarantees in which 1655 * we are writing back data modified via mmap(), no one guarantees in which
1649 * transaction the data will hit the disk. In case we are journaling data, we 1656 * transaction the data will hit the disk. In case we are journaling data, we
1650 * cannot start transaction directly because transaction start ranks above page 1657 * cannot start transaction directly because transaction start ranks above page
1651 * lock so we have to do some magic. 1658 * lock so we have to do some magic.
1652 * 1659 *
1653 * This function can get called via... 1660 * This function can get called via...
1654 * - ext4_writepages after taking page lock (have journal handle) 1661 * - ext4_writepages after taking page lock (have journal handle)
1655 * - journal_submit_inode_data_buffers (no journal handle) 1662 * - journal_submit_inode_data_buffers (no journal handle)
1656 * - shrink_page_list via the kswapd/direct reclaim (no journal handle) 1663 * - shrink_page_list via the kswapd/direct reclaim (no journal handle)
1657 * - grab_page_cache when doing write_begin (have journal handle) 1664 * - grab_page_cache when doing write_begin (have journal handle)
1658 * 1665 *
1659 * We don't do any block allocation in this function. If we have page with 1666 * We don't do any block allocation in this function. If we have page with
1660 * multiple blocks we need to write those buffer_heads that are mapped. This 1667 * multiple blocks we need to write those buffer_heads that are mapped. This
1661 * is important for mmaped based write. So if we do with blocksize 1K 1668 * is important for mmaped based write. So if we do with blocksize 1K
1662 * truncate(f, 1024); 1669 * truncate(f, 1024);
1663 * a = mmap(f, 0, 4096); 1670 * a = mmap(f, 0, 4096);
1664 * a[0] = 'a'; 1671 * a[0] = 'a';
1665 * truncate(f, 4096); 1672 * truncate(f, 4096);
1666 * we have in the page first buffer_head mapped via page_mkwrite call back 1673 * we have in the page first buffer_head mapped via page_mkwrite call back
1667 * but other buffer_heads would be unmapped but dirty (dirty done via the 1674 * but other buffer_heads would be unmapped but dirty (dirty done via the
1668 * do_wp_page). So writepage should write the first block. If we modify 1675 * do_wp_page). So writepage should write the first block. If we modify
1669 * the mmap area beyond 1024 we will again get a page_fault and the 1676 * the mmap area beyond 1024 we will again get a page_fault and the
1670 * page_mkwrite callback will do the block allocation and mark the 1677 * page_mkwrite callback will do the block allocation and mark the
1671 * buffer_heads mapped. 1678 * buffer_heads mapped.
1672 * 1679 *
1673 * We redirty the page if we have any buffer_heads that is either delay or 1680 * We redirty the page if we have any buffer_heads that is either delay or
1674 * unwritten in the page. 1681 * unwritten in the page.
1675 * 1682 *
1676 * We can get recursively called as show below. 1683 * We can get recursively called as show below.
1677 * 1684 *
1678 * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> 1685 * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
1679 * ext4_writepage() 1686 * ext4_writepage()
1680 * 1687 *
1681 * But since we don't do any block allocation we should not deadlock. 1688 * But since we don't do any block allocation we should not deadlock.
1682 * Page also have the dirty flag cleared so we don't get recurive page_lock. 1689 * Page also have the dirty flag cleared so we don't get recurive page_lock.
1683 */ 1690 */
1684 static int ext4_writepage(struct page *page, 1691 static int ext4_writepage(struct page *page,
1685 struct writeback_control *wbc) 1692 struct writeback_control *wbc)
1686 { 1693 {
1687 int ret = 0; 1694 int ret = 0;
1688 loff_t size; 1695 loff_t size;
1689 unsigned int len; 1696 unsigned int len;
1690 struct buffer_head *page_bufs = NULL; 1697 struct buffer_head *page_bufs = NULL;
1691 struct inode *inode = page->mapping->host; 1698 struct inode *inode = page->mapping->host;
1692 struct ext4_io_submit io_submit; 1699 struct ext4_io_submit io_submit;
1693 bool keep_towrite = false; 1700 bool keep_towrite = false;
1694 1701
1695 trace_ext4_writepage(page); 1702 trace_ext4_writepage(page);
1696 size = i_size_read(inode); 1703 size = i_size_read(inode);
1697 if (page->index == size >> PAGE_CACHE_SHIFT) 1704 if (page->index == size >> PAGE_CACHE_SHIFT)
1698 len = size & ~PAGE_CACHE_MASK; 1705 len = size & ~PAGE_CACHE_MASK;
1699 else 1706 else
1700 len = PAGE_CACHE_SIZE; 1707 len = PAGE_CACHE_SIZE;
1701 1708
1702 page_bufs = page_buffers(page); 1709 page_bufs = page_buffers(page);
1703 /* 1710 /*
1704 * We cannot do block allocation or other extent handling in this 1711 * We cannot do block allocation or other extent handling in this
1705 * function. If there are buffers needing that, we have to redirty 1712 * function. If there are buffers needing that, we have to redirty
1706 * the page. But we may reach here when we do a journal commit via 1713 * the page. But we may reach here when we do a journal commit via
1707 * journal_submit_inode_data_buffers() and in that case we must write 1714 * journal_submit_inode_data_buffers() and in that case we must write
1708 * allocated buffers to achieve data=ordered mode guarantees. 1715 * allocated buffers to achieve data=ordered mode guarantees.
1709 */ 1716 */
1710 if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL, 1717 if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
1711 ext4_bh_delay_or_unwritten)) { 1718 ext4_bh_delay_or_unwritten)) {
1712 redirty_page_for_writepage(wbc, page); 1719 redirty_page_for_writepage(wbc, page);
1713 if (current->flags & PF_MEMALLOC) { 1720 if (current->flags & PF_MEMALLOC) {
1714 /* 1721 /*
1715 * For memory cleaning there's no point in writing only 1722 * For memory cleaning there's no point in writing only
1716 * some buffers. So just bail out. Warn if we came here 1723 * some buffers. So just bail out. Warn if we came here
1717 * from direct reclaim. 1724 * from direct reclaim.
1718 */ 1725 */
1719 WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) 1726 WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD))
1720 == PF_MEMALLOC); 1727 == PF_MEMALLOC);
1721 unlock_page(page); 1728 unlock_page(page);
1722 return 0; 1729 return 0;
1723 } 1730 }
1724 keep_towrite = true; 1731 keep_towrite = true;
1725 } 1732 }
1726 1733
1727 if (PageChecked(page) && ext4_should_journal_data(inode)) 1734 if (PageChecked(page) && ext4_should_journal_data(inode))
1728 /* 1735 /*
1729 * It's mmapped pagecache. Add buffers and journal it. There 1736 * It's mmapped pagecache. Add buffers and journal it. There
1730 * doesn't seem much point in redirtying the page here. 1737 * doesn't seem much point in redirtying the page here.
1731 */ 1738 */
1732 return __ext4_journalled_writepage(page, len); 1739 return __ext4_journalled_writepage(page, len);
1733 1740
1734 ext4_io_submit_init(&io_submit, wbc); 1741 ext4_io_submit_init(&io_submit, wbc);
1735 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); 1742 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
1736 if (!io_submit.io_end) { 1743 if (!io_submit.io_end) {
1737 redirty_page_for_writepage(wbc, page); 1744 redirty_page_for_writepage(wbc, page);
1738 unlock_page(page); 1745 unlock_page(page);
1739 return -ENOMEM; 1746 return -ENOMEM;
1740 } 1747 }
1741 ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite); 1748 ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
1742 ext4_io_submit(&io_submit); 1749 ext4_io_submit(&io_submit);
1743 /* Drop io_end reference we got from init */ 1750 /* Drop io_end reference we got from init */
1744 ext4_put_io_end_defer(io_submit.io_end); 1751 ext4_put_io_end_defer(io_submit.io_end);
1745 return ret; 1752 return ret;
1746 } 1753 }
1747 1754
1748 static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) 1755 static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
1749 { 1756 {
1750 int len; 1757 int len;
1751 loff_t size = i_size_read(mpd->inode); 1758 loff_t size = i_size_read(mpd->inode);
1752 int err; 1759 int err;
1753 1760
1754 BUG_ON(page->index != mpd->first_page); 1761 BUG_ON(page->index != mpd->first_page);
1755 if (page->index == size >> PAGE_CACHE_SHIFT) 1762 if (page->index == size >> PAGE_CACHE_SHIFT)
1756 len = size & ~PAGE_CACHE_MASK; 1763 len = size & ~PAGE_CACHE_MASK;
1757 else 1764 else
1758 len = PAGE_CACHE_SIZE; 1765 len = PAGE_CACHE_SIZE;
1759 clear_page_dirty_for_io(page); 1766 clear_page_dirty_for_io(page);
1760 err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); 1767 err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
1761 if (!err) 1768 if (!err)
1762 mpd->wbc->nr_to_write--; 1769 mpd->wbc->nr_to_write--;
1763 mpd->first_page++; 1770 mpd->first_page++;
1764 1771
1765 return err; 1772 return err;
1766 } 1773 }
1767 1774
1768 #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) 1775 #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
1769 1776
1770 /* 1777 /*
1771 * mballoc gives us at most this number of blocks... 1778 * mballoc gives us at most this number of blocks...
1772 * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). 1779 * XXX: That seems to be only a limitation of ext4_mb_normalize_request().
1773 * The rest of mballoc seems to handle chunks up to full group size. 1780 * The rest of mballoc seems to handle chunks up to full group size.
1774 */ 1781 */
1775 #define MAX_WRITEPAGES_EXTENT_LEN 2048 1782 #define MAX_WRITEPAGES_EXTENT_LEN 2048
1776 1783
1777 /* 1784 /*
1778 * mpage_add_bh_to_extent - try to add bh to extent of blocks to map 1785 * mpage_add_bh_to_extent - try to add bh to extent of blocks to map
1779 * 1786 *
1780 * @mpd - extent of blocks 1787 * @mpd - extent of blocks
1781 * @lblk - logical number of the block in the file 1788 * @lblk - logical number of the block in the file
1782 * @bh - buffer head we want to add to the extent 1789 * @bh - buffer head we want to add to the extent
1783 * 1790 *
1784 * The function is used to collect contig. blocks in the same state. If the 1791 * The function is used to collect contig. blocks in the same state. If the
1785 * buffer doesn't require mapping for writeback and we haven't started the 1792 * buffer doesn't require mapping for writeback and we haven't started the
1786 * extent of buffers to map yet, the function returns 'true' immediately - the 1793 * extent of buffers to map yet, the function returns 'true' immediately - the
1787 * caller can write the buffer right away. Otherwise the function returns true 1794 * caller can write the buffer right away. Otherwise the function returns true
1788 * if the block has been added to the extent, false if the block couldn't be 1795 * if the block has been added to the extent, false if the block couldn't be
1789 * added. 1796 * added.
1790 */ 1797 */
1791 static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, 1798 static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
1792 struct buffer_head *bh) 1799 struct buffer_head *bh)
1793 { 1800 {
1794 struct ext4_map_blocks *map = &mpd->map; 1801 struct ext4_map_blocks *map = &mpd->map;
1795 1802
1796 /* Buffer that doesn't need mapping for writeback? */ 1803 /* Buffer that doesn't need mapping for writeback? */
1797 if (!buffer_dirty(bh) || !buffer_mapped(bh) || 1804 if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
1798 (!buffer_delay(bh) && !buffer_unwritten(bh))) { 1805 (!buffer_delay(bh) && !buffer_unwritten(bh))) {
1799 /* So far no extent to map => we write the buffer right away */ 1806 /* So far no extent to map => we write the buffer right away */
1800 if (map->m_len == 0) 1807 if (map->m_len == 0)
1801 return true; 1808 return true;
1802 return false; 1809 return false;
1803 } 1810 }
1804 1811
1805 /* First block in the extent? */ 1812 /* First block in the extent? */
1806 if (map->m_len == 0) { 1813 if (map->m_len == 0) {
1807 map->m_lblk = lblk; 1814 map->m_lblk = lblk;
1808 map->m_len = 1; 1815 map->m_len = 1;
1809 map->m_flags = bh->b_state & BH_FLAGS; 1816 map->m_flags = bh->b_state & BH_FLAGS;
1810 return true; 1817 return true;
1811 } 1818 }
1812 1819
1813 /* Don't go larger than mballoc is willing to allocate */ 1820 /* Don't go larger than mballoc is willing to allocate */
1814 if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) 1821 if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
1815 return false; 1822 return false;
1816 1823
1817 /* Can we merge the block to our big extent? */ 1824 /* Can we merge the block to our big extent? */
1818 if (lblk == map->m_lblk + map->m_len && 1825 if (lblk == map->m_lblk + map->m_len &&
1819 (bh->b_state & BH_FLAGS) == map->m_flags) { 1826 (bh->b_state & BH_FLAGS) == map->m_flags) {
1820 map->m_len++; 1827 map->m_len++;
1821 return true; 1828 return true;
1822 } 1829 }
1823 return false; 1830 return false;
1824 } 1831 }
1825 1832
1826 /* 1833 /*
1827 * mpage_process_page_bufs - submit page buffers for IO or add them to extent 1834 * mpage_process_page_bufs - submit page buffers for IO or add them to extent
1828 * 1835 *
1829 * @mpd - extent of blocks for mapping 1836 * @mpd - extent of blocks for mapping
1830 * @head - the first buffer in the page 1837 * @head - the first buffer in the page
1831 * @bh - buffer we should start processing from 1838 * @bh - buffer we should start processing from
1832 * @lblk - logical number of the block in the file corresponding to @bh 1839 * @lblk - logical number of the block in the file corresponding to @bh
1833 * 1840 *
1834 * Walk through page buffers from @bh upto @head (exclusive) and either submit 1841 * Walk through page buffers from @bh upto @head (exclusive) and either submit
1835 * the page for IO if all buffers in this page were mapped and there's no 1842 * the page for IO if all buffers in this page were mapped and there's no
1836 * accumulated extent of buffers to map or add buffers in the page to the 1843 * accumulated extent of buffers to map or add buffers in the page to the
1837 * extent of buffers to map. The function returns 1 if the caller can continue 1844 * extent of buffers to map. The function returns 1 if the caller can continue
1838 * by processing the next page, 0 if it should stop adding buffers to the 1845 * by processing the next page, 0 if it should stop adding buffers to the
1839 * extent to map because we cannot extend it anymore. It can also return value 1846 * extent to map because we cannot extend it anymore. It can also return value
1840 * < 0 in case of error during IO submission. 1847 * < 0 in case of error during IO submission.
1841 */ 1848 */
1842 static int mpage_process_page_bufs(struct mpage_da_data *mpd, 1849 static int mpage_process_page_bufs(struct mpage_da_data *mpd,
1843 struct buffer_head *head, 1850 struct buffer_head *head,
1844 struct buffer_head *bh, 1851 struct buffer_head *bh,
1845 ext4_lblk_t lblk) 1852 ext4_lblk_t lblk)
1846 { 1853 {
1847 struct inode *inode = mpd->inode; 1854 struct inode *inode = mpd->inode;
1848 int err; 1855 int err;
1849 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) 1856 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
1850 >> inode->i_blkbits; 1857 >> inode->i_blkbits;
1851 1858
1852 do { 1859 do {
1853 BUG_ON(buffer_locked(bh)); 1860 BUG_ON(buffer_locked(bh));
1854 1861
1855 if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) { 1862 if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) {
1856 /* Found extent to map? */ 1863 /* Found extent to map? */
1857 if (mpd->map.m_len) 1864 if (mpd->map.m_len)
1858 return 0; 1865 return 0;
1859 /* Everything mapped so far and we hit EOF */ 1866 /* Everything mapped so far and we hit EOF */
1860 break; 1867 break;
1861 } 1868 }
1862 } while (lblk++, (bh = bh->b_this_page) != head); 1869 } while (lblk++, (bh = bh->b_this_page) != head);
1863 /* So far everything mapped? Submit the page for IO. */ 1870 /* So far everything mapped? Submit the page for IO. */
1864 if (mpd->map.m_len == 0) { 1871 if (mpd->map.m_len == 0) {
1865 err = mpage_submit_page(mpd, head->b_page); 1872 err = mpage_submit_page(mpd, head->b_page);
1866 if (err < 0) 1873 if (err < 0)
1867 return err; 1874 return err;
1868 } 1875 }
1869 return lblk < blocks; 1876 return lblk < blocks;
1870 } 1877 }
1871 1878
1872 /* 1879 /*
1873 * mpage_map_buffers - update buffers corresponding to changed extent and 1880 * mpage_map_buffers - update buffers corresponding to changed extent and
1874 * submit fully mapped pages for IO 1881 * submit fully mapped pages for IO
1875 * 1882 *
1876 * @mpd - description of extent to map, on return next extent to map 1883 * @mpd - description of extent to map, on return next extent to map
1877 * 1884 *
1878 * Scan buffers corresponding to changed extent (we expect corresponding pages 1885 * Scan buffers corresponding to changed extent (we expect corresponding pages
1879 * to be already locked) and update buffer state according to new extent state. 1886 * to be already locked) and update buffer state according to new extent state.
1880 * We map delalloc buffers to their physical location, clear unwritten bits, 1887 * We map delalloc buffers to their physical location, clear unwritten bits,
1881 * and mark buffers as uninit when we perform writes to unwritten extents 1888 * and mark buffers as uninit when we perform writes to unwritten extents
1882 * and do extent conversion after IO is finished. If the last page is not fully 1889 * and do extent conversion after IO is finished. If the last page is not fully
1883 * mapped, we update @map to the next extent in the last page that needs 1890 * mapped, we update @map to the next extent in the last page that needs
1884 * mapping. Otherwise we submit the page for IO. 1891 * mapping. Otherwise we submit the page for IO.
1885 */ 1892 */
1886 static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) 1893 static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
1887 { 1894 {
1888 struct pagevec pvec; 1895 struct pagevec pvec;
1889 int nr_pages, i; 1896 int nr_pages, i;
1890 struct inode *inode = mpd->inode; 1897 struct inode *inode = mpd->inode;
1891 struct buffer_head *head, *bh; 1898 struct buffer_head *head, *bh;
1892 int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; 1899 int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
1893 pgoff_t start, end; 1900 pgoff_t start, end;
1894 ext4_lblk_t lblk; 1901 ext4_lblk_t lblk;
1895 sector_t pblock; 1902 sector_t pblock;
1896 int err; 1903 int err;
1897 1904
1898 start = mpd->map.m_lblk >> bpp_bits; 1905 start = mpd->map.m_lblk >> bpp_bits;
1899 end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; 1906 end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits;
1900 lblk = start << bpp_bits; 1907 lblk = start << bpp_bits;
1901 pblock = mpd->map.m_pblk; 1908 pblock = mpd->map.m_pblk;
1902 1909
1903 pagevec_init(&pvec, 0); 1910 pagevec_init(&pvec, 0);
1904 while (start <= end) { 1911 while (start <= end) {
1905 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start, 1912 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start,
1906 PAGEVEC_SIZE); 1913 PAGEVEC_SIZE);
1907 if (nr_pages == 0) 1914 if (nr_pages == 0)
1908 break; 1915 break;
1909 for (i = 0; i < nr_pages; i++) { 1916 for (i = 0; i < nr_pages; i++) {
1910 struct page *page = pvec.pages[i]; 1917 struct page *page = pvec.pages[i];
1911 1918
1912 if (page->index > end) 1919 if (page->index > end)
1913 break; 1920 break;
1914 /* Up to 'end' pages must be contiguous */ 1921 /* Up to 'end' pages must be contiguous */
1915 BUG_ON(page->index != start); 1922 BUG_ON(page->index != start);
1916 bh = head = page_buffers(page); 1923 bh = head = page_buffers(page);
1917 do { 1924 do {
1918 if (lblk < mpd->map.m_lblk) 1925 if (lblk < mpd->map.m_lblk)
1919 continue; 1926 continue;
1920 if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { 1927 if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
1921 /* 1928 /*
1922 * Buffer after end of mapped extent. 1929 * Buffer after end of mapped extent.
1923 * Find next buffer in the page to map. 1930 * Find next buffer in the page to map.
1924 */ 1931 */
1925 mpd->map.m_len = 0; 1932 mpd->map.m_len = 0;
1926 mpd->map.m_flags = 0; 1933 mpd->map.m_flags = 0;
1927 /* 1934 /*
1928 * FIXME: If dioread_nolock supports 1935 * FIXME: If dioread_nolock supports
1929 * blocksize < pagesize, we need to make 1936 * blocksize < pagesize, we need to make
1930 * sure we add size mapped so far to 1937 * sure we add size mapped so far to
1931 * io_end->size as the following call 1938 * io_end->size as the following call
1932 * can submit the page for IO. 1939 * can submit the page for IO.
1933 */ 1940 */
1934 err = mpage_process_page_bufs(mpd, head, 1941 err = mpage_process_page_bufs(mpd, head,
1935 bh, lblk); 1942 bh, lblk);
1936 pagevec_release(&pvec); 1943 pagevec_release(&pvec);
1937 if (err > 0) 1944 if (err > 0)
1938 err = 0; 1945 err = 0;
1939 return err; 1946 return err;
1940 } 1947 }
1941 if (buffer_delay(bh)) { 1948 if (buffer_delay(bh)) {
1942 clear_buffer_delay(bh); 1949 clear_buffer_delay(bh);
1943 bh->b_blocknr = pblock++; 1950 bh->b_blocknr = pblock++;
1944 } 1951 }
1945 clear_buffer_unwritten(bh); 1952 clear_buffer_unwritten(bh);
1946 } while (lblk++, (bh = bh->b_this_page) != head); 1953 } while (lblk++, (bh = bh->b_this_page) != head);
1947 1954
1948 /* 1955 /*
1949 * FIXME: This is going to break if dioread_nolock 1956 * FIXME: This is going to break if dioread_nolock
1950 * supports blocksize < pagesize as we will try to 1957 * supports blocksize < pagesize as we will try to
1951 * convert potentially unmapped parts of inode. 1958 * convert potentially unmapped parts of inode.
1952 */ 1959 */
1953 mpd->io_submit.io_end->size += PAGE_CACHE_SIZE; 1960 mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
1954 /* Page fully mapped - let IO run! */ 1961 /* Page fully mapped - let IO run! */
1955 err = mpage_submit_page(mpd, page); 1962 err = mpage_submit_page(mpd, page);
1956 if (err < 0) { 1963 if (err < 0) {
1957 pagevec_release(&pvec); 1964 pagevec_release(&pvec);
1958 return err; 1965 return err;
1959 } 1966 }
1960 start++; 1967 start++;
1961 } 1968 }
1962 pagevec_release(&pvec); 1969 pagevec_release(&pvec);
1963 } 1970 }
1964 /* Extent fully mapped and matches with page boundary. We are done. */ 1971 /* Extent fully mapped and matches with page boundary. We are done. */
1965 mpd->map.m_len = 0; 1972 mpd->map.m_len = 0;
1966 mpd->map.m_flags = 0; 1973 mpd->map.m_flags = 0;
1967 return 0; 1974 return 0;
1968 } 1975 }
1969 1976
1970 static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) 1977 static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
1971 { 1978 {
1972 struct inode *inode = mpd->inode; 1979 struct inode *inode = mpd->inode;
1973 struct ext4_map_blocks *map = &mpd->map; 1980 struct ext4_map_blocks *map = &mpd->map;
1974 int get_blocks_flags; 1981 int get_blocks_flags;
1975 int err, dioread_nolock; 1982 int err, dioread_nolock;
1976 1983
1977 trace_ext4_da_write_pages_extent(inode, map); 1984 trace_ext4_da_write_pages_extent(inode, map);
1978 /* 1985 /*
1979 * Call ext4_map_blocks() to allocate any delayed allocation blocks, or 1986 * Call ext4_map_blocks() to allocate any delayed allocation blocks, or
1980 * to convert an unwritten extent to be initialized (in the case 1987 * to convert an unwritten extent to be initialized (in the case
1981 * where we have written into one or more preallocated blocks). It is 1988 * where we have written into one or more preallocated blocks). It is
1982 * possible that we're going to need more metadata blocks than 1989 * possible that we're going to need more metadata blocks than
1983 * previously reserved. However we must not fail because we're in 1990 * previously reserved. However we must not fail because we're in
1984 * writeback and there is nothing we can do about it so it might result 1991 * writeback and there is nothing we can do about it so it might result
1985 * in data loss. So use reserved blocks to allocate metadata if 1992 * in data loss. So use reserved blocks to allocate metadata if
1986 * possible. 1993 * possible.
1987 * 1994 *
1988 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if 1995 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if
1989 * the blocks in question are delalloc blocks. This indicates 1996 * the blocks in question are delalloc blocks. This indicates
1990 * that the blocks and quotas has already been checked when 1997 * that the blocks and quotas has already been checked when
1991 * the data was copied into the page cache. 1998 * the data was copied into the page cache.
1992 */ 1999 */
1993 get_blocks_flags = EXT4_GET_BLOCKS_CREATE | 2000 get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
1994 EXT4_GET_BLOCKS_METADATA_NOFAIL; 2001 EXT4_GET_BLOCKS_METADATA_NOFAIL;
1995 dioread_nolock = ext4_should_dioread_nolock(inode); 2002 dioread_nolock = ext4_should_dioread_nolock(inode);
1996 if (dioread_nolock) 2003 if (dioread_nolock)
1997 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; 2004 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
1998 if (map->m_flags & (1 << BH_Delay)) 2005 if (map->m_flags & (1 << BH_Delay))
1999 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; 2006 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
2000 2007
2001 err = ext4_map_blocks(handle, inode, map, get_blocks_flags); 2008 err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
2002 if (err < 0) 2009 if (err < 0)
2003 return err; 2010 return err;
2004 if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) { 2011 if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) {
2005 if (!mpd->io_submit.io_end->handle && 2012 if (!mpd->io_submit.io_end->handle &&
2006 ext4_handle_valid(handle)) { 2013 ext4_handle_valid(handle)) {
2007 mpd->io_submit.io_end->handle = handle->h_rsv_handle; 2014 mpd->io_submit.io_end->handle = handle->h_rsv_handle;
2008 handle->h_rsv_handle = NULL; 2015 handle->h_rsv_handle = NULL;
2009 } 2016 }
2010 ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); 2017 ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end);
2011 } 2018 }
2012 2019
2013 BUG_ON(map->m_len == 0); 2020 BUG_ON(map->m_len == 0);
2014 if (map->m_flags & EXT4_MAP_NEW) { 2021 if (map->m_flags & EXT4_MAP_NEW) {
2015 struct block_device *bdev = inode->i_sb->s_bdev; 2022 struct block_device *bdev = inode->i_sb->s_bdev;
2016 int i; 2023 int i;
2017 2024
2018 for (i = 0; i < map->m_len; i++) 2025 for (i = 0; i < map->m_len; i++)
2019 unmap_underlying_metadata(bdev, map->m_pblk + i); 2026 unmap_underlying_metadata(bdev, map->m_pblk + i);
2020 } 2027 }
2021 return 0; 2028 return 0;
2022 } 2029 }
2023 2030
2024 /* 2031 /*
2025 * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length 2032 * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length
2026 * mpd->len and submit pages underlying it for IO 2033 * mpd->len and submit pages underlying it for IO
2027 * 2034 *
2028 * @handle - handle for journal operations 2035 * @handle - handle for journal operations
2029 * @mpd - extent to map 2036 * @mpd - extent to map
2030 * @give_up_on_write - we set this to true iff there is a fatal error and there 2037 * @give_up_on_write - we set this to true iff there is a fatal error and there
2031 * is no hope of writing the data. The caller should discard 2038 * is no hope of writing the data. The caller should discard
2032 * dirty pages to avoid infinite loops. 2039 * dirty pages to avoid infinite loops.
2033 * 2040 *
2034 * The function maps extent starting at mpd->lblk of length mpd->len. If it is 2041 * The function maps extent starting at mpd->lblk of length mpd->len. If it is
2035 * delayed, blocks are allocated, if it is unwritten, we may need to convert 2042 * delayed, blocks are allocated, if it is unwritten, we may need to convert
2036 * them to initialized or split the described range from larger unwritten 2043 * them to initialized or split the described range from larger unwritten
2037 * extent. Note that we need not map all the described range since allocation 2044 * extent. Note that we need not map all the described range since allocation
2038 * can return less blocks or the range is covered by more unwritten extents. We 2045 * can return less blocks or the range is covered by more unwritten extents. We
2039 * cannot map more because we are limited by reserved transaction credits. On 2046 * cannot map more because we are limited by reserved transaction credits. On
2040 * the other hand we always make sure that the last touched page is fully 2047 * the other hand we always make sure that the last touched page is fully
2041 * mapped so that it can be written out (and thus forward progress is 2048 * mapped so that it can be written out (and thus forward progress is
2042 * guaranteed). After mapping we submit all mapped pages for IO. 2049 * guaranteed). After mapping we submit all mapped pages for IO.
2043 */ 2050 */
2044 static int mpage_map_and_submit_extent(handle_t *handle, 2051 static int mpage_map_and_submit_extent(handle_t *handle,
2045 struct mpage_da_data *mpd, 2052 struct mpage_da_data *mpd,
2046 bool *give_up_on_write) 2053 bool *give_up_on_write)
2047 { 2054 {
2048 struct inode *inode = mpd->inode; 2055 struct inode *inode = mpd->inode;
2049 struct ext4_map_blocks *map = &mpd->map; 2056 struct ext4_map_blocks *map = &mpd->map;
2050 int err; 2057 int err;
2051 loff_t disksize; 2058 loff_t disksize;
2052 int progress = 0; 2059 int progress = 0;
2053 2060
2054 mpd->io_submit.io_end->offset = 2061 mpd->io_submit.io_end->offset =
2055 ((loff_t)map->m_lblk) << inode->i_blkbits; 2062 ((loff_t)map->m_lblk) << inode->i_blkbits;
2056 do { 2063 do {
2057 err = mpage_map_one_extent(handle, mpd); 2064 err = mpage_map_one_extent(handle, mpd);
2058 if (err < 0) { 2065 if (err < 0) {
2059 struct super_block *sb = inode->i_sb; 2066 struct super_block *sb = inode->i_sb;
2060 2067
2061 if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) 2068 if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
2062 goto invalidate_dirty_pages; 2069 goto invalidate_dirty_pages;
2063 /* 2070 /*
2064 * Let the uper layers retry transient errors. 2071 * Let the uper layers retry transient errors.
2065 * In the case of ENOSPC, if ext4_count_free_blocks() 2072 * In the case of ENOSPC, if ext4_count_free_blocks()
2066 * is non-zero, a commit should free up blocks. 2073 * is non-zero, a commit should free up blocks.
2067 */ 2074 */
2068 if ((err == -ENOMEM) || 2075 if ((err == -ENOMEM) ||
2069 (err == -ENOSPC && ext4_count_free_clusters(sb))) { 2076 (err == -ENOSPC && ext4_count_free_clusters(sb))) {
2070 if (progress) 2077 if (progress)
2071 goto update_disksize; 2078 goto update_disksize;
2072 return err; 2079 return err;
2073 } 2080 }
2074 ext4_msg(sb, KERN_CRIT, 2081 ext4_msg(sb, KERN_CRIT,
2075 "Delayed block allocation failed for " 2082 "Delayed block allocation failed for "
2076 "inode %lu at logical offset %llu with" 2083 "inode %lu at logical offset %llu with"
2077 " max blocks %u with error %d", 2084 " max blocks %u with error %d",
2078 inode->i_ino, 2085 inode->i_ino,
2079 (unsigned long long)map->m_lblk, 2086 (unsigned long long)map->m_lblk,
2080 (unsigned)map->m_len, -err); 2087 (unsigned)map->m_len, -err);
2081 ext4_msg(sb, KERN_CRIT, 2088 ext4_msg(sb, KERN_CRIT,
2082 "This should not happen!! Data will " 2089 "This should not happen!! Data will "
2083 "be lost\n"); 2090 "be lost\n");
2084 if (err == -ENOSPC) 2091 if (err == -ENOSPC)
2085 ext4_print_free_blocks(inode); 2092 ext4_print_free_blocks(inode);
2086 invalidate_dirty_pages: 2093 invalidate_dirty_pages:
2087 *give_up_on_write = true; 2094 *give_up_on_write = true;
2088 return err; 2095 return err;
2089 } 2096 }
2090 progress = 1; 2097 progress = 1;
2091 /* 2098 /*
2092 * Update buffer state, submit mapped pages, and get us new 2099 * Update buffer state, submit mapped pages, and get us new
2093 * extent to map 2100 * extent to map
2094 */ 2101 */
2095 err = mpage_map_and_submit_buffers(mpd); 2102 err = mpage_map_and_submit_buffers(mpd);
2096 if (err < 0) 2103 if (err < 0)
2097 goto update_disksize; 2104 goto update_disksize;
2098 } while (map->m_len); 2105 } while (map->m_len);
2099 2106
2100 update_disksize: 2107 update_disksize:
2101 /* 2108 /*
2102 * Update on-disk size after IO is submitted. Races with 2109 * Update on-disk size after IO is submitted. Races with
2103 * truncate are avoided by checking i_size under i_data_sem. 2110 * truncate are avoided by checking i_size under i_data_sem.
2104 */ 2111 */
2105 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; 2112 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
2106 if (disksize > EXT4_I(inode)->i_disksize) { 2113 if (disksize > EXT4_I(inode)->i_disksize) {
2107 int err2; 2114 int err2;
2108 loff_t i_size; 2115 loff_t i_size;
2109 2116
2110 down_write(&EXT4_I(inode)->i_data_sem); 2117 down_write(&EXT4_I(inode)->i_data_sem);
2111 i_size = i_size_read(inode); 2118 i_size = i_size_read(inode);
2112 if (disksize > i_size) 2119 if (disksize > i_size)
2113 disksize = i_size; 2120 disksize = i_size;
2114 if (disksize > EXT4_I(inode)->i_disksize) 2121 if (disksize > EXT4_I(inode)->i_disksize)
2115 EXT4_I(inode)->i_disksize = disksize; 2122 EXT4_I(inode)->i_disksize = disksize;
2116 err2 = ext4_mark_inode_dirty(handle, inode); 2123 err2 = ext4_mark_inode_dirty(handle, inode);
2117 up_write(&EXT4_I(inode)->i_data_sem); 2124 up_write(&EXT4_I(inode)->i_data_sem);
2118 if (err2) 2125 if (err2)
2119 ext4_error(inode->i_sb, 2126 ext4_error(inode->i_sb,
2120 "Failed to mark inode %lu dirty", 2127 "Failed to mark inode %lu dirty",
2121 inode->i_ino); 2128 inode->i_ino);
2122 if (!err) 2129 if (!err)
2123 err = err2; 2130 err = err2;
2124 } 2131 }
2125 return err; 2132 return err;
2126 } 2133 }
2127 2134
2128 /* 2135 /*
2129 * Calculate the total number of credits to reserve for one writepages 2136 * Calculate the total number of credits to reserve for one writepages
2130 * iteration. This is called from ext4_writepages(). We map an extent of 2137 * iteration. This is called from ext4_writepages(). We map an extent of
2131 * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping 2138 * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping
2132 * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + 2139 * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN +
2133 * bpp - 1 blocks in bpp different extents. 2140 * bpp - 1 blocks in bpp different extents.
2134 */ 2141 */
2135 static int ext4_da_writepages_trans_blocks(struct inode *inode) 2142 static int ext4_da_writepages_trans_blocks(struct inode *inode)
2136 { 2143 {
2137 int bpp = ext4_journal_blocks_per_page(inode); 2144 int bpp = ext4_journal_blocks_per_page(inode);
2138 2145
2139 return ext4_meta_trans_blocks(inode, 2146 return ext4_meta_trans_blocks(inode,
2140 MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp); 2147 MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp);
2141 } 2148 }
2142 2149
2143 /* 2150 /*
2144 * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages 2151 * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages
2145 * and underlying extent to map 2152 * and underlying extent to map
2146 * 2153 *
2147 * @mpd - where to look for pages 2154 * @mpd - where to look for pages
2148 * 2155 *
2149 * Walk dirty pages in the mapping. If they are fully mapped, submit them for 2156 * Walk dirty pages in the mapping. If they are fully mapped, submit them for
2150 * IO immediately. When we find a page which isn't mapped we start accumulating 2157 * IO immediately. When we find a page which isn't mapped we start accumulating
2151 * extent of buffers underlying these pages that needs mapping (formed by 2158 * extent of buffers underlying these pages that needs mapping (formed by
2152 * either delayed or unwritten buffers). We also lock the pages containing 2159 * either delayed or unwritten buffers). We also lock the pages containing
2153 * these buffers. The extent found is returned in @mpd structure (starting at 2160 * these buffers. The extent found is returned in @mpd structure (starting at
2154 * mpd->lblk with length mpd->len blocks). 2161 * mpd->lblk with length mpd->len blocks).
2155 * 2162 *
2156 * Note that this function can attach bios to one io_end structure which are 2163 * Note that this function can attach bios to one io_end structure which are
2157 * neither logically nor physically contiguous. Although it may seem as an 2164 * neither logically nor physically contiguous. Although it may seem as an
2158 * unnecessary complication, it is actually inevitable in blocksize < pagesize 2165 * unnecessary complication, it is actually inevitable in blocksize < pagesize
2159 * case as we need to track IO to all buffers underlying a page in one io_end. 2166 * case as we need to track IO to all buffers underlying a page in one io_end.
2160 */ 2167 */
2161 static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) 2168 static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
2162 { 2169 {
2163 struct address_space *mapping = mpd->inode->i_mapping; 2170 struct address_space *mapping = mpd->inode->i_mapping;
2164 struct pagevec pvec; 2171 struct pagevec pvec;
2165 unsigned int nr_pages; 2172 unsigned int nr_pages;
2166 long left = mpd->wbc->nr_to_write; 2173 long left = mpd->wbc->nr_to_write;
2167 pgoff_t index = mpd->first_page; 2174 pgoff_t index = mpd->first_page;
2168 pgoff_t end = mpd->last_page; 2175 pgoff_t end = mpd->last_page;
2169 int tag; 2176 int tag;
2170 int i, err = 0; 2177 int i, err = 0;
2171 int blkbits = mpd->inode->i_blkbits; 2178 int blkbits = mpd->inode->i_blkbits;
2172 ext4_lblk_t lblk; 2179 ext4_lblk_t lblk;
2173 struct buffer_head *head; 2180 struct buffer_head *head;
2174 2181
2175 if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) 2182 if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages)
2176 tag = PAGECACHE_TAG_TOWRITE; 2183 tag = PAGECACHE_TAG_TOWRITE;
2177 else 2184 else
2178 tag = PAGECACHE_TAG_DIRTY; 2185 tag = PAGECACHE_TAG_DIRTY;
2179 2186
2180 pagevec_init(&pvec, 0); 2187 pagevec_init(&pvec, 0);
2181 mpd->map.m_len = 0; 2188 mpd->map.m_len = 0;
2182 mpd->next_page = index; 2189 mpd->next_page = index;
2183 while (index <= end) { 2190 while (index <= end) {
2184 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, 2191 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2185 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 2192 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2186 if (nr_pages == 0) 2193 if (nr_pages == 0)
2187 goto out; 2194 goto out;
2188 2195
2189 for (i = 0; i < nr_pages; i++) { 2196 for (i = 0; i < nr_pages; i++) {
2190 struct page *page = pvec.pages[i]; 2197 struct page *page = pvec.pages[i];
2191 2198
2192 /* 2199 /*
2193 * At this point, the page may be truncated or 2200 * At this point, the page may be truncated or
2194 * invalidated (changing page->mapping to NULL), or 2201 * invalidated (changing page->mapping to NULL), or
2195 * even swizzled back from swapper_space to tmpfs file 2202 * even swizzled back from swapper_space to tmpfs file
2196 * mapping. However, page->index will not change 2203 * mapping. However, page->index will not change
2197 * because we have a reference on the page. 2204 * because we have a reference on the page.
2198 */ 2205 */
2199 if (page->index > end) 2206 if (page->index > end)
2200 goto out; 2207 goto out;
2201 2208
2202 /* 2209 /*
2203 * Accumulated enough dirty pages? This doesn't apply 2210 * Accumulated enough dirty pages? This doesn't apply
2204 * to WB_SYNC_ALL mode. For integrity sync we have to 2211 * to WB_SYNC_ALL mode. For integrity sync we have to
2205 * keep going because someone may be concurrently 2212 * keep going because someone may be concurrently
2206 * dirtying pages, and we might have synced a lot of 2213 * dirtying pages, and we might have synced a lot of
2207 * newly appeared dirty pages, but have not synced all 2214 * newly appeared dirty pages, but have not synced all
2208 * of the old dirty pages. 2215 * of the old dirty pages.
2209 */ 2216 */
2210 if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0) 2217 if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0)
2211 goto out; 2218 goto out;
2212 2219
2213 /* If we can't merge this page, we are done. */ 2220 /* If we can't merge this page, we are done. */
2214 if (mpd->map.m_len > 0 && mpd->next_page != page->index) 2221 if (mpd->map.m_len > 0 && mpd->next_page != page->index)
2215 goto out; 2222 goto out;
2216 2223
2217 lock_page(page); 2224 lock_page(page);
2218 /* 2225 /*
2219 * If the page is no longer dirty, or its mapping no 2226 * If the page is no longer dirty, or its mapping no
2220 * longer corresponds to inode we are writing (which 2227 * longer corresponds to inode we are writing (which
2221 * means it has been truncated or invalidated), or the 2228 * means it has been truncated or invalidated), or the
2222 * page is already under writeback and we are not doing 2229 * page is already under writeback and we are not doing
2223 * a data integrity writeback, skip the page 2230 * a data integrity writeback, skip the page
2224 */ 2231 */
2225 if (!PageDirty(page) || 2232 if (!PageDirty(page) ||
2226 (PageWriteback(page) && 2233 (PageWriteback(page) &&
2227 (mpd->wbc->sync_mode == WB_SYNC_NONE)) || 2234 (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
2228 unlikely(page->mapping != mapping)) { 2235 unlikely(page->mapping != mapping)) {
2229 unlock_page(page); 2236 unlock_page(page);
2230 continue; 2237 continue;
2231 } 2238 }
2232 2239
2233 wait_on_page_writeback(page); 2240 wait_on_page_writeback(page);
2234 BUG_ON(PageWriteback(page)); 2241 BUG_ON(PageWriteback(page));
2235 2242
2236 if (mpd->map.m_len == 0) 2243 if (mpd->map.m_len == 0)
2237 mpd->first_page = page->index; 2244 mpd->first_page = page->index;
2238 mpd->next_page = page->index + 1; 2245 mpd->next_page = page->index + 1;
2239 /* Add all dirty buffers to mpd */ 2246 /* Add all dirty buffers to mpd */
2240 lblk = ((ext4_lblk_t)page->index) << 2247 lblk = ((ext4_lblk_t)page->index) <<
2241 (PAGE_CACHE_SHIFT - blkbits); 2248 (PAGE_CACHE_SHIFT - blkbits);
2242 head = page_buffers(page); 2249 head = page_buffers(page);
2243 err = mpage_process_page_bufs(mpd, head, head, lblk); 2250 err = mpage_process_page_bufs(mpd, head, head, lblk);
2244 if (err <= 0) 2251 if (err <= 0)
2245 goto out; 2252 goto out;
2246 err = 0; 2253 err = 0;
2247 left--; 2254 left--;
2248 } 2255 }
2249 pagevec_release(&pvec); 2256 pagevec_release(&pvec);
2250 cond_resched(); 2257 cond_resched();
2251 } 2258 }
2252 return 0; 2259 return 0;
2253 out: 2260 out:
2254 pagevec_release(&pvec); 2261 pagevec_release(&pvec);
2255 return err; 2262 return err;
2256 } 2263 }
2257 2264
2258 static int __writepage(struct page *page, struct writeback_control *wbc, 2265 static int __writepage(struct page *page, struct writeback_control *wbc,
2259 void *data) 2266 void *data)
2260 { 2267 {
2261 struct address_space *mapping = data; 2268 struct address_space *mapping = data;
2262 int ret = ext4_writepage(page, wbc); 2269 int ret = ext4_writepage(page, wbc);
2263 mapping_set_error(mapping, ret); 2270 mapping_set_error(mapping, ret);
2264 return ret; 2271 return ret;
2265 } 2272 }
2266 2273
2267 static int ext4_writepages(struct address_space *mapping, 2274 static int ext4_writepages(struct address_space *mapping,
2268 struct writeback_control *wbc) 2275 struct writeback_control *wbc)
2269 { 2276 {
2270 pgoff_t writeback_index = 0; 2277 pgoff_t writeback_index = 0;
2271 long nr_to_write = wbc->nr_to_write; 2278 long nr_to_write = wbc->nr_to_write;
2272 int range_whole = 0; 2279 int range_whole = 0;
2273 int cycled = 1; 2280 int cycled = 1;
2274 handle_t *handle = NULL; 2281 handle_t *handle = NULL;
2275 struct mpage_da_data mpd; 2282 struct mpage_da_data mpd;
2276 struct inode *inode = mapping->host; 2283 struct inode *inode = mapping->host;
2277 int needed_blocks, rsv_blocks = 0, ret = 0; 2284 int needed_blocks, rsv_blocks = 0, ret = 0;
2278 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2285 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2279 bool done; 2286 bool done;
2280 struct blk_plug plug; 2287 struct blk_plug plug;
2281 bool give_up_on_write = false; 2288 bool give_up_on_write = false;
2282 2289
2283 trace_ext4_writepages(inode, wbc); 2290 trace_ext4_writepages(inode, wbc);
2284 2291
2285 /* 2292 /*
2286 * No pages to write? This is mainly a kludge to avoid starting 2293 * No pages to write? This is mainly a kludge to avoid starting
2287 * a transaction for special inodes like journal inode on last iput() 2294 * a transaction for special inodes like journal inode on last iput()
2288 * because that could violate lock ordering on umount 2295 * because that could violate lock ordering on umount
2289 */ 2296 */
2290 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 2297 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
2291 goto out_writepages; 2298 goto out_writepages;
2292 2299
2293 if (ext4_should_journal_data(inode)) { 2300 if (ext4_should_journal_data(inode)) {
2294 struct blk_plug plug; 2301 struct blk_plug plug;
2295 2302
2296 blk_start_plug(&plug); 2303 blk_start_plug(&plug);
2297 ret = write_cache_pages(mapping, wbc, __writepage, mapping); 2304 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2298 blk_finish_plug(&plug); 2305 blk_finish_plug(&plug);
2299 goto out_writepages; 2306 goto out_writepages;
2300 } 2307 }
2301 2308
2302 /* 2309 /*
2303 * If the filesystem has aborted, it is read-only, so return 2310 * If the filesystem has aborted, it is read-only, so return
2304 * right away instead of dumping stack traces later on that 2311 * right away instead of dumping stack traces later on that
2305 * will obscure the real source of the problem. We test 2312 * will obscure the real source of the problem. We test
2306 * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because 2313 * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because
2307 * the latter could be true if the filesystem is mounted 2314 * the latter could be true if the filesystem is mounted
2308 * read-only, and in that case, ext4_writepages should 2315 * read-only, and in that case, ext4_writepages should
2309 * *never* be called, so if that ever happens, we would want 2316 * *never* be called, so if that ever happens, we would want
2310 * the stack trace. 2317 * the stack trace.
2311 */ 2318 */
2312 if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { 2319 if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
2313 ret = -EROFS; 2320 ret = -EROFS;
2314 goto out_writepages; 2321 goto out_writepages;
2315 } 2322 }
2316 2323
2317 if (ext4_should_dioread_nolock(inode)) { 2324 if (ext4_should_dioread_nolock(inode)) {
2318 /* 2325 /*
2319 * We may need to convert up to one extent per block in 2326 * We may need to convert up to one extent per block in
2320 * the page and we may dirty the inode. 2327 * the page and we may dirty the inode.
2321 */ 2328 */
2322 rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); 2329 rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
2323 } 2330 }
2324 2331
2325 /* 2332 /*
2326 * If we have inline data and arrive here, it means that 2333 * If we have inline data and arrive here, it means that
2327 * we will soon create the block for the 1st page, so 2334 * we will soon create the block for the 1st page, so
2328 * we'd better clear the inline data here. 2335 * we'd better clear the inline data here.
2329 */ 2336 */
2330 if (ext4_has_inline_data(inode)) { 2337 if (ext4_has_inline_data(inode)) {
2331 /* Just inode will be modified... */ 2338 /* Just inode will be modified... */
2332 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); 2339 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
2333 if (IS_ERR(handle)) { 2340 if (IS_ERR(handle)) {
2334 ret = PTR_ERR(handle); 2341 ret = PTR_ERR(handle);
2335 goto out_writepages; 2342 goto out_writepages;
2336 } 2343 }
2337 BUG_ON(ext4_test_inode_state(inode, 2344 BUG_ON(ext4_test_inode_state(inode,
2338 EXT4_STATE_MAY_INLINE_DATA)); 2345 EXT4_STATE_MAY_INLINE_DATA));
2339 ext4_destroy_inline_data(handle, inode); 2346 ext4_destroy_inline_data(handle, inode);
2340 ext4_journal_stop(handle); 2347 ext4_journal_stop(handle);
2341 } 2348 }
2342 2349
2343 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 2350 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2344 range_whole = 1; 2351 range_whole = 1;
2345 2352
2346 if (wbc->range_cyclic) { 2353 if (wbc->range_cyclic) {
2347 writeback_index = mapping->writeback_index; 2354 writeback_index = mapping->writeback_index;
2348 if (writeback_index) 2355 if (writeback_index)
2349 cycled = 0; 2356 cycled = 0;
2350 mpd.first_page = writeback_index; 2357 mpd.first_page = writeback_index;
2351 mpd.last_page = -1; 2358 mpd.last_page = -1;
2352 } else { 2359 } else {
2353 mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT; 2360 mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT;
2354 mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT; 2361 mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT;
2355 } 2362 }
2356 2363
2357 mpd.inode = inode; 2364 mpd.inode = inode;
2358 mpd.wbc = wbc; 2365 mpd.wbc = wbc;
2359 ext4_io_submit_init(&mpd.io_submit, wbc); 2366 ext4_io_submit_init(&mpd.io_submit, wbc);
2360 retry: 2367 retry:
2361 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2368 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2362 tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page); 2369 tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
2363 done = false; 2370 done = false;
2364 blk_start_plug(&plug); 2371 blk_start_plug(&plug);
2365 while (!done && mpd.first_page <= mpd.last_page) { 2372 while (!done && mpd.first_page <= mpd.last_page) {
2366 /* For each extent of pages we use new io_end */ 2373 /* For each extent of pages we use new io_end */
2367 mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); 2374 mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
2368 if (!mpd.io_submit.io_end) { 2375 if (!mpd.io_submit.io_end) {
2369 ret = -ENOMEM; 2376 ret = -ENOMEM;
2370 break; 2377 break;
2371 } 2378 }
2372 2379
2373 /* 2380 /*
2374 * We have two constraints: We find one extent to map and we 2381 * We have two constraints: We find one extent to map and we
2375 * must always write out whole page (makes a difference when 2382 * must always write out whole page (makes a difference when
2376 * blocksize < pagesize) so that we don't block on IO when we 2383 * blocksize < pagesize) so that we don't block on IO when we
2377 * try to write out the rest of the page. Journalled mode is 2384 * try to write out the rest of the page. Journalled mode is
2378 * not supported by delalloc. 2385 * not supported by delalloc.
2379 */ 2386 */
2380 BUG_ON(ext4_should_journal_data(inode)); 2387 BUG_ON(ext4_should_journal_data(inode));
2381 needed_blocks = ext4_da_writepages_trans_blocks(inode); 2388 needed_blocks = ext4_da_writepages_trans_blocks(inode);
2382 2389
2383 /* start a new transaction */ 2390 /* start a new transaction */
2384 handle = ext4_journal_start_with_reserve(inode, 2391 handle = ext4_journal_start_with_reserve(inode,
2385 EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks); 2392 EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks);
2386 if (IS_ERR(handle)) { 2393 if (IS_ERR(handle)) {
2387 ret = PTR_ERR(handle); 2394 ret = PTR_ERR(handle);
2388 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " 2395 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
2389 "%ld pages, ino %lu; err %d", __func__, 2396 "%ld pages, ino %lu; err %d", __func__,
2390 wbc->nr_to_write, inode->i_ino, ret); 2397 wbc->nr_to_write, inode->i_ino, ret);
2391 /* Release allocated io_end */ 2398 /* Release allocated io_end */
2392 ext4_put_io_end(mpd.io_submit.io_end); 2399 ext4_put_io_end(mpd.io_submit.io_end);
2393 break; 2400 break;
2394 } 2401 }
2395 2402
2396 trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc); 2403 trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc);
2397 ret = mpage_prepare_extent_to_map(&mpd); 2404 ret = mpage_prepare_extent_to_map(&mpd);
2398 if (!ret) { 2405 if (!ret) {
2399 if (mpd.map.m_len) 2406 if (mpd.map.m_len)
2400 ret = mpage_map_and_submit_extent(handle, &mpd, 2407 ret = mpage_map_and_submit_extent(handle, &mpd,
2401 &give_up_on_write); 2408 &give_up_on_write);
2402 else { 2409 else {
2403 /* 2410 /*
2404 * We scanned the whole range (or exhausted 2411 * We scanned the whole range (or exhausted
2405 * nr_to_write), submitted what was mapped and 2412 * nr_to_write), submitted what was mapped and
2406 * didn't find anything needing mapping. We are 2413 * didn't find anything needing mapping. We are
2407 * done. 2414 * done.
2408 */ 2415 */
2409 done = true; 2416 done = true;
2410 } 2417 }
2411 } 2418 }
2412 ext4_journal_stop(handle); 2419 ext4_journal_stop(handle);
2413 /* Submit prepared bio */ 2420 /* Submit prepared bio */
2414 ext4_io_submit(&mpd.io_submit); 2421 ext4_io_submit(&mpd.io_submit);
2415 /* Unlock pages we didn't use */ 2422 /* Unlock pages we didn't use */
2416 mpage_release_unused_pages(&mpd, give_up_on_write); 2423 mpage_release_unused_pages(&mpd, give_up_on_write);
2417 /* Drop our io_end reference we got from init */ 2424 /* Drop our io_end reference we got from init */
2418 ext4_put_io_end(mpd.io_submit.io_end); 2425 ext4_put_io_end(mpd.io_submit.io_end);
2419 2426
2420 if (ret == -ENOSPC && sbi->s_journal) { 2427 if (ret == -ENOSPC && sbi->s_journal) {
2421 /* 2428 /*
2422 * Commit the transaction which would 2429 * Commit the transaction which would
2423 * free blocks released in the transaction 2430 * free blocks released in the transaction
2424 * and try again 2431 * and try again
2425 */ 2432 */
2426 jbd2_journal_force_commit_nested(sbi->s_journal); 2433 jbd2_journal_force_commit_nested(sbi->s_journal);
2427 ret = 0; 2434 ret = 0;
2428 continue; 2435 continue;
2429 } 2436 }
2430 /* Fatal error - ENOMEM, EIO... */ 2437 /* Fatal error - ENOMEM, EIO... */
2431 if (ret) 2438 if (ret)
2432 break; 2439 break;
2433 } 2440 }
2434 blk_finish_plug(&plug); 2441 blk_finish_plug(&plug);
2435 if (!ret && !cycled && wbc->nr_to_write > 0) { 2442 if (!ret && !cycled && wbc->nr_to_write > 0) {
2436 cycled = 1; 2443 cycled = 1;
2437 mpd.last_page = writeback_index - 1; 2444 mpd.last_page = writeback_index - 1;
2438 mpd.first_page = 0; 2445 mpd.first_page = 0;
2439 goto retry; 2446 goto retry;
2440 } 2447 }
2441 2448
2442 /* Update index */ 2449 /* Update index */
2443 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 2450 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2444 /* 2451 /*
2445 * Set the writeback_index so that range_cyclic 2452 * Set the writeback_index so that range_cyclic
2446 * mode will write it back later 2453 * mode will write it back later
2447 */ 2454 */
2448 mapping->writeback_index = mpd.first_page; 2455 mapping->writeback_index = mpd.first_page;
2449 2456
2450 out_writepages: 2457 out_writepages:
2451 trace_ext4_writepages_result(inode, wbc, ret, 2458 trace_ext4_writepages_result(inode, wbc, ret,
2452 nr_to_write - wbc->nr_to_write); 2459 nr_to_write - wbc->nr_to_write);
2453 return ret; 2460 return ret;
2454 } 2461 }
2455 2462
2456 static int ext4_nonda_switch(struct super_block *sb) 2463 static int ext4_nonda_switch(struct super_block *sb)
2457 { 2464 {
2458 s64 free_clusters, dirty_clusters; 2465 s64 free_clusters, dirty_clusters;
2459 struct ext4_sb_info *sbi = EXT4_SB(sb); 2466 struct ext4_sb_info *sbi = EXT4_SB(sb);
2460 2467
2461 /* 2468 /*
2462 * switch to non delalloc mode if we are running low 2469 * switch to non delalloc mode if we are running low
2463 * on free block. The free block accounting via percpu 2470 * on free block. The free block accounting via percpu
2464 * counters can get slightly wrong with percpu_counter_batch getting 2471 * counters can get slightly wrong with percpu_counter_batch getting
2465 * accumulated on each CPU without updating global counters 2472 * accumulated on each CPU without updating global counters
2466 * Delalloc need an accurate free block accounting. So switch 2473 * Delalloc need an accurate free block accounting. So switch
2467 * to non delalloc when we are near to error range. 2474 * to non delalloc when we are near to error range.
2468 */ 2475 */
2469 free_clusters = 2476 free_clusters =
2470 percpu_counter_read_positive(&sbi->s_freeclusters_counter); 2477 percpu_counter_read_positive(&sbi->s_freeclusters_counter);
2471 dirty_clusters = 2478 dirty_clusters =
2472 percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); 2479 percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
2473 /* 2480 /*
2474 * Start pushing delalloc when 1/2 of free blocks are dirty. 2481 * Start pushing delalloc when 1/2 of free blocks are dirty.
2475 */ 2482 */
2476 if (dirty_clusters && (free_clusters < 2 * dirty_clusters)) 2483 if (dirty_clusters && (free_clusters < 2 * dirty_clusters))
2477 try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); 2484 try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
2478 2485
2479 if (2 * free_clusters < 3 * dirty_clusters || 2486 if (2 * free_clusters < 3 * dirty_clusters ||
2480 free_clusters < (dirty_clusters + EXT4_FREECLUSTERS_WATERMARK)) { 2487 free_clusters < (dirty_clusters + EXT4_FREECLUSTERS_WATERMARK)) {
2481 /* 2488 /*
2482 * free block count is less than 150% of dirty blocks 2489 * free block count is less than 150% of dirty blocks
2483 * or free blocks is less than watermark 2490 * or free blocks is less than watermark
2484 */ 2491 */
2485 return 1; 2492 return 1;
2486 } 2493 }
2487 return 0; 2494 return 0;
2488 } 2495 }
2489 2496
2490 /* We always reserve for an inode update; the superblock could be there too */ 2497 /* We always reserve for an inode update; the superblock could be there too */
2491 static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len) 2498 static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
2492 { 2499 {
2493 if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 2500 if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
2494 EXT4_FEATURE_RO_COMPAT_LARGE_FILE))) 2501 EXT4_FEATURE_RO_COMPAT_LARGE_FILE)))
2495 return 1; 2502 return 1;
2496 2503
2497 if (pos + len <= 0x7fffffffULL) 2504 if (pos + len <= 0x7fffffffULL)
2498 return 1; 2505 return 1;
2499 2506
2500 /* We might need to update the superblock to set LARGE_FILE */ 2507 /* We might need to update the superblock to set LARGE_FILE */
2501 return 2; 2508 return 2;
2502 } 2509 }
2503 2510
2504 static int ext4_da_write_begin(struct file *file, struct address_space *mapping, 2511 static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2505 loff_t pos, unsigned len, unsigned flags, 2512 loff_t pos, unsigned len, unsigned flags,
2506 struct page **pagep, void **fsdata) 2513 struct page **pagep, void **fsdata)
2507 { 2514 {
2508 int ret, retries = 0; 2515 int ret, retries = 0;
2509 struct page *page; 2516 struct page *page;
2510 pgoff_t index; 2517 pgoff_t index;
2511 struct inode *inode = mapping->host; 2518 struct inode *inode = mapping->host;
2512 handle_t *handle; 2519 handle_t *handle;
2513 2520
2514 index = pos >> PAGE_CACHE_SHIFT; 2521 index = pos >> PAGE_CACHE_SHIFT;
2515 2522
2516 if (ext4_nonda_switch(inode->i_sb)) { 2523 if (ext4_nonda_switch(inode->i_sb)) {
2517 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; 2524 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
2518 return ext4_write_begin(file, mapping, pos, 2525 return ext4_write_begin(file, mapping, pos,
2519 len, flags, pagep, fsdata); 2526 len, flags, pagep, fsdata);
2520 } 2527 }
2521 *fsdata = (void *)0; 2528 *fsdata = (void *)0;
2522 trace_ext4_da_write_begin(inode, pos, len, flags); 2529 trace_ext4_da_write_begin(inode, pos, len, flags);
2523 2530
2524 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 2531 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
2525 ret = ext4_da_write_inline_data_begin(mapping, inode, 2532 ret = ext4_da_write_inline_data_begin(mapping, inode,
2526 pos, len, flags, 2533 pos, len, flags,
2527 pagep, fsdata); 2534 pagep, fsdata);
2528 if (ret < 0) 2535 if (ret < 0)
2529 return ret; 2536 return ret;
2530 if (ret == 1) 2537 if (ret == 1)
2531 return 0; 2538 return 0;
2532 } 2539 }
2533 2540
2534 /* 2541 /*
2535 * grab_cache_page_write_begin() can take a long time if the 2542 * grab_cache_page_write_begin() can take a long time if the
2536 * system is thrashing due to memory pressure, or if the page 2543 * system is thrashing due to memory pressure, or if the page
2537 * is being written back. So grab it first before we start 2544 * is being written back. So grab it first before we start
2538 * the transaction handle. This also allows us to allocate 2545 * the transaction handle. This also allows us to allocate
2539 * the page (if needed) without using GFP_NOFS. 2546 * the page (if needed) without using GFP_NOFS.
2540 */ 2547 */
2541 retry_grab: 2548 retry_grab:
2542 page = grab_cache_page_write_begin(mapping, index, flags); 2549 page = grab_cache_page_write_begin(mapping, index, flags);
2543 if (!page) 2550 if (!page)
2544 return -ENOMEM; 2551 return -ENOMEM;
2545 unlock_page(page); 2552 unlock_page(page);
2546 2553
2547 /* 2554 /*
2548 * With delayed allocation, we don't log the i_disksize update 2555 * With delayed allocation, we don't log the i_disksize update
2549 * if there is delayed block allocation. But we still need 2556 * if there is delayed block allocation. But we still need
2550 * to journalling the i_disksize update if writes to the end 2557 * to journalling the i_disksize update if writes to the end
2551 * of file which has an already mapped buffer. 2558 * of file which has an already mapped buffer.
2552 */ 2559 */
2553 retry_journal: 2560 retry_journal:
2554 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 2561 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
2555 ext4_da_write_credits(inode, pos, len)); 2562 ext4_da_write_credits(inode, pos, len));
2556 if (IS_ERR(handle)) { 2563 if (IS_ERR(handle)) {
2557 page_cache_release(page); 2564 page_cache_release(page);
2558 return PTR_ERR(handle); 2565 return PTR_ERR(handle);
2559 } 2566 }
2560 2567
2561 lock_page(page); 2568 lock_page(page);
2562 if (page->mapping != mapping) { 2569 if (page->mapping != mapping) {
2563 /* The page got truncated from under us */ 2570 /* The page got truncated from under us */
2564 unlock_page(page); 2571 unlock_page(page);
2565 page_cache_release(page); 2572 page_cache_release(page);
2566 ext4_journal_stop(handle); 2573 ext4_journal_stop(handle);
2567 goto retry_grab; 2574 goto retry_grab;
2568 } 2575 }
2569 /* In case writeback began while the page was unlocked */ 2576 /* In case writeback began while the page was unlocked */
2570 wait_for_stable_page(page); 2577 wait_for_stable_page(page);
2571 2578
2572 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); 2579 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
2573 if (ret < 0) { 2580 if (ret < 0) {
2574 unlock_page(page); 2581 unlock_page(page);
2575 ext4_journal_stop(handle); 2582 ext4_journal_stop(handle);
2576 /* 2583 /*
2577 * block_write_begin may have instantiated a few blocks 2584 * block_write_begin may have instantiated a few blocks
2578 * outside i_size. Trim these off again. Don't need 2585 * outside i_size. Trim these off again. Don't need
2579 * i_size_read because we hold i_mutex. 2586 * i_size_read because we hold i_mutex.
2580 */ 2587 */
2581 if (pos + len > inode->i_size) 2588 if (pos + len > inode->i_size)
2582 ext4_truncate_failed_write(inode); 2589 ext4_truncate_failed_write(inode);
2583 2590
2584 if (ret == -ENOSPC && 2591 if (ret == -ENOSPC &&
2585 ext4_should_retry_alloc(inode->i_sb, &retries)) 2592 ext4_should_retry_alloc(inode->i_sb, &retries))
2586 goto retry_journal; 2593 goto retry_journal;
2587 2594
2588 page_cache_release(page); 2595 page_cache_release(page);
2589 return ret; 2596 return ret;
2590 } 2597 }
2591 2598
2592 *pagep = page; 2599 *pagep = page;
2593 return ret; 2600 return ret;
2594 } 2601 }
2595 2602
2596 /* 2603 /*
2597 * Check if we should update i_disksize 2604 * Check if we should update i_disksize
2598 * when write to the end of file but not require block allocation 2605 * when write to the end of file but not require block allocation
2599 */ 2606 */
2600 static int ext4_da_should_update_i_disksize(struct page *page, 2607 static int ext4_da_should_update_i_disksize(struct page *page,
2601 unsigned long offset) 2608 unsigned long offset)
2602 { 2609 {
2603 struct buffer_head *bh; 2610 struct buffer_head *bh;
2604 struct inode *inode = page->mapping->host; 2611 struct inode *inode = page->mapping->host;
2605 unsigned int idx; 2612 unsigned int idx;
2606 int i; 2613 int i;
2607 2614
2608 bh = page_buffers(page); 2615 bh = page_buffers(page);
2609 idx = offset >> inode->i_blkbits; 2616 idx = offset >> inode->i_blkbits;
2610 2617
2611 for (i = 0; i < idx; i++) 2618 for (i = 0; i < idx; i++)
2612 bh = bh->b_this_page; 2619 bh = bh->b_this_page;
2613 2620
2614 if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) 2621 if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
2615 return 0; 2622 return 0;
2616 return 1; 2623 return 1;
2617 } 2624 }
2618 2625
2619 static int ext4_da_write_end(struct file *file, 2626 static int ext4_da_write_end(struct file *file,
2620 struct address_space *mapping, 2627 struct address_space *mapping,
2621 loff_t pos, unsigned len, unsigned copied, 2628 loff_t pos, unsigned len, unsigned copied,
2622 struct page *page, void *fsdata) 2629 struct page *page, void *fsdata)
2623 { 2630 {
2624 struct inode *inode = mapping->host; 2631 struct inode *inode = mapping->host;
2625 int ret = 0, ret2; 2632 int ret = 0, ret2;
2626 handle_t *handle = ext4_journal_current_handle(); 2633 handle_t *handle = ext4_journal_current_handle();
2627 loff_t new_i_size; 2634 loff_t new_i_size;
2628 unsigned long start, end; 2635 unsigned long start, end;
2629 int write_mode = (int)(unsigned long)fsdata; 2636 int write_mode = (int)(unsigned long)fsdata;
2630 2637
2631 if (write_mode == FALL_BACK_TO_NONDELALLOC) 2638 if (write_mode == FALL_BACK_TO_NONDELALLOC)
2632 return ext4_write_end(file, mapping, pos, 2639 return ext4_write_end(file, mapping, pos,
2633 len, copied, page, fsdata); 2640 len, copied, page, fsdata);
2634 2641
2635 trace_ext4_da_write_end(inode, pos, len, copied); 2642 trace_ext4_da_write_end(inode, pos, len, copied);
2636 start = pos & (PAGE_CACHE_SIZE - 1); 2643 start = pos & (PAGE_CACHE_SIZE - 1);
2637 end = start + copied - 1; 2644 end = start + copied - 1;
2638 2645
2639 /* 2646 /*
2640 * generic_write_end() will run mark_inode_dirty() if i_size 2647 * generic_write_end() will run mark_inode_dirty() if i_size
2641 * changes. So let's piggyback the i_disksize mark_inode_dirty 2648 * changes. So let's piggyback the i_disksize mark_inode_dirty
2642 * into that. 2649 * into that.
2643 */ 2650 */
2644 new_i_size = pos + copied; 2651 new_i_size = pos + copied;
2645 if (copied && new_i_size > EXT4_I(inode)->i_disksize) { 2652 if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
2646 if (ext4_has_inline_data(inode) || 2653 if (ext4_has_inline_data(inode) ||
2647 ext4_da_should_update_i_disksize(page, end)) { 2654 ext4_da_should_update_i_disksize(page, end)) {
2648 ext4_update_i_disksize(inode, new_i_size); 2655 ext4_update_i_disksize(inode, new_i_size);
2649 /* We need to mark inode dirty even if 2656 /* We need to mark inode dirty even if
2650 * new_i_size is less that inode->i_size 2657 * new_i_size is less that inode->i_size
2651 * bu greater than i_disksize.(hint delalloc) 2658 * bu greater than i_disksize.(hint delalloc)
2652 */ 2659 */
2653 ext4_mark_inode_dirty(handle, inode); 2660 ext4_mark_inode_dirty(handle, inode);
2654 } 2661 }
2655 } 2662 }
2656 2663
2657 if (write_mode != CONVERT_INLINE_DATA && 2664 if (write_mode != CONVERT_INLINE_DATA &&
2658 ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) && 2665 ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
2659 ext4_has_inline_data(inode)) 2666 ext4_has_inline_data(inode))
2660 ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied, 2667 ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied,
2661 page); 2668 page);
2662 else 2669 else
2663 ret2 = generic_write_end(file, mapping, pos, len, copied, 2670 ret2 = generic_write_end(file, mapping, pos, len, copied,
2664 page, fsdata); 2671 page, fsdata);
2665 2672
2666 copied = ret2; 2673 copied = ret2;
2667 if (ret2 < 0) 2674 if (ret2 < 0)
2668 ret = ret2; 2675 ret = ret2;
2669 ret2 = ext4_journal_stop(handle); 2676 ret2 = ext4_journal_stop(handle);
2670 if (!ret) 2677 if (!ret)
2671 ret = ret2; 2678 ret = ret2;
2672 2679
2673 return ret ? ret : copied; 2680 return ret ? ret : copied;
2674 } 2681 }
2675 2682
2676 static void ext4_da_invalidatepage(struct page *page, unsigned int offset, 2683 static void ext4_da_invalidatepage(struct page *page, unsigned int offset,
2677 unsigned int length) 2684 unsigned int length)
2678 { 2685 {
2679 /* 2686 /*
2680 * Drop reserved blocks 2687 * Drop reserved blocks
2681 */ 2688 */
2682 BUG_ON(!PageLocked(page)); 2689 BUG_ON(!PageLocked(page));
2683 if (!page_has_buffers(page)) 2690 if (!page_has_buffers(page))
2684 goto out; 2691 goto out;
2685 2692
2686 ext4_da_page_release_reservation(page, offset, length); 2693 ext4_da_page_release_reservation(page, offset, length);
2687 2694
2688 out: 2695 out:
2689 ext4_invalidatepage(page, offset, length); 2696 ext4_invalidatepage(page, offset, length);
2690 2697
2691 return; 2698 return;
2692 } 2699 }
2693 2700
2694 /* 2701 /*
2695 * Force all delayed allocation blocks to be allocated for a given inode. 2702 * Force all delayed allocation blocks to be allocated for a given inode.
2696 */ 2703 */
2697 int ext4_alloc_da_blocks(struct inode *inode) 2704 int ext4_alloc_da_blocks(struct inode *inode)
2698 { 2705 {
2699 trace_ext4_alloc_da_blocks(inode); 2706 trace_ext4_alloc_da_blocks(inode);
2700 2707
2701 if (!EXT4_I(inode)->i_reserved_data_blocks) 2708 if (!EXT4_I(inode)->i_reserved_data_blocks)
2702 return 0; 2709 return 0;
2703 2710
2704 /* 2711 /*
2705 * We do something simple for now. The filemap_flush() will 2712 * We do something simple for now. The filemap_flush() will
2706 * also start triggering a write of the data blocks, which is 2713 * also start triggering a write of the data blocks, which is
2707 * not strictly speaking necessary (and for users of 2714 * not strictly speaking necessary (and for users of
2708 * laptop_mode, not even desirable). However, to do otherwise 2715 * laptop_mode, not even desirable). However, to do otherwise
2709 * would require replicating code paths in: 2716 * would require replicating code paths in:
2710 * 2717 *
2711 * ext4_writepages() -> 2718 * ext4_writepages() ->
2712 * write_cache_pages() ---> (via passed in callback function) 2719 * write_cache_pages() ---> (via passed in callback function)
2713 * __mpage_da_writepage() --> 2720 * __mpage_da_writepage() -->
2714 * mpage_add_bh_to_extent() 2721 * mpage_add_bh_to_extent()
2715 * mpage_da_map_blocks() 2722 * mpage_da_map_blocks()
2716 * 2723 *
2717 * The problem is that write_cache_pages(), located in 2724 * The problem is that write_cache_pages(), located in
2718 * mm/page-writeback.c, marks pages clean in preparation for 2725 * mm/page-writeback.c, marks pages clean in preparation for
2719 * doing I/O, which is not desirable if we're not planning on 2726 * doing I/O, which is not desirable if we're not planning on
2720 * doing I/O at all. 2727 * doing I/O at all.
2721 * 2728 *
2722 * We could call write_cache_pages(), and then redirty all of 2729 * We could call write_cache_pages(), and then redirty all of
2723 * the pages by calling redirty_page_for_writepage() but that 2730 * the pages by calling redirty_page_for_writepage() but that
2724 * would be ugly in the extreme. So instead we would need to 2731 * would be ugly in the extreme. So instead we would need to
2725 * replicate parts of the code in the above functions, 2732 * replicate parts of the code in the above functions,
2726 * simplifying them because we wouldn't actually intend to 2733 * simplifying them because we wouldn't actually intend to
2727 * write out the pages, but rather only collect contiguous 2734 * write out the pages, but rather only collect contiguous
2728 * logical block extents, call the multi-block allocator, and 2735 * logical block extents, call the multi-block allocator, and
2729 * then update the buffer heads with the block allocations. 2736 * then update the buffer heads with the block allocations.
2730 * 2737 *
2731 * For now, though, we'll cheat by calling filemap_flush(), 2738 * For now, though, we'll cheat by calling filemap_flush(),
2732 * which will map the blocks, and start the I/O, but not 2739 * which will map the blocks, and start the I/O, but not
2733 * actually wait for the I/O to complete. 2740 * actually wait for the I/O to complete.
2734 */ 2741 */
2735 return filemap_flush(inode->i_mapping); 2742 return filemap_flush(inode->i_mapping);
2736 } 2743 }
2737 2744
2738 /* 2745 /*
2739 * bmap() is special. It gets used by applications such as lilo and by 2746 * bmap() is special. It gets used by applications such as lilo and by
2740 * the swapper to find the on-disk block of a specific piece of data. 2747 * the swapper to find the on-disk block of a specific piece of data.
2741 * 2748 *
2742 * Naturally, this is dangerous if the block concerned is still in the 2749 * Naturally, this is dangerous if the block concerned is still in the
2743 * journal. If somebody makes a swapfile on an ext4 data-journaling 2750 * journal. If somebody makes a swapfile on an ext4 data-journaling
2744 * filesystem and enables swap, then they may get a nasty shock when the 2751 * filesystem and enables swap, then they may get a nasty shock when the
2745 * data getting swapped to that swapfile suddenly gets overwritten by 2752 * data getting swapped to that swapfile suddenly gets overwritten by
2746 * the original zero's written out previously to the journal and 2753 * the original zero's written out previously to the journal and
2747 * awaiting writeback in the kernel's buffer cache. 2754 * awaiting writeback in the kernel's buffer cache.
2748 * 2755 *
2749 * So, if we see any bmap calls here on a modified, data-journaled file, 2756 * So, if we see any bmap calls here on a modified, data-journaled file,
2750 * take extra steps to flush any blocks which might be in the cache. 2757 * take extra steps to flush any blocks which might be in the cache.
2751 */ 2758 */
2752 static sector_t ext4_bmap(struct address_space *mapping, sector_t block) 2759 static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
2753 { 2760 {
2754 struct inode *inode = mapping->host; 2761 struct inode *inode = mapping->host;
2755 journal_t *journal; 2762 journal_t *journal;
2756 int err; 2763 int err;
2757 2764
2758 /* 2765 /*
2759 * We can get here for an inline file via the FIBMAP ioctl 2766 * We can get here for an inline file via the FIBMAP ioctl
2760 */ 2767 */
2761 if (ext4_has_inline_data(inode)) 2768 if (ext4_has_inline_data(inode))
2762 return 0; 2769 return 0;
2763 2770
2764 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && 2771 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
2765 test_opt(inode->i_sb, DELALLOC)) { 2772 test_opt(inode->i_sb, DELALLOC)) {
2766 /* 2773 /*
2767 * With delalloc we want to sync the file 2774 * With delalloc we want to sync the file
2768 * so that we can make sure we allocate 2775 * so that we can make sure we allocate
2769 * blocks for file 2776 * blocks for file
2770 */ 2777 */
2771 filemap_write_and_wait(mapping); 2778 filemap_write_and_wait(mapping);
2772 } 2779 }
2773 2780
2774 if (EXT4_JOURNAL(inode) && 2781 if (EXT4_JOURNAL(inode) &&
2775 ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { 2782 ext4_test_inode_state(inode, EXT4_STATE_JDATA)) {
2776 /* 2783 /*
2777 * This is a REALLY heavyweight approach, but the use of 2784 * This is a REALLY heavyweight approach, but the use of
2778 * bmap on dirty files is expected to be extremely rare: 2785 * bmap on dirty files is expected to be extremely rare:
2779 * only if we run lilo or swapon on a freshly made file 2786 * only if we run lilo or swapon on a freshly made file
2780 * do we expect this to happen. 2787 * do we expect this to happen.
2781 * 2788 *
2782 * (bmap requires CAP_SYS_RAWIO so this does not 2789 * (bmap requires CAP_SYS_RAWIO so this does not
2783 * represent an unprivileged user DOS attack --- we'd be 2790 * represent an unprivileged user DOS attack --- we'd be
2784 * in trouble if mortal users could trigger this path at 2791 * in trouble if mortal users could trigger this path at
2785 * will.) 2792 * will.)
2786 * 2793 *
2787 * NB. EXT4_STATE_JDATA is not set on files other than 2794 * NB. EXT4_STATE_JDATA is not set on files other than
2788 * regular files. If somebody wants to bmap a directory 2795 * regular files. If somebody wants to bmap a directory
2789 * or symlink and gets confused because the buffer 2796 * or symlink and gets confused because the buffer
2790 * hasn't yet been flushed to disk, they deserve 2797 * hasn't yet been flushed to disk, they deserve
2791 * everything they get. 2798 * everything they get.
2792 */ 2799 */
2793 2800
2794 ext4_clear_inode_state(inode, EXT4_STATE_JDATA); 2801 ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
2795 journal = EXT4_JOURNAL(inode); 2802 journal = EXT4_JOURNAL(inode);
2796 jbd2_journal_lock_updates(journal); 2803 jbd2_journal_lock_updates(journal);
2797 err = jbd2_journal_flush(journal); 2804 err = jbd2_journal_flush(journal);
2798 jbd2_journal_unlock_updates(journal); 2805 jbd2_journal_unlock_updates(journal);
2799 2806
2800 if (err) 2807 if (err)
2801 return 0; 2808 return 0;
2802 } 2809 }
2803 2810
2804 return generic_block_bmap(mapping, block, ext4_get_block); 2811 return generic_block_bmap(mapping, block, ext4_get_block);
2805 } 2812 }
2806 2813
2807 static int ext4_readpage(struct file *file, struct page *page) 2814 static int ext4_readpage(struct file *file, struct page *page)
2808 { 2815 {
2809 int ret = -EAGAIN; 2816 int ret = -EAGAIN;
2810 struct inode *inode = page->mapping->host; 2817 struct inode *inode = page->mapping->host;
2811 2818
2812 trace_ext4_readpage(page); 2819 trace_ext4_readpage(page);
2813 2820
2814 if (ext4_has_inline_data(inode)) 2821 if (ext4_has_inline_data(inode))
2815 ret = ext4_readpage_inline(inode, page); 2822 ret = ext4_readpage_inline(inode, page);
2816 2823
2817 if (ret == -EAGAIN) 2824 if (ret == -EAGAIN)
2818 return mpage_readpage(page, ext4_get_block); 2825 return mpage_readpage(page, ext4_get_block);
2819 2826
2820 return ret; 2827 return ret;
2821 } 2828 }
2822 2829
2823 static int 2830 static int
2824 ext4_readpages(struct file *file, struct address_space *mapping, 2831 ext4_readpages(struct file *file, struct address_space *mapping,
2825 struct list_head *pages, unsigned nr_pages) 2832 struct list_head *pages, unsigned nr_pages)
2826 { 2833 {
2827 struct inode *inode = mapping->host; 2834 struct inode *inode = mapping->host;
2828 2835
2829 /* If the file has inline data, no need to do readpages. */ 2836 /* If the file has inline data, no need to do readpages. */
2830 if (ext4_has_inline_data(inode)) 2837 if (ext4_has_inline_data(inode))
2831 return 0; 2838 return 0;
2832 2839
2833 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 2840 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
2834 } 2841 }
2835 2842
2836 static void ext4_invalidatepage(struct page *page, unsigned int offset, 2843 static void ext4_invalidatepage(struct page *page, unsigned int offset,
2837 unsigned int length) 2844 unsigned int length)
2838 { 2845 {
2839 trace_ext4_invalidatepage(page, offset, length); 2846 trace_ext4_invalidatepage(page, offset, length);
2840 2847
2841 /* No journalling happens on data buffers when this function is used */ 2848 /* No journalling happens on data buffers when this function is used */
2842 WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); 2849 WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
2843 2850
2844 block_invalidatepage(page, offset, length); 2851 block_invalidatepage(page, offset, length);
2845 } 2852 }
2846 2853
2847 static int __ext4_journalled_invalidatepage(struct page *page, 2854 static int __ext4_journalled_invalidatepage(struct page *page,
2848 unsigned int offset, 2855 unsigned int offset,
2849 unsigned int length) 2856 unsigned int length)
2850 { 2857 {
2851 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 2858 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
2852 2859
2853 trace_ext4_journalled_invalidatepage(page, offset, length); 2860 trace_ext4_journalled_invalidatepage(page, offset, length);
2854 2861
2855 /* 2862 /*
2856 * If it's a full truncate we just forget about the pending dirtying 2863 * If it's a full truncate we just forget about the pending dirtying
2857 */ 2864 */
2858 if (offset == 0 && length == PAGE_CACHE_SIZE) 2865 if (offset == 0 && length == PAGE_CACHE_SIZE)
2859 ClearPageChecked(page); 2866 ClearPageChecked(page);
2860 2867
2861 return jbd2_journal_invalidatepage(journal, page, offset, length); 2868 return jbd2_journal_invalidatepage(journal, page, offset, length);
2862 } 2869 }
2863 2870
2864 /* Wrapper for aops... */ 2871 /* Wrapper for aops... */
2865 static void ext4_journalled_invalidatepage(struct page *page, 2872 static void ext4_journalled_invalidatepage(struct page *page,
2866 unsigned int offset, 2873 unsigned int offset,
2867 unsigned int length) 2874 unsigned int length)
2868 { 2875 {
2869 WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0); 2876 WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0);
2870 } 2877 }
2871 2878
2872 static int ext4_releasepage(struct page *page, gfp_t wait) 2879 static int ext4_releasepage(struct page *page, gfp_t wait)
2873 { 2880 {
2874 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 2881 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
2875 2882
2876 trace_ext4_releasepage(page); 2883 trace_ext4_releasepage(page);
2877 2884
2878 /* Page has dirty journalled data -> cannot release */ 2885 /* Page has dirty journalled data -> cannot release */
2879 if (PageChecked(page)) 2886 if (PageChecked(page))
2880 return 0; 2887 return 0;
2881 if (journal) 2888 if (journal)
2882 return jbd2_journal_try_to_free_buffers(journal, page, wait); 2889 return jbd2_journal_try_to_free_buffers(journal, page, wait);
2883 else 2890 else
2884 return try_to_free_buffers(page); 2891 return try_to_free_buffers(page);
2885 } 2892 }
2886 2893
2887 /* 2894 /*
2888 * ext4_get_block used when preparing for a DIO write or buffer write. 2895 * ext4_get_block used when preparing for a DIO write or buffer write.
2889 * We allocate an uinitialized extent if blocks haven't been allocated. 2896 * We allocate an uinitialized extent if blocks haven't been allocated.
2890 * The extent will be converted to initialized after the IO is complete. 2897 * The extent will be converted to initialized after the IO is complete.
2891 */ 2898 */
2892 int ext4_get_block_write(struct inode *inode, sector_t iblock, 2899 int ext4_get_block_write(struct inode *inode, sector_t iblock,
2893 struct buffer_head *bh_result, int create) 2900 struct buffer_head *bh_result, int create)
2894 { 2901 {
2895 ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", 2902 ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
2896 inode->i_ino, create); 2903 inode->i_ino, create);
2897 return _ext4_get_block(inode, iblock, bh_result, 2904 return _ext4_get_block(inode, iblock, bh_result,
2898 EXT4_GET_BLOCKS_IO_CREATE_EXT); 2905 EXT4_GET_BLOCKS_IO_CREATE_EXT);
2899 } 2906 }
2900 2907
2901 static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, 2908 static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
2902 struct buffer_head *bh_result, int create) 2909 struct buffer_head *bh_result, int create)
2903 { 2910 {
2904 ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n", 2911 ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
2905 inode->i_ino, create); 2912 inode->i_ino, create);
2906 return _ext4_get_block(inode, iblock, bh_result, 2913 return _ext4_get_block(inode, iblock, bh_result,
2907 EXT4_GET_BLOCKS_NO_LOCK); 2914 EXT4_GET_BLOCKS_NO_LOCK);
2908 } 2915 }
2909 2916
2910 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 2917 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2911 ssize_t size, void *private) 2918 ssize_t size, void *private)
2912 { 2919 {
2913 ext4_io_end_t *io_end = iocb->private; 2920 ext4_io_end_t *io_end = iocb->private;
2914 2921
2915 /* if not async direct IO just return */ 2922 /* if not async direct IO just return */
2916 if (!io_end) 2923 if (!io_end)
2917 return; 2924 return;
2918 2925
2919 ext_debug("ext4_end_io_dio(): io_end 0x%p " 2926 ext_debug("ext4_end_io_dio(): io_end 0x%p "
2920 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", 2927 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
2921 iocb->private, io_end->inode->i_ino, iocb, offset, 2928 iocb->private, io_end->inode->i_ino, iocb, offset,
2922 size); 2929 size);
2923 2930
2924 iocb->private = NULL; 2931 iocb->private = NULL;
2925 io_end->offset = offset; 2932 io_end->offset = offset;
2926 io_end->size = size; 2933 io_end->size = size;
2927 ext4_put_io_end(io_end); 2934 ext4_put_io_end(io_end);
2928 } 2935 }
2929 2936
2930 /* 2937 /*
2931 * For ext4 extent files, ext4 will do direct-io write to holes, 2938 * For ext4 extent files, ext4 will do direct-io write to holes,
2932 * preallocated extents, and those write extend the file, no need to 2939 * preallocated extents, and those write extend the file, no need to
2933 * fall back to buffered IO. 2940 * fall back to buffered IO.
2934 * 2941 *
2935 * For holes, we fallocate those blocks, mark them as unwritten 2942 * For holes, we fallocate those blocks, mark them as unwritten
2936 * If those blocks were preallocated, we mark sure they are split, but 2943 * If those blocks were preallocated, we mark sure they are split, but
2937 * still keep the range to write as unwritten. 2944 * still keep the range to write as unwritten.
2938 * 2945 *
2939 * The unwritten extents will be converted to written when DIO is completed. 2946 * The unwritten extents will be converted to written when DIO is completed.
2940 * For async direct IO, since the IO may still pending when return, we 2947 * For async direct IO, since the IO may still pending when return, we
2941 * set up an end_io call back function, which will do the conversion 2948 * set up an end_io call back function, which will do the conversion
2942 * when async direct IO completed. 2949 * when async direct IO completed.
2943 * 2950 *
2944 * If the O_DIRECT write will extend the file then add this inode to the 2951 * If the O_DIRECT write will extend the file then add this inode to the
2945 * orphan list. So recovery will truncate it back to the original size 2952 * orphan list. So recovery will truncate it back to the original size
2946 * if the machine crashes during the write. 2953 * if the machine crashes during the write.
2947 * 2954 *
2948 */ 2955 */
2949 static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, 2956 static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
2950 struct iov_iter *iter, loff_t offset) 2957 struct iov_iter *iter, loff_t offset)
2951 { 2958 {
2952 struct file *file = iocb->ki_filp; 2959 struct file *file = iocb->ki_filp;
2953 struct inode *inode = file->f_mapping->host; 2960 struct inode *inode = file->f_mapping->host;
2954 ssize_t ret; 2961 ssize_t ret;
2955 size_t count = iov_iter_count(iter); 2962 size_t count = iov_iter_count(iter);
2956 int overwrite = 0; 2963 int overwrite = 0;
2957 get_block_t *get_block_func = NULL; 2964 get_block_t *get_block_func = NULL;
2958 int dio_flags = 0; 2965 int dio_flags = 0;
2959 loff_t final_size = offset + count; 2966 loff_t final_size = offset + count;
2960 ext4_io_end_t *io_end = NULL; 2967 ext4_io_end_t *io_end = NULL;
2961 2968
2962 /* Use the old path for reads and writes beyond i_size. */ 2969 /* Use the old path for reads and writes beyond i_size. */
2963 if (rw != WRITE || final_size > inode->i_size) 2970 if (rw != WRITE || final_size > inode->i_size)
2964 return ext4_ind_direct_IO(rw, iocb, iter, offset); 2971 return ext4_ind_direct_IO(rw, iocb, iter, offset);
2965 2972
2966 BUG_ON(iocb->private == NULL); 2973 BUG_ON(iocb->private == NULL);
2967 2974
2968 /* 2975 /*
2969 * Make all waiters for direct IO properly wait also for extent 2976 * Make all waiters for direct IO properly wait also for extent
2970 * conversion. This also disallows race between truncate() and 2977 * conversion. This also disallows race between truncate() and
2971 * overwrite DIO as i_dio_count needs to be incremented under i_mutex. 2978 * overwrite DIO as i_dio_count needs to be incremented under i_mutex.
2972 */ 2979 */
2973 if (rw == WRITE) 2980 if (rw == WRITE)
2974 atomic_inc(&inode->i_dio_count); 2981 atomic_inc(&inode->i_dio_count);
2975 2982
2976 /* If we do a overwrite dio, i_mutex locking can be released */ 2983 /* If we do a overwrite dio, i_mutex locking can be released */
2977 overwrite = *((int *)iocb->private); 2984 overwrite = *((int *)iocb->private);
2978 2985
2979 if (overwrite) { 2986 if (overwrite) {
2980 down_read(&EXT4_I(inode)->i_data_sem); 2987 down_read(&EXT4_I(inode)->i_data_sem);
2981 mutex_unlock(&inode->i_mutex); 2988 mutex_unlock(&inode->i_mutex);
2982 } 2989 }
2983 2990
2984 /* 2991 /*
2985 * We could direct write to holes and fallocate. 2992 * We could direct write to holes and fallocate.
2986 * 2993 *
2987 * Allocated blocks to fill the hole are marked as 2994 * Allocated blocks to fill the hole are marked as
2988 * unwritten to prevent parallel buffered read to expose 2995 * unwritten to prevent parallel buffered read to expose
2989 * the stale data before DIO complete the data IO. 2996 * the stale data before DIO complete the data IO.
2990 * 2997 *
2991 * As to previously fallocated extents, ext4 get_block will 2998 * As to previously fallocated extents, ext4 get_block will
2992 * just simply mark the buffer mapped but still keep the 2999 * just simply mark the buffer mapped but still keep the
2993 * extents unwritten. 3000 * extents unwritten.
2994 * 3001 *
2995 * For non AIO case, we will convert those unwritten extents 3002 * For non AIO case, we will convert those unwritten extents
2996 * to written after return back from blockdev_direct_IO. 3003 * to written after return back from blockdev_direct_IO.
2997 * 3004 *
2998 * For async DIO, the conversion needs to be deferred when the 3005 * For async DIO, the conversion needs to be deferred when the
2999 * IO is completed. The ext4 end_io callback function will be 3006 * IO is completed. The ext4 end_io callback function will be
3000 * called to take care of the conversion work. Here for async 3007 * called to take care of the conversion work. Here for async
3001 * case, we allocate an io_end structure to hook to the iocb. 3008 * case, we allocate an io_end structure to hook to the iocb.
3002 */ 3009 */
3003 iocb->private = NULL; 3010 iocb->private = NULL;
3004 ext4_inode_aio_set(inode, NULL); 3011 ext4_inode_aio_set(inode, NULL);
3005 if (!is_sync_kiocb(iocb)) { 3012 if (!is_sync_kiocb(iocb)) {
3006 io_end = ext4_init_io_end(inode, GFP_NOFS); 3013 io_end = ext4_init_io_end(inode, GFP_NOFS);
3007 if (!io_end) { 3014 if (!io_end) {
3008 ret = -ENOMEM; 3015 ret = -ENOMEM;
3009 goto retake_lock; 3016 goto retake_lock;
3010 } 3017 }
3011 /* 3018 /*
3012 * Grab reference for DIO. Will be dropped in ext4_end_io_dio() 3019 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
3013 */ 3020 */
3014 iocb->private = ext4_get_io_end(io_end); 3021 iocb->private = ext4_get_io_end(io_end);
3015 /* 3022 /*
3016 * we save the io structure for current async direct 3023 * we save the io structure for current async direct
3017 * IO, so that later ext4_map_blocks() could flag the 3024 * IO, so that later ext4_map_blocks() could flag the
3018 * io structure whether there is a unwritten extents 3025 * io structure whether there is a unwritten extents
3019 * needs to be converted when IO is completed. 3026 * needs to be converted when IO is completed.
3020 */ 3027 */
3021 ext4_inode_aio_set(inode, io_end); 3028 ext4_inode_aio_set(inode, io_end);
3022 } 3029 }
3023 3030
3024 if (overwrite) { 3031 if (overwrite) {
3025 get_block_func = ext4_get_block_write_nolock; 3032 get_block_func = ext4_get_block_write_nolock;
3026 } else { 3033 } else {
3027 get_block_func = ext4_get_block_write; 3034 get_block_func = ext4_get_block_write;
3028 dio_flags = DIO_LOCKING; 3035 dio_flags = DIO_LOCKING;
3029 } 3036 }
3030 if (IS_DAX(inode)) 3037 if (IS_DAX(inode))
3031 ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func, 3038 ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func,
3032 ext4_end_io_dio, dio_flags); 3039 ext4_end_io_dio, dio_flags);
3033 else 3040 else
3034 ret = __blockdev_direct_IO(rw, iocb, inode, 3041 ret = __blockdev_direct_IO(rw, iocb, inode,
3035 inode->i_sb->s_bdev, iter, offset, 3042 inode->i_sb->s_bdev, iter, offset,
3036 get_block_func, 3043 get_block_func,
3037 ext4_end_io_dio, NULL, dio_flags); 3044 ext4_end_io_dio, NULL, dio_flags);
3038 3045
3039 /* 3046 /*
3040 * Put our reference to io_end. This can free the io_end structure e.g. 3047 * Put our reference to io_end. This can free the io_end structure e.g.
3041 * in sync IO case or in case of error. It can even perform extent 3048 * in sync IO case or in case of error. It can even perform extent
3042 * conversion if all bios we submitted finished before we got here. 3049 * conversion if all bios we submitted finished before we got here.
3043 * Note that in that case iocb->private can be already set to NULL 3050 * Note that in that case iocb->private can be already set to NULL
3044 * here. 3051 * here.
3045 */ 3052 */
3046 if (io_end) { 3053 if (io_end) {
3047 ext4_inode_aio_set(inode, NULL); 3054 ext4_inode_aio_set(inode, NULL);
3048 ext4_put_io_end(io_end); 3055 ext4_put_io_end(io_end);
3049 /* 3056 /*
3050 * When no IO was submitted ext4_end_io_dio() was not 3057 * When no IO was submitted ext4_end_io_dio() was not
3051 * called so we have to put iocb's reference. 3058 * called so we have to put iocb's reference.
3052 */ 3059 */
3053 if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { 3060 if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) {
3054 WARN_ON(iocb->private != io_end); 3061 WARN_ON(iocb->private != io_end);
3055 WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); 3062 WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
3056 ext4_put_io_end(io_end); 3063 ext4_put_io_end(io_end);
3057 iocb->private = NULL; 3064 iocb->private = NULL;
3058 } 3065 }
3059 } 3066 }
3060 if (ret > 0 && !overwrite && ext4_test_inode_state(inode, 3067 if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3061 EXT4_STATE_DIO_UNWRITTEN)) { 3068 EXT4_STATE_DIO_UNWRITTEN)) {
3062 int err; 3069 int err;
3063 /* 3070 /*
3064 * for non AIO case, since the IO is already 3071 * for non AIO case, since the IO is already
3065 * completed, we could do the conversion right here 3072 * completed, we could do the conversion right here
3066 */ 3073 */
3067 err = ext4_convert_unwritten_extents(NULL, inode, 3074 err = ext4_convert_unwritten_extents(NULL, inode,
3068 offset, ret); 3075 offset, ret);
3069 if (err < 0) 3076 if (err < 0)
3070 ret = err; 3077 ret = err;
3071 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3078 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3072 } 3079 }
3073 3080
3074 retake_lock: 3081 retake_lock:
3075 if (rw == WRITE) 3082 if (rw == WRITE)
3076 inode_dio_done(inode); 3083 inode_dio_done(inode);
3077 /* take i_mutex locking again if we do a ovewrite dio */ 3084 /* take i_mutex locking again if we do a ovewrite dio */
3078 if (overwrite) { 3085 if (overwrite) {
3079 up_read(&EXT4_I(inode)->i_data_sem); 3086 up_read(&EXT4_I(inode)->i_data_sem);
3080 mutex_lock(&inode->i_mutex); 3087 mutex_lock(&inode->i_mutex);
3081 } 3088 }
3082 3089
3083 return ret; 3090 return ret;
3084 } 3091 }
3085 3092
3086 static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, 3093 static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3087 struct iov_iter *iter, loff_t offset) 3094 struct iov_iter *iter, loff_t offset)
3088 { 3095 {
3089 struct file *file = iocb->ki_filp; 3096 struct file *file = iocb->ki_filp;
3090 struct inode *inode = file->f_mapping->host; 3097 struct inode *inode = file->f_mapping->host;
3091 size_t count = iov_iter_count(iter); 3098 size_t count = iov_iter_count(iter);
3092 ssize_t ret; 3099 ssize_t ret;
3093 3100
3094 /* 3101 /*
3095 * If we are doing data journalling we don't support O_DIRECT 3102 * If we are doing data journalling we don't support O_DIRECT
3096 */ 3103 */
3097 if (ext4_should_journal_data(inode)) 3104 if (ext4_should_journal_data(inode))
3098 return 0; 3105 return 0;
3099 3106
3100 /* Let buffer I/O handle the inline data case. */ 3107 /* Let buffer I/O handle the inline data case. */
3101 if (ext4_has_inline_data(inode)) 3108 if (ext4_has_inline_data(inode))
3102 return 0; 3109 return 0;
3103 3110
3104 trace_ext4_direct_IO_enter(inode, offset, count, rw); 3111 trace_ext4_direct_IO_enter(inode, offset, count, rw);
3105 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3112 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3106 ret = ext4_ext_direct_IO(rw, iocb, iter, offset); 3113 ret = ext4_ext_direct_IO(rw, iocb, iter, offset);
3107 else 3114 else
3108 ret = ext4_ind_direct_IO(rw, iocb, iter, offset); 3115 ret = ext4_ind_direct_IO(rw, iocb, iter, offset);
3109 trace_ext4_direct_IO_exit(inode, offset, count, rw, ret); 3116 trace_ext4_direct_IO_exit(inode, offset, count, rw, ret);
3110 return ret; 3117 return ret;
3111 } 3118 }
3112 3119
3113 /* 3120 /*
3114 * Pages can be marked dirty completely asynchronously from ext4's journalling 3121 * Pages can be marked dirty completely asynchronously from ext4's journalling
3115 * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do 3122 * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do
3116 * much here because ->set_page_dirty is called under VFS locks. The page is 3123 * much here because ->set_page_dirty is called under VFS locks. The page is
3117 * not necessarily locked. 3124 * not necessarily locked.
3118 * 3125 *
3119 * We cannot just dirty the page and leave attached buffers clean, because the 3126 * We cannot just dirty the page and leave attached buffers clean, because the
3120 * buffers' dirty state is "definitive". We cannot just set the buffers dirty 3127 * buffers' dirty state is "definitive". We cannot just set the buffers dirty
3121 * or jbddirty because all the journalling code will explode. 3128 * or jbddirty because all the journalling code will explode.
3122 * 3129 *
3123 * So what we do is to mark the page "pending dirty" and next time writepage 3130 * So what we do is to mark the page "pending dirty" and next time writepage
3124 * is called, propagate that into the buffers appropriately. 3131 * is called, propagate that into the buffers appropriately.
3125 */ 3132 */
3126 static int ext4_journalled_set_page_dirty(struct page *page) 3133 static int ext4_journalled_set_page_dirty(struct page *page)
3127 { 3134 {
3128 SetPageChecked(page); 3135 SetPageChecked(page);
3129 return __set_page_dirty_nobuffers(page); 3136 return __set_page_dirty_nobuffers(page);
3130 } 3137 }
3131 3138
3132 static const struct address_space_operations ext4_aops = { 3139 static const struct address_space_operations ext4_aops = {
3133 .readpage = ext4_readpage, 3140 .readpage = ext4_readpage,
3134 .readpages = ext4_readpages, 3141 .readpages = ext4_readpages,
3135 .writepage = ext4_writepage, 3142 .writepage = ext4_writepage,
3136 .writepages = ext4_writepages, 3143 .writepages = ext4_writepages,
3137 .write_begin = ext4_write_begin, 3144 .write_begin = ext4_write_begin,
3138 .write_end = ext4_write_end, 3145 .write_end = ext4_write_end,
3139 .bmap = ext4_bmap, 3146 .bmap = ext4_bmap,
3140 .invalidatepage = ext4_invalidatepage, 3147 .invalidatepage = ext4_invalidatepage,
3141 .releasepage = ext4_releasepage, 3148 .releasepage = ext4_releasepage,
3142 .direct_IO = ext4_direct_IO, 3149 .direct_IO = ext4_direct_IO,
3143 .migratepage = buffer_migrate_page, 3150 .migratepage = buffer_migrate_page,
3144 .is_partially_uptodate = block_is_partially_uptodate, 3151 .is_partially_uptodate = block_is_partially_uptodate,
3145 .error_remove_page = generic_error_remove_page, 3152 .error_remove_page = generic_error_remove_page,
3146 }; 3153 };
3147 3154
3148 static const struct address_space_operations ext4_journalled_aops = { 3155 static const struct address_space_operations ext4_journalled_aops = {
3149 .readpage = ext4_readpage, 3156 .readpage = ext4_readpage,
3150 .readpages = ext4_readpages, 3157 .readpages = ext4_readpages,
3151 .writepage = ext4_writepage, 3158 .writepage = ext4_writepage,
3152 .writepages = ext4_writepages, 3159 .writepages = ext4_writepages,
3153 .write_begin = ext4_write_begin, 3160 .write_begin = ext4_write_begin,
3154 .write_end = ext4_journalled_write_end, 3161 .write_end = ext4_journalled_write_end,
3155 .set_page_dirty = ext4_journalled_set_page_dirty, 3162 .set_page_dirty = ext4_journalled_set_page_dirty,
3156 .bmap = ext4_bmap, 3163 .bmap = ext4_bmap,
3157 .invalidatepage = ext4_journalled_invalidatepage, 3164 .invalidatepage = ext4_journalled_invalidatepage,
3158 .releasepage = ext4_releasepage, 3165 .releasepage = ext4_releasepage,
3159 .direct_IO = ext4_direct_IO, 3166 .direct_IO = ext4_direct_IO,
3160 .is_partially_uptodate = block_is_partially_uptodate, 3167 .is_partially_uptodate = block_is_partially_uptodate,
3161 .error_remove_page = generic_error_remove_page, 3168 .error_remove_page = generic_error_remove_page,
3162 }; 3169 };
3163 3170
3164 static const struct address_space_operations ext4_da_aops = { 3171 static const struct address_space_operations ext4_da_aops = {
3165 .readpage = ext4_readpage, 3172 .readpage = ext4_readpage,
3166 .readpages = ext4_readpages, 3173 .readpages = ext4_readpages,
3167 .writepage = ext4_writepage, 3174 .writepage = ext4_writepage,
3168 .writepages = ext4_writepages, 3175 .writepages = ext4_writepages,
3169 .write_begin = ext4_da_write_begin, 3176 .write_begin = ext4_da_write_begin,
3170 .write_end = ext4_da_write_end, 3177 .write_end = ext4_da_write_end,
3171 .bmap = ext4_bmap, 3178 .bmap = ext4_bmap,
3172 .invalidatepage = ext4_da_invalidatepage, 3179 .invalidatepage = ext4_da_invalidatepage,
3173 .releasepage = ext4_releasepage, 3180 .releasepage = ext4_releasepage,
3174 .direct_IO = ext4_direct_IO, 3181 .direct_IO = ext4_direct_IO,
3175 .migratepage = buffer_migrate_page, 3182 .migratepage = buffer_migrate_page,
3176 .is_partially_uptodate = block_is_partially_uptodate, 3183 .is_partially_uptodate = block_is_partially_uptodate,
3177 .error_remove_page = generic_error_remove_page, 3184 .error_remove_page = generic_error_remove_page,
3178 }; 3185 };
3179 3186
3180 void ext4_set_aops(struct inode *inode) 3187 void ext4_set_aops(struct inode *inode)
3181 { 3188 {
3182 switch (ext4_inode_journal_mode(inode)) { 3189 switch (ext4_inode_journal_mode(inode)) {
3183 case EXT4_INODE_ORDERED_DATA_MODE: 3190 case EXT4_INODE_ORDERED_DATA_MODE:
3184 ext4_set_inode_state(inode, EXT4_STATE_ORDERED_MODE); 3191 ext4_set_inode_state(inode, EXT4_STATE_ORDERED_MODE);
3185 break; 3192 break;
3186 case EXT4_INODE_WRITEBACK_DATA_MODE: 3193 case EXT4_INODE_WRITEBACK_DATA_MODE:
3187 ext4_clear_inode_state(inode, EXT4_STATE_ORDERED_MODE); 3194 ext4_clear_inode_state(inode, EXT4_STATE_ORDERED_MODE);
3188 break; 3195 break;
3189 case EXT4_INODE_JOURNAL_DATA_MODE: 3196 case EXT4_INODE_JOURNAL_DATA_MODE:
3190 inode->i_mapping->a_ops = &ext4_journalled_aops; 3197 inode->i_mapping->a_ops = &ext4_journalled_aops;
3191 return; 3198 return;
3192 default: 3199 default:
3193 BUG(); 3200 BUG();
3194 } 3201 }
3195 if (test_opt(inode->i_sb, DELALLOC)) 3202 if (test_opt(inode->i_sb, DELALLOC))
3196 inode->i_mapping->a_ops = &ext4_da_aops; 3203 inode->i_mapping->a_ops = &ext4_da_aops;
3197 else 3204 else
3198 inode->i_mapping->a_ops = &ext4_aops; 3205 inode->i_mapping->a_ops = &ext4_aops;
3199 } 3206 }
3200 3207
3201 static int __ext4_block_zero_page_range(handle_t *handle, 3208 static int __ext4_block_zero_page_range(handle_t *handle,
3202 struct address_space *mapping, loff_t from, loff_t length) 3209 struct address_space *mapping, loff_t from, loff_t length)
3203 { 3210 {
3204 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; 3211 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3205 unsigned offset = from & (PAGE_CACHE_SIZE-1); 3212 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3206 unsigned blocksize, pos; 3213 unsigned blocksize, pos;
3207 ext4_lblk_t iblock; 3214 ext4_lblk_t iblock;
3208 struct inode *inode = mapping->host; 3215 struct inode *inode = mapping->host;
3209 struct buffer_head *bh; 3216 struct buffer_head *bh;
3210 struct page *page; 3217 struct page *page;
3211 int err = 0; 3218 int err = 0;
3212 3219
3213 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, 3220 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3214 mapping_gfp_mask(mapping) & ~__GFP_FS); 3221 mapping_gfp_mask(mapping) & ~__GFP_FS);
3215 if (!page) 3222 if (!page)
3216 return -ENOMEM; 3223 return -ENOMEM;
3217 3224
3218 blocksize = inode->i_sb->s_blocksize; 3225 blocksize = inode->i_sb->s_blocksize;
3219 3226
3220 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 3227 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3221 3228
3222 if (!page_has_buffers(page)) 3229 if (!page_has_buffers(page))
3223 create_empty_buffers(page, blocksize, 0); 3230 create_empty_buffers(page, blocksize, 0);
3224 3231
3225 /* Find the buffer that contains "offset" */ 3232 /* Find the buffer that contains "offset" */
3226 bh = page_buffers(page); 3233 bh = page_buffers(page);
3227 pos = blocksize; 3234 pos = blocksize;
3228 while (offset >= pos) { 3235 while (offset >= pos) {
3229 bh = bh->b_this_page; 3236 bh = bh->b_this_page;
3230 iblock++; 3237 iblock++;
3231 pos += blocksize; 3238 pos += blocksize;
3232 } 3239 }
3233 if (buffer_freed(bh)) { 3240 if (buffer_freed(bh)) {
3234 BUFFER_TRACE(bh, "freed: skip"); 3241 BUFFER_TRACE(bh, "freed: skip");
3235 goto unlock; 3242 goto unlock;
3236 } 3243 }
3237 if (!buffer_mapped(bh)) { 3244 if (!buffer_mapped(bh)) {
3238 BUFFER_TRACE(bh, "unmapped"); 3245 BUFFER_TRACE(bh, "unmapped");
3239 ext4_get_block(inode, iblock, bh, 0); 3246 ext4_get_block(inode, iblock, bh, 0);
3240 /* unmapped? It's a hole - nothing to do */ 3247 /* unmapped? It's a hole - nothing to do */
3241 if (!buffer_mapped(bh)) { 3248 if (!buffer_mapped(bh)) {
3242 BUFFER_TRACE(bh, "still unmapped"); 3249 BUFFER_TRACE(bh, "still unmapped");
3243 goto unlock; 3250 goto unlock;
3244 } 3251 }
3245 } 3252 }
3246 3253
3247 /* Ok, it's mapped. Make sure it's up-to-date */ 3254 /* Ok, it's mapped. Make sure it's up-to-date */
3248 if (PageUptodate(page)) 3255 if (PageUptodate(page))
3249 set_buffer_uptodate(bh); 3256 set_buffer_uptodate(bh);
3250 3257
3251 if (!buffer_uptodate(bh)) { 3258 if (!buffer_uptodate(bh)) {
3252 err = -EIO; 3259 err = -EIO;
3253 ll_rw_block(READ, 1, &bh); 3260 ll_rw_block(READ, 1, &bh);
3254 wait_on_buffer(bh); 3261 wait_on_buffer(bh);
3255 /* Uhhuh. Read error. Complain and punt. */ 3262 /* Uhhuh. Read error. Complain and punt. */
3256 if (!buffer_uptodate(bh)) 3263 if (!buffer_uptodate(bh))
3257 goto unlock; 3264 goto unlock;
3258 } 3265 }
3259 if (ext4_should_journal_data(inode)) { 3266 if (ext4_should_journal_data(inode)) {
3260 BUFFER_TRACE(bh, "get write access"); 3267 BUFFER_TRACE(bh, "get write access");
3261 err = ext4_journal_get_write_access(handle, bh); 3268 err = ext4_journal_get_write_access(handle, bh);
3262 if (err) 3269 if (err)
3263 goto unlock; 3270 goto unlock;
3264 } 3271 }
3265 zero_user(page, offset, length); 3272 zero_user(page, offset, length);
3266 BUFFER_TRACE(bh, "zeroed end of block"); 3273 BUFFER_TRACE(bh, "zeroed end of block");
3267 3274
3268 if (ext4_should_journal_data(inode)) { 3275 if (ext4_should_journal_data(inode)) {
3269 err = ext4_handle_dirty_metadata(handle, inode, bh); 3276 err = ext4_handle_dirty_metadata(handle, inode, bh);
3270 } else { 3277 } else {
3271 err = 0; 3278 err = 0;
3272 mark_buffer_dirty(bh); 3279 mark_buffer_dirty(bh);
3273 if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) 3280 if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE))
3274 err = ext4_jbd2_file_inode(handle, inode); 3281 err = ext4_jbd2_file_inode(handle, inode);
3275 } 3282 }
3276 3283
3277 unlock: 3284 unlock:
3278 unlock_page(page); 3285 unlock_page(page);
3279 page_cache_release(page); 3286 page_cache_release(page);
3280 return err; 3287 return err;
3281 } 3288 }
3282 3289
3283 /* 3290 /*
3284 * ext4_block_zero_page_range() zeros out a mapping of length 'length' 3291 * ext4_block_zero_page_range() zeros out a mapping of length 'length'
3285 * starting from file offset 'from'. The range to be zero'd must 3292 * starting from file offset 'from'. The range to be zero'd must
3286 * be contained with in one block. If the specified range exceeds 3293 * be contained with in one block. If the specified range exceeds
3287 * the end of the block it will be shortened to end of the block 3294 * the end of the block it will be shortened to end of the block
3288 * that cooresponds to 'from' 3295 * that cooresponds to 'from'
3289 */ 3296 */
3290 static int ext4_block_zero_page_range(handle_t *handle, 3297 static int ext4_block_zero_page_range(handle_t *handle,
3291 struct address_space *mapping, loff_t from, loff_t length) 3298 struct address_space *mapping, loff_t from, loff_t length)
3292 { 3299 {
3293 struct inode *inode = mapping->host; 3300 struct inode *inode = mapping->host;
3294 unsigned offset = from & (PAGE_CACHE_SIZE-1); 3301 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3295 unsigned blocksize = inode->i_sb->s_blocksize; 3302 unsigned blocksize = inode->i_sb->s_blocksize;
3296 unsigned max = blocksize - (offset & (blocksize - 1)); 3303 unsigned max = blocksize - (offset & (blocksize - 1));
3297 3304
3298 /* 3305 /*
3299 * correct length if it does not fall between 3306 * correct length if it does not fall between
3300 * 'from' and the end of the block 3307 * 'from' and the end of the block
3301 */ 3308 */
3302 if (length > max || length < 0) 3309 if (length > max || length < 0)
3303 length = max; 3310 length = max;
3304 3311
3305 if (IS_DAX(inode)) 3312 if (IS_DAX(inode))
3306 return dax_zero_page_range(inode, from, length, ext4_get_block); 3313 return dax_zero_page_range(inode, from, length, ext4_get_block);
3307 return __ext4_block_zero_page_range(handle, mapping, from, length); 3314 return __ext4_block_zero_page_range(handle, mapping, from, length);
3308 } 3315 }
3309 3316
3310 /* 3317 /*
3311 * ext4_block_truncate_page() zeroes out a mapping from file offset `from' 3318 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
3312 * up to the end of the block which corresponds to `from'. 3319 * up to the end of the block which corresponds to `from'.
3313 * This required during truncate. We need to physically zero the tail end 3320 * This required during truncate. We need to physically zero the tail end
3314 * of that block so it doesn't yield old data if the file is later grown. 3321 * of that block so it doesn't yield old data if the file is later grown.
3315 */ 3322 */
3316 static int ext4_block_truncate_page(handle_t *handle, 3323 static int ext4_block_truncate_page(handle_t *handle,
3317 struct address_space *mapping, loff_t from) 3324 struct address_space *mapping, loff_t from)
3318 { 3325 {
3319 unsigned offset = from & (PAGE_CACHE_SIZE-1); 3326 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3320 unsigned length; 3327 unsigned length;
3321 unsigned blocksize; 3328 unsigned blocksize;
3322 struct inode *inode = mapping->host; 3329 struct inode *inode = mapping->host;
3323 3330
3324 blocksize = inode->i_sb->s_blocksize; 3331 blocksize = inode->i_sb->s_blocksize;
3325 length = blocksize - (offset & (blocksize - 1)); 3332 length = blocksize - (offset & (blocksize - 1));
3326 3333
3327 return ext4_block_zero_page_range(handle, mapping, from, length); 3334 return ext4_block_zero_page_range(handle, mapping, from, length);
3328 } 3335 }
3329 3336
3330 int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 3337 int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
3331 loff_t lstart, loff_t length) 3338 loff_t lstart, loff_t length)
3332 { 3339 {
3333 struct super_block *sb = inode->i_sb; 3340 struct super_block *sb = inode->i_sb;
3334 struct address_space *mapping = inode->i_mapping; 3341 struct address_space *mapping = inode->i_mapping;
3335 unsigned partial_start, partial_end; 3342 unsigned partial_start, partial_end;
3336 ext4_fsblk_t start, end; 3343 ext4_fsblk_t start, end;
3337 loff_t byte_end = (lstart + length - 1); 3344 loff_t byte_end = (lstart + length - 1);
3338 int err = 0; 3345 int err = 0;
3339 3346
3340 partial_start = lstart & (sb->s_blocksize - 1); 3347 partial_start = lstart & (sb->s_blocksize - 1);
3341 partial_end = byte_end & (sb->s_blocksize - 1); 3348 partial_end = byte_end & (sb->s_blocksize - 1);
3342 3349
3343 start = lstart >> sb->s_blocksize_bits; 3350 start = lstart >> sb->s_blocksize_bits;
3344 end = byte_end >> sb->s_blocksize_bits; 3351 end = byte_end >> sb->s_blocksize_bits;
3345 3352
3346 /* Handle partial zero within the single block */ 3353 /* Handle partial zero within the single block */
3347 if (start == end && 3354 if (start == end &&
3348 (partial_start || (partial_end != sb->s_blocksize - 1))) { 3355 (partial_start || (partial_end != sb->s_blocksize - 1))) {
3349 err = ext4_block_zero_page_range(handle, mapping, 3356 err = ext4_block_zero_page_range(handle, mapping,
3350 lstart, length); 3357 lstart, length);
3351 return err; 3358 return err;
3352 } 3359 }
3353 /* Handle partial zero out on the start of the range */ 3360 /* Handle partial zero out on the start of the range */
3354 if (partial_start) { 3361 if (partial_start) {
3355 err = ext4_block_zero_page_range(handle, mapping, 3362 err = ext4_block_zero_page_range(handle, mapping,
3356 lstart, sb->s_blocksize); 3363 lstart, sb->s_blocksize);
3357 if (err) 3364 if (err)
3358 return err; 3365 return err;
3359 } 3366 }
3360 /* Handle partial zero out on the end of the range */ 3367 /* Handle partial zero out on the end of the range */
3361 if (partial_end != sb->s_blocksize - 1) 3368 if (partial_end != sb->s_blocksize - 1)
3362 err = ext4_block_zero_page_range(handle, mapping, 3369 err = ext4_block_zero_page_range(handle, mapping,
3363 byte_end - partial_end, 3370 byte_end - partial_end,
3364 partial_end + 1); 3371 partial_end + 1);
3365 return err; 3372 return err;
3366 } 3373 }
3367 3374
3368 int ext4_can_truncate(struct inode *inode) 3375 int ext4_can_truncate(struct inode *inode)
3369 { 3376 {
3370 if (S_ISREG(inode->i_mode)) 3377 if (S_ISREG(inode->i_mode))
3371 return 1; 3378 return 1;
3372 if (S_ISDIR(inode->i_mode)) 3379 if (S_ISDIR(inode->i_mode))
3373 return 1; 3380 return 1;
3374 if (S_ISLNK(inode->i_mode)) 3381 if (S_ISLNK(inode->i_mode))
3375 return !ext4_inode_is_fast_symlink(inode); 3382 return !ext4_inode_is_fast_symlink(inode);
3376 return 0; 3383 return 0;
3377 } 3384 }
3378 3385
3379 /* 3386 /*
3380 * ext4_punch_hole: punches a hole in a file by releaseing the blocks 3387 * ext4_punch_hole: punches a hole in a file by releaseing the blocks
3381 * associated with the given offset and length 3388 * associated with the given offset and length
3382 * 3389 *
3383 * @inode: File inode 3390 * @inode: File inode
3384 * @offset: The offset where the hole will begin 3391 * @offset: The offset where the hole will begin
3385 * @len: The length of the hole 3392 * @len: The length of the hole
3386 * 3393 *
3387 * Returns: 0 on success or negative on failure 3394 * Returns: 0 on success or negative on failure
3388 */ 3395 */
3389 3396
3390 int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) 3397 int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3391 { 3398 {
3392 struct super_block *sb = inode->i_sb; 3399 struct super_block *sb = inode->i_sb;
3393 ext4_lblk_t first_block, stop_block; 3400 ext4_lblk_t first_block, stop_block;
3394 struct address_space *mapping = inode->i_mapping; 3401 struct address_space *mapping = inode->i_mapping;
3395 loff_t first_block_offset, last_block_offset; 3402 loff_t first_block_offset, last_block_offset;
3396 handle_t *handle; 3403 handle_t *handle;
3397 unsigned int credits; 3404 unsigned int credits;
3398 int ret = 0; 3405 int ret = 0;
3399 3406
3400 if (!S_ISREG(inode->i_mode)) 3407 if (!S_ISREG(inode->i_mode))
3401 return -EOPNOTSUPP; 3408 return -EOPNOTSUPP;
3402 3409
3403 trace_ext4_punch_hole(inode, offset, length, 0); 3410 trace_ext4_punch_hole(inode, offset, length, 0);
3404 3411
3405 /* 3412 /*
3406 * Write out all dirty pages to avoid race conditions 3413 * Write out all dirty pages to avoid race conditions
3407 * Then release them. 3414 * Then release them.
3408 */ 3415 */
3409 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 3416 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
3410 ret = filemap_write_and_wait_range(mapping, offset, 3417 ret = filemap_write_and_wait_range(mapping, offset,
3411 offset + length - 1); 3418 offset + length - 1);
3412 if (ret) 3419 if (ret)
3413 return ret; 3420 return ret;
3414 } 3421 }
3415 3422
3416 mutex_lock(&inode->i_mutex); 3423 mutex_lock(&inode->i_mutex);
3417 3424
3418 /* No need to punch hole beyond i_size */ 3425 /* No need to punch hole beyond i_size */
3419 if (offset >= inode->i_size) 3426 if (offset >= inode->i_size)
3420 goto out_mutex; 3427 goto out_mutex;
3421 3428
3422 /* 3429 /*
3423 * If the hole extends beyond i_size, set the hole 3430 * If the hole extends beyond i_size, set the hole
3424 * to end after the page that contains i_size 3431 * to end after the page that contains i_size
3425 */ 3432 */
3426 if (offset + length > inode->i_size) { 3433 if (offset + length > inode->i_size) {
3427 length = inode->i_size + 3434 length = inode->i_size +
3428 PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) - 3435 PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
3429 offset; 3436 offset;
3430 } 3437 }
3431 3438
3432 if (offset & (sb->s_blocksize - 1) || 3439 if (offset & (sb->s_blocksize - 1) ||
3433 (offset + length) & (sb->s_blocksize - 1)) { 3440 (offset + length) & (sb->s_blocksize - 1)) {
3434 /* 3441 /*
3435 * Attach jinode to inode for jbd2 if we do any zeroing of 3442 * Attach jinode to inode for jbd2 if we do any zeroing of
3436 * partial block 3443 * partial block
3437 */ 3444 */
3438 ret = ext4_inode_attach_jinode(inode); 3445 ret = ext4_inode_attach_jinode(inode);
3439 if (ret < 0) 3446 if (ret < 0)
3440 goto out_mutex; 3447 goto out_mutex;
3441 3448
3442 } 3449 }
3443 3450
3444 first_block_offset = round_up(offset, sb->s_blocksize); 3451 first_block_offset = round_up(offset, sb->s_blocksize);
3445 last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; 3452 last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
3446 3453
3447 /* Now release the pages and zero block aligned part of pages*/ 3454 /* Now release the pages and zero block aligned part of pages*/
3448 if (last_block_offset > first_block_offset) 3455 if (last_block_offset > first_block_offset)
3449 truncate_pagecache_range(inode, first_block_offset, 3456 truncate_pagecache_range(inode, first_block_offset,
3450 last_block_offset); 3457 last_block_offset);
3451 3458
3452 /* Wait all existing dio workers, newcomers will block on i_mutex */ 3459 /* Wait all existing dio workers, newcomers will block on i_mutex */
3453 ext4_inode_block_unlocked_dio(inode); 3460 ext4_inode_block_unlocked_dio(inode);
3454 inode_dio_wait(inode); 3461 inode_dio_wait(inode);
3455 3462
3456 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3463 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3457 credits = ext4_writepage_trans_blocks(inode); 3464 credits = ext4_writepage_trans_blocks(inode);
3458 else 3465 else
3459 credits = ext4_blocks_for_truncate(inode); 3466 credits = ext4_blocks_for_truncate(inode);
3460 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); 3467 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
3461 if (IS_ERR(handle)) { 3468 if (IS_ERR(handle)) {
3462 ret = PTR_ERR(handle); 3469 ret = PTR_ERR(handle);
3463 ext4_std_error(sb, ret); 3470 ext4_std_error(sb, ret);
3464 goto out_dio; 3471 goto out_dio;
3465 } 3472 }
3466 3473
3467 ret = ext4_zero_partial_blocks(handle, inode, offset, 3474 ret = ext4_zero_partial_blocks(handle, inode, offset,
3468 length); 3475 length);
3469 if (ret) 3476 if (ret)
3470 goto out_stop; 3477 goto out_stop;
3471 3478
3472 first_block = (offset + sb->s_blocksize - 1) >> 3479 first_block = (offset + sb->s_blocksize - 1) >>
3473 EXT4_BLOCK_SIZE_BITS(sb); 3480 EXT4_BLOCK_SIZE_BITS(sb);
3474 stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); 3481 stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
3475 3482
3476 /* If there are no blocks to remove, return now */ 3483 /* If there are no blocks to remove, return now */
3477 if (first_block >= stop_block) 3484 if (first_block >= stop_block)
3478 goto out_stop; 3485 goto out_stop;
3479 3486
3480 down_write(&EXT4_I(inode)->i_data_sem); 3487 down_write(&EXT4_I(inode)->i_data_sem);
3481 ext4_discard_preallocations(inode); 3488 ext4_discard_preallocations(inode);
3482 3489
3483 ret = ext4_es_remove_extent(inode, first_block, 3490 ret = ext4_es_remove_extent(inode, first_block,
3484 stop_block - first_block); 3491 stop_block - first_block);
3485 if (ret) { 3492 if (ret) {
3486 up_write(&EXT4_I(inode)->i_data_sem); 3493 up_write(&EXT4_I(inode)->i_data_sem);
3487 goto out_stop; 3494 goto out_stop;
3488 } 3495 }
3489 3496
3490 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3497 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3491 ret = ext4_ext_remove_space(inode, first_block, 3498 ret = ext4_ext_remove_space(inode, first_block,
3492 stop_block - 1); 3499 stop_block - 1);
3493 else 3500 else
3494 ret = ext4_ind_remove_space(handle, inode, first_block, 3501 ret = ext4_ind_remove_space(handle, inode, first_block,
3495 stop_block); 3502 stop_block);
3496 3503
3497 up_write(&EXT4_I(inode)->i_data_sem); 3504 up_write(&EXT4_I(inode)->i_data_sem);
3498 if (IS_SYNC(inode)) 3505 if (IS_SYNC(inode))
3499 ext4_handle_sync(handle); 3506 ext4_handle_sync(handle);
3500 3507
3501 /* Now release the pages again to reduce race window */ 3508 /* Now release the pages again to reduce race window */
3502 if (last_block_offset > first_block_offset) 3509 if (last_block_offset > first_block_offset)
3503 truncate_pagecache_range(inode, first_block_offset, 3510 truncate_pagecache_range(inode, first_block_offset,
3504 last_block_offset); 3511 last_block_offset);
3505 3512
3506 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 3513 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3507 ext4_mark_inode_dirty(handle, inode); 3514 ext4_mark_inode_dirty(handle, inode);
3508 out_stop: 3515 out_stop:
3509 ext4_journal_stop(handle); 3516 ext4_journal_stop(handle);
3510 out_dio: 3517 out_dio:
3511 ext4_inode_resume_unlocked_dio(inode); 3518 ext4_inode_resume_unlocked_dio(inode);
3512 out_mutex: 3519 out_mutex:
3513 mutex_unlock(&inode->i_mutex); 3520 mutex_unlock(&inode->i_mutex);
3514 return ret; 3521 return ret;
3515 } 3522 }
3516 3523
3517 int ext4_inode_attach_jinode(struct inode *inode) 3524 int ext4_inode_attach_jinode(struct inode *inode)
3518 { 3525 {
3519 struct ext4_inode_info *ei = EXT4_I(inode); 3526 struct ext4_inode_info *ei = EXT4_I(inode);
3520 struct jbd2_inode *jinode; 3527 struct jbd2_inode *jinode;
3521 3528
3522 if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal) 3529 if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal)
3523 return 0; 3530 return 0;
3524 3531
3525 jinode = jbd2_alloc_inode(GFP_KERNEL); 3532 jinode = jbd2_alloc_inode(GFP_KERNEL);
3526 spin_lock(&inode->i_lock); 3533 spin_lock(&inode->i_lock);
3527 if (!ei->jinode) { 3534 if (!ei->jinode) {
3528 if (!jinode) { 3535 if (!jinode) {
3529 spin_unlock(&inode->i_lock); 3536 spin_unlock(&inode->i_lock);
3530 return -ENOMEM; 3537 return -ENOMEM;
3531 } 3538 }
3532 ei->jinode = jinode; 3539 ei->jinode = jinode;
3533 jbd2_journal_init_jbd_inode(ei->jinode, inode); 3540 jbd2_journal_init_jbd_inode(ei->jinode, inode);
3534 jinode = NULL; 3541 jinode = NULL;
3535 } 3542 }
3536 spin_unlock(&inode->i_lock); 3543 spin_unlock(&inode->i_lock);
3537 if (unlikely(jinode != NULL)) 3544 if (unlikely(jinode != NULL))
3538 jbd2_free_inode(jinode); 3545 jbd2_free_inode(jinode);
3539 return 0; 3546 return 0;
3540 } 3547 }
3541 3548
3542 /* 3549 /*
3543 * ext4_truncate() 3550 * ext4_truncate()
3544 * 3551 *
3545 * We block out ext4_get_block() block instantiations across the entire 3552 * We block out ext4_get_block() block instantiations across the entire
3546 * transaction, and VFS/VM ensures that ext4_truncate() cannot run 3553 * transaction, and VFS/VM ensures that ext4_truncate() cannot run
3547 * simultaneously on behalf of the same inode. 3554 * simultaneously on behalf of the same inode.
3548 * 3555 *
3549 * As we work through the truncate and commit bits of it to the journal there 3556 * As we work through the truncate and commit bits of it to the journal there
3550 * is one core, guiding principle: the file's tree must always be consistent on 3557 * is one core, guiding principle: the file's tree must always be consistent on
3551 * disk. We must be able to restart the truncate after a crash. 3558 * disk. We must be able to restart the truncate after a crash.
3552 * 3559 *
3553 * The file's tree may be transiently inconsistent in memory (although it 3560 * The file's tree may be transiently inconsistent in memory (although it
3554 * probably isn't), but whenever we close off and commit a journal transaction, 3561 * probably isn't), but whenever we close off and commit a journal transaction,
3555 * the contents of (the filesystem + the journal) must be consistent and 3562 * the contents of (the filesystem + the journal) must be consistent and
3556 * restartable. It's pretty simple, really: bottom up, right to left (although 3563 * restartable. It's pretty simple, really: bottom up, right to left (although
3557 * left-to-right works OK too). 3564 * left-to-right works OK too).
3558 * 3565 *
3559 * Note that at recovery time, journal replay occurs *before* the restart of 3566 * Note that at recovery time, journal replay occurs *before* the restart of
3560 * truncate against the orphan inode list. 3567 * truncate against the orphan inode list.
3561 * 3568 *
3562 * The committed inode has the new, desired i_size (which is the same as 3569 * The committed inode has the new, desired i_size (which is the same as
3563 * i_disksize in this case). After a crash, ext4_orphan_cleanup() will see 3570 * i_disksize in this case). After a crash, ext4_orphan_cleanup() will see
3564 * that this inode's truncate did not complete and it will again call 3571 * that this inode's truncate did not complete and it will again call
3565 * ext4_truncate() to have another go. So there will be instantiated blocks 3572 * ext4_truncate() to have another go. So there will be instantiated blocks
3566 * to the right of the truncation point in a crashed ext4 filesystem. But 3573 * to the right of the truncation point in a crashed ext4 filesystem. But
3567 * that's fine - as long as they are linked from the inode, the post-crash 3574 * that's fine - as long as they are linked from the inode, the post-crash
3568 * ext4_truncate() run will find them and release them. 3575 * ext4_truncate() run will find them and release them.
3569 */ 3576 */
3570 void ext4_truncate(struct inode *inode) 3577 void ext4_truncate(struct inode *inode)
3571 { 3578 {
3572 struct ext4_inode_info *ei = EXT4_I(inode); 3579 struct ext4_inode_info *ei = EXT4_I(inode);
3573 unsigned int credits; 3580 unsigned int credits;
3574 handle_t *handle; 3581 handle_t *handle;
3575 struct address_space *mapping = inode->i_mapping; 3582 struct address_space *mapping = inode->i_mapping;
3576 3583
3577 /* 3584 /*
3578 * There is a possibility that we're either freeing the inode 3585 * There is a possibility that we're either freeing the inode
3579 * or it's a completely new inode. In those cases we might not 3586 * or it's a completely new inode. In those cases we might not
3580 * have i_mutex locked because it's not necessary. 3587 * have i_mutex locked because it's not necessary.
3581 */ 3588 */
3582 if (!(inode->i_state & (I_NEW|I_FREEING))) 3589 if (!(inode->i_state & (I_NEW|I_FREEING)))
3583 WARN_ON(!mutex_is_locked(&inode->i_mutex)); 3590 WARN_ON(!mutex_is_locked(&inode->i_mutex));
3584 trace_ext4_truncate_enter(inode); 3591 trace_ext4_truncate_enter(inode);
3585 3592
3586 if (!ext4_can_truncate(inode)) 3593 if (!ext4_can_truncate(inode))
3587 return; 3594 return;
3588 3595
3589 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 3596 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
3590 3597
3591 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 3598 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3592 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); 3599 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
3593 3600
3594 if (ext4_has_inline_data(inode)) { 3601 if (ext4_has_inline_data(inode)) {
3595 int has_inline = 1; 3602 int has_inline = 1;
3596 3603
3597 ext4_inline_data_truncate(inode, &has_inline); 3604 ext4_inline_data_truncate(inode, &has_inline);
3598 if (has_inline) 3605 if (has_inline)
3599 return; 3606 return;
3600 } 3607 }
3601 3608
3602 /* If we zero-out tail of the page, we have to create jinode for jbd2 */ 3609 /* If we zero-out tail of the page, we have to create jinode for jbd2 */
3603 if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { 3610 if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
3604 if (ext4_inode_attach_jinode(inode) < 0) 3611 if (ext4_inode_attach_jinode(inode) < 0)
3605 return; 3612 return;
3606 } 3613 }
3607 3614
3608 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3615 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3609 credits = ext4_writepage_trans_blocks(inode); 3616 credits = ext4_writepage_trans_blocks(inode);
3610 else 3617 else
3611 credits = ext4_blocks_for_truncate(inode); 3618 credits = ext4_blocks_for_truncate(inode);
3612 3619
3613 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); 3620 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
3614 if (IS_ERR(handle)) { 3621 if (IS_ERR(handle)) {
3615 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 3622 ext4_std_error(inode->i_sb, PTR_ERR(handle));
3616 return; 3623 return;
3617 } 3624 }
3618 3625
3619 if (inode->i_size & (inode->i_sb->s_blocksize - 1)) 3626 if (inode->i_size & (inode->i_sb->s_blocksize - 1))
3620 ext4_block_truncate_page(handle, mapping, inode->i_size); 3627 ext4_block_truncate_page(handle, mapping, inode->i_size);
3621 3628
3622 /* 3629 /*
3623 * We add the inode to the orphan list, so that if this 3630 * We add the inode to the orphan list, so that if this
3624 * truncate spans multiple transactions, and we crash, we will 3631 * truncate spans multiple transactions, and we crash, we will
3625 * resume the truncate when the filesystem recovers. It also 3632 * resume the truncate when the filesystem recovers. It also
3626 * marks the inode dirty, to catch the new size. 3633 * marks the inode dirty, to catch the new size.
3627 * 3634 *
3628 * Implication: the file must always be in a sane, consistent 3635 * Implication: the file must always be in a sane, consistent
3629 * truncatable state while each transaction commits. 3636 * truncatable state while each transaction commits.
3630 */ 3637 */
3631 if (ext4_orphan_add(handle, inode)) 3638 if (ext4_orphan_add(handle, inode))
3632 goto out_stop; 3639 goto out_stop;
3633 3640
3634 down_write(&EXT4_I(inode)->i_data_sem); 3641 down_write(&EXT4_I(inode)->i_data_sem);
3635 3642
3636 ext4_discard_preallocations(inode); 3643 ext4_discard_preallocations(inode);
3637 3644
3638 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3645 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3639 ext4_ext_truncate(handle, inode); 3646 ext4_ext_truncate(handle, inode);
3640 else 3647 else
3641 ext4_ind_truncate(handle, inode); 3648 ext4_ind_truncate(handle, inode);
3642 3649
3643 up_write(&ei->i_data_sem); 3650 up_write(&ei->i_data_sem);
3644 3651
3645 if (IS_SYNC(inode)) 3652 if (IS_SYNC(inode))
3646 ext4_handle_sync(handle); 3653 ext4_handle_sync(handle);
3647 3654
3648 out_stop: 3655 out_stop:
3649 /* 3656 /*
3650 * If this was a simple ftruncate() and the file will remain alive, 3657 * If this was a simple ftruncate() and the file will remain alive,
3651 * then we need to clear up the orphan record which we created above. 3658 * then we need to clear up the orphan record which we created above.
3652 * However, if this was a real unlink then we were called by 3659 * However, if this was a real unlink then we were called by
3653 * ext4_evict_inode(), and we allow that function to clean up the 3660 * ext4_evict_inode(), and we allow that function to clean up the
3654 * orphan info for us. 3661 * orphan info for us.
3655 */ 3662 */
3656 if (inode->i_nlink) 3663 if (inode->i_nlink)
3657 ext4_orphan_del(handle, inode); 3664 ext4_orphan_del(handle, inode);
3658 3665
3659 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 3666 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3660 ext4_mark_inode_dirty(handle, inode); 3667 ext4_mark_inode_dirty(handle, inode);
3661 ext4_journal_stop(handle); 3668 ext4_journal_stop(handle);
3662 3669
3663 trace_ext4_truncate_exit(inode); 3670 trace_ext4_truncate_exit(inode);
3664 } 3671 }
3665 3672
3666 /* 3673 /*
3667 * ext4_get_inode_loc returns with an extra refcount against the inode's 3674 * ext4_get_inode_loc returns with an extra refcount against the inode's
3668 * underlying buffer_head on success. If 'in_mem' is true, we have all 3675 * underlying buffer_head on success. If 'in_mem' is true, we have all
3669 * data in memory that is needed to recreate the on-disk version of this 3676 * data in memory that is needed to recreate the on-disk version of this
3670 * inode. 3677 * inode.
3671 */ 3678 */
3672 static int __ext4_get_inode_loc(struct inode *inode, 3679 static int __ext4_get_inode_loc(struct inode *inode,
3673 struct ext4_iloc *iloc, int in_mem) 3680 struct ext4_iloc *iloc, int in_mem)
3674 { 3681 {
3675 struct ext4_group_desc *gdp; 3682 struct ext4_group_desc *gdp;
3676 struct buffer_head *bh; 3683 struct buffer_head *bh;
3677 struct super_block *sb = inode->i_sb; 3684 struct super_block *sb = inode->i_sb;
3678 ext4_fsblk_t block; 3685 ext4_fsblk_t block;
3679 int inodes_per_block, inode_offset; 3686 int inodes_per_block, inode_offset;
3680 3687
3681 iloc->bh = NULL; 3688 iloc->bh = NULL;
3682 if (!ext4_valid_inum(sb, inode->i_ino)) 3689 if (!ext4_valid_inum(sb, inode->i_ino))
3683 return -EIO; 3690 return -EIO;
3684 3691
3685 iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); 3692 iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
3686 gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); 3693 gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
3687 if (!gdp) 3694 if (!gdp)
3688 return -EIO; 3695 return -EIO;
3689 3696
3690 /* 3697 /*
3691 * Figure out the offset within the block group inode table 3698 * Figure out the offset within the block group inode table
3692 */ 3699 */
3693 inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; 3700 inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
3694 inode_offset = ((inode->i_ino - 1) % 3701 inode_offset = ((inode->i_ino - 1) %
3695 EXT4_INODES_PER_GROUP(sb)); 3702 EXT4_INODES_PER_GROUP(sb));
3696 block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); 3703 block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
3697 iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); 3704 iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
3698 3705
3699 bh = sb_getblk(sb, block); 3706 bh = sb_getblk(sb, block);
3700 if (unlikely(!bh)) 3707 if (unlikely(!bh))
3701 return -ENOMEM; 3708 return -ENOMEM;
3702 if (!buffer_uptodate(bh)) { 3709 if (!buffer_uptodate(bh)) {
3703 lock_buffer(bh); 3710 lock_buffer(bh);
3704 3711
3705 /* 3712 /*
3706 * If the buffer has the write error flag, we have failed 3713 * If the buffer has the write error flag, we have failed
3707 * to write out another inode in the same block. In this 3714 * to write out another inode in the same block. In this
3708 * case, we don't have to read the block because we may 3715 * case, we don't have to read the block because we may
3709 * read the old inode data successfully. 3716 * read the old inode data successfully.
3710 */ 3717 */
3711 if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) 3718 if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
3712 set_buffer_uptodate(bh); 3719 set_buffer_uptodate(bh);
3713 3720
3714 if (buffer_uptodate(bh)) { 3721 if (buffer_uptodate(bh)) {
3715 /* someone brought it uptodate while we waited */ 3722 /* someone brought it uptodate while we waited */
3716 unlock_buffer(bh); 3723 unlock_buffer(bh);
3717 goto has_buffer; 3724 goto has_buffer;
3718 } 3725 }
3719 3726
3720 /* 3727 /*
3721 * If we have all information of the inode in memory and this 3728 * If we have all information of the inode in memory and this
3722 * is the only valid inode in the block, we need not read the 3729 * is the only valid inode in the block, we need not read the
3723 * block. 3730 * block.
3724 */ 3731 */
3725 if (in_mem) { 3732 if (in_mem) {
3726 struct buffer_head *bitmap_bh; 3733 struct buffer_head *bitmap_bh;
3727 int i, start; 3734 int i, start;
3728 3735
3729 start = inode_offset & ~(inodes_per_block - 1); 3736 start = inode_offset & ~(inodes_per_block - 1);
3730 3737
3731 /* Is the inode bitmap in cache? */ 3738 /* Is the inode bitmap in cache? */
3732 bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); 3739 bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
3733 if (unlikely(!bitmap_bh)) 3740 if (unlikely(!bitmap_bh))
3734 goto make_io; 3741 goto make_io;
3735 3742
3736 /* 3743 /*
3737 * If the inode bitmap isn't in cache then the 3744 * If the inode bitmap isn't in cache then the
3738 * optimisation may end up performing two reads instead 3745 * optimisation may end up performing two reads instead
3739 * of one, so skip it. 3746 * of one, so skip it.
3740 */ 3747 */
3741 if (!buffer_uptodate(bitmap_bh)) { 3748 if (!buffer_uptodate(bitmap_bh)) {
3742 brelse(bitmap_bh); 3749 brelse(bitmap_bh);
3743 goto make_io; 3750 goto make_io;
3744 } 3751 }
3745 for (i = start; i < start + inodes_per_block; i++) { 3752 for (i = start; i < start + inodes_per_block; i++) {
3746 if (i == inode_offset) 3753 if (i == inode_offset)
3747 continue; 3754 continue;
3748 if (ext4_test_bit(i, bitmap_bh->b_data)) 3755 if (ext4_test_bit(i, bitmap_bh->b_data))
3749 break; 3756 break;
3750 } 3757 }
3751 brelse(bitmap_bh); 3758 brelse(bitmap_bh);
3752 if (i == start + inodes_per_block) { 3759 if (i == start + inodes_per_block) {
3753 /* all other inodes are free, so skip I/O */ 3760 /* all other inodes are free, so skip I/O */
3754 memset(bh->b_data, 0, bh->b_size); 3761 memset(bh->b_data, 0, bh->b_size);
3755 set_buffer_uptodate(bh); 3762 set_buffer_uptodate(bh);
3756 unlock_buffer(bh); 3763 unlock_buffer(bh);
3757 goto has_buffer; 3764 goto has_buffer;
3758 } 3765 }
3759 } 3766 }
3760 3767
3761 make_io: 3768 make_io:
3762 /* 3769 /*
3763 * If we need to do any I/O, try to pre-readahead extra 3770 * If we need to do any I/O, try to pre-readahead extra
3764 * blocks from the inode table. 3771 * blocks from the inode table.
3765 */ 3772 */
3766 if (EXT4_SB(sb)->s_inode_readahead_blks) { 3773 if (EXT4_SB(sb)->s_inode_readahead_blks) {
3767 ext4_fsblk_t b, end, table; 3774 ext4_fsblk_t b, end, table;
3768 unsigned num; 3775 unsigned num;
3769 __u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks; 3776 __u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks;
3770 3777
3771 table = ext4_inode_table(sb, gdp); 3778 table = ext4_inode_table(sb, gdp);
3772 /* s_inode_readahead_blks is always a power of 2 */ 3779 /* s_inode_readahead_blks is always a power of 2 */
3773 b = block & ~((ext4_fsblk_t) ra_blks - 1); 3780 b = block & ~((ext4_fsblk_t) ra_blks - 1);
3774 if (table > b) 3781 if (table > b)
3775 b = table; 3782 b = table;
3776 end = b + ra_blks; 3783 end = b + ra_blks;
3777 num = EXT4_INODES_PER_GROUP(sb); 3784 num = EXT4_INODES_PER_GROUP(sb);
3778 if (ext4_has_group_desc_csum(sb)) 3785 if (ext4_has_group_desc_csum(sb))
3779 num -= ext4_itable_unused_count(sb, gdp); 3786 num -= ext4_itable_unused_count(sb, gdp);
3780 table += num / inodes_per_block; 3787 table += num / inodes_per_block;
3781 if (end > table) 3788 if (end > table)
3782 end = table; 3789 end = table;
3783 while (b <= end) 3790 while (b <= end)
3784 sb_breadahead(sb, b++); 3791 sb_breadahead(sb, b++);
3785 } 3792 }
3786 3793
3787 /* 3794 /*
3788 * There are other valid inodes in the buffer, this inode 3795 * There are other valid inodes in the buffer, this inode
3789 * has in-inode xattrs, or we don't have this inode in memory. 3796 * has in-inode xattrs, or we don't have this inode in memory.
3790 * Read the block from disk. 3797 * Read the block from disk.
3791 */ 3798 */
3792 trace_ext4_load_inode(inode); 3799 trace_ext4_load_inode(inode);
3793 get_bh(bh); 3800 get_bh(bh);
3794 bh->b_end_io = end_buffer_read_sync; 3801 bh->b_end_io = end_buffer_read_sync;
3795 submit_bh(READ | REQ_META | REQ_PRIO, bh); 3802 submit_bh(READ | REQ_META | REQ_PRIO, bh);
3796 wait_on_buffer(bh); 3803 wait_on_buffer(bh);
3797 if (!buffer_uptodate(bh)) { 3804 if (!buffer_uptodate(bh)) {
3798 EXT4_ERROR_INODE_BLOCK(inode, block, 3805 EXT4_ERROR_INODE_BLOCK(inode, block,
3799 "unable to read itable block"); 3806 "unable to read itable block");
3800 brelse(bh); 3807 brelse(bh);
3801 return -EIO; 3808 return -EIO;
3802 } 3809 }
3803 } 3810 }
3804 has_buffer: 3811 has_buffer:
3805 iloc->bh = bh; 3812 iloc->bh = bh;
3806 return 0; 3813 return 0;
3807 } 3814 }
3808 3815
3809 int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) 3816 int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
3810 { 3817 {
3811 /* We have all inode data except xattrs in memory here. */ 3818 /* We have all inode data except xattrs in memory here. */
3812 return __ext4_get_inode_loc(inode, iloc, 3819 return __ext4_get_inode_loc(inode, iloc,
3813 !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); 3820 !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
3814 } 3821 }
3815 3822
3816 void ext4_set_inode_flags(struct inode *inode) 3823 void ext4_set_inode_flags(struct inode *inode)
3817 { 3824 {
3818 unsigned int flags = EXT4_I(inode)->i_flags; 3825 unsigned int flags = EXT4_I(inode)->i_flags;
3819 unsigned int new_fl = 0; 3826 unsigned int new_fl = 0;
3820 3827
3821 if (flags & EXT4_SYNC_FL) 3828 if (flags & EXT4_SYNC_FL)
3822 new_fl |= S_SYNC; 3829 new_fl |= S_SYNC;
3823 if (flags & EXT4_APPEND_FL) 3830 if (flags & EXT4_APPEND_FL)
3824 new_fl |= S_APPEND; 3831 new_fl |= S_APPEND;
3825 if (flags & EXT4_IMMUTABLE_FL) 3832 if (flags & EXT4_IMMUTABLE_FL)
3826 new_fl |= S_IMMUTABLE; 3833 new_fl |= S_IMMUTABLE;
3827 if (flags & EXT4_NOATIME_FL) 3834 if (flags & EXT4_NOATIME_FL)
3828 new_fl |= S_NOATIME; 3835 new_fl |= S_NOATIME;
3829 if (flags & EXT4_DIRSYNC_FL) 3836 if (flags & EXT4_DIRSYNC_FL)
3830 new_fl |= S_DIRSYNC; 3837 new_fl |= S_DIRSYNC;
3831 if (test_opt(inode->i_sb, DAX)) 3838 if (test_opt(inode->i_sb, DAX))
3832 new_fl |= S_DAX; 3839 new_fl |= S_DAX;
3833 inode_set_flags(inode, new_fl, 3840 inode_set_flags(inode, new_fl,
3834 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); 3841 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
3835 } 3842 }
3836 3843
3837 /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ 3844 /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
3838 void ext4_get_inode_flags(struct ext4_inode_info *ei) 3845 void ext4_get_inode_flags(struct ext4_inode_info *ei)
3839 { 3846 {
3840 unsigned int vfs_fl; 3847 unsigned int vfs_fl;
3841 unsigned long old_fl, new_fl; 3848 unsigned long old_fl, new_fl;
3842 3849
3843 do { 3850 do {
3844 vfs_fl = ei->vfs_inode.i_flags; 3851 vfs_fl = ei->vfs_inode.i_flags;
3845 old_fl = ei->i_flags; 3852 old_fl = ei->i_flags;
3846 new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL| 3853 new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
3847 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL| 3854 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
3848 EXT4_DIRSYNC_FL); 3855 EXT4_DIRSYNC_FL);
3849 if (vfs_fl & S_SYNC) 3856 if (vfs_fl & S_SYNC)
3850 new_fl |= EXT4_SYNC_FL; 3857 new_fl |= EXT4_SYNC_FL;
3851 if (vfs_fl & S_APPEND) 3858 if (vfs_fl & S_APPEND)
3852 new_fl |= EXT4_APPEND_FL; 3859 new_fl |= EXT4_APPEND_FL;
3853 if (vfs_fl & S_IMMUTABLE) 3860 if (vfs_fl & S_IMMUTABLE)
3854 new_fl |= EXT4_IMMUTABLE_FL; 3861 new_fl |= EXT4_IMMUTABLE_FL;
3855 if (vfs_fl & S_NOATIME) 3862 if (vfs_fl & S_NOATIME)
3856 new_fl |= EXT4_NOATIME_FL; 3863 new_fl |= EXT4_NOATIME_FL;
3857 if (vfs_fl & S_DIRSYNC) 3864 if (vfs_fl & S_DIRSYNC)
3858 new_fl |= EXT4_DIRSYNC_FL; 3865 new_fl |= EXT4_DIRSYNC_FL;
3859 } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl); 3866 } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
3860 } 3867 }
3861 3868
3862 static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, 3869 static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
3863 struct ext4_inode_info *ei) 3870 struct ext4_inode_info *ei)
3864 { 3871 {
3865 blkcnt_t i_blocks ; 3872 blkcnt_t i_blocks ;
3866 struct inode *inode = &(ei->vfs_inode); 3873 struct inode *inode = &(ei->vfs_inode);
3867 struct super_block *sb = inode->i_sb; 3874 struct super_block *sb = inode->i_sb;
3868 3875
3869 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3876 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3870 EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { 3877 EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
3871 /* we are using combined 48 bit field */ 3878 /* we are using combined 48 bit field */
3872 i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | 3879 i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
3873 le32_to_cpu(raw_inode->i_blocks_lo); 3880 le32_to_cpu(raw_inode->i_blocks_lo);
3874 if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) { 3881 if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
3875 /* i_blocks represent file system block size */ 3882 /* i_blocks represent file system block size */
3876 return i_blocks << (inode->i_blkbits - 9); 3883 return i_blocks << (inode->i_blkbits - 9);
3877 } else { 3884 } else {
3878 return i_blocks; 3885 return i_blocks;
3879 } 3886 }
3880 } else { 3887 } else {
3881 return le32_to_cpu(raw_inode->i_blocks_lo); 3888 return le32_to_cpu(raw_inode->i_blocks_lo);
3882 } 3889 }
3883 } 3890 }
3884 3891
3885 static inline void ext4_iget_extra_inode(struct inode *inode, 3892 static inline void ext4_iget_extra_inode(struct inode *inode,
3886 struct ext4_inode *raw_inode, 3893 struct ext4_inode *raw_inode,
3887 struct ext4_inode_info *ei) 3894 struct ext4_inode_info *ei)
3888 { 3895 {
3889 __le32 *magic = (void *)raw_inode + 3896 __le32 *magic = (void *)raw_inode +
3890 EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; 3897 EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
3891 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { 3898 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
3892 ext4_set_inode_state(inode, EXT4_STATE_XATTR); 3899 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
3893 ext4_find_inline_data_nolock(inode); 3900 ext4_find_inline_data_nolock(inode);
3894 } else 3901 } else
3895 EXT4_I(inode)->i_inline_off = 0; 3902 EXT4_I(inode)->i_inline_off = 0;
3896 } 3903 }
3897 3904
3898 struct inode *ext4_iget(struct super_block *sb, unsigned long ino) 3905 struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3899 { 3906 {
3900 struct ext4_iloc iloc; 3907 struct ext4_iloc iloc;
3901 struct ext4_inode *raw_inode; 3908 struct ext4_inode *raw_inode;
3902 struct ext4_inode_info *ei; 3909 struct ext4_inode_info *ei;
3903 struct inode *inode; 3910 struct inode *inode;
3904 journal_t *journal = EXT4_SB(sb)->s_journal; 3911 journal_t *journal = EXT4_SB(sb)->s_journal;
3905 long ret; 3912 long ret;
3906 int block; 3913 int block;
3907 uid_t i_uid; 3914 uid_t i_uid;
3908 gid_t i_gid; 3915 gid_t i_gid;
3909 3916
3910 inode = iget_locked(sb, ino); 3917 inode = iget_locked(sb, ino);
3911 if (!inode) 3918 if (!inode)
3912 return ERR_PTR(-ENOMEM); 3919 return ERR_PTR(-ENOMEM);
3913 if (!(inode->i_state & I_NEW)) 3920 if (!(inode->i_state & I_NEW))
3914 return inode; 3921 return inode;
3915 3922
3916 ei = EXT4_I(inode); 3923 ei = EXT4_I(inode);
3917 iloc.bh = NULL; 3924 iloc.bh = NULL;
3918 3925
3919 ret = __ext4_get_inode_loc(inode, &iloc, 0); 3926 ret = __ext4_get_inode_loc(inode, &iloc, 0);
3920 if (ret < 0) 3927 if (ret < 0)
3921 goto bad_inode; 3928 goto bad_inode;
3922 raw_inode = ext4_raw_inode(&iloc); 3929 raw_inode = ext4_raw_inode(&iloc);
3923 3930
3924 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 3931 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
3925 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 3932 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
3926 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 3933 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
3927 EXT4_INODE_SIZE(inode->i_sb)) { 3934 EXT4_INODE_SIZE(inode->i_sb)) {
3928 EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", 3935 EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)",
3929 EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, 3936 EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize,
3930 EXT4_INODE_SIZE(inode->i_sb)); 3937 EXT4_INODE_SIZE(inode->i_sb));
3931 ret = -EIO; 3938 ret = -EIO;
3932 goto bad_inode; 3939 goto bad_inode;
3933 } 3940 }
3934 } else 3941 } else
3935 ei->i_extra_isize = 0; 3942 ei->i_extra_isize = 0;
3936 3943
3937 /* Precompute checksum seed for inode metadata */ 3944 /* Precompute checksum seed for inode metadata */
3938 if (ext4_has_metadata_csum(sb)) { 3945 if (ext4_has_metadata_csum(sb)) {
3939 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 3946 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3940 __u32 csum; 3947 __u32 csum;
3941 __le32 inum = cpu_to_le32(inode->i_ino); 3948 __le32 inum = cpu_to_le32(inode->i_ino);
3942 __le32 gen = raw_inode->i_generation; 3949 __le32 gen = raw_inode->i_generation;
3943 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, 3950 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
3944 sizeof(inum)); 3951 sizeof(inum));
3945 ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, 3952 ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
3946 sizeof(gen)); 3953 sizeof(gen));
3947 } 3954 }
3948 3955
3949 if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { 3956 if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
3950 EXT4_ERROR_INODE(inode, "checksum invalid"); 3957 EXT4_ERROR_INODE(inode, "checksum invalid");
3951 ret = -EIO; 3958 ret = -EIO;
3952 goto bad_inode; 3959 goto bad_inode;
3953 } 3960 }
3954 3961
3955 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 3962 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
3956 i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 3963 i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
3957 i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); 3964 i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
3958 if (!(test_opt(inode->i_sb, NO_UID32))) { 3965 if (!(test_opt(inode->i_sb, NO_UID32))) {
3959 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 3966 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3960 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 3967 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3961 } 3968 }
3962 i_uid_write(inode, i_uid); 3969 i_uid_write(inode, i_uid);
3963 i_gid_write(inode, i_gid); 3970 i_gid_write(inode, i_gid);
3964 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 3971 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
3965 3972
3966 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ 3973 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
3967 ei->i_inline_off = 0; 3974 ei->i_inline_off = 0;
3968 ei->i_dir_start_lookup = 0; 3975 ei->i_dir_start_lookup = 0;
3969 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 3976 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
3970 /* We now have enough fields to check if the inode was active or not. 3977 /* We now have enough fields to check if the inode was active or not.
3971 * This is needed because nfsd might try to access dead inodes 3978 * This is needed because nfsd might try to access dead inodes
3972 * the test is that same one that e2fsck uses 3979 * the test is that same one that e2fsck uses
3973 * NeilBrown 1999oct15 3980 * NeilBrown 1999oct15
3974 */ 3981 */
3975 if (inode->i_nlink == 0) { 3982 if (inode->i_nlink == 0) {
3976 if ((inode->i_mode == 0 || 3983 if ((inode->i_mode == 0 ||
3977 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) && 3984 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
3978 ino != EXT4_BOOT_LOADER_INO) { 3985 ino != EXT4_BOOT_LOADER_INO) {
3979 /* this inode is deleted */ 3986 /* this inode is deleted */
3980 ret = -ESTALE; 3987 ret = -ESTALE;
3981 goto bad_inode; 3988 goto bad_inode;
3982 } 3989 }
3983 /* The only unlinked inodes we let through here have 3990 /* The only unlinked inodes we let through here have
3984 * valid i_mode and are being read by the orphan 3991 * valid i_mode and are being read by the orphan
3985 * recovery code: that's fine, we're about to complete 3992 * recovery code: that's fine, we're about to complete
3986 * the process of deleting those. 3993 * the process of deleting those.
3987 * OR it is the EXT4_BOOT_LOADER_INO which is 3994 * OR it is the EXT4_BOOT_LOADER_INO which is
3988 * not initialized on a new filesystem. */ 3995 * not initialized on a new filesystem. */
3989 } 3996 }
3990 ei->i_flags = le32_to_cpu(raw_inode->i_flags); 3997 ei->i_flags = le32_to_cpu(raw_inode->i_flags);
3991 inode->i_blocks = ext4_inode_blocks(raw_inode, ei); 3998 inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
3992 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); 3999 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
3993 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) 4000 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
3994 ei->i_file_acl |= 4001 ei->i_file_acl |=
3995 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 4002 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
3996 inode->i_size = ext4_isize(raw_inode); 4003 inode->i_size = ext4_isize(raw_inode);
3997 ei->i_disksize = inode->i_size; 4004 ei->i_disksize = inode->i_size;
3998 #ifdef CONFIG_QUOTA 4005 #ifdef CONFIG_QUOTA
3999 ei->i_reserved_quota = 0; 4006 ei->i_reserved_quota = 0;
4000 #endif 4007 #endif
4001 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 4008 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
4002 ei->i_block_group = iloc.block_group; 4009 ei->i_block_group = iloc.block_group;
4003 ei->i_last_alloc_group = ~0; 4010 ei->i_last_alloc_group = ~0;
4004 /* 4011 /*
4005 * NOTE! The in-memory inode i_data array is in little-endian order 4012 * NOTE! The in-memory inode i_data array is in little-endian order
4006 * even on big-endian machines: we do NOT byteswap the block numbers! 4013 * even on big-endian machines: we do NOT byteswap the block numbers!
4007 */ 4014 */
4008 for (block = 0; block < EXT4_N_BLOCKS; block++) 4015 for (block = 0; block < EXT4_N_BLOCKS; block++)
4009 ei->i_data[block] = raw_inode->i_block[block]; 4016 ei->i_data[block] = raw_inode->i_block[block];
4010 INIT_LIST_HEAD(&ei->i_orphan); 4017 INIT_LIST_HEAD(&ei->i_orphan);
4011 4018
4012 /* 4019 /*
4013 * Set transaction id's of transactions that have to be committed 4020 * Set transaction id's of transactions that have to be committed
4014 * to finish f[data]sync. We set them to currently running transaction 4021 * to finish f[data]sync. We set them to currently running transaction
4015 * as we cannot be sure that the inode or some of its metadata isn't 4022 * as we cannot be sure that the inode or some of its metadata isn't
4016 * part of the transaction - the inode could have been reclaimed and 4023 * part of the transaction - the inode could have been reclaimed and
4017 * now it is reread from disk. 4024 * now it is reread from disk.
4018 */ 4025 */
4019 if (journal) { 4026 if (journal) {
4020 transaction_t *transaction; 4027 transaction_t *transaction;
4021 tid_t tid; 4028 tid_t tid;
4022 4029
4023 read_lock(&journal->j_state_lock); 4030 read_lock(&journal->j_state_lock);
4024 if (journal->j_running_transaction) 4031 if (journal->j_running_transaction)
4025 transaction = journal->j_running_transaction; 4032 transaction = journal->j_running_transaction;
4026 else 4033 else
4027 transaction = journal->j_committing_transaction; 4034 transaction = journal->j_committing_transaction;
4028 if (transaction) 4035 if (transaction)
4029 tid = transaction->t_tid; 4036 tid = transaction->t_tid;
4030 else 4037 else
4031 tid = journal->j_commit_sequence; 4038 tid = journal->j_commit_sequence;
4032 read_unlock(&journal->j_state_lock); 4039 read_unlock(&journal->j_state_lock);
4033 ei->i_sync_tid = tid; 4040 ei->i_sync_tid = tid;
4034 ei->i_datasync_tid = tid; 4041 ei->i_datasync_tid = tid;
4035 } 4042 }
4036 4043
4037 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4044 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4038 if (ei->i_extra_isize == 0) { 4045 if (ei->i_extra_isize == 0) {
4039 /* The extra space is currently unused. Use it. */ 4046 /* The extra space is currently unused. Use it. */
4040 ei->i_extra_isize = sizeof(struct ext4_inode) - 4047 ei->i_extra_isize = sizeof(struct ext4_inode) -
4041 EXT4_GOOD_OLD_INODE_SIZE; 4048 EXT4_GOOD_OLD_INODE_SIZE;
4042 } else { 4049 } else {
4043 ext4_iget_extra_inode(inode, raw_inode, ei); 4050 ext4_iget_extra_inode(inode, raw_inode, ei);
4044 } 4051 }
4045 } 4052 }
4046 4053
4047 EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); 4054 EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
4048 EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); 4055 EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
4049 EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); 4056 EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
4050 EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); 4057 EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
4051 4058
4052 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { 4059 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) {
4053 inode->i_version = le32_to_cpu(raw_inode->i_disk_version); 4060 inode->i_version = le32_to_cpu(raw_inode->i_disk_version);
4054 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4061 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4055 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 4062 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
4056 inode->i_version |= 4063 inode->i_version |=
4057 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; 4064 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
4058 } 4065 }
4059 } 4066 }
4060 4067
4061 ret = 0; 4068 ret = 0;
4062 if (ei->i_file_acl && 4069 if (ei->i_file_acl &&
4063 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { 4070 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
4064 EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", 4071 EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
4065 ei->i_file_acl); 4072 ei->i_file_acl);
4066 ret = -EIO; 4073 ret = -EIO;
4067 goto bad_inode; 4074 goto bad_inode;
4068 } else if (!ext4_has_inline_data(inode)) { 4075 } else if (!ext4_has_inline_data(inode)) {
4069 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 4076 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
4070 if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 4077 if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4071 (S_ISLNK(inode->i_mode) && 4078 (S_ISLNK(inode->i_mode) &&
4072 !ext4_inode_is_fast_symlink(inode)))) 4079 !ext4_inode_is_fast_symlink(inode))))
4073 /* Validate extent which is part of inode */ 4080 /* Validate extent which is part of inode */
4074 ret = ext4_ext_check_inode(inode); 4081 ret = ext4_ext_check_inode(inode);
4075 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 4082 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4076 (S_ISLNK(inode->i_mode) && 4083 (S_ISLNK(inode->i_mode) &&
4077 !ext4_inode_is_fast_symlink(inode))) { 4084 !ext4_inode_is_fast_symlink(inode))) {
4078 /* Validate block references which are part of inode */ 4085 /* Validate block references which are part of inode */
4079 ret = ext4_ind_check_inode(inode); 4086 ret = ext4_ind_check_inode(inode);
4080 } 4087 }
4081 } 4088 }
4082 if (ret) 4089 if (ret)
4083 goto bad_inode; 4090 goto bad_inode;
4084 4091
4085 if (S_ISREG(inode->i_mode)) { 4092 if (S_ISREG(inode->i_mode)) {
4086 inode->i_op = &ext4_file_inode_operations; 4093 inode->i_op = &ext4_file_inode_operations;
4087 if (test_opt(inode->i_sb, DAX)) 4094 if (test_opt(inode->i_sb, DAX))
4088 inode->i_fop = &ext4_dax_file_operations; 4095 inode->i_fop = &ext4_dax_file_operations;
4089 else 4096 else
4090 inode->i_fop = &ext4_file_operations; 4097 inode->i_fop = &ext4_file_operations;
4091 ext4_set_aops(inode); 4098 ext4_set_aops(inode);
4092 } else if (S_ISDIR(inode->i_mode)) { 4099 } else if (S_ISDIR(inode->i_mode)) {
4093 inode->i_op = &ext4_dir_inode_operations; 4100 inode->i_op = &ext4_dir_inode_operations;
4094 inode->i_fop = &ext4_dir_operations; 4101 inode->i_fop = &ext4_dir_operations;
4095 } else if (S_ISLNK(inode->i_mode)) { 4102 } else if (S_ISLNK(inode->i_mode)) {
4096 if (ext4_inode_is_fast_symlink(inode)) { 4103 if (ext4_inode_is_fast_symlink(inode)) {
4097 inode->i_op = &ext4_fast_symlink_inode_operations; 4104 inode->i_op = &ext4_fast_symlink_inode_operations;
4098 nd_terminate_link(ei->i_data, inode->i_size, 4105 nd_terminate_link(ei->i_data, inode->i_size,
4099 sizeof(ei->i_data) - 1); 4106 sizeof(ei->i_data) - 1);
4100 } else { 4107 } else {
4101 inode->i_op = &ext4_symlink_inode_operations; 4108 inode->i_op = &ext4_symlink_inode_operations;
4102 ext4_set_aops(inode); 4109 ext4_set_aops(inode);
4103 } 4110 }
4104 } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 4111 } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
4105 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 4112 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
4106 inode->i_op = &ext4_special_inode_operations; 4113 inode->i_op = &ext4_special_inode_operations;
4107 if (raw_inode->i_block[0]) 4114 if (raw_inode->i_block[0])
4108 init_special_inode(inode, inode->i_mode, 4115 init_special_inode(inode, inode->i_mode,
4109 old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); 4116 old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
4110 else 4117 else
4111 init_special_inode(inode, inode->i_mode, 4118 init_special_inode(inode, inode->i_mode,
4112 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 4119 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4113 } else if (ino == EXT4_BOOT_LOADER_INO) { 4120 } else if (ino == EXT4_BOOT_LOADER_INO) {
4114 make_bad_inode(inode); 4121 make_bad_inode(inode);
4115 } else { 4122 } else {
4116 ret = -EIO; 4123 ret = -EIO;
4117 EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); 4124 EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
4118 goto bad_inode; 4125 goto bad_inode;
4119 } 4126 }
4120 brelse(iloc.bh); 4127 brelse(iloc.bh);
4121 ext4_set_inode_flags(inode); 4128 ext4_set_inode_flags(inode);
4122 unlock_new_inode(inode); 4129 unlock_new_inode(inode);
4123 return inode; 4130 return inode;
4124 4131
4125 bad_inode: 4132 bad_inode:
4126 brelse(iloc.bh); 4133 brelse(iloc.bh);
4127 iget_failed(inode); 4134 iget_failed(inode);
4128 return ERR_PTR(ret); 4135 return ERR_PTR(ret);
4129 } 4136 }
4130 4137
4131 struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino) 4138 struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
4132 { 4139 {
4133 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 4140 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
4134 return ERR_PTR(-EIO); 4141 return ERR_PTR(-EIO);
4135 return ext4_iget(sb, ino); 4142 return ext4_iget(sb, ino);
4136 } 4143 }
4137 4144
4138 static int ext4_inode_blocks_set(handle_t *handle, 4145 static int ext4_inode_blocks_set(handle_t *handle,
4139 struct ext4_inode *raw_inode, 4146 struct ext4_inode *raw_inode,
4140 struct ext4_inode_info *ei) 4147 struct ext4_inode_info *ei)
4141 { 4148 {
4142 struct inode *inode = &(ei->vfs_inode); 4149 struct inode *inode = &(ei->vfs_inode);
4143 u64 i_blocks = inode->i_blocks; 4150 u64 i_blocks = inode->i_blocks;
4144 struct super_block *sb = inode->i_sb; 4151 struct super_block *sb = inode->i_sb;
4145 4152
4146 if (i_blocks <= ~0U) { 4153 if (i_blocks <= ~0U) {
4147 /* 4154 /*
4148 * i_blocks can be represented in a 32 bit variable 4155 * i_blocks can be represented in a 32 bit variable
4149 * as multiple of 512 bytes 4156 * as multiple of 512 bytes
4150 */ 4157 */
4151 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4158 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4152 raw_inode->i_blocks_high = 0; 4159 raw_inode->i_blocks_high = 0;
4153 ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); 4160 ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
4154 return 0; 4161 return 0;
4155 } 4162 }
4156 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) 4163 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
4157 return -EFBIG; 4164 return -EFBIG;
4158 4165
4159 if (i_blocks <= 0xffffffffffffULL) { 4166 if (i_blocks <= 0xffffffffffffULL) {
4160 /* 4167 /*
4161 * i_blocks can be represented in a 48 bit variable 4168 * i_blocks can be represented in a 48 bit variable
4162 * as multiple of 512 bytes 4169 * as multiple of 512 bytes
4163 */ 4170 */
4164 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4171 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4165 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 4172 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
4166 ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); 4173 ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
4167 } else { 4174 } else {
4168 ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); 4175 ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
4169 /* i_block is stored in file system block size */ 4176 /* i_block is stored in file system block size */
4170 i_blocks = i_blocks >> (inode->i_blkbits - 9); 4177 i_blocks = i_blocks >> (inode->i_blkbits - 9);
4171 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4178 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4172 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 4179 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
4173 } 4180 }
4174 return 0; 4181 return 0;
4175 } 4182 }
4176 4183
4177 struct other_inode { 4184 struct other_inode {
4178 unsigned long orig_ino; 4185 unsigned long orig_ino;
4179 struct ext4_inode *raw_inode; 4186 struct ext4_inode *raw_inode;
4180 }; 4187 };
4181 4188
4182 static int other_inode_match(struct inode * inode, unsigned long ino, 4189 static int other_inode_match(struct inode * inode, unsigned long ino,
4183 void *data) 4190 void *data)
4184 { 4191 {
4185 struct other_inode *oi = (struct other_inode *) data; 4192 struct other_inode *oi = (struct other_inode *) data;
4186 4193
4187 if ((inode->i_ino != ino) || 4194 if ((inode->i_ino != ino) ||
4188 (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | 4195 (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
4189 I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || 4196 I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
4190 ((inode->i_state & I_DIRTY_TIME) == 0)) 4197 ((inode->i_state & I_DIRTY_TIME) == 0))
4191 return 0; 4198 return 0;
4192 spin_lock(&inode->i_lock); 4199 spin_lock(&inode->i_lock);
4193 if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | 4200 if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
4194 I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) && 4201 I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
4195 (inode->i_state & I_DIRTY_TIME)) { 4202 (inode->i_state & I_DIRTY_TIME)) {
4196 struct ext4_inode_info *ei = EXT4_I(inode); 4203 struct ext4_inode_info *ei = EXT4_I(inode);
4197 4204
4198 inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); 4205 inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
4199 spin_unlock(&inode->i_lock); 4206 spin_unlock(&inode->i_lock);
4200 4207
4201 spin_lock(&ei->i_raw_lock); 4208 spin_lock(&ei->i_raw_lock);
4202 EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode); 4209 EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
4203 EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode); 4210 EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
4204 EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode); 4211 EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
4205 ext4_inode_csum_set(inode, oi->raw_inode, ei); 4212 ext4_inode_csum_set(inode, oi->raw_inode, ei);
4206 spin_unlock(&ei->i_raw_lock); 4213 spin_unlock(&ei->i_raw_lock);
4207 trace_ext4_other_inode_update_time(inode, oi->orig_ino); 4214 trace_ext4_other_inode_update_time(inode, oi->orig_ino);
4208 return -1; 4215 return -1;
4209 } 4216 }
4210 spin_unlock(&inode->i_lock); 4217 spin_unlock(&inode->i_lock);
4211 return -1; 4218 return -1;
4212 } 4219 }
4213 4220
4214 /* 4221 /*
4215 * Opportunistically update the other time fields for other inodes in 4222 * Opportunistically update the other time fields for other inodes in
4216 * the same inode table block. 4223 * the same inode table block.
4217 */ 4224 */
4218 static void ext4_update_other_inodes_time(struct super_block *sb, 4225 static void ext4_update_other_inodes_time(struct super_block *sb,
4219 unsigned long orig_ino, char *buf) 4226 unsigned long orig_ino, char *buf)
4220 { 4227 {
4221 struct other_inode oi; 4228 struct other_inode oi;
4222 unsigned long ino; 4229 unsigned long ino;
4223 int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; 4230 int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
4224 int inode_size = EXT4_INODE_SIZE(sb); 4231 int inode_size = EXT4_INODE_SIZE(sb);
4225 4232
4226 oi.orig_ino = orig_ino; 4233 oi.orig_ino = orig_ino;
4227 ino = orig_ino & ~(inodes_per_block - 1); 4234 ino = orig_ino & ~(inodes_per_block - 1);
4228 for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { 4235 for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
4229 if (ino == orig_ino) 4236 if (ino == orig_ino)
4230 continue; 4237 continue;
4231 oi.raw_inode = (struct ext4_inode *) buf; 4238 oi.raw_inode = (struct ext4_inode *) buf;
4232 (void) find_inode_nowait(sb, ino, other_inode_match, &oi); 4239 (void) find_inode_nowait(sb, ino, other_inode_match, &oi);
4233 } 4240 }
4234 } 4241 }
4235 4242
4236 /* 4243 /*
4237 * Post the struct inode info into an on-disk inode location in the 4244 * Post the struct inode info into an on-disk inode location in the
4238 * buffer-cache. This gobbles the caller's reference to the 4245 * buffer-cache. This gobbles the caller's reference to the
4239 * buffer_head in the inode location struct. 4246 * buffer_head in the inode location struct.
4240 * 4247 *
4241 * The caller must have write access to iloc->bh. 4248 * The caller must have write access to iloc->bh.
4242 */ 4249 */
4243 static int ext4_do_update_inode(handle_t *handle, 4250 static int ext4_do_update_inode(handle_t *handle,
4244 struct inode *inode, 4251 struct inode *inode,
4245 struct ext4_iloc *iloc) 4252 struct ext4_iloc *iloc)
4246 { 4253 {
4247 struct ext4_inode *raw_inode = ext4_raw_inode(iloc); 4254 struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
4248 struct ext4_inode_info *ei = EXT4_I(inode); 4255 struct ext4_inode_info *ei = EXT4_I(inode);
4249 struct buffer_head *bh = iloc->bh; 4256 struct buffer_head *bh = iloc->bh;
4250 struct super_block *sb = inode->i_sb; 4257 struct super_block *sb = inode->i_sb;
4251 int err = 0, rc, block; 4258 int err = 0, rc, block;
4252 int need_datasync = 0, set_large_file = 0; 4259 int need_datasync = 0, set_large_file = 0;
4253 uid_t i_uid; 4260 uid_t i_uid;
4254 gid_t i_gid; 4261 gid_t i_gid;
4255 4262
4256 spin_lock(&ei->i_raw_lock); 4263 spin_lock(&ei->i_raw_lock);
4257 4264
4258 /* For fields not tracked in the in-memory inode, 4265 /* For fields not tracked in the in-memory inode,
4259 * initialise them to zero for new inodes. */ 4266 * initialise them to zero for new inodes. */
4260 if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) 4267 if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
4261 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 4268 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
4262 4269
4263 ext4_get_inode_flags(ei); 4270 ext4_get_inode_flags(ei);
4264 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 4271 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
4265 i_uid = i_uid_read(inode); 4272 i_uid = i_uid_read(inode);
4266 i_gid = i_gid_read(inode); 4273 i_gid = i_gid_read(inode);
4267 if (!(test_opt(inode->i_sb, NO_UID32))) { 4274 if (!(test_opt(inode->i_sb, NO_UID32))) {
4268 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); 4275 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
4269 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); 4276 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
4270 /* 4277 /*
4271 * Fix up interoperability with old kernels. Otherwise, old inodes get 4278 * Fix up interoperability with old kernels. Otherwise, old inodes get
4272 * re-used with the upper 16 bits of the uid/gid intact 4279 * re-used with the upper 16 bits of the uid/gid intact
4273 */ 4280 */
4274 if (!ei->i_dtime) { 4281 if (!ei->i_dtime) {
4275 raw_inode->i_uid_high = 4282 raw_inode->i_uid_high =
4276 cpu_to_le16(high_16_bits(i_uid)); 4283 cpu_to_le16(high_16_bits(i_uid));
4277 raw_inode->i_gid_high = 4284 raw_inode->i_gid_high =
4278 cpu_to_le16(high_16_bits(i_gid)); 4285 cpu_to_le16(high_16_bits(i_gid));
4279 } else { 4286 } else {
4280 raw_inode->i_uid_high = 0; 4287 raw_inode->i_uid_high = 0;
4281 raw_inode->i_gid_high = 0; 4288 raw_inode->i_gid_high = 0;
4282 } 4289 }
4283 } else { 4290 } else {
4284 raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); 4291 raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
4285 raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid)); 4292 raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid));
4286 raw_inode->i_uid_high = 0; 4293 raw_inode->i_uid_high = 0;
4287 raw_inode->i_gid_high = 0; 4294 raw_inode->i_gid_high = 0;
4288 } 4295 }
4289 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 4296 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
4290 4297
4291 EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); 4298 EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
4292 EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); 4299 EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
4293 EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); 4300 EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
4294 EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); 4301 EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
4295 4302
4296 err = ext4_inode_blocks_set(handle, raw_inode, ei); 4303 err = ext4_inode_blocks_set(handle, raw_inode, ei);
4297 if (err) { 4304 if (err) {
4298 spin_unlock(&ei->i_raw_lock); 4305 spin_unlock(&ei->i_raw_lock);
4299 goto out_brelse; 4306 goto out_brelse;
4300 } 4307 }
4301 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 4308 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
4302 raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); 4309 raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
4303 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) 4310 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT)))
4304 raw_inode->i_file_acl_high = 4311 raw_inode->i_file_acl_high =
4305 cpu_to_le16(ei->i_file_acl >> 32); 4312 cpu_to_le16(ei->i_file_acl >> 32);
4306 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); 4313 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
4307 if (ei->i_disksize != ext4_isize(raw_inode)) { 4314 if (ei->i_disksize != ext4_isize(raw_inode)) {
4308 ext4_isize_set(raw_inode, ei->i_disksize); 4315 ext4_isize_set(raw_inode, ei->i_disksize);
4309 need_datasync = 1; 4316 need_datasync = 1;
4310 } 4317 }
4311 if (ei->i_disksize > 0x7fffffffULL) { 4318 if (ei->i_disksize > 0x7fffffffULL) {
4312 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 4319 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
4313 EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || 4320 EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
4314 EXT4_SB(sb)->s_es->s_rev_level == 4321 EXT4_SB(sb)->s_es->s_rev_level ==
4315 cpu_to_le32(EXT4_GOOD_OLD_REV)) 4322 cpu_to_le32(EXT4_GOOD_OLD_REV))
4316 set_large_file = 1; 4323 set_large_file = 1;
4317 } 4324 }
4318 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 4325 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
4319 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 4326 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
4320 if (old_valid_dev(inode->i_rdev)) { 4327 if (old_valid_dev(inode->i_rdev)) {
4321 raw_inode->i_block[0] = 4328 raw_inode->i_block[0] =
4322 cpu_to_le32(old_encode_dev(inode->i_rdev)); 4329 cpu_to_le32(old_encode_dev(inode->i_rdev));
4323 raw_inode->i_block[1] = 0; 4330 raw_inode->i_block[1] = 0;
4324 } else { 4331 } else {
4325 raw_inode->i_block[0] = 0; 4332 raw_inode->i_block[0] = 0;
4326 raw_inode->i_block[1] = 4333 raw_inode->i_block[1] =
4327 cpu_to_le32(new_encode_dev(inode->i_rdev)); 4334 cpu_to_le32(new_encode_dev(inode->i_rdev));
4328 raw_inode->i_block[2] = 0; 4335 raw_inode->i_block[2] = 0;
4329 } 4336 }
4330 } else if (!ext4_has_inline_data(inode)) { 4337 } else if (!ext4_has_inline_data(inode)) {
4331 for (block = 0; block < EXT4_N_BLOCKS; block++) 4338 for (block = 0; block < EXT4_N_BLOCKS; block++)
4332 raw_inode->i_block[block] = ei->i_data[block]; 4339 raw_inode->i_block[block] = ei->i_data[block];
4333 } 4340 }
4334 4341
4335 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { 4342 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) {
4336 raw_inode->i_disk_version = cpu_to_le32(inode->i_version); 4343 raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
4337 if (ei->i_extra_isize) { 4344 if (ei->i_extra_isize) {
4338 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 4345 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
4339 raw_inode->i_version_hi = 4346 raw_inode->i_version_hi =
4340 cpu_to_le32(inode->i_version >> 32); 4347 cpu_to_le32(inode->i_version >> 32);
4341 raw_inode->i_extra_isize = 4348 raw_inode->i_extra_isize =
4342 cpu_to_le16(ei->i_extra_isize); 4349 cpu_to_le16(ei->i_extra_isize);
4343 } 4350 }
4344 } 4351 }
4345 ext4_inode_csum_set(inode, raw_inode, ei); 4352 ext4_inode_csum_set(inode, raw_inode, ei);
4346 spin_unlock(&ei->i_raw_lock); 4353 spin_unlock(&ei->i_raw_lock);
4347 if (inode->i_sb->s_flags & MS_LAZYTIME) 4354 if (inode->i_sb->s_flags & MS_LAZYTIME)
4348 ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, 4355 ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
4349 bh->b_data); 4356 bh->b_data);
4350 4357
4351 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4358 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4352 rc = ext4_handle_dirty_metadata(handle, NULL, bh); 4359 rc = ext4_handle_dirty_metadata(handle, NULL, bh);
4353 if (!err) 4360 if (!err)
4354 err = rc; 4361 err = rc;
4355 ext4_clear_inode_state(inode, EXT4_STATE_NEW); 4362 ext4_clear_inode_state(inode, EXT4_STATE_NEW);
4356 if (set_large_file) { 4363 if (set_large_file) {
4357 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); 4364 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access");
4358 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); 4365 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
4359 if (err) 4366 if (err)
4360 goto out_brelse; 4367 goto out_brelse;
4361 ext4_update_dynamic_rev(sb); 4368 ext4_update_dynamic_rev(sb);
4362 EXT4_SET_RO_COMPAT_FEATURE(sb, 4369 EXT4_SET_RO_COMPAT_FEATURE(sb,
4363 EXT4_FEATURE_RO_COMPAT_LARGE_FILE); 4370 EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
4364 ext4_handle_sync(handle); 4371 ext4_handle_sync(handle);
4365 err = ext4_handle_dirty_super(handle, sb); 4372 err = ext4_handle_dirty_super(handle, sb);
4366 } 4373 }
4367 ext4_update_inode_fsync_trans(handle, inode, need_datasync); 4374 ext4_update_inode_fsync_trans(handle, inode, need_datasync);
4368 out_brelse: 4375 out_brelse:
4369 brelse(bh); 4376 brelse(bh);
4370 ext4_std_error(inode->i_sb, err); 4377 ext4_std_error(inode->i_sb, err);
4371 return err; 4378 return err;
4372 } 4379 }
4373 4380
4374 /* 4381 /*
4375 * ext4_write_inode() 4382 * ext4_write_inode()
4376 * 4383 *
4377 * We are called from a few places: 4384 * We are called from a few places:
4378 * 4385 *
4379 * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files. 4386 * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
4380 * Here, there will be no transaction running. We wait for any running 4387 * Here, there will be no transaction running. We wait for any running
4381 * transaction to commit. 4388 * transaction to commit.
4382 * 4389 *
4383 * - Within flush work (sys_sync(), kupdate and such). 4390 * - Within flush work (sys_sync(), kupdate and such).
4384 * We wait on commit, if told to. 4391 * We wait on commit, if told to.
4385 * 4392 *
4386 * - Within iput_final() -> write_inode_now() 4393 * - Within iput_final() -> write_inode_now()
4387 * We wait on commit, if told to. 4394 * We wait on commit, if told to.
4388 * 4395 *
4389 * In all cases it is actually safe for us to return without doing anything, 4396 * In all cases it is actually safe for us to return without doing anything,
4390 * because the inode has been copied into a raw inode buffer in 4397 * because the inode has been copied into a raw inode buffer in
4391 * ext4_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL 4398 * ext4_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL
4392 * writeback. 4399 * writeback.
4393 * 4400 *
4394 * Note that we are absolutely dependent upon all inode dirtiers doing the 4401 * Note that we are absolutely dependent upon all inode dirtiers doing the
4395 * right thing: they *must* call mark_inode_dirty() after dirtying info in 4402 * right thing: they *must* call mark_inode_dirty() after dirtying info in
4396 * which we are interested. 4403 * which we are interested.
4397 * 4404 *
4398 * It would be a bug for them to not do this. The code: 4405 * It would be a bug for them to not do this. The code:
4399 * 4406 *
4400 * mark_inode_dirty(inode) 4407 * mark_inode_dirty(inode)
4401 * stuff(); 4408 * stuff();
4402 * inode->i_size = expr; 4409 * inode->i_size = expr;
4403 * 4410 *
4404 * is in error because write_inode() could occur while `stuff()' is running, 4411 * is in error because write_inode() could occur while `stuff()' is running,
4405 * and the new i_size will be lost. Plus the inode will no longer be on the 4412 * and the new i_size will be lost. Plus the inode will no longer be on the
4406 * superblock's dirty inode list. 4413 * superblock's dirty inode list.
4407 */ 4414 */
4408 int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) 4415 int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4409 { 4416 {
4410 int err; 4417 int err;
4411 4418
4412 if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) 4419 if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
4413 return 0; 4420 return 0;
4414 4421
4415 if (EXT4_SB(inode->i_sb)->s_journal) { 4422 if (EXT4_SB(inode->i_sb)->s_journal) {
4416 if (ext4_journal_current_handle()) { 4423 if (ext4_journal_current_handle()) {
4417 jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); 4424 jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
4418 dump_stack(); 4425 dump_stack();
4419 return -EIO; 4426 return -EIO;
4420 } 4427 }
4421 4428
4422 /* 4429 /*
4423 * No need to force transaction in WB_SYNC_NONE mode. Also 4430 * No need to force transaction in WB_SYNC_NONE mode. Also
4424 * ext4_sync_fs() will force the commit after everything is 4431 * ext4_sync_fs() will force the commit after everything is
4425 * written. 4432 * written.
4426 */ 4433 */
4427 if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) 4434 if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
4428 return 0; 4435 return 0;
4429 4436
4430 err = ext4_force_commit(inode->i_sb); 4437 err = ext4_force_commit(inode->i_sb);
4431 } else { 4438 } else {
4432 struct ext4_iloc iloc; 4439 struct ext4_iloc iloc;
4433 4440
4434 err = __ext4_get_inode_loc(inode, &iloc, 0); 4441 err = __ext4_get_inode_loc(inode, &iloc, 0);
4435 if (err) 4442 if (err)
4436 return err; 4443 return err;
4437 /* 4444 /*
4438 * sync(2) will flush the whole buffer cache. No need to do 4445 * sync(2) will flush the whole buffer cache. No need to do
4439 * it here separately for each inode. 4446 * it here separately for each inode.
4440 */ 4447 */
4441 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) 4448 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
4442 sync_dirty_buffer(iloc.bh); 4449 sync_dirty_buffer(iloc.bh);
4443 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 4450 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
4444 EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, 4451 EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
4445 "IO error syncing inode"); 4452 "IO error syncing inode");
4446 err = -EIO; 4453 err = -EIO;
4447 } 4454 }
4448 brelse(iloc.bh); 4455 brelse(iloc.bh);
4449 } 4456 }
4450 return err; 4457 return err;
4451 } 4458 }
4452 4459
4453 /* 4460 /*
4454 * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate 4461 * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
4455 * buffers that are attached to a page stradding i_size and are undergoing 4462 * buffers that are attached to a page stradding i_size and are undergoing
4456 * commit. In that case we have to wait for commit to finish and try again. 4463 * commit. In that case we have to wait for commit to finish and try again.
4457 */ 4464 */
4458 static void ext4_wait_for_tail_page_commit(struct inode *inode) 4465 static void ext4_wait_for_tail_page_commit(struct inode *inode)
4459 { 4466 {
4460 struct page *page; 4467 struct page *page;
4461 unsigned offset; 4468 unsigned offset;
4462 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 4469 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
4463 tid_t commit_tid = 0; 4470 tid_t commit_tid = 0;
4464 int ret; 4471 int ret;
4465 4472
4466 offset = inode->i_size & (PAGE_CACHE_SIZE - 1); 4473 offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
4467 /* 4474 /*
4468 * All buffers in the last page remain valid? Then there's nothing to 4475 * All buffers in the last page remain valid? Then there's nothing to
4469 * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE == 4476 * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
4470 * blocksize case 4477 * blocksize case
4471 */ 4478 */
4472 if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits)) 4479 if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
4473 return; 4480 return;
4474 while (1) { 4481 while (1) {
4475 page = find_lock_page(inode->i_mapping, 4482 page = find_lock_page(inode->i_mapping,
4476 inode->i_size >> PAGE_CACHE_SHIFT); 4483 inode->i_size >> PAGE_CACHE_SHIFT);
4477 if (!page) 4484 if (!page)
4478 return; 4485 return;
4479 ret = __ext4_journalled_invalidatepage(page, offset, 4486 ret = __ext4_journalled_invalidatepage(page, offset,
4480 PAGE_CACHE_SIZE - offset); 4487 PAGE_CACHE_SIZE - offset);
4481 unlock_page(page); 4488 unlock_page(page);
4482 page_cache_release(page); 4489 page_cache_release(page);
4483 if (ret != -EBUSY) 4490 if (ret != -EBUSY)
4484 return; 4491 return;
4485 commit_tid = 0; 4492 commit_tid = 0;
4486 read_lock(&journal->j_state_lock); 4493 read_lock(&journal->j_state_lock);
4487 if (journal->j_committing_transaction) 4494 if (journal->j_committing_transaction)
4488 commit_tid = journal->j_committing_transaction->t_tid; 4495 commit_tid = journal->j_committing_transaction->t_tid;
4489 read_unlock(&journal->j_state_lock); 4496 read_unlock(&journal->j_state_lock);
4490 if (commit_tid) 4497 if (commit_tid)
4491 jbd2_log_wait_commit(journal, commit_tid); 4498 jbd2_log_wait_commit(journal, commit_tid);
4492 } 4499 }
4493 } 4500 }
4494 4501
4495 /* 4502 /*
4496 * ext4_setattr() 4503 * ext4_setattr()
4497 * 4504 *
4498 * Called from notify_change. 4505 * Called from notify_change.
4499 * 4506 *
4500 * We want to trap VFS attempts to truncate the file as soon as 4507 * We want to trap VFS attempts to truncate the file as soon as
4501 * possible. In particular, we want to make sure that when the VFS 4508 * possible. In particular, we want to make sure that when the VFS
4502 * shrinks i_size, we put the inode on the orphan list and modify 4509 * shrinks i_size, we put the inode on the orphan list and modify
4503 * i_disksize immediately, so that during the subsequent flushing of 4510 * i_disksize immediately, so that during the subsequent flushing of
4504 * dirty pages and freeing of disk blocks, we can guarantee that any 4511 * dirty pages and freeing of disk blocks, we can guarantee that any
4505 * commit will leave the blocks being flushed in an unused state on 4512 * commit will leave the blocks being flushed in an unused state on
4506 * disk. (On recovery, the inode will get truncated and the blocks will 4513 * disk. (On recovery, the inode will get truncated and the blocks will
4507 * be freed, so we have a strong guarantee that no future commit will 4514 * be freed, so we have a strong guarantee that no future commit will
4508 * leave these blocks visible to the user.) 4515 * leave these blocks visible to the user.)
4509 * 4516 *
4510 * Another thing we have to assure is that if we are in ordered mode 4517 * Another thing we have to assure is that if we are in ordered mode
4511 * and inode is still attached to the committing transaction, we must 4518 * and inode is still attached to the committing transaction, we must
4512 * we start writeout of all the dirty pages which are being truncated. 4519 * we start writeout of all the dirty pages which are being truncated.
4513 * This way we are sure that all the data written in the previous 4520 * This way we are sure that all the data written in the previous
4514 * transaction are already on disk (truncate waits for pages under 4521 * transaction are already on disk (truncate waits for pages under
4515 * writeback). 4522 * writeback).
4516 * 4523 *
4517 * Called with inode->i_mutex down. 4524 * Called with inode->i_mutex down.
4518 */ 4525 */
4519 int ext4_setattr(struct dentry *dentry, struct iattr *attr) 4526 int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4520 { 4527 {
4521 struct inode *inode = dentry->d_inode; 4528 struct inode *inode = dentry->d_inode;
4522 int error, rc = 0; 4529 int error, rc = 0;
4523 int orphan = 0; 4530 int orphan = 0;
4524 const unsigned int ia_valid = attr->ia_valid; 4531 const unsigned int ia_valid = attr->ia_valid;
4525 4532
4526 error = inode_change_ok(inode, attr); 4533 error = inode_change_ok(inode, attr);
4527 if (error) 4534 if (error)
4528 return error; 4535 return error;
4529 4536
4530 if (is_quota_modification(inode, attr)) 4537 if (is_quota_modification(inode, attr))
4531 dquot_initialize(inode); 4538 dquot_initialize(inode);
4532 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || 4539 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
4533 (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { 4540 (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
4534 handle_t *handle; 4541 handle_t *handle;
4535 4542
4536 /* (user+group)*(old+new) structure, inode write (sb, 4543 /* (user+group)*(old+new) structure, inode write (sb,
4537 * inode block, ? - but truncate inode update has it) */ 4544 * inode block, ? - but truncate inode update has it) */
4538 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 4545 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
4539 (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) + 4546 (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) +
4540 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3); 4547 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3);
4541 if (IS_ERR(handle)) { 4548 if (IS_ERR(handle)) {
4542 error = PTR_ERR(handle); 4549 error = PTR_ERR(handle);
4543 goto err_out; 4550 goto err_out;
4544 } 4551 }
4545 error = dquot_transfer(inode, attr); 4552 error = dquot_transfer(inode, attr);
4546 if (error) { 4553 if (error) {
4547 ext4_journal_stop(handle); 4554 ext4_journal_stop(handle);
4548 return error; 4555 return error;
4549 } 4556 }
4550 /* Update corresponding info in inode so that everything is in 4557 /* Update corresponding info in inode so that everything is in
4551 * one transaction */ 4558 * one transaction */
4552 if (attr->ia_valid & ATTR_UID) 4559 if (attr->ia_valid & ATTR_UID)
4553 inode->i_uid = attr->ia_uid; 4560 inode->i_uid = attr->ia_uid;
4554 if (attr->ia_valid & ATTR_GID) 4561 if (attr->ia_valid & ATTR_GID)
4555 inode->i_gid = attr->ia_gid; 4562 inode->i_gid = attr->ia_gid;
4556 error = ext4_mark_inode_dirty(handle, inode); 4563 error = ext4_mark_inode_dirty(handle, inode);
4557 ext4_journal_stop(handle); 4564 ext4_journal_stop(handle);
4558 } 4565 }
4559 4566
4560 if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { 4567 if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
4561 handle_t *handle; 4568 handle_t *handle;
4562 4569
4563 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 4570 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4564 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 4571 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4565 4572
4566 if (attr->ia_size > sbi->s_bitmap_maxbytes) 4573 if (attr->ia_size > sbi->s_bitmap_maxbytes)
4567 return -EFBIG; 4574 return -EFBIG;
4568 } 4575 }
4569 4576
4570 if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) 4577 if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
4571 inode_inc_iversion(inode); 4578 inode_inc_iversion(inode);
4572 4579
4573 if (S_ISREG(inode->i_mode) && 4580 if (S_ISREG(inode->i_mode) &&
4574 (attr->ia_size < inode->i_size)) { 4581 (attr->ia_size < inode->i_size)) {
4575 if (ext4_should_order_data(inode)) { 4582 if (ext4_should_order_data(inode)) {
4576 error = ext4_begin_ordered_truncate(inode, 4583 error = ext4_begin_ordered_truncate(inode,
4577 attr->ia_size); 4584 attr->ia_size);
4578 if (error) 4585 if (error)
4579 goto err_out; 4586 goto err_out;
4580 } 4587 }
4581 handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); 4588 handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
4582 if (IS_ERR(handle)) { 4589 if (IS_ERR(handle)) {
4583 error = PTR_ERR(handle); 4590 error = PTR_ERR(handle);
4584 goto err_out; 4591 goto err_out;
4585 } 4592 }
4586 if (ext4_handle_valid(handle)) { 4593 if (ext4_handle_valid(handle)) {
4587 error = ext4_orphan_add(handle, inode); 4594 error = ext4_orphan_add(handle, inode);
4588 orphan = 1; 4595 orphan = 1;
4589 } 4596 }
4590 down_write(&EXT4_I(inode)->i_data_sem); 4597 down_write(&EXT4_I(inode)->i_data_sem);
4591 EXT4_I(inode)->i_disksize = attr->ia_size; 4598 EXT4_I(inode)->i_disksize = attr->ia_size;
4592 rc = ext4_mark_inode_dirty(handle, inode); 4599 rc = ext4_mark_inode_dirty(handle, inode);
4593 if (!error) 4600 if (!error)
4594 error = rc; 4601 error = rc;
4595 /* 4602 /*
4596 * We have to update i_size under i_data_sem together 4603 * We have to update i_size under i_data_sem together
4597 * with i_disksize to avoid races with writeback code 4604 * with i_disksize to avoid races with writeback code
4598 * running ext4_wb_update_i_disksize(). 4605 * running ext4_wb_update_i_disksize().
4599 */ 4606 */
4600 if (!error) 4607 if (!error)
4601 i_size_write(inode, attr->ia_size); 4608 i_size_write(inode, attr->ia_size);
4602 up_write(&EXT4_I(inode)->i_data_sem); 4609 up_write(&EXT4_I(inode)->i_data_sem);
4603 ext4_journal_stop(handle); 4610 ext4_journal_stop(handle);
4604 if (error) { 4611 if (error) {
4605 ext4_orphan_del(NULL, inode); 4612 ext4_orphan_del(NULL, inode);
4606 goto err_out; 4613 goto err_out;
4607 } 4614 }
4608 } else { 4615 } else {
4609 loff_t oldsize = inode->i_size; 4616 loff_t oldsize = inode->i_size;
4610 4617
4611 i_size_write(inode, attr->ia_size); 4618 i_size_write(inode, attr->ia_size);
4612 pagecache_isize_extended(inode, oldsize, inode->i_size); 4619 pagecache_isize_extended(inode, oldsize, inode->i_size);
4613 } 4620 }
4614 4621
4615 /* 4622 /*
4616 * Blocks are going to be removed from the inode. Wait 4623 * Blocks are going to be removed from the inode. Wait
4617 * for dio in flight. Temporarily disable 4624 * for dio in flight. Temporarily disable
4618 * dioread_nolock to prevent livelock. 4625 * dioread_nolock to prevent livelock.
4619 */ 4626 */
4620 if (orphan) { 4627 if (orphan) {
4621 if (!ext4_should_journal_data(inode)) { 4628 if (!ext4_should_journal_data(inode)) {
4622 ext4_inode_block_unlocked_dio(inode); 4629 ext4_inode_block_unlocked_dio(inode);
4623 inode_dio_wait(inode); 4630 inode_dio_wait(inode);
4624 ext4_inode_resume_unlocked_dio(inode); 4631 ext4_inode_resume_unlocked_dio(inode);
4625 } else 4632 } else
4626 ext4_wait_for_tail_page_commit(inode); 4633 ext4_wait_for_tail_page_commit(inode);
4627 } 4634 }
4628 /* 4635 /*
4629 * Truncate pagecache after we've waited for commit 4636 * Truncate pagecache after we've waited for commit
4630 * in data=journal mode to make pages freeable. 4637 * in data=journal mode to make pages freeable.
4631 */ 4638 */
4632 truncate_pagecache(inode, inode->i_size); 4639 truncate_pagecache(inode, inode->i_size);
4633 } 4640 }
4634 /* 4641 /*
4635 * We want to call ext4_truncate() even if attr->ia_size == 4642 * We want to call ext4_truncate() even if attr->ia_size ==
4636 * inode->i_size for cases like truncation of fallocated space 4643 * inode->i_size for cases like truncation of fallocated space
4637 */ 4644 */
4638 if (attr->ia_valid & ATTR_SIZE) 4645 if (attr->ia_valid & ATTR_SIZE)
4639 ext4_truncate(inode); 4646 ext4_truncate(inode);
4640 4647
4641 if (!rc) { 4648 if (!rc) {
4642 setattr_copy(inode, attr); 4649 setattr_copy(inode, attr);
4643 mark_inode_dirty(inode); 4650 mark_inode_dirty(inode);
4644 } 4651 }
4645 4652
4646 /* 4653 /*
4647 * If the call to ext4_truncate failed to get a transaction handle at 4654 * If the call to ext4_truncate failed to get a transaction handle at
4648 * all, we need to clean up the in-core orphan list manually. 4655 * all, we need to clean up the in-core orphan list manually.
4649 */ 4656 */
4650 if (orphan && inode->i_nlink) 4657 if (orphan && inode->i_nlink)
4651 ext4_orphan_del(NULL, inode); 4658 ext4_orphan_del(NULL, inode);
4652 4659
4653 if (!rc && (ia_valid & ATTR_MODE)) 4660 if (!rc && (ia_valid & ATTR_MODE))
4654 rc = posix_acl_chmod(inode, inode->i_mode); 4661 rc = posix_acl_chmod(inode, inode->i_mode);
4655 4662
4656 err_out: 4663 err_out:
4657 ext4_std_error(inode->i_sb, error); 4664 ext4_std_error(inode->i_sb, error);
4658 if (!error) 4665 if (!error)
4659 error = rc; 4666 error = rc;
4660 return error; 4667 return error;
4661 } 4668 }
4662 4669
4663 int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, 4670 int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
4664 struct kstat *stat) 4671 struct kstat *stat)
4665 { 4672 {
4666 struct inode *inode; 4673 struct inode *inode;
4667 unsigned long long delalloc_blocks; 4674 unsigned long long delalloc_blocks;
4668 4675
4669 inode = dentry->d_inode; 4676 inode = dentry->d_inode;
4670 generic_fillattr(inode, stat); 4677 generic_fillattr(inode, stat);
4671 4678
4672 /* 4679 /*
4673 * If there is inline data in the inode, the inode will normally not 4680 * If there is inline data in the inode, the inode will normally not
4674 * have data blocks allocated (it may have an external xattr block). 4681 * have data blocks allocated (it may have an external xattr block).
4675 * Report at least one sector for such files, so tools like tar, rsync, 4682 * Report at least one sector for such files, so tools like tar, rsync,
4676 * others doen't incorrectly think the file is completely sparse. 4683 * others doen't incorrectly think the file is completely sparse.
4677 */ 4684 */
4678 if (unlikely(ext4_has_inline_data(inode))) 4685 if (unlikely(ext4_has_inline_data(inode)))
4679 stat->blocks += (stat->size + 511) >> 9; 4686 stat->blocks += (stat->size + 511) >> 9;
4680 4687
4681 /* 4688 /*
4682 * We can't update i_blocks if the block allocation is delayed 4689 * We can't update i_blocks if the block allocation is delayed
4683 * otherwise in the case of system crash before the real block 4690 * otherwise in the case of system crash before the real block
4684 * allocation is done, we will have i_blocks inconsistent with 4691 * allocation is done, we will have i_blocks inconsistent with
4685 * on-disk file blocks. 4692 * on-disk file blocks.
4686 * We always keep i_blocks updated together with real 4693 * We always keep i_blocks updated together with real
4687 * allocation. But to not confuse with user, stat 4694 * allocation. But to not confuse with user, stat
4688 * will return the blocks that include the delayed allocation 4695 * will return the blocks that include the delayed allocation
4689 * blocks for this file. 4696 * blocks for this file.
4690 */ 4697 */
4691 delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), 4698 delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
4692 EXT4_I(inode)->i_reserved_data_blocks); 4699 EXT4_I(inode)->i_reserved_data_blocks);
4693 stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9); 4700 stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9);
4694 return 0; 4701 return 0;
4695 } 4702 }
4696 4703
4697 static int ext4_index_trans_blocks(struct inode *inode, int lblocks, 4704 static int ext4_index_trans_blocks(struct inode *inode, int lblocks,
4698 int pextents) 4705 int pextents)
4699 { 4706 {
4700 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 4707 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4701 return ext4_ind_trans_blocks(inode, lblocks); 4708 return ext4_ind_trans_blocks(inode, lblocks);
4702 return ext4_ext_index_trans_blocks(inode, pextents); 4709 return ext4_ext_index_trans_blocks(inode, pextents);
4703 } 4710 }
4704 4711
4705 /* 4712 /*
4706 * Account for index blocks, block groups bitmaps and block group 4713 * Account for index blocks, block groups bitmaps and block group
4707 * descriptor blocks if modify datablocks and index blocks 4714 * descriptor blocks if modify datablocks and index blocks
4708 * worse case, the indexs blocks spread over different block groups 4715 * worse case, the indexs blocks spread over different block groups
4709 * 4716 *
4710 * If datablocks are discontiguous, they are possible to spread over 4717 * If datablocks are discontiguous, they are possible to spread over
4711 * different block groups too. If they are contiguous, with flexbg, 4718 * different block groups too. If they are contiguous, with flexbg,
4712 * they could still across block group boundary. 4719 * they could still across block group boundary.
4713 * 4720 *
4714 * Also account for superblock, inode, quota and xattr blocks 4721 * Also account for superblock, inode, quota and xattr blocks
4715 */ 4722 */
4716 static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, 4723 static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
4717 int pextents) 4724 int pextents)
4718 { 4725 {
4719 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); 4726 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
4720 int gdpblocks; 4727 int gdpblocks;
4721 int idxblocks; 4728 int idxblocks;
4722 int ret = 0; 4729 int ret = 0;
4723 4730
4724 /* 4731 /*
4725 * How many index blocks need to touch to map @lblocks logical blocks 4732 * How many index blocks need to touch to map @lblocks logical blocks
4726 * to @pextents physical extents? 4733 * to @pextents physical extents?
4727 */ 4734 */
4728 idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents); 4735 idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents);
4729 4736
4730 ret = idxblocks; 4737 ret = idxblocks;
4731 4738
4732 /* 4739 /*
4733 * Now let's see how many group bitmaps and group descriptors need 4740 * Now let's see how many group bitmaps and group descriptors need
4734 * to account 4741 * to account
4735 */ 4742 */
4736 groups = idxblocks + pextents; 4743 groups = idxblocks + pextents;
4737 gdpblocks = groups; 4744 gdpblocks = groups;
4738 if (groups > ngroups) 4745 if (groups > ngroups)
4739 groups = ngroups; 4746 groups = ngroups;
4740 if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) 4747 if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
4741 gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; 4748 gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
4742 4749
4743 /* bitmaps and block group descriptor blocks */ 4750 /* bitmaps and block group descriptor blocks */
4744 ret += groups + gdpblocks; 4751 ret += groups + gdpblocks;
4745 4752
4746 /* Blocks for super block, inode, quota and xattr blocks */ 4753 /* Blocks for super block, inode, quota and xattr blocks */
4747 ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); 4754 ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
4748 4755
4749 return ret; 4756 return ret;
4750 } 4757 }
4751 4758
4752 /* 4759 /*
4753 * Calculate the total number of credits to reserve to fit 4760 * Calculate the total number of credits to reserve to fit
4754 * the modification of a single pages into a single transaction, 4761 * the modification of a single pages into a single transaction,
4755 * which may include multiple chunks of block allocations. 4762 * which may include multiple chunks of block allocations.
4756 * 4763 *
4757 * This could be called via ext4_write_begin() 4764 * This could be called via ext4_write_begin()
4758 * 4765 *
4759 * We need to consider the worse case, when 4766 * We need to consider the worse case, when
4760 * one new block per extent. 4767 * one new block per extent.
4761 */ 4768 */
4762 int ext4_writepage_trans_blocks(struct inode *inode) 4769 int ext4_writepage_trans_blocks(struct inode *inode)
4763 { 4770 {
4764 int bpp = ext4_journal_blocks_per_page(inode); 4771 int bpp = ext4_journal_blocks_per_page(inode);
4765 int ret; 4772 int ret;
4766 4773
4767 ret = ext4_meta_trans_blocks(inode, bpp, bpp); 4774 ret = ext4_meta_trans_blocks(inode, bpp, bpp);
4768 4775
4769 /* Account for data blocks for journalled mode */ 4776 /* Account for data blocks for journalled mode */
4770 if (ext4_should_journal_data(inode)) 4777 if (ext4_should_journal_data(inode))
4771 ret += bpp; 4778 ret += bpp;
4772 return ret; 4779 return ret;
4773 } 4780 }
4774 4781
4775 /* 4782 /*
4776 * Calculate the journal credits for a chunk of data modification. 4783 * Calculate the journal credits for a chunk of data modification.
4777 * 4784 *
4778 * This is called from DIO, fallocate or whoever calling 4785 * This is called from DIO, fallocate or whoever calling
4779 * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks. 4786 * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
4780 * 4787 *
4781 * journal buffers for data blocks are not included here, as DIO 4788 * journal buffers for data blocks are not included here, as DIO
4782 * and fallocate do no need to journal data buffers. 4789 * and fallocate do no need to journal data buffers.
4783 */ 4790 */
4784 int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) 4791 int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
4785 { 4792 {
4786 return ext4_meta_trans_blocks(inode, nrblocks, 1); 4793 return ext4_meta_trans_blocks(inode, nrblocks, 1);
4787 } 4794 }
4788 4795
4789 /* 4796 /*
4790 * The caller must have previously called ext4_reserve_inode_write(). 4797 * The caller must have previously called ext4_reserve_inode_write().
4791 * Give this, we know that the caller already has write access to iloc->bh. 4798 * Give this, we know that the caller already has write access to iloc->bh.
4792 */ 4799 */
4793 int ext4_mark_iloc_dirty(handle_t *handle, 4800 int ext4_mark_iloc_dirty(handle_t *handle,
4794 struct inode *inode, struct ext4_iloc *iloc) 4801 struct inode *inode, struct ext4_iloc *iloc)
4795 { 4802 {
4796 int err = 0; 4803 int err = 0;
4797 4804
4798 if (IS_I_VERSION(inode)) 4805 if (IS_I_VERSION(inode))
4799 inode_inc_iversion(inode); 4806 inode_inc_iversion(inode);
4800 4807
4801 /* the do_update_inode consumes one bh->b_count */ 4808 /* the do_update_inode consumes one bh->b_count */
4802 get_bh(iloc->bh); 4809 get_bh(iloc->bh);
4803 4810
4804 /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ 4811 /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */
4805 err = ext4_do_update_inode(handle, inode, iloc); 4812 err = ext4_do_update_inode(handle, inode, iloc);
4806 put_bh(iloc->bh); 4813 put_bh(iloc->bh);
4807 return err; 4814 return err;
4808 } 4815 }
4809 4816
4810 /* 4817 /*
4811 * On success, We end up with an outstanding reference count against 4818 * On success, We end up with an outstanding reference count against
4812 * iloc->bh. This _must_ be cleaned up later. 4819 * iloc->bh. This _must_ be cleaned up later.
4813 */ 4820 */
4814 4821
4815 int 4822 int
4816 ext4_reserve_inode_write(handle_t *handle, struct inode *inode, 4823 ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
4817 struct ext4_iloc *iloc) 4824 struct ext4_iloc *iloc)
4818 { 4825 {
4819 int err; 4826 int err;
4820 4827
4821 err = ext4_get_inode_loc(inode, iloc); 4828 err = ext4_get_inode_loc(inode, iloc);
4822 if (!err) { 4829 if (!err) {
4823 BUFFER_TRACE(iloc->bh, "get_write_access"); 4830 BUFFER_TRACE(iloc->bh, "get_write_access");
4824 err = ext4_journal_get_write_access(handle, iloc->bh); 4831 err = ext4_journal_get_write_access(handle, iloc->bh);
4825 if (err) { 4832 if (err) {
4826 brelse(iloc->bh); 4833 brelse(iloc->bh);
4827 iloc->bh = NULL; 4834 iloc->bh = NULL;
4828 } 4835 }
4829 } 4836 }
4830 ext4_std_error(inode->i_sb, err); 4837 ext4_std_error(inode->i_sb, err);
4831 return err; 4838 return err;
4832 } 4839 }
4833 4840
4834 /* 4841 /*
4835 * Expand an inode by new_extra_isize bytes. 4842 * Expand an inode by new_extra_isize bytes.
4836 * Returns 0 on success or negative error number on failure. 4843 * Returns 0 on success or negative error number on failure.
4837 */ 4844 */
4838 static int ext4_expand_extra_isize(struct inode *inode, 4845 static int ext4_expand_extra_isize(struct inode *inode,
4839 unsigned int new_extra_isize, 4846 unsigned int new_extra_isize,
4840 struct ext4_iloc iloc, 4847 struct ext4_iloc iloc,
4841 handle_t *handle) 4848 handle_t *handle)
4842 { 4849 {
4843 struct ext4_inode *raw_inode; 4850 struct ext4_inode *raw_inode;
4844 struct ext4_xattr_ibody_header *header; 4851 struct ext4_xattr_ibody_header *header;
4845 4852
4846 if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) 4853 if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
4847 return 0; 4854 return 0;
4848 4855
4849 raw_inode = ext4_raw_inode(&iloc); 4856 raw_inode = ext4_raw_inode(&iloc);
4850 4857
4851 header = IHDR(inode, raw_inode); 4858 header = IHDR(inode, raw_inode);
4852 4859
4853 /* No extended attributes present */ 4860 /* No extended attributes present */
4854 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || 4861 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
4855 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { 4862 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
4856 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, 4863 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
4857 new_extra_isize); 4864 new_extra_isize);
4858 EXT4_I(inode)->i_extra_isize = new_extra_isize; 4865 EXT4_I(inode)->i_extra_isize = new_extra_isize;
4859 return 0; 4866 return 0;
4860 } 4867 }
4861 4868
4862 /* try to expand with EAs present */ 4869 /* try to expand with EAs present */
4863 return ext4_expand_extra_isize_ea(inode, new_extra_isize, 4870 return ext4_expand_extra_isize_ea(inode, new_extra_isize,
4864 raw_inode, handle); 4871 raw_inode, handle);
4865 } 4872 }
4866 4873
4867 /* 4874 /*
4868 * What we do here is to mark the in-core inode as clean with respect to inode 4875 * What we do here is to mark the in-core inode as clean with respect to inode
4869 * dirtiness (it may still be data-dirty). 4876 * dirtiness (it may still be data-dirty).
4870 * This means that the in-core inode may be reaped by prune_icache 4877 * This means that the in-core inode may be reaped by prune_icache
4871 * without having to perform any I/O. This is a very good thing, 4878 * without having to perform any I/O. This is a very good thing,
4872 * because *any* task may call prune_icache - even ones which 4879 * because *any* task may call prune_icache - even ones which
4873 * have a transaction open against a different journal. 4880 * have a transaction open against a different journal.
4874 * 4881 *
4875 * Is this cheating? Not really. Sure, we haven't written the 4882 * Is this cheating? Not really. Sure, we haven't written the
4876 * inode out, but prune_icache isn't a user-visible syncing function. 4883 * inode out, but prune_icache isn't a user-visible syncing function.
4877 * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) 4884 * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
4878 * we start and wait on commits. 4885 * we start and wait on commits.
4879 */ 4886 */
4880 int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) 4887 int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
4881 { 4888 {
4882 struct ext4_iloc iloc; 4889 struct ext4_iloc iloc;
4883 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 4890 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4884 static unsigned int mnt_count; 4891 static unsigned int mnt_count;
4885 int err, ret; 4892 int err, ret;
4886 4893
4887 might_sleep(); 4894 might_sleep();
4888 trace_ext4_mark_inode_dirty(inode, _RET_IP_); 4895 trace_ext4_mark_inode_dirty(inode, _RET_IP_);
4889 err = ext4_reserve_inode_write(handle, inode, &iloc); 4896 err = ext4_reserve_inode_write(handle, inode, &iloc);
4890 if (ext4_handle_valid(handle) && 4897 if (ext4_handle_valid(handle) &&
4891 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && 4898 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
4892 !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { 4899 !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
4893 /* 4900 /*
4894 * We need extra buffer credits since we may write into EA block 4901 * We need extra buffer credits since we may write into EA block
4895 * with this same handle. If journal_extend fails, then it will 4902 * with this same handle. If journal_extend fails, then it will
4896 * only result in a minor loss of functionality for that inode. 4903 * only result in a minor loss of functionality for that inode.
4897 * If this is felt to be critical, then e2fsck should be run to 4904 * If this is felt to be critical, then e2fsck should be run to
4898 * force a large enough s_min_extra_isize. 4905 * force a large enough s_min_extra_isize.
4899 */ 4906 */
4900 if ((jbd2_journal_extend(handle, 4907 if ((jbd2_journal_extend(handle,
4901 EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { 4908 EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
4902 ret = ext4_expand_extra_isize(inode, 4909 ret = ext4_expand_extra_isize(inode,
4903 sbi->s_want_extra_isize, 4910 sbi->s_want_extra_isize,
4904 iloc, handle); 4911 iloc, handle);
4905 if (ret) { 4912 if (ret) {
4906 ext4_set_inode_state(inode, 4913 ext4_set_inode_state(inode,
4907 EXT4_STATE_NO_EXPAND); 4914 EXT4_STATE_NO_EXPAND);
4908 if (mnt_count != 4915 if (mnt_count !=
4909 le16_to_cpu(sbi->s_es->s_mnt_count)) { 4916 le16_to_cpu(sbi->s_es->s_mnt_count)) {
4910 ext4_warning(inode->i_sb, 4917 ext4_warning(inode->i_sb,
4911 "Unable to expand inode %lu. Delete" 4918 "Unable to expand inode %lu. Delete"
4912 " some EAs or run e2fsck.", 4919 " some EAs or run e2fsck.",
4913 inode->i_ino); 4920 inode->i_ino);
4914 mnt_count = 4921 mnt_count =
4915 le16_to_cpu(sbi->s_es->s_mnt_count); 4922 le16_to_cpu(sbi->s_es->s_mnt_count);
4916 } 4923 }
4917 } 4924 }
4918 } 4925 }
4919 } 4926 }
4920 if (!err) 4927 if (!err)
4921 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 4928 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
4922 return err; 4929 return err;
4923 } 4930 }
4924 4931
4925 /* 4932 /*
4926 * ext4_dirty_inode() is called from __mark_inode_dirty() 4933 * ext4_dirty_inode() is called from __mark_inode_dirty()
4927 * 4934 *
4928 * We're really interested in the case where a file is being extended. 4935 * We're really interested in the case where a file is being extended.
4929 * i_size has been changed by generic_commit_write() and we thus need 4936 * i_size has been changed by generic_commit_write() and we thus need
4930 * to include the updated inode in the current transaction. 4937 * to include the updated inode in the current transaction.
4931 * 4938 *
4932 * Also, dquot_alloc_block() will always dirty the inode when blocks 4939 * Also, dquot_alloc_block() will always dirty the inode when blocks
4933 * are allocated to the file. 4940 * are allocated to the file.
4934 * 4941 *
4935 * If the inode is marked synchronous, we don't honour that here - doing 4942 * If the inode is marked synchronous, we don't honour that here - doing
4936 * so would cause a commit on atime updates, which we don't bother doing. 4943 * so would cause a commit on atime updates, which we don't bother doing.
4937 * We handle synchronous inodes at the highest possible level. 4944 * We handle synchronous inodes at the highest possible level.
4938 * 4945 *
4939 * If only the I_DIRTY_TIME flag is set, we can skip everything. If 4946 * If only the I_DIRTY_TIME flag is set, we can skip everything. If
4940 * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need 4947 * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
4941 * to copy into the on-disk inode structure are the timestamp files. 4948 * to copy into the on-disk inode structure are the timestamp files.
4942 */ 4949 */
4943 void ext4_dirty_inode(struct inode *inode, int flags) 4950 void ext4_dirty_inode(struct inode *inode, int flags)
4944 { 4951 {
4945 handle_t *handle; 4952 handle_t *handle;
4946 4953
4947 if (flags == I_DIRTY_TIME) 4954 if (flags == I_DIRTY_TIME)
4948 return; 4955 return;
4949 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 4956 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
4950 if (IS_ERR(handle)) 4957 if (IS_ERR(handle))
4951 goto out; 4958 goto out;
4952 4959
4953 ext4_mark_inode_dirty(handle, inode); 4960 ext4_mark_inode_dirty(handle, inode);
4954 4961
4955 ext4_journal_stop(handle); 4962 ext4_journal_stop(handle);
4956 out: 4963 out:
4957 return; 4964 return;
4958 } 4965 }
4959 4966
4960 #if 0 4967 #if 0
4961 /* 4968 /*
4962 * Bind an inode's backing buffer_head into this transaction, to prevent 4969 * Bind an inode's backing buffer_head into this transaction, to prevent
4963 * it from being flushed to disk early. Unlike 4970 * it from being flushed to disk early. Unlike
4964 * ext4_reserve_inode_write, this leaves behind no bh reference and 4971 * ext4_reserve_inode_write, this leaves behind no bh reference and
4965 * returns no iloc structure, so the caller needs to repeat the iloc 4972 * returns no iloc structure, so the caller needs to repeat the iloc
4966 * lookup to mark the inode dirty later. 4973 * lookup to mark the inode dirty later.
4967 */ 4974 */
4968 static int ext4_pin_inode(handle_t *handle, struct inode *inode) 4975 static int ext4_pin_inode(handle_t *handle, struct inode *inode)
4969 { 4976 {
4970 struct ext4_iloc iloc; 4977 struct ext4_iloc iloc;
4971 4978
4972 int err = 0; 4979 int err = 0;
4973 if (handle) { 4980 if (handle) {
4974 err = ext4_get_inode_loc(inode, &iloc); 4981 err = ext4_get_inode_loc(inode, &iloc);
4975 if (!err) { 4982 if (!err) {
4976 BUFFER_TRACE(iloc.bh, "get_write_access"); 4983 BUFFER_TRACE(iloc.bh, "get_write_access");
4977 err = jbd2_journal_get_write_access(handle, iloc.bh); 4984 err = jbd2_journal_get_write_access(handle, iloc.bh);
4978 if (!err) 4985 if (!err)
4979 err = ext4_handle_dirty_metadata(handle, 4986 err = ext4_handle_dirty_metadata(handle,
4980 NULL, 4987 NULL,
4981 iloc.bh); 4988 iloc.bh);
4982 brelse(iloc.bh); 4989 brelse(iloc.bh);
4983 } 4990 }
4984 } 4991 }
4985 ext4_std_error(inode->i_sb, err); 4992 ext4_std_error(inode->i_sb, err);
4986 return err; 4993 return err;
4987 } 4994 }
4988 #endif 4995 #endif
4989 4996
4990 int ext4_change_inode_journal_flag(struct inode *inode, int val) 4997 int ext4_change_inode_journal_flag(struct inode *inode, int val)
4991 { 4998 {
4992 journal_t *journal; 4999 journal_t *journal;
4993 handle_t *handle; 5000 handle_t *handle;
4994 int err; 5001 int err;
4995 5002
4996 /* 5003 /*
4997 * We have to be very careful here: changing a data block's 5004 * We have to be very careful here: changing a data block's
4998 * journaling status dynamically is dangerous. If we write a 5005 * journaling status dynamically is dangerous. If we write a
4999 * data block to the journal, change the status and then delete 5006 * data block to the journal, change the status and then delete
5000 * that block, we risk forgetting to revoke the old log record 5007 * that block, we risk forgetting to revoke the old log record
5001 * from the journal and so a subsequent replay can corrupt data. 5008 * from the journal and so a subsequent replay can corrupt data.
5002 * So, first we make sure that the journal is empty and that 5009 * So, first we make sure that the journal is empty and that
5003 * nobody is changing anything. 5010 * nobody is changing anything.
5004 */ 5011 */
5005 5012
5006 journal = EXT4_JOURNAL(inode); 5013 journal = EXT4_JOURNAL(inode);
5007 if (!journal) 5014 if (!journal)
5008 return 0; 5015 return 0;
5009 if (is_journal_aborted(journal)) 5016 if (is_journal_aborted(journal))
5010 return -EROFS; 5017 return -EROFS;
5011 /* We have to allocate physical blocks for delalloc blocks 5018 /* We have to allocate physical blocks for delalloc blocks
5012 * before flushing journal. otherwise delalloc blocks can not 5019 * before flushing journal. otherwise delalloc blocks can not
5013 * be allocated any more. even more truncate on delalloc blocks 5020 * be allocated any more. even more truncate on delalloc blocks
5014 * could trigger BUG by flushing delalloc blocks in journal. 5021 * could trigger BUG by flushing delalloc blocks in journal.
5015 * There is no delalloc block in non-journal data mode. 5022 * There is no delalloc block in non-journal data mode.
5016 */ 5023 */
5017 if (val && test_opt(inode->i_sb, DELALLOC)) { 5024 if (val && test_opt(inode->i_sb, DELALLOC)) {
5018 err = ext4_alloc_da_blocks(inode); 5025 err = ext4_alloc_da_blocks(inode);
5019 if (err < 0) 5026 if (err < 0)
5020 return err; 5027 return err;
5021 } 5028 }
5022 5029
5023 /* Wait for all existing dio workers */ 5030 /* Wait for all existing dio workers */
5024 ext4_inode_block_unlocked_dio(inode); 5031 ext4_inode_block_unlocked_dio(inode);
5025 inode_dio_wait(inode); 5032 inode_dio_wait(inode);
5026 5033
5027 jbd2_journal_lock_updates(journal); 5034 jbd2_journal_lock_updates(journal);
5028 5035
5029 /* 5036 /*
5030 * OK, there are no updates running now, and all cached data is 5037 * OK, there are no updates running now, and all cached data is
5031 * synced to disk. We are now in a completely consistent state 5038 * synced to disk. We are now in a completely consistent state
5032 * which doesn't have anything in the journal, and we know that 5039 * which doesn't have anything in the journal, and we know that
5033 * no filesystem updates are running, so it is safe to modify 5040 * no filesystem updates are running, so it is safe to modify
5034 * the inode's in-core data-journaling state flag now. 5041 * the inode's in-core data-journaling state flag now.
5035 */ 5042 */
5036 5043
5037 if (val) 5044 if (val)
5038 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 5045 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
5039 else { 5046 else {
5040 err = jbd2_journal_flush(journal); 5047 err = jbd2_journal_flush(journal);
5041 if (err < 0) { 5048 if (err < 0) {
5042 jbd2_journal_unlock_updates(journal); 5049 jbd2_journal_unlock_updates(journal);
5043 ext4_inode_resume_unlocked_dio(inode); 5050 ext4_inode_resume_unlocked_dio(inode);
5044 return err; 5051 return err;
5045 } 5052 }
5046 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 5053 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
5047 } 5054 }
5048 ext4_set_aops(inode); 5055 ext4_set_aops(inode);
5049 5056
5050 jbd2_journal_unlock_updates(journal); 5057 jbd2_journal_unlock_updates(journal);
5051 ext4_inode_resume_unlocked_dio(inode); 5058 ext4_inode_resume_unlocked_dio(inode);
5052 5059
5053 /* Finally we can mark the inode as dirty. */ 5060 /* Finally we can mark the inode as dirty. */
5054 5061
5055 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); 5062 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
5056 if (IS_ERR(handle)) 5063 if (IS_ERR(handle))
5057 return PTR_ERR(handle); 5064 return PTR_ERR(handle);
5058 5065
5059 err = ext4_mark_inode_dirty(handle, inode); 5066 err = ext4_mark_inode_dirty(handle, inode);
5060 ext4_handle_sync(handle); 5067 ext4_handle_sync(handle);
5061 ext4_journal_stop(handle); 5068 ext4_journal_stop(handle);
5062 ext4_std_error(inode->i_sb, err); 5069 ext4_std_error(inode->i_sb, err);
5063 5070
5064 return err; 5071 return err;
5065 } 5072 }
5066 5073
5067 static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) 5074 static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
5068 { 5075 {
5069 return !buffer_mapped(bh); 5076 return !buffer_mapped(bh);
5070 } 5077 }
5071 5078
5072 int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 5079 int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5073 { 5080 {
5074 struct page *page = vmf->page; 5081 struct page *page = vmf->page;
5075 loff_t size; 5082 loff_t size;
5076 unsigned long len; 5083 unsigned long len;
5077 int ret; 5084 int ret;
5078 struct file *file = vma->vm_file; 5085 struct file *file = vma->vm_file;
5079 struct inode *inode = file_inode(file); 5086 struct inode *inode = file_inode(file);
5080 struct address_space *mapping = inode->i_mapping; 5087 struct address_space *mapping = inode->i_mapping;
5081 handle_t *handle; 5088 handle_t *handle;
5082 get_block_t *get_block; 5089 get_block_t *get_block;
5083 int retries = 0; 5090 int retries = 0;
5084 5091
5085 sb_start_pagefault(inode->i_sb); 5092 sb_start_pagefault(inode->i_sb);
5086 file_update_time(vma->vm_file); 5093 file_update_time(vma->vm_file);
5087 /* Delalloc case is easy... */ 5094 /* Delalloc case is easy... */
5088 if (test_opt(inode->i_sb, DELALLOC) && 5095 if (test_opt(inode->i_sb, DELALLOC) &&
5089 !ext4_should_journal_data(inode) && 5096 !ext4_should_journal_data(inode) &&
5090 !ext4_nonda_switch(inode->i_sb)) { 5097 !ext4_nonda_switch(inode->i_sb)) {
5091 do { 5098 do {
5092 ret = __block_page_mkwrite(vma, vmf, 5099 ret = __block_page_mkwrite(vma, vmf,
5093 ext4_da_get_block_prep); 5100 ext4_da_get_block_prep);
5094 } while (ret == -ENOSPC && 5101 } while (ret == -ENOSPC &&
5095 ext4_should_retry_alloc(inode->i_sb, &retries)); 5102 ext4_should_retry_alloc(inode->i_sb, &retries));
5096 goto out_ret; 5103 goto out_ret;
5097 } 5104 }
5098 5105
5099 lock_page(page); 5106 lock_page(page);
5100 size = i_size_read(inode); 5107 size = i_size_read(inode);
5101 /* Page got truncated from under us? */ 5108 /* Page got truncated from under us? */
5102 if (page->mapping != mapping || page_offset(page) > size) { 5109 if (page->mapping != mapping || page_offset(page) > size) {
5103 unlock_page(page); 5110 unlock_page(page);
5104 ret = VM_FAULT_NOPAGE; 5111 ret = VM_FAULT_NOPAGE;
5105 goto out; 5112 goto out;
5106 } 5113 }
5107 5114
5108 if (page->index == size >> PAGE_CACHE_SHIFT) 5115 if (page->index == size >> PAGE_CACHE_SHIFT)
5109 len = size & ~PAGE_CACHE_MASK; 5116 len = size & ~PAGE_CACHE_MASK;
5110 else 5117 else
5111 len = PAGE_CACHE_SIZE; 5118 len = PAGE_CACHE_SIZE;
5112 /* 5119 /*
5113 * Return if we have all the buffers mapped. This avoids the need to do 5120 * Return if we have all the buffers mapped. This avoids the need to do
5114 * journal_start/journal_stop which can block and take a long time 5121 * journal_start/journal_stop which can block and take a long time
5115 */ 5122 */
5116 if (page_has_buffers(page)) { 5123 if (page_has_buffers(page)) {
5117 if (!ext4_walk_page_buffers(NULL, page_buffers(page), 5124 if (!ext4_walk_page_buffers(NULL, page_buffers(page),
5118 0, len, NULL, 5125 0, len, NULL,
5119 ext4_bh_unmapped)) { 5126 ext4_bh_unmapped)) {
5120 /* Wait so that we don't change page under IO */ 5127 /* Wait so that we don't change page under IO */
5121 wait_for_stable_page(page); 5128 wait_for_stable_page(page);
5122 ret = VM_FAULT_LOCKED; 5129 ret = VM_FAULT_LOCKED;
5123 goto out; 5130 goto out;
5124 } 5131 }
5125 } 5132 }
5126 unlock_page(page); 5133 unlock_page(page);
5127 /* OK, we need to fill the hole... */ 5134 /* OK, we need to fill the hole... */
5128 if (ext4_should_dioread_nolock(inode)) 5135 if (ext4_should_dioread_nolock(inode))
5129 get_block = ext4_get_block_write; 5136 get_block = ext4_get_block_write;
5130 else 5137 else
5131 get_block = ext4_get_block; 5138 get_block = ext4_get_block;
5132 retry_alloc: 5139 retry_alloc:
5133 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 5140 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
5134 ext4_writepage_trans_blocks(inode)); 5141 ext4_writepage_trans_blocks(inode));
5135 if (IS_ERR(handle)) { 5142 if (IS_ERR(handle)) {
5136 ret = VM_FAULT_SIGBUS; 5143 ret = VM_FAULT_SIGBUS;
5137 goto out; 5144 goto out;
5138 } 5145 }
5139 ret = __block_page_mkwrite(vma, vmf, get_block); 5146 ret = __block_page_mkwrite(vma, vmf, get_block);
5140 if (!ret && ext4_should_journal_data(inode)) { 5147 if (!ret && ext4_should_journal_data(inode)) {
5141 if (ext4_walk_page_buffers(handle, page_buffers(page), 0, 5148 if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
5142 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { 5149 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
5143 unlock_page(page); 5150 unlock_page(page);
5144 ret = VM_FAULT_SIGBUS; 5151 ret = VM_FAULT_SIGBUS;
5145 ext4_journal_stop(handle); 5152 ext4_journal_stop(handle);
5146 goto out; 5153 goto out;
5147 } 5154 }
5148 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 5155 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
5149 } 5156 }
5150 ext4_journal_stop(handle); 5157 ext4_journal_stop(handle);
5151 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 5158 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
5152 goto retry_alloc; 5159 goto retry_alloc;
5153 out_ret: 5160 out_ret:
5154 ret = block_page_mkwrite_return(ret); 5161 ret = block_page_mkwrite_return(ret);
5155 out: 5162 out:
5156 sb_end_pagefault(inode->i_sb); 5163 sb_end_pagefault(inode->i_sb);
5157 return ret; 5164 return ret;
5158 } 5165 }
5159 5166
1 /* 1 /*
2 * linux/fs/ext4/super.c 2 * linux/fs/ext4/super.c
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal 6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 * 8 *
9 * from 9 * from
10 * 10 *
11 * linux/fs/minix/inode.c 11 * linux/fs/minix/inode.c
12 * 12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 * 14 *
15 * Big-endian to little-endian byte-swapping/bitmaps by 15 * Big-endian to little-endian byte-swapping/bitmaps by
16 * David S. Miller (davem@caip.rutgers.edu), 1995 16 * David S. Miller (davem@caip.rutgers.edu), 1995
17 */ 17 */
18 18
19 #include <linux/module.h> 19 #include <linux/module.h>
20 #include <linux/string.h> 20 #include <linux/string.h>
21 #include <linux/fs.h> 21 #include <linux/fs.h>
22 #include <linux/time.h> 22 #include <linux/time.h>
23 #include <linux/vmalloc.h> 23 #include <linux/vmalloc.h>
24 #include <linux/jbd2.h> 24 #include <linux/jbd2.h>
25 #include <linux/slab.h> 25 #include <linux/slab.h>
26 #include <linux/init.h> 26 #include <linux/init.h>
27 #include <linux/blkdev.h> 27 #include <linux/blkdev.h>
28 #include <linux/parser.h> 28 #include <linux/parser.h>
29 #include <linux/buffer_head.h> 29 #include <linux/buffer_head.h>
30 #include <linux/exportfs.h> 30 #include <linux/exportfs.h>
31 #include <linux/vfs.h> 31 #include <linux/vfs.h>
32 #include <linux/random.h> 32 #include <linux/random.h>
33 #include <linux/mount.h> 33 #include <linux/mount.h>
34 #include <linux/namei.h> 34 #include <linux/namei.h>
35 #include <linux/quotaops.h> 35 #include <linux/quotaops.h>
36 #include <linux/seq_file.h> 36 #include <linux/seq_file.h>
37 #include <linux/proc_fs.h> 37 #include <linux/proc_fs.h>
38 #include <linux/ctype.h> 38 #include <linux/ctype.h>
39 #include <linux/log2.h> 39 #include <linux/log2.h>
40 #include <linux/crc16.h> 40 #include <linux/crc16.h>
41 #include <linux/cleancache.h> 41 #include <linux/cleancache.h>
42 #include <asm/uaccess.h> 42 #include <asm/uaccess.h>
43 43
44 #include <linux/kthread.h> 44 #include <linux/kthread.h>
45 #include <linux/freezer.h> 45 #include <linux/freezer.h>
46 46
47 #include "ext4.h" 47 #include "ext4.h"
48 #include "ext4_extents.h" /* Needed for trace points definition */ 48 #include "ext4_extents.h" /* Needed for trace points definition */
49 #include "ext4_jbd2.h" 49 #include "ext4_jbd2.h"
50 #include "xattr.h" 50 #include "xattr.h"
51 #include "acl.h" 51 #include "acl.h"
52 #include "mballoc.h" 52 #include "mballoc.h"
53 53
54 #define CREATE_TRACE_POINTS 54 #define CREATE_TRACE_POINTS
55 #include <trace/events/ext4.h> 55 #include <trace/events/ext4.h>
56 56
57 static struct proc_dir_entry *ext4_proc_root; 57 static struct proc_dir_entry *ext4_proc_root;
58 static struct kset *ext4_kset; 58 static struct kset *ext4_kset;
59 static struct ext4_lazy_init *ext4_li_info; 59 static struct ext4_lazy_init *ext4_li_info;
60 static struct mutex ext4_li_mtx; 60 static struct mutex ext4_li_mtx;
61 static struct ext4_features *ext4_feat; 61 static struct ext4_features *ext4_feat;
62 static int ext4_mballoc_ready; 62 static int ext4_mballoc_ready;
63 63
64 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 64 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
65 unsigned long journal_devnum); 65 unsigned long journal_devnum);
66 static int ext4_show_options(struct seq_file *seq, struct dentry *root); 66 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
67 static int ext4_commit_super(struct super_block *sb, int sync); 67 static int ext4_commit_super(struct super_block *sb, int sync);
68 static void ext4_mark_recovery_complete(struct super_block *sb, 68 static void ext4_mark_recovery_complete(struct super_block *sb,
69 struct ext4_super_block *es); 69 struct ext4_super_block *es);
70 static void ext4_clear_journal_err(struct super_block *sb, 70 static void ext4_clear_journal_err(struct super_block *sb,
71 struct ext4_super_block *es); 71 struct ext4_super_block *es);
72 static int ext4_sync_fs(struct super_block *sb, int wait); 72 static int ext4_sync_fs(struct super_block *sb, int wait);
73 static int ext4_remount(struct super_block *sb, int *flags, char *data); 73 static int ext4_remount(struct super_block *sb, int *flags, char *data);
74 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 74 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
75 static int ext4_unfreeze(struct super_block *sb); 75 static int ext4_unfreeze(struct super_block *sb);
76 static int ext4_freeze(struct super_block *sb); 76 static int ext4_freeze(struct super_block *sb);
77 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 77 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
78 const char *dev_name, void *data); 78 const char *dev_name, void *data);
79 static inline int ext2_feature_set_ok(struct super_block *sb); 79 static inline int ext2_feature_set_ok(struct super_block *sb);
80 static inline int ext3_feature_set_ok(struct super_block *sb); 80 static inline int ext3_feature_set_ok(struct super_block *sb);
81 static int ext4_feature_set_ok(struct super_block *sb, int readonly); 81 static int ext4_feature_set_ok(struct super_block *sb, int readonly);
82 static void ext4_destroy_lazyinit_thread(void); 82 static void ext4_destroy_lazyinit_thread(void);
83 static void ext4_unregister_li_request(struct super_block *sb); 83 static void ext4_unregister_li_request(struct super_block *sb);
84 static void ext4_clear_request_list(void); 84 static void ext4_clear_request_list(void);
85 static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t); 85 static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t);
86 86
87 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 87 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
88 static struct file_system_type ext2_fs_type = { 88 static struct file_system_type ext2_fs_type = {
89 .owner = THIS_MODULE, 89 .owner = THIS_MODULE,
90 .name = "ext2", 90 .name = "ext2",
91 .mount = ext4_mount, 91 .mount = ext4_mount,
92 .kill_sb = kill_block_super, 92 .kill_sb = kill_block_super,
93 .fs_flags = FS_REQUIRES_DEV, 93 .fs_flags = FS_REQUIRES_DEV,
94 }; 94 };
95 MODULE_ALIAS_FS("ext2"); 95 MODULE_ALIAS_FS("ext2");
96 MODULE_ALIAS("ext2"); 96 MODULE_ALIAS("ext2");
97 #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) 97 #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
98 #else 98 #else
99 #define IS_EXT2_SB(sb) (0) 99 #define IS_EXT2_SB(sb) (0)
100 #endif 100 #endif
101 101
102 102
103 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 103 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
104 static struct file_system_type ext3_fs_type = { 104 static struct file_system_type ext3_fs_type = {
105 .owner = THIS_MODULE, 105 .owner = THIS_MODULE,
106 .name = "ext3", 106 .name = "ext3",
107 .mount = ext4_mount, 107 .mount = ext4_mount,
108 .kill_sb = kill_block_super, 108 .kill_sb = kill_block_super,
109 .fs_flags = FS_REQUIRES_DEV, 109 .fs_flags = FS_REQUIRES_DEV,
110 }; 110 };
111 MODULE_ALIAS_FS("ext3"); 111 MODULE_ALIAS_FS("ext3");
112 MODULE_ALIAS("ext3"); 112 MODULE_ALIAS("ext3");
113 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) 113 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
114 #else 114 #else
115 #define IS_EXT3_SB(sb) (0) 115 #define IS_EXT3_SB(sb) (0)
116 #endif 116 #endif
117 117
118 static int ext4_verify_csum_type(struct super_block *sb, 118 static int ext4_verify_csum_type(struct super_block *sb,
119 struct ext4_super_block *es) 119 struct ext4_super_block *es)
120 { 120 {
121 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 121 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
122 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 122 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
123 return 1; 123 return 1;
124 124
125 return es->s_checksum_type == EXT4_CRC32C_CHKSUM; 125 return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
126 } 126 }
127 127
128 static __le32 ext4_superblock_csum(struct super_block *sb, 128 static __le32 ext4_superblock_csum(struct super_block *sb,
129 struct ext4_super_block *es) 129 struct ext4_super_block *es)
130 { 130 {
131 struct ext4_sb_info *sbi = EXT4_SB(sb); 131 struct ext4_sb_info *sbi = EXT4_SB(sb);
132 int offset = offsetof(struct ext4_super_block, s_checksum); 132 int offset = offsetof(struct ext4_super_block, s_checksum);
133 __u32 csum; 133 __u32 csum;
134 134
135 csum = ext4_chksum(sbi, ~0, (char *)es, offset); 135 csum = ext4_chksum(sbi, ~0, (char *)es, offset);
136 136
137 return cpu_to_le32(csum); 137 return cpu_to_le32(csum);
138 } 138 }
139 139
140 static int ext4_superblock_csum_verify(struct super_block *sb, 140 static int ext4_superblock_csum_verify(struct super_block *sb,
141 struct ext4_super_block *es) 141 struct ext4_super_block *es)
142 { 142 {
143 if (!ext4_has_metadata_csum(sb)) 143 if (!ext4_has_metadata_csum(sb))
144 return 1; 144 return 1;
145 145
146 return es->s_checksum == ext4_superblock_csum(sb, es); 146 return es->s_checksum == ext4_superblock_csum(sb, es);
147 } 147 }
148 148
149 void ext4_superblock_csum_set(struct super_block *sb) 149 void ext4_superblock_csum_set(struct super_block *sb)
150 { 150 {
151 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 151 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
152 152
153 if (!ext4_has_metadata_csum(sb)) 153 if (!ext4_has_metadata_csum(sb))
154 return; 154 return;
155 155
156 es->s_checksum = ext4_superblock_csum(sb, es); 156 es->s_checksum = ext4_superblock_csum(sb, es);
157 } 157 }
158 158
159 void *ext4_kvmalloc(size_t size, gfp_t flags) 159 void *ext4_kvmalloc(size_t size, gfp_t flags)
160 { 160 {
161 void *ret; 161 void *ret;
162 162
163 ret = kmalloc(size, flags | __GFP_NOWARN); 163 ret = kmalloc(size, flags | __GFP_NOWARN);
164 if (!ret) 164 if (!ret)
165 ret = __vmalloc(size, flags, PAGE_KERNEL); 165 ret = __vmalloc(size, flags, PAGE_KERNEL);
166 return ret; 166 return ret;
167 } 167 }
168 168
169 void *ext4_kvzalloc(size_t size, gfp_t flags) 169 void *ext4_kvzalloc(size_t size, gfp_t flags)
170 { 170 {
171 void *ret; 171 void *ret;
172 172
173 ret = kzalloc(size, flags | __GFP_NOWARN); 173 ret = kzalloc(size, flags | __GFP_NOWARN);
174 if (!ret) 174 if (!ret)
175 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); 175 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
176 return ret; 176 return ret;
177 } 177 }
178 178
179 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 179 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
180 struct ext4_group_desc *bg) 180 struct ext4_group_desc *bg)
181 { 181 {
182 return le32_to_cpu(bg->bg_block_bitmap_lo) | 182 return le32_to_cpu(bg->bg_block_bitmap_lo) |
183 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 183 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
184 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 184 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
185 } 185 }
186 186
187 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 187 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
188 struct ext4_group_desc *bg) 188 struct ext4_group_desc *bg)
189 { 189 {
190 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 190 return le32_to_cpu(bg->bg_inode_bitmap_lo) |
191 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 191 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
192 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 192 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
193 } 193 }
194 194
195 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 195 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
196 struct ext4_group_desc *bg) 196 struct ext4_group_desc *bg)
197 { 197 {
198 return le32_to_cpu(bg->bg_inode_table_lo) | 198 return le32_to_cpu(bg->bg_inode_table_lo) |
199 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 199 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
200 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 200 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
201 } 201 }
202 202
203 __u32 ext4_free_group_clusters(struct super_block *sb, 203 __u32 ext4_free_group_clusters(struct super_block *sb,
204 struct ext4_group_desc *bg) 204 struct ext4_group_desc *bg)
205 { 205 {
206 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 206 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
207 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 207 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
208 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 208 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
209 } 209 }
210 210
211 __u32 ext4_free_inodes_count(struct super_block *sb, 211 __u32 ext4_free_inodes_count(struct super_block *sb,
212 struct ext4_group_desc *bg) 212 struct ext4_group_desc *bg)
213 { 213 {
214 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 214 return le16_to_cpu(bg->bg_free_inodes_count_lo) |
215 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 215 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
216 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 216 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
217 } 217 }
218 218
219 __u32 ext4_used_dirs_count(struct super_block *sb, 219 __u32 ext4_used_dirs_count(struct super_block *sb,
220 struct ext4_group_desc *bg) 220 struct ext4_group_desc *bg)
221 { 221 {
222 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 222 return le16_to_cpu(bg->bg_used_dirs_count_lo) |
223 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 223 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
224 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 224 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
225 } 225 }
226 226
227 __u32 ext4_itable_unused_count(struct super_block *sb, 227 __u32 ext4_itable_unused_count(struct super_block *sb,
228 struct ext4_group_desc *bg) 228 struct ext4_group_desc *bg)
229 { 229 {
230 return le16_to_cpu(bg->bg_itable_unused_lo) | 230 return le16_to_cpu(bg->bg_itable_unused_lo) |
231 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 231 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
232 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 232 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
233 } 233 }
234 234
235 void ext4_block_bitmap_set(struct super_block *sb, 235 void ext4_block_bitmap_set(struct super_block *sb,
236 struct ext4_group_desc *bg, ext4_fsblk_t blk) 236 struct ext4_group_desc *bg, ext4_fsblk_t blk)
237 { 237 {
238 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 238 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
239 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 239 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
240 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 240 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
241 } 241 }
242 242
243 void ext4_inode_bitmap_set(struct super_block *sb, 243 void ext4_inode_bitmap_set(struct super_block *sb,
244 struct ext4_group_desc *bg, ext4_fsblk_t blk) 244 struct ext4_group_desc *bg, ext4_fsblk_t blk)
245 { 245 {
246 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 246 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
247 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 247 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
248 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 248 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
249 } 249 }
250 250
251 void ext4_inode_table_set(struct super_block *sb, 251 void ext4_inode_table_set(struct super_block *sb,
252 struct ext4_group_desc *bg, ext4_fsblk_t blk) 252 struct ext4_group_desc *bg, ext4_fsblk_t blk)
253 { 253 {
254 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 254 bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
255 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 255 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
256 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 256 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
257 } 257 }
258 258
259 void ext4_free_group_clusters_set(struct super_block *sb, 259 void ext4_free_group_clusters_set(struct super_block *sb,
260 struct ext4_group_desc *bg, __u32 count) 260 struct ext4_group_desc *bg, __u32 count)
261 { 261 {
262 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 262 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
263 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 263 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
264 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); 264 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
265 } 265 }
266 266
267 void ext4_free_inodes_set(struct super_block *sb, 267 void ext4_free_inodes_set(struct super_block *sb,
268 struct ext4_group_desc *bg, __u32 count) 268 struct ext4_group_desc *bg, __u32 count)
269 { 269 {
270 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); 270 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
271 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 271 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
272 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); 272 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
273 } 273 }
274 274
275 void ext4_used_dirs_set(struct super_block *sb, 275 void ext4_used_dirs_set(struct super_block *sb,
276 struct ext4_group_desc *bg, __u32 count) 276 struct ext4_group_desc *bg, __u32 count)
277 { 277 {
278 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); 278 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
279 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 279 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
280 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); 280 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
281 } 281 }
282 282
283 void ext4_itable_unused_set(struct super_block *sb, 283 void ext4_itable_unused_set(struct super_block *sb,
284 struct ext4_group_desc *bg, __u32 count) 284 struct ext4_group_desc *bg, __u32 count)
285 { 285 {
286 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); 286 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
287 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 287 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
288 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); 288 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
289 } 289 }
290 290
291 291
292 static void __save_error_info(struct super_block *sb, const char *func, 292 static void __save_error_info(struct super_block *sb, const char *func,
293 unsigned int line) 293 unsigned int line)
294 { 294 {
295 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 295 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
296 296
297 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 297 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
298 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 298 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
299 es->s_last_error_time = cpu_to_le32(get_seconds()); 299 es->s_last_error_time = cpu_to_le32(get_seconds());
300 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); 300 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
301 es->s_last_error_line = cpu_to_le32(line); 301 es->s_last_error_line = cpu_to_le32(line);
302 if (!es->s_first_error_time) { 302 if (!es->s_first_error_time) {
303 es->s_first_error_time = es->s_last_error_time; 303 es->s_first_error_time = es->s_last_error_time;
304 strncpy(es->s_first_error_func, func, 304 strncpy(es->s_first_error_func, func,
305 sizeof(es->s_first_error_func)); 305 sizeof(es->s_first_error_func));
306 es->s_first_error_line = cpu_to_le32(line); 306 es->s_first_error_line = cpu_to_le32(line);
307 es->s_first_error_ino = es->s_last_error_ino; 307 es->s_first_error_ino = es->s_last_error_ino;
308 es->s_first_error_block = es->s_last_error_block; 308 es->s_first_error_block = es->s_last_error_block;
309 } 309 }
310 /* 310 /*
311 * Start the daily error reporting function if it hasn't been 311 * Start the daily error reporting function if it hasn't been
312 * started already 312 * started already
313 */ 313 */
314 if (!es->s_error_count) 314 if (!es->s_error_count)
315 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); 315 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
316 le32_add_cpu(&es->s_error_count, 1); 316 le32_add_cpu(&es->s_error_count, 1);
317 } 317 }
318 318
319 static void save_error_info(struct super_block *sb, const char *func, 319 static void save_error_info(struct super_block *sb, const char *func,
320 unsigned int line) 320 unsigned int line)
321 { 321 {
322 __save_error_info(sb, func, line); 322 __save_error_info(sb, func, line);
323 ext4_commit_super(sb, 1); 323 ext4_commit_super(sb, 1);
324 } 324 }
325 325
326 /* 326 /*
327 * The del_gendisk() function uninitializes the disk-specific data 327 * The del_gendisk() function uninitializes the disk-specific data
328 * structures, including the bdi structure, without telling anyone 328 * structures, including the bdi structure, without telling anyone
329 * else. Once this happens, any attempt to call mark_buffer_dirty() 329 * else. Once this happens, any attempt to call mark_buffer_dirty()
330 * (for example, by ext4_commit_super), will cause a kernel OOPS. 330 * (for example, by ext4_commit_super), will cause a kernel OOPS.
331 * This is a kludge to prevent these oops until we can put in a proper 331 * This is a kludge to prevent these oops until we can put in a proper
332 * hook in del_gendisk() to inform the VFS and file system layers. 332 * hook in del_gendisk() to inform the VFS and file system layers.
333 */ 333 */
334 static int block_device_ejected(struct super_block *sb) 334 static int block_device_ejected(struct super_block *sb)
335 { 335 {
336 struct inode *bd_inode = sb->s_bdev->bd_inode; 336 struct inode *bd_inode = sb->s_bdev->bd_inode;
337 struct backing_dev_info *bdi = inode_to_bdi(bd_inode); 337 struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
338 338
339 return bdi->dev == NULL; 339 return bdi->dev == NULL;
340 } 340 }
341 341
342 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) 342 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
343 { 343 {
344 struct super_block *sb = journal->j_private; 344 struct super_block *sb = journal->j_private;
345 struct ext4_sb_info *sbi = EXT4_SB(sb); 345 struct ext4_sb_info *sbi = EXT4_SB(sb);
346 int error = is_journal_aborted(journal); 346 int error = is_journal_aborted(journal);
347 struct ext4_journal_cb_entry *jce; 347 struct ext4_journal_cb_entry *jce;
348 348
349 BUG_ON(txn->t_state == T_FINISHED); 349 BUG_ON(txn->t_state == T_FINISHED);
350 spin_lock(&sbi->s_md_lock); 350 spin_lock(&sbi->s_md_lock);
351 while (!list_empty(&txn->t_private_list)) { 351 while (!list_empty(&txn->t_private_list)) {
352 jce = list_entry(txn->t_private_list.next, 352 jce = list_entry(txn->t_private_list.next,
353 struct ext4_journal_cb_entry, jce_list); 353 struct ext4_journal_cb_entry, jce_list);
354 list_del_init(&jce->jce_list); 354 list_del_init(&jce->jce_list);
355 spin_unlock(&sbi->s_md_lock); 355 spin_unlock(&sbi->s_md_lock);
356 jce->jce_func(sb, jce, error); 356 jce->jce_func(sb, jce, error);
357 spin_lock(&sbi->s_md_lock); 357 spin_lock(&sbi->s_md_lock);
358 } 358 }
359 spin_unlock(&sbi->s_md_lock); 359 spin_unlock(&sbi->s_md_lock);
360 } 360 }
361 361
362 /* Deal with the reporting of failure conditions on a filesystem such as 362 /* Deal with the reporting of failure conditions on a filesystem such as
363 * inconsistencies detected or read IO failures. 363 * inconsistencies detected or read IO failures.
364 * 364 *
365 * On ext2, we can store the error state of the filesystem in the 365 * On ext2, we can store the error state of the filesystem in the
366 * superblock. That is not possible on ext4, because we may have other 366 * superblock. That is not possible on ext4, because we may have other
367 * write ordering constraints on the superblock which prevent us from 367 * write ordering constraints on the superblock which prevent us from
368 * writing it out straight away; and given that the journal is about to 368 * writing it out straight away; and given that the journal is about to
369 * be aborted, we can't rely on the current, or future, transactions to 369 * be aborted, we can't rely on the current, or future, transactions to
370 * write out the superblock safely. 370 * write out the superblock safely.
371 * 371 *
372 * We'll just use the jbd2_journal_abort() error code to record an error in 372 * We'll just use the jbd2_journal_abort() error code to record an error in
373 * the journal instead. On recovery, the journal will complain about 373 * the journal instead. On recovery, the journal will complain about
374 * that error until we've noted it down and cleared it. 374 * that error until we've noted it down and cleared it.
375 */ 375 */
376 376
377 static void ext4_handle_error(struct super_block *sb) 377 static void ext4_handle_error(struct super_block *sb)
378 { 378 {
379 if (sb->s_flags & MS_RDONLY) 379 if (sb->s_flags & MS_RDONLY)
380 return; 380 return;
381 381
382 if (!test_opt(sb, ERRORS_CONT)) { 382 if (!test_opt(sb, ERRORS_CONT)) {
383 journal_t *journal = EXT4_SB(sb)->s_journal; 383 journal_t *journal = EXT4_SB(sb)->s_journal;
384 384
385 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 385 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
386 if (journal) 386 if (journal)
387 jbd2_journal_abort(journal, -EIO); 387 jbd2_journal_abort(journal, -EIO);
388 } 388 }
389 if (test_opt(sb, ERRORS_RO)) { 389 if (test_opt(sb, ERRORS_RO)) {
390 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 390 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
391 /* 391 /*
392 * Make sure updated value of ->s_mount_flags will be visible 392 * Make sure updated value of ->s_mount_flags will be visible
393 * before ->s_flags update 393 * before ->s_flags update
394 */ 394 */
395 smp_wmb(); 395 smp_wmb();
396 sb->s_flags |= MS_RDONLY; 396 sb->s_flags |= MS_RDONLY;
397 } 397 }
398 if (test_opt(sb, ERRORS_PANIC)) 398 if (test_opt(sb, ERRORS_PANIC))
399 panic("EXT4-fs (device %s): panic forced after error\n", 399 panic("EXT4-fs (device %s): panic forced after error\n",
400 sb->s_id); 400 sb->s_id);
401 } 401 }
402 402
403 #define ext4_error_ratelimit(sb) \ 403 #define ext4_error_ratelimit(sb) \
404 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \ 404 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
405 "EXT4-fs error") 405 "EXT4-fs error")
406 406
407 void __ext4_error(struct super_block *sb, const char *function, 407 void __ext4_error(struct super_block *sb, const char *function,
408 unsigned int line, const char *fmt, ...) 408 unsigned int line, const char *fmt, ...)
409 { 409 {
410 struct va_format vaf; 410 struct va_format vaf;
411 va_list args; 411 va_list args;
412 412
413 if (ext4_error_ratelimit(sb)) { 413 if (ext4_error_ratelimit(sb)) {
414 va_start(args, fmt); 414 va_start(args, fmt);
415 vaf.fmt = fmt; 415 vaf.fmt = fmt;
416 vaf.va = &args; 416 vaf.va = &args;
417 printk(KERN_CRIT 417 printk(KERN_CRIT
418 "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", 418 "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
419 sb->s_id, function, line, current->comm, &vaf); 419 sb->s_id, function, line, current->comm, &vaf);
420 va_end(args); 420 va_end(args);
421 } 421 }
422 save_error_info(sb, function, line); 422 save_error_info(sb, function, line);
423 ext4_handle_error(sb); 423 ext4_handle_error(sb);
424 } 424 }
425 425
426 void __ext4_error_inode(struct inode *inode, const char *function, 426 void __ext4_error_inode(struct inode *inode, const char *function,
427 unsigned int line, ext4_fsblk_t block, 427 unsigned int line, ext4_fsblk_t block,
428 const char *fmt, ...) 428 const char *fmt, ...)
429 { 429 {
430 va_list args; 430 va_list args;
431 struct va_format vaf; 431 struct va_format vaf;
432 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 432 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
433 433
434 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 434 es->s_last_error_ino = cpu_to_le32(inode->i_ino);
435 es->s_last_error_block = cpu_to_le64(block); 435 es->s_last_error_block = cpu_to_le64(block);
436 if (ext4_error_ratelimit(inode->i_sb)) { 436 if (ext4_error_ratelimit(inode->i_sb)) {
437 va_start(args, fmt); 437 va_start(args, fmt);
438 vaf.fmt = fmt; 438 vaf.fmt = fmt;
439 vaf.va = &args; 439 vaf.va = &args;
440 if (block) 440 if (block)
441 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " 441 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
442 "inode #%lu: block %llu: comm %s: %pV\n", 442 "inode #%lu: block %llu: comm %s: %pV\n",
443 inode->i_sb->s_id, function, line, inode->i_ino, 443 inode->i_sb->s_id, function, line, inode->i_ino,
444 block, current->comm, &vaf); 444 block, current->comm, &vaf);
445 else 445 else
446 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " 446 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
447 "inode #%lu: comm %s: %pV\n", 447 "inode #%lu: comm %s: %pV\n",
448 inode->i_sb->s_id, function, line, inode->i_ino, 448 inode->i_sb->s_id, function, line, inode->i_ino,
449 current->comm, &vaf); 449 current->comm, &vaf);
450 va_end(args); 450 va_end(args);
451 } 451 }
452 save_error_info(inode->i_sb, function, line); 452 save_error_info(inode->i_sb, function, line);
453 ext4_handle_error(inode->i_sb); 453 ext4_handle_error(inode->i_sb);
454 } 454 }
455 455
456 void __ext4_error_file(struct file *file, const char *function, 456 void __ext4_error_file(struct file *file, const char *function,
457 unsigned int line, ext4_fsblk_t block, 457 unsigned int line, ext4_fsblk_t block,
458 const char *fmt, ...) 458 const char *fmt, ...)
459 { 459 {
460 va_list args; 460 va_list args;
461 struct va_format vaf; 461 struct va_format vaf;
462 struct ext4_super_block *es; 462 struct ext4_super_block *es;
463 struct inode *inode = file_inode(file); 463 struct inode *inode = file_inode(file);
464 char pathname[80], *path; 464 char pathname[80], *path;
465 465
466 es = EXT4_SB(inode->i_sb)->s_es; 466 es = EXT4_SB(inode->i_sb)->s_es;
467 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 467 es->s_last_error_ino = cpu_to_le32(inode->i_ino);
468 if (ext4_error_ratelimit(inode->i_sb)) { 468 if (ext4_error_ratelimit(inode->i_sb)) {
469 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 469 path = d_path(&(file->f_path), pathname, sizeof(pathname));
470 if (IS_ERR(path)) 470 if (IS_ERR(path))
471 path = "(unknown)"; 471 path = "(unknown)";
472 va_start(args, fmt); 472 va_start(args, fmt);
473 vaf.fmt = fmt; 473 vaf.fmt = fmt;
474 vaf.va = &args; 474 vaf.va = &args;
475 if (block) 475 if (block)
476 printk(KERN_CRIT 476 printk(KERN_CRIT
477 "EXT4-fs error (device %s): %s:%d: inode #%lu: " 477 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
478 "block %llu: comm %s: path %s: %pV\n", 478 "block %llu: comm %s: path %s: %pV\n",
479 inode->i_sb->s_id, function, line, inode->i_ino, 479 inode->i_sb->s_id, function, line, inode->i_ino,
480 block, current->comm, path, &vaf); 480 block, current->comm, path, &vaf);
481 else 481 else
482 printk(KERN_CRIT 482 printk(KERN_CRIT
483 "EXT4-fs error (device %s): %s:%d: inode #%lu: " 483 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
484 "comm %s: path %s: %pV\n", 484 "comm %s: path %s: %pV\n",
485 inode->i_sb->s_id, function, line, inode->i_ino, 485 inode->i_sb->s_id, function, line, inode->i_ino,
486 current->comm, path, &vaf); 486 current->comm, path, &vaf);
487 va_end(args); 487 va_end(args);
488 } 488 }
489 save_error_info(inode->i_sb, function, line); 489 save_error_info(inode->i_sb, function, line);
490 ext4_handle_error(inode->i_sb); 490 ext4_handle_error(inode->i_sb);
491 } 491 }
492 492
493 const char *ext4_decode_error(struct super_block *sb, int errno, 493 const char *ext4_decode_error(struct super_block *sb, int errno,
494 char nbuf[16]) 494 char nbuf[16])
495 { 495 {
496 char *errstr = NULL; 496 char *errstr = NULL;
497 497
498 switch (errno) { 498 switch (errno) {
499 case -EIO: 499 case -EIO:
500 errstr = "IO failure"; 500 errstr = "IO failure";
501 break; 501 break;
502 case -ENOMEM: 502 case -ENOMEM:
503 errstr = "Out of memory"; 503 errstr = "Out of memory";
504 break; 504 break;
505 case -EROFS: 505 case -EROFS:
506 if (!sb || (EXT4_SB(sb)->s_journal && 506 if (!sb || (EXT4_SB(sb)->s_journal &&
507 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) 507 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
508 errstr = "Journal has aborted"; 508 errstr = "Journal has aborted";
509 else 509 else
510 errstr = "Readonly filesystem"; 510 errstr = "Readonly filesystem";
511 break; 511 break;
512 default: 512 default:
513 /* If the caller passed in an extra buffer for unknown 513 /* If the caller passed in an extra buffer for unknown
514 * errors, textualise them now. Else we just return 514 * errors, textualise them now. Else we just return
515 * NULL. */ 515 * NULL. */
516 if (nbuf) { 516 if (nbuf) {
517 /* Check for truncated error codes... */ 517 /* Check for truncated error codes... */
518 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 518 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
519 errstr = nbuf; 519 errstr = nbuf;
520 } 520 }
521 break; 521 break;
522 } 522 }
523 523
524 return errstr; 524 return errstr;
525 } 525 }
526 526
527 /* __ext4_std_error decodes expected errors from journaling functions 527 /* __ext4_std_error decodes expected errors from journaling functions
528 * automatically and invokes the appropriate error response. */ 528 * automatically and invokes the appropriate error response. */
529 529
530 void __ext4_std_error(struct super_block *sb, const char *function, 530 void __ext4_std_error(struct super_block *sb, const char *function,
531 unsigned int line, int errno) 531 unsigned int line, int errno)
532 { 532 {
533 char nbuf[16]; 533 char nbuf[16];
534 const char *errstr; 534 const char *errstr;
535 535
536 /* Special case: if the error is EROFS, and we're not already 536 /* Special case: if the error is EROFS, and we're not already
537 * inside a transaction, then there's really no point in logging 537 * inside a transaction, then there's really no point in logging
538 * an error. */ 538 * an error. */
539 if (errno == -EROFS && journal_current_handle() == NULL && 539 if (errno == -EROFS && journal_current_handle() == NULL &&
540 (sb->s_flags & MS_RDONLY)) 540 (sb->s_flags & MS_RDONLY))
541 return; 541 return;
542 542
543 if (ext4_error_ratelimit(sb)) { 543 if (ext4_error_ratelimit(sb)) {
544 errstr = ext4_decode_error(sb, errno, nbuf); 544 errstr = ext4_decode_error(sb, errno, nbuf);
545 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", 545 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
546 sb->s_id, function, line, errstr); 546 sb->s_id, function, line, errstr);
547 } 547 }
548 548
549 save_error_info(sb, function, line); 549 save_error_info(sb, function, line);
550 ext4_handle_error(sb); 550 ext4_handle_error(sb);
551 } 551 }
552 552
553 /* 553 /*
554 * ext4_abort is a much stronger failure handler than ext4_error. The 554 * ext4_abort is a much stronger failure handler than ext4_error. The
555 * abort function may be used to deal with unrecoverable failures such 555 * abort function may be used to deal with unrecoverable failures such
556 * as journal IO errors or ENOMEM at a critical moment in log management. 556 * as journal IO errors or ENOMEM at a critical moment in log management.
557 * 557 *
558 * We unconditionally force the filesystem into an ABORT|READONLY state, 558 * We unconditionally force the filesystem into an ABORT|READONLY state,
559 * unless the error response on the fs has been set to panic in which 559 * unless the error response on the fs has been set to panic in which
560 * case we take the easy way out and panic immediately. 560 * case we take the easy way out and panic immediately.
561 */ 561 */
562 562
563 void __ext4_abort(struct super_block *sb, const char *function, 563 void __ext4_abort(struct super_block *sb, const char *function,
564 unsigned int line, const char *fmt, ...) 564 unsigned int line, const char *fmt, ...)
565 { 565 {
566 va_list args; 566 va_list args;
567 567
568 save_error_info(sb, function, line); 568 save_error_info(sb, function, line);
569 va_start(args, fmt); 569 va_start(args, fmt);
570 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id, 570 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
571 function, line); 571 function, line);
572 vprintk(fmt, args); 572 vprintk(fmt, args);
573 printk("\n"); 573 printk("\n");
574 va_end(args); 574 va_end(args);
575 575
576 if ((sb->s_flags & MS_RDONLY) == 0) { 576 if ((sb->s_flags & MS_RDONLY) == 0) {
577 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 577 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
578 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 578 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
579 /* 579 /*
580 * Make sure updated value of ->s_mount_flags will be visible 580 * Make sure updated value of ->s_mount_flags will be visible
581 * before ->s_flags update 581 * before ->s_flags update
582 */ 582 */
583 smp_wmb(); 583 smp_wmb();
584 sb->s_flags |= MS_RDONLY; 584 sb->s_flags |= MS_RDONLY;
585 if (EXT4_SB(sb)->s_journal) 585 if (EXT4_SB(sb)->s_journal)
586 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 586 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
587 save_error_info(sb, function, line); 587 save_error_info(sb, function, line);
588 } 588 }
589 if (test_opt(sb, ERRORS_PANIC)) 589 if (test_opt(sb, ERRORS_PANIC))
590 panic("EXT4-fs panic from previous error\n"); 590 panic("EXT4-fs panic from previous error\n");
591 } 591 }
592 592
593 void __ext4_msg(struct super_block *sb, 593 void __ext4_msg(struct super_block *sb,
594 const char *prefix, const char *fmt, ...) 594 const char *prefix, const char *fmt, ...)
595 { 595 {
596 struct va_format vaf; 596 struct va_format vaf;
597 va_list args; 597 va_list args;
598 598
599 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) 599 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
600 return; 600 return;
601 601
602 va_start(args, fmt); 602 va_start(args, fmt);
603 vaf.fmt = fmt; 603 vaf.fmt = fmt;
604 vaf.va = &args; 604 vaf.va = &args;
605 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); 605 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
606 va_end(args); 606 va_end(args);
607 } 607 }
608 608
609 void __ext4_warning(struct super_block *sb, const char *function, 609 void __ext4_warning(struct super_block *sb, const char *function,
610 unsigned int line, const char *fmt, ...) 610 unsigned int line, const char *fmt, ...)
611 { 611 {
612 struct va_format vaf; 612 struct va_format vaf;
613 va_list args; 613 va_list args;
614 614
615 if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), 615 if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
616 "EXT4-fs warning")) 616 "EXT4-fs warning"))
617 return; 617 return;
618 618
619 va_start(args, fmt); 619 va_start(args, fmt);
620 vaf.fmt = fmt; 620 vaf.fmt = fmt;
621 vaf.va = &args; 621 vaf.va = &args;
622 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n", 622 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
623 sb->s_id, function, line, &vaf); 623 sb->s_id, function, line, &vaf);
624 va_end(args); 624 va_end(args);
625 } 625 }
626 626
627 void __ext4_grp_locked_error(const char *function, unsigned int line, 627 void __ext4_grp_locked_error(const char *function, unsigned int line,
628 struct super_block *sb, ext4_group_t grp, 628 struct super_block *sb, ext4_group_t grp,
629 unsigned long ino, ext4_fsblk_t block, 629 unsigned long ino, ext4_fsblk_t block,
630 const char *fmt, ...) 630 const char *fmt, ...)
631 __releases(bitlock) 631 __releases(bitlock)
632 __acquires(bitlock) 632 __acquires(bitlock)
633 { 633 {
634 struct va_format vaf; 634 struct va_format vaf;
635 va_list args; 635 va_list args;
636 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 636 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
637 637
638 es->s_last_error_ino = cpu_to_le32(ino); 638 es->s_last_error_ino = cpu_to_le32(ino);
639 es->s_last_error_block = cpu_to_le64(block); 639 es->s_last_error_block = cpu_to_le64(block);
640 __save_error_info(sb, function, line); 640 __save_error_info(sb, function, line);
641 641
642 if (ext4_error_ratelimit(sb)) { 642 if (ext4_error_ratelimit(sb)) {
643 va_start(args, fmt); 643 va_start(args, fmt);
644 vaf.fmt = fmt; 644 vaf.fmt = fmt;
645 vaf.va = &args; 645 vaf.va = &args;
646 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", 646 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
647 sb->s_id, function, line, grp); 647 sb->s_id, function, line, grp);
648 if (ino) 648 if (ino)
649 printk(KERN_CONT "inode %lu: ", ino); 649 printk(KERN_CONT "inode %lu: ", ino);
650 if (block) 650 if (block)
651 printk(KERN_CONT "block %llu:", 651 printk(KERN_CONT "block %llu:",
652 (unsigned long long) block); 652 (unsigned long long) block);
653 printk(KERN_CONT "%pV\n", &vaf); 653 printk(KERN_CONT "%pV\n", &vaf);
654 va_end(args); 654 va_end(args);
655 } 655 }
656 656
657 if (test_opt(sb, ERRORS_CONT)) { 657 if (test_opt(sb, ERRORS_CONT)) {
658 ext4_commit_super(sb, 0); 658 ext4_commit_super(sb, 0);
659 return; 659 return;
660 } 660 }
661 661
662 ext4_unlock_group(sb, grp); 662 ext4_unlock_group(sb, grp);
663 ext4_handle_error(sb); 663 ext4_handle_error(sb);
664 /* 664 /*
665 * We only get here in the ERRORS_RO case; relocking the group 665 * We only get here in the ERRORS_RO case; relocking the group
666 * may be dangerous, but nothing bad will happen since the 666 * may be dangerous, but nothing bad will happen since the
667 * filesystem will have already been marked read/only and the 667 * filesystem will have already been marked read/only and the
668 * journal has been aborted. We return 1 as a hint to callers 668 * journal has been aborted. We return 1 as a hint to callers
669 * who might what to use the return value from 669 * who might what to use the return value from
670 * ext4_grp_locked_error() to distinguish between the 670 * ext4_grp_locked_error() to distinguish between the
671 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 671 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
672 * aggressively from the ext4 function in question, with a 672 * aggressively from the ext4 function in question, with a
673 * more appropriate error code. 673 * more appropriate error code.
674 */ 674 */
675 ext4_lock_group(sb, grp); 675 ext4_lock_group(sb, grp);
676 return; 676 return;
677 } 677 }
678 678
679 void ext4_update_dynamic_rev(struct super_block *sb) 679 void ext4_update_dynamic_rev(struct super_block *sb)
680 { 680 {
681 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 681 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
682 682
683 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 683 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
684 return; 684 return;
685 685
686 ext4_warning(sb, 686 ext4_warning(sb,
687 "updating to rev %d because of new feature flag, " 687 "updating to rev %d because of new feature flag, "
688 "running e2fsck is recommended", 688 "running e2fsck is recommended",
689 EXT4_DYNAMIC_REV); 689 EXT4_DYNAMIC_REV);
690 690
691 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 691 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
692 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 692 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
693 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 693 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
694 /* leave es->s_feature_*compat flags alone */ 694 /* leave es->s_feature_*compat flags alone */
695 /* es->s_uuid will be set by e2fsck if empty */ 695 /* es->s_uuid will be set by e2fsck if empty */
696 696
697 /* 697 /*
698 * The rest of the superblock fields should be zero, and if not it 698 * The rest of the superblock fields should be zero, and if not it
699 * means they are likely already in use, so leave them alone. We 699 * means they are likely already in use, so leave them alone. We
700 * can leave it up to e2fsck to clean up any inconsistencies there. 700 * can leave it up to e2fsck to clean up any inconsistencies there.
701 */ 701 */
702 } 702 }
703 703
704 /* 704 /*
705 * Open the external journal device 705 * Open the external journal device
706 */ 706 */
707 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) 707 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
708 { 708 {
709 struct block_device *bdev; 709 struct block_device *bdev;
710 char b[BDEVNAME_SIZE]; 710 char b[BDEVNAME_SIZE];
711 711
712 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); 712 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
713 if (IS_ERR(bdev)) 713 if (IS_ERR(bdev))
714 goto fail; 714 goto fail;
715 return bdev; 715 return bdev;
716 716
717 fail: 717 fail:
718 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", 718 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
719 __bdevname(dev, b), PTR_ERR(bdev)); 719 __bdevname(dev, b), PTR_ERR(bdev));
720 return NULL; 720 return NULL;
721 } 721 }
722 722
723 /* 723 /*
724 * Release the journal device 724 * Release the journal device
725 */ 725 */
726 static void ext4_blkdev_put(struct block_device *bdev) 726 static void ext4_blkdev_put(struct block_device *bdev)
727 { 727 {
728 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 728 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
729 } 729 }
730 730
731 static void ext4_blkdev_remove(struct ext4_sb_info *sbi) 731 static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
732 { 732 {
733 struct block_device *bdev; 733 struct block_device *bdev;
734 bdev = sbi->journal_bdev; 734 bdev = sbi->journal_bdev;
735 if (bdev) { 735 if (bdev) {
736 ext4_blkdev_put(bdev); 736 ext4_blkdev_put(bdev);
737 sbi->journal_bdev = NULL; 737 sbi->journal_bdev = NULL;
738 } 738 }
739 } 739 }
740 740
741 static inline struct inode *orphan_list_entry(struct list_head *l) 741 static inline struct inode *orphan_list_entry(struct list_head *l)
742 { 742 {
743 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 743 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
744 } 744 }
745 745
746 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 746 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
747 { 747 {
748 struct list_head *l; 748 struct list_head *l;
749 749
750 ext4_msg(sb, KERN_ERR, "sb orphan head is %d", 750 ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
751 le32_to_cpu(sbi->s_es->s_last_orphan)); 751 le32_to_cpu(sbi->s_es->s_last_orphan));
752 752
753 printk(KERN_ERR "sb_info orphan list:\n"); 753 printk(KERN_ERR "sb_info orphan list:\n");
754 list_for_each(l, &sbi->s_orphan) { 754 list_for_each(l, &sbi->s_orphan) {
755 struct inode *inode = orphan_list_entry(l); 755 struct inode *inode = orphan_list_entry(l);
756 printk(KERN_ERR " " 756 printk(KERN_ERR " "
757 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 757 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
758 inode->i_sb->s_id, inode->i_ino, inode, 758 inode->i_sb->s_id, inode->i_ino, inode,
759 inode->i_mode, inode->i_nlink, 759 inode->i_mode, inode->i_nlink,
760 NEXT_ORPHAN(inode)); 760 NEXT_ORPHAN(inode));
761 } 761 }
762 } 762 }
763 763
764 static void ext4_put_super(struct super_block *sb) 764 static void ext4_put_super(struct super_block *sb)
765 { 765 {
766 struct ext4_sb_info *sbi = EXT4_SB(sb); 766 struct ext4_sb_info *sbi = EXT4_SB(sb);
767 struct ext4_super_block *es = sbi->s_es; 767 struct ext4_super_block *es = sbi->s_es;
768 int i, err; 768 int i, err;
769 769
770 ext4_unregister_li_request(sb); 770 ext4_unregister_li_request(sb);
771 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 771 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
772 772
773 flush_workqueue(sbi->rsv_conversion_wq); 773 flush_workqueue(sbi->rsv_conversion_wq);
774 destroy_workqueue(sbi->rsv_conversion_wq); 774 destroy_workqueue(sbi->rsv_conversion_wq);
775 775
776 if (sbi->s_journal) { 776 if (sbi->s_journal) {
777 err = jbd2_journal_destroy(sbi->s_journal); 777 err = jbd2_journal_destroy(sbi->s_journal);
778 sbi->s_journal = NULL; 778 sbi->s_journal = NULL;
779 if (err < 0) 779 if (err < 0)
780 ext4_abort(sb, "Couldn't clean up the journal"); 780 ext4_abort(sb, "Couldn't clean up the journal");
781 } 781 }
782 782
783 ext4_es_unregister_shrinker(sbi); 783 ext4_es_unregister_shrinker(sbi);
784 del_timer_sync(&sbi->s_err_report); 784 del_timer_sync(&sbi->s_err_report);
785 ext4_release_system_zone(sb); 785 ext4_release_system_zone(sb);
786 ext4_mb_release(sb); 786 ext4_mb_release(sb);
787 ext4_ext_release(sb); 787 ext4_ext_release(sb);
788 ext4_xattr_put_super(sb); 788 ext4_xattr_put_super(sb);
789 789
790 if (!(sb->s_flags & MS_RDONLY)) { 790 if (!(sb->s_flags & MS_RDONLY)) {
791 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 791 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
792 es->s_state = cpu_to_le16(sbi->s_mount_state); 792 es->s_state = cpu_to_le16(sbi->s_mount_state);
793 } 793 }
794 if (!(sb->s_flags & MS_RDONLY)) 794 if (!(sb->s_flags & MS_RDONLY))
795 ext4_commit_super(sb, 1); 795 ext4_commit_super(sb, 1);
796 796
797 if (sbi->s_proc) { 797 if (sbi->s_proc) {
798 remove_proc_entry("options", sbi->s_proc); 798 remove_proc_entry("options", sbi->s_proc);
799 remove_proc_entry(sb->s_id, ext4_proc_root); 799 remove_proc_entry(sb->s_id, ext4_proc_root);
800 } 800 }
801 kobject_del(&sbi->s_kobj); 801 kobject_del(&sbi->s_kobj);
802 802
803 for (i = 0; i < sbi->s_gdb_count; i++) 803 for (i = 0; i < sbi->s_gdb_count; i++)
804 brelse(sbi->s_group_desc[i]); 804 brelse(sbi->s_group_desc[i]);
805 kvfree(sbi->s_group_desc); 805 kvfree(sbi->s_group_desc);
806 kvfree(sbi->s_flex_groups); 806 kvfree(sbi->s_flex_groups);
807 percpu_counter_destroy(&sbi->s_freeclusters_counter); 807 percpu_counter_destroy(&sbi->s_freeclusters_counter);
808 percpu_counter_destroy(&sbi->s_freeinodes_counter); 808 percpu_counter_destroy(&sbi->s_freeinodes_counter);
809 percpu_counter_destroy(&sbi->s_dirs_counter); 809 percpu_counter_destroy(&sbi->s_dirs_counter);
810 percpu_counter_destroy(&sbi->s_dirtyclusters_counter); 810 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
811 brelse(sbi->s_sbh); 811 brelse(sbi->s_sbh);
812 #ifdef CONFIG_QUOTA 812 #ifdef CONFIG_QUOTA
813 for (i = 0; i < EXT4_MAXQUOTAS; i++) 813 for (i = 0; i < EXT4_MAXQUOTAS; i++)
814 kfree(sbi->s_qf_names[i]); 814 kfree(sbi->s_qf_names[i]);
815 #endif 815 #endif
816 816
817 /* Debugging code just in case the in-memory inode orphan list 817 /* Debugging code just in case the in-memory inode orphan list
818 * isn't empty. The on-disk one can be non-empty if we've 818 * isn't empty. The on-disk one can be non-empty if we've
819 * detected an error and taken the fs readonly, but the 819 * detected an error and taken the fs readonly, but the
820 * in-memory list had better be clean by this point. */ 820 * in-memory list had better be clean by this point. */
821 if (!list_empty(&sbi->s_orphan)) 821 if (!list_empty(&sbi->s_orphan))
822 dump_orphan_list(sb, sbi); 822 dump_orphan_list(sb, sbi);
823 J_ASSERT(list_empty(&sbi->s_orphan)); 823 J_ASSERT(list_empty(&sbi->s_orphan));
824 824
825 invalidate_bdev(sb->s_bdev); 825 invalidate_bdev(sb->s_bdev);
826 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 826 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
827 /* 827 /*
828 * Invalidate the journal device's buffers. We don't want them 828 * Invalidate the journal device's buffers. We don't want them
829 * floating about in memory - the physical journal device may 829 * floating about in memory - the physical journal device may
830 * hotswapped, and it breaks the `ro-after' testing code. 830 * hotswapped, and it breaks the `ro-after' testing code.
831 */ 831 */
832 sync_blockdev(sbi->journal_bdev); 832 sync_blockdev(sbi->journal_bdev);
833 invalidate_bdev(sbi->journal_bdev); 833 invalidate_bdev(sbi->journal_bdev);
834 ext4_blkdev_remove(sbi); 834 ext4_blkdev_remove(sbi);
835 } 835 }
836 if (sbi->s_mb_cache) { 836 if (sbi->s_mb_cache) {
837 ext4_xattr_destroy_cache(sbi->s_mb_cache); 837 ext4_xattr_destroy_cache(sbi->s_mb_cache);
838 sbi->s_mb_cache = NULL; 838 sbi->s_mb_cache = NULL;
839 } 839 }
840 if (sbi->s_mmp_tsk) 840 if (sbi->s_mmp_tsk)
841 kthread_stop(sbi->s_mmp_tsk); 841 kthread_stop(sbi->s_mmp_tsk);
842 sb->s_fs_info = NULL; 842 sb->s_fs_info = NULL;
843 /* 843 /*
844 * Now that we are completely done shutting down the 844 * Now that we are completely done shutting down the
845 * superblock, we need to actually destroy the kobject. 845 * superblock, we need to actually destroy the kobject.
846 */ 846 */
847 kobject_put(&sbi->s_kobj); 847 kobject_put(&sbi->s_kobj);
848 wait_for_completion(&sbi->s_kobj_unregister); 848 wait_for_completion(&sbi->s_kobj_unregister);
849 if (sbi->s_chksum_driver) 849 if (sbi->s_chksum_driver)
850 crypto_free_shash(sbi->s_chksum_driver); 850 crypto_free_shash(sbi->s_chksum_driver);
851 kfree(sbi->s_blockgroup_lock); 851 kfree(sbi->s_blockgroup_lock);
852 kfree(sbi); 852 kfree(sbi);
853 } 853 }
854 854
855 static struct kmem_cache *ext4_inode_cachep; 855 static struct kmem_cache *ext4_inode_cachep;
856 856
857 /* 857 /*
858 * Called inside transaction, so use GFP_NOFS 858 * Called inside transaction, so use GFP_NOFS
859 */ 859 */
860 static struct inode *ext4_alloc_inode(struct super_block *sb) 860 static struct inode *ext4_alloc_inode(struct super_block *sb)
861 { 861 {
862 struct ext4_inode_info *ei; 862 struct ext4_inode_info *ei;
863 863
864 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 864 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
865 if (!ei) 865 if (!ei)
866 return NULL; 866 return NULL;
867 867
868 ei->vfs_inode.i_version = 1; 868 ei->vfs_inode.i_version = 1;
869 spin_lock_init(&ei->i_raw_lock); 869 spin_lock_init(&ei->i_raw_lock);
870 INIT_LIST_HEAD(&ei->i_prealloc_list); 870 INIT_LIST_HEAD(&ei->i_prealloc_list);
871 spin_lock_init(&ei->i_prealloc_lock); 871 spin_lock_init(&ei->i_prealloc_lock);
872 ext4_es_init_tree(&ei->i_es_tree); 872 ext4_es_init_tree(&ei->i_es_tree);
873 rwlock_init(&ei->i_es_lock); 873 rwlock_init(&ei->i_es_lock);
874 INIT_LIST_HEAD(&ei->i_es_list); 874 INIT_LIST_HEAD(&ei->i_es_list);
875 ei->i_es_all_nr = 0; 875 ei->i_es_all_nr = 0;
876 ei->i_es_shk_nr = 0; 876 ei->i_es_shk_nr = 0;
877 ei->i_es_shrink_lblk = 0; 877 ei->i_es_shrink_lblk = 0;
878 ei->i_reserved_data_blocks = 0; 878 ei->i_reserved_data_blocks = 0;
879 ei->i_reserved_meta_blocks = 0; 879 ei->i_reserved_meta_blocks = 0;
880 ei->i_allocated_meta_blocks = 0; 880 ei->i_allocated_meta_blocks = 0;
881 ei->i_da_metadata_calc_len = 0; 881 ei->i_da_metadata_calc_len = 0;
882 ei->i_da_metadata_calc_last_lblock = 0; 882 ei->i_da_metadata_calc_last_lblock = 0;
883 spin_lock_init(&(ei->i_block_reservation_lock)); 883 spin_lock_init(&(ei->i_block_reservation_lock));
884 #ifdef CONFIG_QUOTA 884 #ifdef CONFIG_QUOTA
885 ei->i_reserved_quota = 0; 885 ei->i_reserved_quota = 0;
886 memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); 886 memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
887 #endif 887 #endif
888 ei->jinode = NULL; 888 ei->jinode = NULL;
889 INIT_LIST_HEAD(&ei->i_rsv_conversion_list); 889 INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
890 spin_lock_init(&ei->i_completed_io_lock); 890 spin_lock_init(&ei->i_completed_io_lock);
891 ei->i_sync_tid = 0; 891 ei->i_sync_tid = 0;
892 ei->i_datasync_tid = 0; 892 ei->i_datasync_tid = 0;
893 atomic_set(&ei->i_ioend_count, 0); 893 atomic_set(&ei->i_ioend_count, 0);
894 atomic_set(&ei->i_unwritten, 0); 894 atomic_set(&ei->i_unwritten, 0);
895 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); 895 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
896 896
897 return &ei->vfs_inode; 897 return &ei->vfs_inode;
898 } 898 }
899 899
900 static int ext4_drop_inode(struct inode *inode) 900 static int ext4_drop_inode(struct inode *inode)
901 { 901 {
902 int drop = generic_drop_inode(inode); 902 int drop = generic_drop_inode(inode);
903 903
904 trace_ext4_drop_inode(inode, drop); 904 trace_ext4_drop_inode(inode, drop);
905 return drop; 905 return drop;
906 } 906 }
907 907
908 static void ext4_i_callback(struct rcu_head *head) 908 static void ext4_i_callback(struct rcu_head *head)
909 { 909 {
910 struct inode *inode = container_of(head, struct inode, i_rcu); 910 struct inode *inode = container_of(head, struct inode, i_rcu);
911 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 911 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
912 } 912 }
913 913
914 static void ext4_destroy_inode(struct inode *inode) 914 static void ext4_destroy_inode(struct inode *inode)
915 { 915 {
916 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 916 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
917 ext4_msg(inode->i_sb, KERN_ERR, 917 ext4_msg(inode->i_sb, KERN_ERR,
918 "Inode %lu (%p): orphan list check failed!", 918 "Inode %lu (%p): orphan list check failed!",
919 inode->i_ino, EXT4_I(inode)); 919 inode->i_ino, EXT4_I(inode));
920 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 920 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
921 EXT4_I(inode), sizeof(struct ext4_inode_info), 921 EXT4_I(inode), sizeof(struct ext4_inode_info),
922 true); 922 true);
923 dump_stack(); 923 dump_stack();
924 } 924 }
925 call_rcu(&inode->i_rcu, ext4_i_callback); 925 call_rcu(&inode->i_rcu, ext4_i_callback);
926 } 926 }
927 927
928 static void init_once(void *foo) 928 static void init_once(void *foo)
929 { 929 {
930 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 930 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
931 931
932 INIT_LIST_HEAD(&ei->i_orphan); 932 INIT_LIST_HEAD(&ei->i_orphan);
933 init_rwsem(&ei->xattr_sem); 933 init_rwsem(&ei->xattr_sem);
934 init_rwsem(&ei->i_data_sem); 934 init_rwsem(&ei->i_data_sem);
935 inode_init_once(&ei->vfs_inode); 935 inode_init_once(&ei->vfs_inode);
936 } 936 }
937 937
938 static int __init init_inodecache(void) 938 static int __init init_inodecache(void)
939 { 939 {
940 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 940 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
941 sizeof(struct ext4_inode_info), 941 sizeof(struct ext4_inode_info),
942 0, (SLAB_RECLAIM_ACCOUNT| 942 0, (SLAB_RECLAIM_ACCOUNT|
943 SLAB_MEM_SPREAD), 943 SLAB_MEM_SPREAD),
944 init_once); 944 init_once);
945 if (ext4_inode_cachep == NULL) 945 if (ext4_inode_cachep == NULL)
946 return -ENOMEM; 946 return -ENOMEM;
947 return 0; 947 return 0;
948 } 948 }
949 949
950 static void destroy_inodecache(void) 950 static void destroy_inodecache(void)
951 { 951 {
952 /* 952 /*
953 * Make sure all delayed rcu free inodes are flushed before we 953 * Make sure all delayed rcu free inodes are flushed before we
954 * destroy cache. 954 * destroy cache.
955 */ 955 */
956 rcu_barrier(); 956 rcu_barrier();
957 kmem_cache_destroy(ext4_inode_cachep); 957 kmem_cache_destroy(ext4_inode_cachep);
958 } 958 }
959 959
960 void ext4_clear_inode(struct inode *inode) 960 void ext4_clear_inode(struct inode *inode)
961 { 961 {
962 invalidate_inode_buffers(inode); 962 invalidate_inode_buffers(inode);
963 clear_inode(inode); 963 clear_inode(inode);
964 dquot_drop(inode); 964 dquot_drop(inode);
965 ext4_discard_preallocations(inode); 965 ext4_discard_preallocations(inode);
966 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 966 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
967 if (EXT4_I(inode)->jinode) { 967 if (EXT4_I(inode)->jinode) {
968 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), 968 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
969 EXT4_I(inode)->jinode); 969 EXT4_I(inode)->jinode);
970 jbd2_free_inode(EXT4_I(inode)->jinode); 970 jbd2_free_inode(EXT4_I(inode)->jinode);
971 EXT4_I(inode)->jinode = NULL; 971 EXT4_I(inode)->jinode = NULL;
972 } 972 }
973 } 973 }
974 974
975 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 975 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
976 u64 ino, u32 generation) 976 u64 ino, u32 generation)
977 { 977 {
978 struct inode *inode; 978 struct inode *inode;
979 979
980 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 980 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
981 return ERR_PTR(-ESTALE); 981 return ERR_PTR(-ESTALE);
982 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 982 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
983 return ERR_PTR(-ESTALE); 983 return ERR_PTR(-ESTALE);
984 984
985 /* iget isn't really right if the inode is currently unallocated!! 985 /* iget isn't really right if the inode is currently unallocated!!
986 * 986 *
987 * ext4_read_inode will return a bad_inode if the inode had been 987 * ext4_read_inode will return a bad_inode if the inode had been
988 * deleted, so we should be safe. 988 * deleted, so we should be safe.
989 * 989 *
990 * Currently we don't know the generation for parent directory, so 990 * Currently we don't know the generation for parent directory, so
991 * a generation of 0 means "accept any" 991 * a generation of 0 means "accept any"
992 */ 992 */
993 inode = ext4_iget_normal(sb, ino); 993 inode = ext4_iget_normal(sb, ino);
994 if (IS_ERR(inode)) 994 if (IS_ERR(inode))
995 return ERR_CAST(inode); 995 return ERR_CAST(inode);
996 if (generation && inode->i_generation != generation) { 996 if (generation && inode->i_generation != generation) {
997 iput(inode); 997 iput(inode);
998 return ERR_PTR(-ESTALE); 998 return ERR_PTR(-ESTALE);
999 } 999 }
1000 1000
1001 return inode; 1001 return inode;
1002 } 1002 }
1003 1003
1004 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 1004 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1005 int fh_len, int fh_type) 1005 int fh_len, int fh_type)
1006 { 1006 {
1007 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 1007 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1008 ext4_nfs_get_inode); 1008 ext4_nfs_get_inode);
1009 } 1009 }
1010 1010
1011 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 1011 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1012 int fh_len, int fh_type) 1012 int fh_len, int fh_type)
1013 { 1013 {
1014 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 1014 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1015 ext4_nfs_get_inode); 1015 ext4_nfs_get_inode);
1016 } 1016 }
1017 1017
1018 /* 1018 /*
1019 * Try to release metadata pages (indirect blocks, directories) which are 1019 * Try to release metadata pages (indirect blocks, directories) which are
1020 * mapped via the block device. Since these pages could have journal heads 1020 * mapped via the block device. Since these pages could have journal heads
1021 * which would prevent try_to_free_buffers() from freeing them, we must use 1021 * which would prevent try_to_free_buffers() from freeing them, we must use
1022 * jbd2 layer's try_to_free_buffers() function to release them. 1022 * jbd2 layer's try_to_free_buffers() function to release them.
1023 */ 1023 */
1024 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 1024 static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1025 gfp_t wait) 1025 gfp_t wait)
1026 { 1026 {
1027 journal_t *journal = EXT4_SB(sb)->s_journal; 1027 journal_t *journal = EXT4_SB(sb)->s_journal;
1028 1028
1029 WARN_ON(PageChecked(page)); 1029 WARN_ON(PageChecked(page));
1030 if (!page_has_buffers(page)) 1030 if (!page_has_buffers(page))
1031 return 0; 1031 return 0;
1032 if (journal) 1032 if (journal)
1033 return jbd2_journal_try_to_free_buffers(journal, page, 1033 return jbd2_journal_try_to_free_buffers(journal, page,
1034 wait & ~__GFP_WAIT); 1034 wait & ~__GFP_WAIT);
1035 return try_to_free_buffers(page); 1035 return try_to_free_buffers(page);
1036 } 1036 }
1037 1037
1038 #ifdef CONFIG_QUOTA 1038 #ifdef CONFIG_QUOTA
1039 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 1039 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
1040 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 1040 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
1041 1041
1042 static int ext4_write_dquot(struct dquot *dquot); 1042 static int ext4_write_dquot(struct dquot *dquot);
1043 static int ext4_acquire_dquot(struct dquot *dquot); 1043 static int ext4_acquire_dquot(struct dquot *dquot);
1044 static int ext4_release_dquot(struct dquot *dquot); 1044 static int ext4_release_dquot(struct dquot *dquot);
1045 static int ext4_mark_dquot_dirty(struct dquot *dquot); 1045 static int ext4_mark_dquot_dirty(struct dquot *dquot);
1046 static int ext4_write_info(struct super_block *sb, int type); 1046 static int ext4_write_info(struct super_block *sb, int type);
1047 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1047 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1048 struct path *path); 1048 struct path *path);
1049 static int ext4_quota_off(struct super_block *sb, int type); 1049 static int ext4_quota_off(struct super_block *sb, int type);
1050 static int ext4_quota_on_mount(struct super_block *sb, int type); 1050 static int ext4_quota_on_mount(struct super_block *sb, int type);
1051 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1051 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1052 size_t len, loff_t off); 1052 size_t len, loff_t off);
1053 static ssize_t ext4_quota_write(struct super_block *sb, int type, 1053 static ssize_t ext4_quota_write(struct super_block *sb, int type,
1054 const char *data, size_t len, loff_t off); 1054 const char *data, size_t len, loff_t off);
1055 static int ext4_quota_enable(struct super_block *sb, int type, int format_id, 1055 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1056 unsigned int flags); 1056 unsigned int flags);
1057 static int ext4_enable_quotas(struct super_block *sb); 1057 static int ext4_enable_quotas(struct super_block *sb);
1058 1058
1059 static struct dquot **ext4_get_dquots(struct inode *inode) 1059 static struct dquot **ext4_get_dquots(struct inode *inode)
1060 { 1060 {
1061 return EXT4_I(inode)->i_dquot; 1061 return EXT4_I(inode)->i_dquot;
1062 } 1062 }
1063 1063
1064 static const struct dquot_operations ext4_quota_operations = { 1064 static const struct dquot_operations ext4_quota_operations = {
1065 .get_reserved_space = ext4_get_reserved_space, 1065 .get_reserved_space = ext4_get_reserved_space,
1066 .write_dquot = ext4_write_dquot, 1066 .write_dquot = ext4_write_dquot,
1067 .acquire_dquot = ext4_acquire_dquot, 1067 .acquire_dquot = ext4_acquire_dquot,
1068 .release_dquot = ext4_release_dquot, 1068 .release_dquot = ext4_release_dquot,
1069 .mark_dirty = ext4_mark_dquot_dirty, 1069 .mark_dirty = ext4_mark_dquot_dirty,
1070 .write_info = ext4_write_info, 1070 .write_info = ext4_write_info,
1071 .alloc_dquot = dquot_alloc, 1071 .alloc_dquot = dquot_alloc,
1072 .destroy_dquot = dquot_destroy, 1072 .destroy_dquot = dquot_destroy,
1073 }; 1073 };
1074 1074
1075 static const struct quotactl_ops ext4_qctl_operations = { 1075 static const struct quotactl_ops ext4_qctl_operations = {
1076 .quota_on = ext4_quota_on, 1076 .quota_on = ext4_quota_on,
1077 .quota_off = ext4_quota_off, 1077 .quota_off = ext4_quota_off,
1078 .quota_sync = dquot_quota_sync, 1078 .quota_sync = dquot_quota_sync,
1079 .get_info = dquot_get_dqinfo, 1079 .get_info = dquot_get_dqinfo,
1080 .set_info = dquot_set_dqinfo, 1080 .set_info = dquot_set_dqinfo,
1081 .get_dqblk = dquot_get_dqblk, 1081 .get_dqblk = dquot_get_dqblk,
1082 .set_dqblk = dquot_set_dqblk 1082 .set_dqblk = dquot_set_dqblk
1083 }; 1083 };
1084 #endif 1084 #endif
1085 1085
1086 static const struct super_operations ext4_sops = { 1086 static const struct super_operations ext4_sops = {
1087 .alloc_inode = ext4_alloc_inode, 1087 .alloc_inode = ext4_alloc_inode,
1088 .destroy_inode = ext4_destroy_inode, 1088 .destroy_inode = ext4_destroy_inode,
1089 .write_inode = ext4_write_inode, 1089 .write_inode = ext4_write_inode,
1090 .dirty_inode = ext4_dirty_inode, 1090 .dirty_inode = ext4_dirty_inode,
1091 .drop_inode = ext4_drop_inode, 1091 .drop_inode = ext4_drop_inode,
1092 .evict_inode = ext4_evict_inode, 1092 .evict_inode = ext4_evict_inode,
1093 .put_super = ext4_put_super, 1093 .put_super = ext4_put_super,
1094 .sync_fs = ext4_sync_fs, 1094 .sync_fs = ext4_sync_fs,
1095 .freeze_fs = ext4_freeze, 1095 .freeze_fs = ext4_freeze,
1096 .unfreeze_fs = ext4_unfreeze, 1096 .unfreeze_fs = ext4_unfreeze,
1097 .statfs = ext4_statfs, 1097 .statfs = ext4_statfs,
1098 .remount_fs = ext4_remount, 1098 .remount_fs = ext4_remount,
1099 .show_options = ext4_show_options, 1099 .show_options = ext4_show_options,
1100 #ifdef CONFIG_QUOTA 1100 #ifdef CONFIG_QUOTA
1101 .quota_read = ext4_quota_read, 1101 .quota_read = ext4_quota_read,
1102 .quota_write = ext4_quota_write, 1102 .quota_write = ext4_quota_write,
1103 .get_dquots = ext4_get_dquots, 1103 .get_dquots = ext4_get_dquots,
1104 #endif 1104 #endif
1105 .bdev_try_to_free_page = bdev_try_to_free_page, 1105 .bdev_try_to_free_page = bdev_try_to_free_page,
1106 }; 1106 };
1107 1107
1108 static const struct export_operations ext4_export_ops = { 1108 static const struct export_operations ext4_export_ops = {
1109 .fh_to_dentry = ext4_fh_to_dentry, 1109 .fh_to_dentry = ext4_fh_to_dentry,
1110 .fh_to_parent = ext4_fh_to_parent, 1110 .fh_to_parent = ext4_fh_to_parent,
1111 .get_parent = ext4_get_parent, 1111 .get_parent = ext4_get_parent,
1112 }; 1112 };
1113 1113
1114 enum { 1114 enum {
1115 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1115 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1116 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1116 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1117 Opt_nouid32, Opt_debug, Opt_removed, 1117 Opt_nouid32, Opt_debug, Opt_removed,
1118 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1118 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1119 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, 1119 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1120 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, 1120 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1121 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, 1121 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1122 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1122 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1123 Opt_data_err_abort, Opt_data_err_ignore, 1123 Opt_data_err_abort, Opt_data_err_ignore,
1124 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1124 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1125 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1125 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1126 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, 1126 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1127 Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax, 1127 Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
1128 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, 1128 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1129 Opt_lazytime, Opt_nolazytime, 1129 Opt_lazytime, Opt_nolazytime,
1130 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1130 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1131 Opt_inode_readahead_blks, Opt_journal_ioprio, 1131 Opt_inode_readahead_blks, Opt_journal_ioprio,
1132 Opt_dioread_nolock, Opt_dioread_lock, 1132 Opt_dioread_nolock, Opt_dioread_lock,
1133 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, 1133 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1134 Opt_max_dir_size_kb, Opt_nojournal_checksum, 1134 Opt_max_dir_size_kb, Opt_nojournal_checksum,
1135 }; 1135 };
1136 1136
1137 static const match_table_t tokens = { 1137 static const match_table_t tokens = {
1138 {Opt_bsd_df, "bsddf"}, 1138 {Opt_bsd_df, "bsddf"},
1139 {Opt_minix_df, "minixdf"}, 1139 {Opt_minix_df, "minixdf"},
1140 {Opt_grpid, "grpid"}, 1140 {Opt_grpid, "grpid"},
1141 {Opt_grpid, "bsdgroups"}, 1141 {Opt_grpid, "bsdgroups"},
1142 {Opt_nogrpid, "nogrpid"}, 1142 {Opt_nogrpid, "nogrpid"},
1143 {Opt_nogrpid, "sysvgroups"}, 1143 {Opt_nogrpid, "sysvgroups"},
1144 {Opt_resgid, "resgid=%u"}, 1144 {Opt_resgid, "resgid=%u"},
1145 {Opt_resuid, "resuid=%u"}, 1145 {Opt_resuid, "resuid=%u"},
1146 {Opt_sb, "sb=%u"}, 1146 {Opt_sb, "sb=%u"},
1147 {Opt_err_cont, "errors=continue"}, 1147 {Opt_err_cont, "errors=continue"},
1148 {Opt_err_panic, "errors=panic"}, 1148 {Opt_err_panic, "errors=panic"},
1149 {Opt_err_ro, "errors=remount-ro"}, 1149 {Opt_err_ro, "errors=remount-ro"},
1150 {Opt_nouid32, "nouid32"}, 1150 {Opt_nouid32, "nouid32"},
1151 {Opt_debug, "debug"}, 1151 {Opt_debug, "debug"},
1152 {Opt_removed, "oldalloc"}, 1152 {Opt_removed, "oldalloc"},
1153 {Opt_removed, "orlov"}, 1153 {Opt_removed, "orlov"},
1154 {Opt_user_xattr, "user_xattr"}, 1154 {Opt_user_xattr, "user_xattr"},
1155 {Opt_nouser_xattr, "nouser_xattr"}, 1155 {Opt_nouser_xattr, "nouser_xattr"},
1156 {Opt_acl, "acl"}, 1156 {Opt_acl, "acl"},
1157 {Opt_noacl, "noacl"}, 1157 {Opt_noacl, "noacl"},
1158 {Opt_noload, "norecovery"}, 1158 {Opt_noload, "norecovery"},
1159 {Opt_noload, "noload"}, 1159 {Opt_noload, "noload"},
1160 {Opt_removed, "nobh"}, 1160 {Opt_removed, "nobh"},
1161 {Opt_removed, "bh"}, 1161 {Opt_removed, "bh"},
1162 {Opt_commit, "commit=%u"}, 1162 {Opt_commit, "commit=%u"},
1163 {Opt_min_batch_time, "min_batch_time=%u"}, 1163 {Opt_min_batch_time, "min_batch_time=%u"},
1164 {Opt_max_batch_time, "max_batch_time=%u"}, 1164 {Opt_max_batch_time, "max_batch_time=%u"},
1165 {Opt_journal_dev, "journal_dev=%u"}, 1165 {Opt_journal_dev, "journal_dev=%u"},
1166 {Opt_journal_path, "journal_path=%s"}, 1166 {Opt_journal_path, "journal_path=%s"},
1167 {Opt_journal_checksum, "journal_checksum"}, 1167 {Opt_journal_checksum, "journal_checksum"},
1168 {Opt_nojournal_checksum, "nojournal_checksum"}, 1168 {Opt_nojournal_checksum, "nojournal_checksum"},
1169 {Opt_journal_async_commit, "journal_async_commit"}, 1169 {Opt_journal_async_commit, "journal_async_commit"},
1170 {Opt_abort, "abort"}, 1170 {Opt_abort, "abort"},
1171 {Opt_data_journal, "data=journal"}, 1171 {Opt_data_journal, "data=journal"},
1172 {Opt_data_ordered, "data=ordered"}, 1172 {Opt_data_ordered, "data=ordered"},
1173 {Opt_data_writeback, "data=writeback"}, 1173 {Opt_data_writeback, "data=writeback"},
1174 {Opt_data_err_abort, "data_err=abort"}, 1174 {Opt_data_err_abort, "data_err=abort"},
1175 {Opt_data_err_ignore, "data_err=ignore"}, 1175 {Opt_data_err_ignore, "data_err=ignore"},
1176 {Opt_offusrjquota, "usrjquota="}, 1176 {Opt_offusrjquota, "usrjquota="},
1177 {Opt_usrjquota, "usrjquota=%s"}, 1177 {Opt_usrjquota, "usrjquota=%s"},
1178 {Opt_offgrpjquota, "grpjquota="}, 1178 {Opt_offgrpjquota, "grpjquota="},
1179 {Opt_grpjquota, "grpjquota=%s"}, 1179 {Opt_grpjquota, "grpjquota=%s"},
1180 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1180 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1181 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1181 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1182 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 1182 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1183 {Opt_grpquota, "grpquota"}, 1183 {Opt_grpquota, "grpquota"},
1184 {Opt_noquota, "noquota"}, 1184 {Opt_noquota, "noquota"},
1185 {Opt_quota, "quota"}, 1185 {Opt_quota, "quota"},
1186 {Opt_usrquota, "usrquota"}, 1186 {Opt_usrquota, "usrquota"},
1187 {Opt_barrier, "barrier=%u"}, 1187 {Opt_barrier, "barrier=%u"},
1188 {Opt_barrier, "barrier"}, 1188 {Opt_barrier, "barrier"},
1189 {Opt_nobarrier, "nobarrier"}, 1189 {Opt_nobarrier, "nobarrier"},
1190 {Opt_i_version, "i_version"}, 1190 {Opt_i_version, "i_version"},
1191 {Opt_dax, "dax"}, 1191 {Opt_dax, "dax"},
1192 {Opt_stripe, "stripe=%u"}, 1192 {Opt_stripe, "stripe=%u"},
1193 {Opt_delalloc, "delalloc"}, 1193 {Opt_delalloc, "delalloc"},
1194 {Opt_lazytime, "lazytime"}, 1194 {Opt_lazytime, "lazytime"},
1195 {Opt_nolazytime, "nolazytime"}, 1195 {Opt_nolazytime, "nolazytime"},
1196 {Opt_nodelalloc, "nodelalloc"}, 1196 {Opt_nodelalloc, "nodelalloc"},
1197 {Opt_removed, "mblk_io_submit"}, 1197 {Opt_removed, "mblk_io_submit"},
1198 {Opt_removed, "nomblk_io_submit"}, 1198 {Opt_removed, "nomblk_io_submit"},
1199 {Opt_block_validity, "block_validity"}, 1199 {Opt_block_validity, "block_validity"},
1200 {Opt_noblock_validity, "noblock_validity"}, 1200 {Opt_noblock_validity, "noblock_validity"},
1201 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1201 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1202 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1202 {Opt_journal_ioprio, "journal_ioprio=%u"},
1203 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1203 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1204 {Opt_auto_da_alloc, "auto_da_alloc"}, 1204 {Opt_auto_da_alloc, "auto_da_alloc"},
1205 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1205 {Opt_noauto_da_alloc, "noauto_da_alloc"},
1206 {Opt_dioread_nolock, "dioread_nolock"}, 1206 {Opt_dioread_nolock, "dioread_nolock"},
1207 {Opt_dioread_lock, "dioread_lock"}, 1207 {Opt_dioread_lock, "dioread_lock"},
1208 {Opt_discard, "discard"}, 1208 {Opt_discard, "discard"},
1209 {Opt_nodiscard, "nodiscard"}, 1209 {Opt_nodiscard, "nodiscard"},
1210 {Opt_init_itable, "init_itable=%u"}, 1210 {Opt_init_itable, "init_itable=%u"},
1211 {Opt_init_itable, "init_itable"}, 1211 {Opt_init_itable, "init_itable"},
1212 {Opt_noinit_itable, "noinit_itable"}, 1212 {Opt_noinit_itable, "noinit_itable"},
1213 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, 1213 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1214 {Opt_removed, "check=none"}, /* mount option from ext2/3 */ 1214 {Opt_removed, "check=none"}, /* mount option from ext2/3 */
1215 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ 1215 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
1216 {Opt_removed, "reservation"}, /* mount option from ext2/3 */ 1216 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
1217 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */ 1217 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1218 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */ 1218 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
1219 {Opt_err, NULL}, 1219 {Opt_err, NULL},
1220 }; 1220 };
1221 1221
1222 static ext4_fsblk_t get_sb_block(void **data) 1222 static ext4_fsblk_t get_sb_block(void **data)
1223 { 1223 {
1224 ext4_fsblk_t sb_block; 1224 ext4_fsblk_t sb_block;
1225 char *options = (char *) *data; 1225 char *options = (char *) *data;
1226 1226
1227 if (!options || strncmp(options, "sb=", 3) != 0) 1227 if (!options || strncmp(options, "sb=", 3) != 0)
1228 return 1; /* Default location */ 1228 return 1; /* Default location */
1229 1229
1230 options += 3; 1230 options += 3;
1231 /* TODO: use simple_strtoll with >32bit ext4 */ 1231 /* TODO: use simple_strtoll with >32bit ext4 */
1232 sb_block = simple_strtoul(options, &options, 0); 1232 sb_block = simple_strtoul(options, &options, 0);
1233 if (*options && *options != ',') { 1233 if (*options && *options != ',') {
1234 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1234 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1235 (char *) *data); 1235 (char *) *data);
1236 return 1; 1236 return 1;
1237 } 1237 }
1238 if (*options == ',') 1238 if (*options == ',')
1239 options++; 1239 options++;
1240 *data = (void *) options; 1240 *data = (void *) options;
1241 1241
1242 return sb_block; 1242 return sb_block;
1243 } 1243 }
1244 1244
1245 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1245 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1246 static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" 1246 static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
1247 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; 1247 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1248 1248
1249 #ifdef CONFIG_QUOTA 1249 #ifdef CONFIG_QUOTA
1250 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 1250 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1251 { 1251 {
1252 struct ext4_sb_info *sbi = EXT4_SB(sb); 1252 struct ext4_sb_info *sbi = EXT4_SB(sb);
1253 char *qname; 1253 char *qname;
1254 int ret = -1; 1254 int ret = -1;
1255 1255
1256 if (sb_any_quota_loaded(sb) && 1256 if (sb_any_quota_loaded(sb) &&
1257 !sbi->s_qf_names[qtype]) { 1257 !sbi->s_qf_names[qtype]) {
1258 ext4_msg(sb, KERN_ERR, 1258 ext4_msg(sb, KERN_ERR,
1259 "Cannot change journaled " 1259 "Cannot change journaled "
1260 "quota options when quota turned on"); 1260 "quota options when quota turned on");
1261 return -1; 1261 return -1;
1262 } 1262 }
1263 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { 1263 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
1264 ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " 1264 ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options "
1265 "when QUOTA feature is enabled"); 1265 "when QUOTA feature is enabled");
1266 return -1; 1266 return -1;
1267 } 1267 }
1268 qname = match_strdup(args); 1268 qname = match_strdup(args);
1269 if (!qname) { 1269 if (!qname) {
1270 ext4_msg(sb, KERN_ERR, 1270 ext4_msg(sb, KERN_ERR,
1271 "Not enough memory for storing quotafile name"); 1271 "Not enough memory for storing quotafile name");
1272 return -1; 1272 return -1;
1273 } 1273 }
1274 if (sbi->s_qf_names[qtype]) { 1274 if (sbi->s_qf_names[qtype]) {
1275 if (strcmp(sbi->s_qf_names[qtype], qname) == 0) 1275 if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
1276 ret = 1; 1276 ret = 1;
1277 else 1277 else
1278 ext4_msg(sb, KERN_ERR, 1278 ext4_msg(sb, KERN_ERR,
1279 "%s quota file already specified", 1279 "%s quota file already specified",
1280 QTYPE2NAME(qtype)); 1280 QTYPE2NAME(qtype));
1281 goto errout; 1281 goto errout;
1282 } 1282 }
1283 if (strchr(qname, '/')) { 1283 if (strchr(qname, '/')) {
1284 ext4_msg(sb, KERN_ERR, 1284 ext4_msg(sb, KERN_ERR,
1285 "quotafile must be on filesystem root"); 1285 "quotafile must be on filesystem root");
1286 goto errout; 1286 goto errout;
1287 } 1287 }
1288 sbi->s_qf_names[qtype] = qname; 1288 sbi->s_qf_names[qtype] = qname;
1289 set_opt(sb, QUOTA); 1289 set_opt(sb, QUOTA);
1290 return 1; 1290 return 1;
1291 errout: 1291 errout:
1292 kfree(qname); 1292 kfree(qname);
1293 return ret; 1293 return ret;
1294 } 1294 }
1295 1295
1296 static int clear_qf_name(struct super_block *sb, int qtype) 1296 static int clear_qf_name(struct super_block *sb, int qtype)
1297 { 1297 {
1298 1298
1299 struct ext4_sb_info *sbi = EXT4_SB(sb); 1299 struct ext4_sb_info *sbi = EXT4_SB(sb);
1300 1300
1301 if (sb_any_quota_loaded(sb) && 1301 if (sb_any_quota_loaded(sb) &&
1302 sbi->s_qf_names[qtype]) { 1302 sbi->s_qf_names[qtype]) {
1303 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" 1303 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1304 " when quota turned on"); 1304 " when quota turned on");
1305 return -1; 1305 return -1;
1306 } 1306 }
1307 kfree(sbi->s_qf_names[qtype]); 1307 kfree(sbi->s_qf_names[qtype]);
1308 sbi->s_qf_names[qtype] = NULL; 1308 sbi->s_qf_names[qtype] = NULL;
1309 return 1; 1309 return 1;
1310 } 1310 }
1311 #endif 1311 #endif
1312 1312
1313 #define MOPT_SET 0x0001 1313 #define MOPT_SET 0x0001
1314 #define MOPT_CLEAR 0x0002 1314 #define MOPT_CLEAR 0x0002
1315 #define MOPT_NOSUPPORT 0x0004 1315 #define MOPT_NOSUPPORT 0x0004
1316 #define MOPT_EXPLICIT 0x0008 1316 #define MOPT_EXPLICIT 0x0008
1317 #define MOPT_CLEAR_ERR 0x0010 1317 #define MOPT_CLEAR_ERR 0x0010
1318 #define MOPT_GTE0 0x0020 1318 #define MOPT_GTE0 0x0020
1319 #ifdef CONFIG_QUOTA 1319 #ifdef CONFIG_QUOTA
1320 #define MOPT_Q 0 1320 #define MOPT_Q 0
1321 #define MOPT_QFMT 0x0040 1321 #define MOPT_QFMT 0x0040
1322 #else 1322 #else
1323 #define MOPT_Q MOPT_NOSUPPORT 1323 #define MOPT_Q MOPT_NOSUPPORT
1324 #define MOPT_QFMT MOPT_NOSUPPORT 1324 #define MOPT_QFMT MOPT_NOSUPPORT
1325 #endif 1325 #endif
1326 #define MOPT_DATAJ 0x0080 1326 #define MOPT_DATAJ 0x0080
1327 #define MOPT_NO_EXT2 0x0100 1327 #define MOPT_NO_EXT2 0x0100
1328 #define MOPT_NO_EXT3 0x0200 1328 #define MOPT_NO_EXT3 0x0200
1329 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) 1329 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1330 #define MOPT_STRING 0x0400 1330 #define MOPT_STRING 0x0400
1331 1331
1332 static const struct mount_opts { 1332 static const struct mount_opts {
1333 int token; 1333 int token;
1334 int mount_opt; 1334 int mount_opt;
1335 int flags; 1335 int flags;
1336 } ext4_mount_opts[] = { 1336 } ext4_mount_opts[] = {
1337 {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET}, 1337 {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1338 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, 1338 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1339 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, 1339 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1340 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, 1340 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1341 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, 1341 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1342 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, 1342 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1343 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, 1343 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1344 MOPT_EXT4_ONLY | MOPT_SET}, 1344 MOPT_EXT4_ONLY | MOPT_SET},
1345 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, 1345 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1346 MOPT_EXT4_ONLY | MOPT_CLEAR}, 1346 MOPT_EXT4_ONLY | MOPT_CLEAR},
1347 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, 1347 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1348 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, 1348 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1349 {Opt_delalloc, EXT4_MOUNT_DELALLOC, 1349 {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1350 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, 1350 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1351 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, 1351 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1352 MOPT_EXT4_ONLY | MOPT_CLEAR}, 1352 MOPT_EXT4_ONLY | MOPT_CLEAR},
1353 {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, 1353 {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1354 MOPT_EXT4_ONLY | MOPT_CLEAR}, 1354 MOPT_EXT4_ONLY | MOPT_CLEAR},
1355 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, 1355 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1356 MOPT_EXT4_ONLY | MOPT_SET}, 1356 MOPT_EXT4_ONLY | MOPT_SET},
1357 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | 1357 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1358 EXT4_MOUNT_JOURNAL_CHECKSUM), 1358 EXT4_MOUNT_JOURNAL_CHECKSUM),
1359 MOPT_EXT4_ONLY | MOPT_SET}, 1359 MOPT_EXT4_ONLY | MOPT_SET},
1360 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET}, 1360 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1361 {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, 1361 {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1362 {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, 1362 {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1363 {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, 1363 {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1364 {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, 1364 {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
1365 MOPT_NO_EXT2 | MOPT_SET}, 1365 MOPT_NO_EXT2 | MOPT_SET},
1366 {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, 1366 {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
1367 MOPT_NO_EXT2 | MOPT_CLEAR}, 1367 MOPT_NO_EXT2 | MOPT_CLEAR},
1368 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, 1368 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1369 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, 1369 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1370 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, 1370 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1371 {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, 1371 {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1372 {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, 1372 {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1373 {Opt_commit, 0, MOPT_GTE0}, 1373 {Opt_commit, 0, MOPT_GTE0},
1374 {Opt_max_batch_time, 0, MOPT_GTE0}, 1374 {Opt_max_batch_time, 0, MOPT_GTE0},
1375 {Opt_min_batch_time, 0, MOPT_GTE0}, 1375 {Opt_min_batch_time, 0, MOPT_GTE0},
1376 {Opt_inode_readahead_blks, 0, MOPT_GTE0}, 1376 {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1377 {Opt_init_itable, 0, MOPT_GTE0}, 1377 {Opt_init_itable, 0, MOPT_GTE0},
1378 {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET}, 1378 {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
1379 {Opt_stripe, 0, MOPT_GTE0}, 1379 {Opt_stripe, 0, MOPT_GTE0},
1380 {Opt_resuid, 0, MOPT_GTE0}, 1380 {Opt_resuid, 0, MOPT_GTE0},
1381 {Opt_resgid, 0, MOPT_GTE0}, 1381 {Opt_resgid, 0, MOPT_GTE0},
1382 {Opt_journal_dev, 0, MOPT_GTE0}, 1382 {Opt_journal_dev, 0, MOPT_GTE0},
1383 {Opt_journal_path, 0, MOPT_STRING}, 1383 {Opt_journal_path, 0, MOPT_STRING},
1384 {Opt_journal_ioprio, 0, MOPT_GTE0}, 1384 {Opt_journal_ioprio, 0, MOPT_GTE0},
1385 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, 1385 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1386 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, 1386 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1387 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, 1387 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1388 MOPT_NO_EXT2 | MOPT_DATAJ}, 1388 MOPT_NO_EXT2 | MOPT_DATAJ},
1389 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, 1389 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1390 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, 1390 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1391 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1391 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1392 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, 1392 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1393 {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, 1393 {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
1394 #else 1394 #else
1395 {Opt_acl, 0, MOPT_NOSUPPORT}, 1395 {Opt_acl, 0, MOPT_NOSUPPORT},
1396 {Opt_noacl, 0, MOPT_NOSUPPORT}, 1396 {Opt_noacl, 0, MOPT_NOSUPPORT},
1397 #endif 1397 #endif
1398 {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, 1398 {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1399 {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, 1399 {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1400 {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, 1400 {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1401 {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, 1401 {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1402 MOPT_SET | MOPT_Q}, 1402 MOPT_SET | MOPT_Q},
1403 {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA, 1403 {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1404 MOPT_SET | MOPT_Q}, 1404 MOPT_SET | MOPT_Q},
1405 {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | 1405 {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1406 EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q}, 1406 EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
1407 {Opt_usrjquota, 0, MOPT_Q}, 1407 {Opt_usrjquota, 0, MOPT_Q},
1408 {Opt_grpjquota, 0, MOPT_Q}, 1408 {Opt_grpjquota, 0, MOPT_Q},
1409 {Opt_offusrjquota, 0, MOPT_Q}, 1409 {Opt_offusrjquota, 0, MOPT_Q},
1410 {Opt_offgrpjquota, 0, MOPT_Q}, 1410 {Opt_offgrpjquota, 0, MOPT_Q},
1411 {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, 1411 {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1412 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, 1412 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1413 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, 1413 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1414 {Opt_max_dir_size_kb, 0, MOPT_GTE0}, 1414 {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1415 {Opt_err, 0, 0} 1415 {Opt_err, 0, 0}
1416 }; 1416 };
1417 1417
1418 static int handle_mount_opt(struct super_block *sb, char *opt, int token, 1418 static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1419 substring_t *args, unsigned long *journal_devnum, 1419 substring_t *args, unsigned long *journal_devnum,
1420 unsigned int *journal_ioprio, int is_remount) 1420 unsigned int *journal_ioprio, int is_remount)
1421 { 1421 {
1422 struct ext4_sb_info *sbi = EXT4_SB(sb); 1422 struct ext4_sb_info *sbi = EXT4_SB(sb);
1423 const struct mount_opts *m; 1423 const struct mount_opts *m;
1424 kuid_t uid; 1424 kuid_t uid;
1425 kgid_t gid; 1425 kgid_t gid;
1426 int arg = 0; 1426 int arg = 0;
1427 1427
1428 #ifdef CONFIG_QUOTA 1428 #ifdef CONFIG_QUOTA
1429 if (token == Opt_usrjquota) 1429 if (token == Opt_usrjquota)
1430 return set_qf_name(sb, USRQUOTA, &args[0]); 1430 return set_qf_name(sb, USRQUOTA, &args[0]);
1431 else if (token == Opt_grpjquota) 1431 else if (token == Opt_grpjquota)
1432 return set_qf_name(sb, GRPQUOTA, &args[0]); 1432 return set_qf_name(sb, GRPQUOTA, &args[0]);
1433 else if (token == Opt_offusrjquota) 1433 else if (token == Opt_offusrjquota)
1434 return clear_qf_name(sb, USRQUOTA); 1434 return clear_qf_name(sb, USRQUOTA);
1435 else if (token == Opt_offgrpjquota) 1435 else if (token == Opt_offgrpjquota)
1436 return clear_qf_name(sb, GRPQUOTA); 1436 return clear_qf_name(sb, GRPQUOTA);
1437 #endif 1437 #endif
1438 switch (token) { 1438 switch (token) {
1439 case Opt_noacl: 1439 case Opt_noacl:
1440 case Opt_nouser_xattr: 1440 case Opt_nouser_xattr:
1441 ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); 1441 ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
1442 break; 1442 break;
1443 case Opt_sb: 1443 case Opt_sb:
1444 return 1; /* handled by get_sb_block() */ 1444 return 1; /* handled by get_sb_block() */
1445 case Opt_removed: 1445 case Opt_removed:
1446 ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt); 1446 ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
1447 return 1; 1447 return 1;
1448 case Opt_abort: 1448 case Opt_abort:
1449 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1449 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1450 return 1; 1450 return 1;
1451 case Opt_i_version: 1451 case Opt_i_version:
1452 sb->s_flags |= MS_I_VERSION; 1452 sb->s_flags |= MS_I_VERSION;
1453 return 1; 1453 return 1;
1454 case Opt_lazytime: 1454 case Opt_lazytime:
1455 sb->s_flags |= MS_LAZYTIME; 1455 sb->s_flags |= MS_LAZYTIME;
1456 return 1; 1456 return 1;
1457 case Opt_nolazytime: 1457 case Opt_nolazytime:
1458 sb->s_flags &= ~MS_LAZYTIME; 1458 sb->s_flags &= ~MS_LAZYTIME;
1459 return 1; 1459 return 1;
1460 } 1460 }
1461 1461
1462 for (m = ext4_mount_opts; m->token != Opt_err; m++) 1462 for (m = ext4_mount_opts; m->token != Opt_err; m++)
1463 if (token == m->token) 1463 if (token == m->token)
1464 break; 1464 break;
1465 1465
1466 if (m->token == Opt_err) { 1466 if (m->token == Opt_err) {
1467 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " 1467 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1468 "or missing value", opt); 1468 "or missing value", opt);
1469 return -1; 1469 return -1;
1470 } 1470 }
1471 1471
1472 if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { 1472 if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
1473 ext4_msg(sb, KERN_ERR, 1473 ext4_msg(sb, KERN_ERR,
1474 "Mount option \"%s\" incompatible with ext2", opt); 1474 "Mount option \"%s\" incompatible with ext2", opt);
1475 return -1; 1475 return -1;
1476 } 1476 }
1477 if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) { 1477 if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
1478 ext4_msg(sb, KERN_ERR, 1478 ext4_msg(sb, KERN_ERR,
1479 "Mount option \"%s\" incompatible with ext3", opt); 1479 "Mount option \"%s\" incompatible with ext3", opt);
1480 return -1; 1480 return -1;
1481 } 1481 }
1482 1482
1483 if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg)) 1483 if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
1484 return -1; 1484 return -1;
1485 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) 1485 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1486 return -1; 1486 return -1;
1487 if (m->flags & MOPT_EXPLICIT) 1487 if (m->flags & MOPT_EXPLICIT)
1488 set_opt2(sb, EXPLICIT_DELALLOC); 1488 set_opt2(sb, EXPLICIT_DELALLOC);
1489 if (m->flags & MOPT_CLEAR_ERR) 1489 if (m->flags & MOPT_CLEAR_ERR)
1490 clear_opt(sb, ERRORS_MASK); 1490 clear_opt(sb, ERRORS_MASK);
1491 if (token == Opt_noquota && sb_any_quota_loaded(sb)) { 1491 if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1492 ext4_msg(sb, KERN_ERR, "Cannot change quota " 1492 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1493 "options when quota turned on"); 1493 "options when quota turned on");
1494 return -1; 1494 return -1;
1495 } 1495 }
1496 1496
1497 if (m->flags & MOPT_NOSUPPORT) { 1497 if (m->flags & MOPT_NOSUPPORT) {
1498 ext4_msg(sb, KERN_ERR, "%s option not supported", opt); 1498 ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1499 } else if (token == Opt_commit) { 1499 } else if (token == Opt_commit) {
1500 if (arg == 0) 1500 if (arg == 0)
1501 arg = JBD2_DEFAULT_MAX_COMMIT_AGE; 1501 arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1502 sbi->s_commit_interval = HZ * arg; 1502 sbi->s_commit_interval = HZ * arg;
1503 } else if (token == Opt_max_batch_time) { 1503 } else if (token == Opt_max_batch_time) {
1504 sbi->s_max_batch_time = arg; 1504 sbi->s_max_batch_time = arg;
1505 } else if (token == Opt_min_batch_time) { 1505 } else if (token == Opt_min_batch_time) {
1506 sbi->s_min_batch_time = arg; 1506 sbi->s_min_batch_time = arg;
1507 } else if (token == Opt_inode_readahead_blks) { 1507 } else if (token == Opt_inode_readahead_blks) {
1508 if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) { 1508 if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
1509 ext4_msg(sb, KERN_ERR, 1509 ext4_msg(sb, KERN_ERR,
1510 "EXT4-fs: inode_readahead_blks must be " 1510 "EXT4-fs: inode_readahead_blks must be "
1511 "0 or a power of 2 smaller than 2^31"); 1511 "0 or a power of 2 smaller than 2^31");
1512 return -1; 1512 return -1;
1513 } 1513 }
1514 sbi->s_inode_readahead_blks = arg; 1514 sbi->s_inode_readahead_blks = arg;
1515 } else if (token == Opt_init_itable) { 1515 } else if (token == Opt_init_itable) {
1516 set_opt(sb, INIT_INODE_TABLE); 1516 set_opt(sb, INIT_INODE_TABLE);
1517 if (!args->from) 1517 if (!args->from)
1518 arg = EXT4_DEF_LI_WAIT_MULT; 1518 arg = EXT4_DEF_LI_WAIT_MULT;
1519 sbi->s_li_wait_mult = arg; 1519 sbi->s_li_wait_mult = arg;
1520 } else if (token == Opt_max_dir_size_kb) { 1520 } else if (token == Opt_max_dir_size_kb) {
1521 sbi->s_max_dir_size_kb = arg; 1521 sbi->s_max_dir_size_kb = arg;
1522 } else if (token == Opt_stripe) { 1522 } else if (token == Opt_stripe) {
1523 sbi->s_stripe = arg; 1523 sbi->s_stripe = arg;
1524 } else if (token == Opt_resuid) { 1524 } else if (token == Opt_resuid) {
1525 uid = make_kuid(current_user_ns(), arg); 1525 uid = make_kuid(current_user_ns(), arg);
1526 if (!uid_valid(uid)) { 1526 if (!uid_valid(uid)) {
1527 ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg); 1527 ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
1528 return -1; 1528 return -1;
1529 } 1529 }
1530 sbi->s_resuid = uid; 1530 sbi->s_resuid = uid;
1531 } else if (token == Opt_resgid) { 1531 } else if (token == Opt_resgid) {
1532 gid = make_kgid(current_user_ns(), arg); 1532 gid = make_kgid(current_user_ns(), arg);
1533 if (!gid_valid(gid)) { 1533 if (!gid_valid(gid)) {
1534 ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg); 1534 ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
1535 return -1; 1535 return -1;
1536 } 1536 }
1537 sbi->s_resgid = gid; 1537 sbi->s_resgid = gid;
1538 } else if (token == Opt_journal_dev) { 1538 } else if (token == Opt_journal_dev) {
1539 if (is_remount) { 1539 if (is_remount) {
1540 ext4_msg(sb, KERN_ERR, 1540 ext4_msg(sb, KERN_ERR,
1541 "Cannot specify journal on remount"); 1541 "Cannot specify journal on remount");
1542 return -1; 1542 return -1;
1543 } 1543 }
1544 *journal_devnum = arg; 1544 *journal_devnum = arg;
1545 } else if (token == Opt_journal_path) { 1545 } else if (token == Opt_journal_path) {
1546 char *journal_path; 1546 char *journal_path;
1547 struct inode *journal_inode; 1547 struct inode *journal_inode;
1548 struct path path; 1548 struct path path;
1549 int error; 1549 int error;
1550 1550
1551 if (is_remount) { 1551 if (is_remount) {
1552 ext4_msg(sb, KERN_ERR, 1552 ext4_msg(sb, KERN_ERR,
1553 "Cannot specify journal on remount"); 1553 "Cannot specify journal on remount");
1554 return -1; 1554 return -1;
1555 } 1555 }
1556 journal_path = match_strdup(&args[0]); 1556 journal_path = match_strdup(&args[0]);
1557 if (!journal_path) { 1557 if (!journal_path) {
1558 ext4_msg(sb, KERN_ERR, "error: could not dup " 1558 ext4_msg(sb, KERN_ERR, "error: could not dup "
1559 "journal device string"); 1559 "journal device string");
1560 return -1; 1560 return -1;
1561 } 1561 }
1562 1562
1563 error = kern_path(journal_path, LOOKUP_FOLLOW, &path); 1563 error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
1564 if (error) { 1564 if (error) {
1565 ext4_msg(sb, KERN_ERR, "error: could not find " 1565 ext4_msg(sb, KERN_ERR, "error: could not find "
1566 "journal device path: error %d", error); 1566 "journal device path: error %d", error);
1567 kfree(journal_path); 1567 kfree(journal_path);
1568 return -1; 1568 return -1;
1569 } 1569 }
1570 1570
1571 journal_inode = path.dentry->d_inode; 1571 journal_inode = path.dentry->d_inode;
1572 if (!S_ISBLK(journal_inode->i_mode)) { 1572 if (!S_ISBLK(journal_inode->i_mode)) {
1573 ext4_msg(sb, KERN_ERR, "error: journal path %s " 1573 ext4_msg(sb, KERN_ERR, "error: journal path %s "
1574 "is not a block device", journal_path); 1574 "is not a block device", journal_path);
1575 path_put(&path); 1575 path_put(&path);
1576 kfree(journal_path); 1576 kfree(journal_path);
1577 return -1; 1577 return -1;
1578 } 1578 }
1579 1579
1580 *journal_devnum = new_encode_dev(journal_inode->i_rdev); 1580 *journal_devnum = new_encode_dev(journal_inode->i_rdev);
1581 path_put(&path); 1581 path_put(&path);
1582 kfree(journal_path); 1582 kfree(journal_path);
1583 } else if (token == Opt_journal_ioprio) { 1583 } else if (token == Opt_journal_ioprio) {
1584 if (arg > 7) { 1584 if (arg > 7) {
1585 ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" 1585 ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
1586 " (must be 0-7)"); 1586 " (must be 0-7)");
1587 return -1; 1587 return -1;
1588 } 1588 }
1589 *journal_ioprio = 1589 *journal_ioprio =
1590 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); 1590 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1591 } else if (m->flags & MOPT_DATAJ) { 1591 } else if (m->flags & MOPT_DATAJ) {
1592 if (is_remount) { 1592 if (is_remount) {
1593 if (!sbi->s_journal) 1593 if (!sbi->s_journal)
1594 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); 1594 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1595 else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) { 1595 else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
1596 ext4_msg(sb, KERN_ERR, 1596 ext4_msg(sb, KERN_ERR,
1597 "Cannot change data mode on remount"); 1597 "Cannot change data mode on remount");
1598 return -1; 1598 return -1;
1599 } 1599 }
1600 } else { 1600 } else {
1601 clear_opt(sb, DATA_FLAGS); 1601 clear_opt(sb, DATA_FLAGS);
1602 sbi->s_mount_opt |= m->mount_opt; 1602 sbi->s_mount_opt |= m->mount_opt;
1603 } 1603 }
1604 #ifdef CONFIG_QUOTA 1604 #ifdef CONFIG_QUOTA
1605 } else if (m->flags & MOPT_QFMT) { 1605 } else if (m->flags & MOPT_QFMT) {
1606 if (sb_any_quota_loaded(sb) && 1606 if (sb_any_quota_loaded(sb) &&
1607 sbi->s_jquota_fmt != m->mount_opt) { 1607 sbi->s_jquota_fmt != m->mount_opt) {
1608 ext4_msg(sb, KERN_ERR, "Cannot change journaled " 1608 ext4_msg(sb, KERN_ERR, "Cannot change journaled "
1609 "quota options when quota turned on"); 1609 "quota options when quota turned on");
1610 return -1; 1610 return -1;
1611 } 1611 }
1612 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 1612 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
1613 EXT4_FEATURE_RO_COMPAT_QUOTA)) { 1613 EXT4_FEATURE_RO_COMPAT_QUOTA)) {
1614 ext4_msg(sb, KERN_ERR, 1614 ext4_msg(sb, KERN_ERR,
1615 "Cannot set journaled quota options " 1615 "Cannot set journaled quota options "
1616 "when QUOTA feature is enabled"); 1616 "when QUOTA feature is enabled");
1617 return -1; 1617 return -1;
1618 } 1618 }
1619 sbi->s_jquota_fmt = m->mount_opt; 1619 sbi->s_jquota_fmt = m->mount_opt;
1620 #endif 1620 #endif
1621 #ifndef CONFIG_FS_DAX 1621 #ifndef CONFIG_FS_DAX
1622 } else if (token == Opt_dax) { 1622 } else if (token == Opt_dax) {
1623 ext4_msg(sb, KERN_INFO, "dax option not supported"); 1623 ext4_msg(sb, KERN_INFO, "dax option not supported");
1624 return -1; 1624 return -1;
1625 #endif 1625 #endif
1626 } else { 1626 } else {
1627 if (!args->from) 1627 if (!args->from)
1628 arg = 1; 1628 arg = 1;
1629 if (m->flags & MOPT_CLEAR) 1629 if (m->flags & MOPT_CLEAR)
1630 arg = !arg; 1630 arg = !arg;
1631 else if (unlikely(!(m->flags & MOPT_SET))) { 1631 else if (unlikely(!(m->flags & MOPT_SET))) {
1632 ext4_msg(sb, KERN_WARNING, 1632 ext4_msg(sb, KERN_WARNING,
1633 "buggy handling of option %s", opt); 1633 "buggy handling of option %s", opt);
1634 WARN_ON(1); 1634 WARN_ON(1);
1635 return -1; 1635 return -1;
1636 } 1636 }
1637 if (arg != 0) 1637 if (arg != 0)
1638 sbi->s_mount_opt |= m->mount_opt; 1638 sbi->s_mount_opt |= m->mount_opt;
1639 else 1639 else
1640 sbi->s_mount_opt &= ~m->mount_opt; 1640 sbi->s_mount_opt &= ~m->mount_opt;
1641 } 1641 }
1642 return 1; 1642 return 1;
1643 } 1643 }
1644 1644
1645 static int parse_options(char *options, struct super_block *sb, 1645 static int parse_options(char *options, struct super_block *sb,
1646 unsigned long *journal_devnum, 1646 unsigned long *journal_devnum,
1647 unsigned int *journal_ioprio, 1647 unsigned int *journal_ioprio,
1648 int is_remount) 1648 int is_remount)
1649 { 1649 {
1650 struct ext4_sb_info *sbi = EXT4_SB(sb); 1650 struct ext4_sb_info *sbi = EXT4_SB(sb);
1651 char *p; 1651 char *p;
1652 substring_t args[MAX_OPT_ARGS]; 1652 substring_t args[MAX_OPT_ARGS];
1653 int token; 1653 int token;
1654 1654
1655 if (!options) 1655 if (!options)
1656 return 1; 1656 return 1;
1657 1657
1658 while ((p = strsep(&options, ",")) != NULL) { 1658 while ((p = strsep(&options, ",")) != NULL) {
1659 if (!*p) 1659 if (!*p)
1660 continue; 1660 continue;
1661 /* 1661 /*
1662 * Initialize args struct so we know whether arg was 1662 * Initialize args struct so we know whether arg was
1663 * found; some options take optional arguments. 1663 * found; some options take optional arguments.
1664 */ 1664 */
1665 args[0].to = args[0].from = NULL; 1665 args[0].to = args[0].from = NULL;
1666 token = match_token(p, tokens, args); 1666 token = match_token(p, tokens, args);
1667 if (handle_mount_opt(sb, p, token, args, journal_devnum, 1667 if (handle_mount_opt(sb, p, token, args, journal_devnum,
1668 journal_ioprio, is_remount) < 0) 1668 journal_ioprio, is_remount) < 0)
1669 return 0; 1669 return 0;
1670 } 1670 }
1671 #ifdef CONFIG_QUOTA 1671 #ifdef CONFIG_QUOTA
1672 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 1672 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
1673 (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { 1673 (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
1674 ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " 1674 ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA "
1675 "feature is enabled"); 1675 "feature is enabled");
1676 return 0; 1676 return 0;
1677 } 1677 }
1678 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1678 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1679 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1679 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1680 clear_opt(sb, USRQUOTA); 1680 clear_opt(sb, USRQUOTA);
1681 1681
1682 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1682 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1683 clear_opt(sb, GRPQUOTA); 1683 clear_opt(sb, GRPQUOTA);
1684 1684
1685 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1685 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1686 ext4_msg(sb, KERN_ERR, "old and new quota " 1686 ext4_msg(sb, KERN_ERR, "old and new quota "
1687 "format mixing"); 1687 "format mixing");
1688 return 0; 1688 return 0;
1689 } 1689 }
1690 1690
1691 if (!sbi->s_jquota_fmt) { 1691 if (!sbi->s_jquota_fmt) {
1692 ext4_msg(sb, KERN_ERR, "journaled quota format " 1692 ext4_msg(sb, KERN_ERR, "journaled quota format "
1693 "not specified"); 1693 "not specified");
1694 return 0; 1694 return 0;
1695 } 1695 }
1696 } 1696 }
1697 #endif 1697 #endif
1698 if (test_opt(sb, DIOREAD_NOLOCK)) { 1698 if (test_opt(sb, DIOREAD_NOLOCK)) {
1699 int blocksize = 1699 int blocksize =
1700 BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); 1700 BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
1701 1701
1702 if (blocksize < PAGE_CACHE_SIZE) { 1702 if (blocksize < PAGE_CACHE_SIZE) {
1703 ext4_msg(sb, KERN_ERR, "can't mount with " 1703 ext4_msg(sb, KERN_ERR, "can't mount with "
1704 "dioread_nolock if block size != PAGE_SIZE"); 1704 "dioread_nolock if block size != PAGE_SIZE");
1705 return 0; 1705 return 0;
1706 } 1706 }
1707 } 1707 }
1708 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && 1708 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
1709 test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 1709 test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
1710 ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit " 1710 ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit "
1711 "in data=ordered mode"); 1711 "in data=ordered mode");
1712 return 0; 1712 return 0;
1713 } 1713 }
1714 return 1; 1714 return 1;
1715 } 1715 }
1716 1716
1717 static inline void ext4_show_quota_options(struct seq_file *seq, 1717 static inline void ext4_show_quota_options(struct seq_file *seq,
1718 struct super_block *sb) 1718 struct super_block *sb)
1719 { 1719 {
1720 #if defined(CONFIG_QUOTA) 1720 #if defined(CONFIG_QUOTA)
1721 struct ext4_sb_info *sbi = EXT4_SB(sb); 1721 struct ext4_sb_info *sbi = EXT4_SB(sb);
1722 1722
1723 if (sbi->s_jquota_fmt) { 1723 if (sbi->s_jquota_fmt) {
1724 char *fmtname = ""; 1724 char *fmtname = "";
1725 1725
1726 switch (sbi->s_jquota_fmt) { 1726 switch (sbi->s_jquota_fmt) {
1727 case QFMT_VFS_OLD: 1727 case QFMT_VFS_OLD:
1728 fmtname = "vfsold"; 1728 fmtname = "vfsold";
1729 break; 1729 break;
1730 case QFMT_VFS_V0: 1730 case QFMT_VFS_V0:
1731 fmtname = "vfsv0"; 1731 fmtname = "vfsv0";
1732 break; 1732 break;
1733 case QFMT_VFS_V1: 1733 case QFMT_VFS_V1:
1734 fmtname = "vfsv1"; 1734 fmtname = "vfsv1";
1735 break; 1735 break;
1736 } 1736 }
1737 seq_printf(seq, ",jqfmt=%s", fmtname); 1737 seq_printf(seq, ",jqfmt=%s", fmtname);
1738 } 1738 }
1739 1739
1740 if (sbi->s_qf_names[USRQUOTA]) 1740 if (sbi->s_qf_names[USRQUOTA])
1741 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 1741 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
1742 1742
1743 if (sbi->s_qf_names[GRPQUOTA]) 1743 if (sbi->s_qf_names[GRPQUOTA])
1744 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 1744 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
1745 #endif 1745 #endif
1746 } 1746 }
1747 1747
1748 static const char *token2str(int token) 1748 static const char *token2str(int token)
1749 { 1749 {
1750 const struct match_token *t; 1750 const struct match_token *t;
1751 1751
1752 for (t = tokens; t->token != Opt_err; t++) 1752 for (t = tokens; t->token != Opt_err; t++)
1753 if (t->token == token && !strchr(t->pattern, '=')) 1753 if (t->token == token && !strchr(t->pattern, '='))
1754 break; 1754 break;
1755 return t->pattern; 1755 return t->pattern;
1756 } 1756 }
1757 1757
1758 /* 1758 /*
1759 * Show an option if 1759 * Show an option if
1760 * - it's set to a non-default value OR 1760 * - it's set to a non-default value OR
1761 * - if the per-sb default is different from the global default 1761 * - if the per-sb default is different from the global default
1762 */ 1762 */
1763 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, 1763 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
1764 int nodefs) 1764 int nodefs)
1765 { 1765 {
1766 struct ext4_sb_info *sbi = EXT4_SB(sb); 1766 struct ext4_sb_info *sbi = EXT4_SB(sb);
1767 struct ext4_super_block *es = sbi->s_es; 1767 struct ext4_super_block *es = sbi->s_es;
1768 int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt; 1768 int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
1769 const struct mount_opts *m; 1769 const struct mount_opts *m;
1770 char sep = nodefs ? '\n' : ','; 1770 char sep = nodefs ? '\n' : ',';
1771 1771
1772 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep) 1772 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
1773 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg) 1773 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
1774 1774
1775 if (sbi->s_sb_block != 1) 1775 if (sbi->s_sb_block != 1)
1776 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block); 1776 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
1777 1777
1778 for (m = ext4_mount_opts; m->token != Opt_err; m++) { 1778 for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1779 int want_set = m->flags & MOPT_SET; 1779 int want_set = m->flags & MOPT_SET;
1780 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || 1780 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
1781 (m->flags & MOPT_CLEAR_ERR)) 1781 (m->flags & MOPT_CLEAR_ERR))
1782 continue; 1782 continue;
1783 if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) 1783 if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
1784 continue; /* skip if same as the default */ 1784 continue; /* skip if same as the default */
1785 if ((want_set && 1785 if ((want_set &&
1786 (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || 1786 (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
1787 (!want_set && (sbi->s_mount_opt & m->mount_opt))) 1787 (!want_set && (sbi->s_mount_opt & m->mount_opt)))
1788 continue; /* select Opt_noFoo vs Opt_Foo */ 1788 continue; /* select Opt_noFoo vs Opt_Foo */
1789 SEQ_OPTS_PRINT("%s", token2str(m->token)); 1789 SEQ_OPTS_PRINT("%s", token2str(m->token));
1790 } 1790 }
1791 1791
1792 if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) || 1792 if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
1793 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) 1793 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
1794 SEQ_OPTS_PRINT("resuid=%u", 1794 SEQ_OPTS_PRINT("resuid=%u",
1795 from_kuid_munged(&init_user_ns, sbi->s_resuid)); 1795 from_kuid_munged(&init_user_ns, sbi->s_resuid));
1796 if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) || 1796 if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
1797 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) 1797 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
1798 SEQ_OPTS_PRINT("resgid=%u", 1798 SEQ_OPTS_PRINT("resgid=%u",
1799 from_kgid_munged(&init_user_ns, sbi->s_resgid)); 1799 from_kgid_munged(&init_user_ns, sbi->s_resgid));
1800 def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); 1800 def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
1801 if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO) 1801 if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
1802 SEQ_OPTS_PUTS("errors=remount-ro"); 1802 SEQ_OPTS_PUTS("errors=remount-ro");
1803 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 1803 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
1804 SEQ_OPTS_PUTS("errors=continue"); 1804 SEQ_OPTS_PUTS("errors=continue");
1805 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 1805 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
1806 SEQ_OPTS_PUTS("errors=panic"); 1806 SEQ_OPTS_PUTS("errors=panic");
1807 if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) 1807 if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
1808 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ); 1808 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
1809 if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) 1809 if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
1810 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); 1810 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
1811 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) 1811 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
1812 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); 1812 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
1813 if (sb->s_flags & MS_I_VERSION) 1813 if (sb->s_flags & MS_I_VERSION)
1814 SEQ_OPTS_PUTS("i_version"); 1814 SEQ_OPTS_PUTS("i_version");
1815 if (nodefs || sbi->s_stripe) 1815 if (nodefs || sbi->s_stripe)
1816 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); 1816 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
1817 if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) { 1817 if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
1818 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 1818 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
1819 SEQ_OPTS_PUTS("data=journal"); 1819 SEQ_OPTS_PUTS("data=journal");
1820 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 1820 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
1821 SEQ_OPTS_PUTS("data=ordered"); 1821 SEQ_OPTS_PUTS("data=ordered");
1822 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 1822 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
1823 SEQ_OPTS_PUTS("data=writeback"); 1823 SEQ_OPTS_PUTS("data=writeback");
1824 } 1824 }
1825 if (nodefs || 1825 if (nodefs ||
1826 sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 1826 sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
1827 SEQ_OPTS_PRINT("inode_readahead_blks=%u", 1827 SEQ_OPTS_PRINT("inode_readahead_blks=%u",
1828 sbi->s_inode_readahead_blks); 1828 sbi->s_inode_readahead_blks);
1829 1829
1830 if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && 1830 if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
1831 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) 1831 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
1832 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); 1832 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
1833 if (nodefs || sbi->s_max_dir_size_kb) 1833 if (nodefs || sbi->s_max_dir_size_kb)
1834 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb); 1834 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
1835 1835
1836 ext4_show_quota_options(seq, sb); 1836 ext4_show_quota_options(seq, sb);
1837 return 0; 1837 return 0;
1838 } 1838 }
1839 1839
1840 static int ext4_show_options(struct seq_file *seq, struct dentry *root) 1840 static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1841 { 1841 {
1842 return _ext4_show_options(seq, root->d_sb, 0); 1842 return _ext4_show_options(seq, root->d_sb, 0);
1843 } 1843 }
1844 1844
1845 static int options_seq_show(struct seq_file *seq, void *offset) 1845 static int options_seq_show(struct seq_file *seq, void *offset)
1846 { 1846 {
1847 struct super_block *sb = seq->private; 1847 struct super_block *sb = seq->private;
1848 int rc; 1848 int rc;
1849 1849
1850 seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw"); 1850 seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
1851 rc = _ext4_show_options(seq, sb, 1); 1851 rc = _ext4_show_options(seq, sb, 1);
1852 seq_puts(seq, "\n"); 1852 seq_puts(seq, "\n");
1853 return rc; 1853 return rc;
1854 } 1854 }
1855 1855
1856 static int options_open_fs(struct inode *inode, struct file *file) 1856 static int options_open_fs(struct inode *inode, struct file *file)
1857 { 1857 {
1858 return single_open(file, options_seq_show, PDE_DATA(inode)); 1858 return single_open(file, options_seq_show, PDE_DATA(inode));
1859 } 1859 }
1860 1860
1861 static const struct file_operations ext4_seq_options_fops = { 1861 static const struct file_operations ext4_seq_options_fops = {
1862 .owner = THIS_MODULE, 1862 .owner = THIS_MODULE,
1863 .open = options_open_fs, 1863 .open = options_open_fs,
1864 .read = seq_read, 1864 .read = seq_read,
1865 .llseek = seq_lseek, 1865 .llseek = seq_lseek,
1866 .release = single_release, 1866 .release = single_release,
1867 }; 1867 };
1868 1868
1869 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1869 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1870 int read_only) 1870 int read_only)
1871 { 1871 {
1872 struct ext4_sb_info *sbi = EXT4_SB(sb); 1872 struct ext4_sb_info *sbi = EXT4_SB(sb);
1873 int res = 0; 1873 int res = 0;
1874 1874
1875 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1875 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1876 ext4_msg(sb, KERN_ERR, "revision level too high, " 1876 ext4_msg(sb, KERN_ERR, "revision level too high, "
1877 "forcing read-only mode"); 1877 "forcing read-only mode");
1878 res = MS_RDONLY; 1878 res = MS_RDONLY;
1879 } 1879 }
1880 if (read_only) 1880 if (read_only)
1881 goto done; 1881 goto done;
1882 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1882 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1883 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1883 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1884 "running e2fsck is recommended"); 1884 "running e2fsck is recommended");
1885 else if (sbi->s_mount_state & EXT4_ERROR_FS) 1885 else if (sbi->s_mount_state & EXT4_ERROR_FS)
1886 ext4_msg(sb, KERN_WARNING, 1886 ext4_msg(sb, KERN_WARNING,
1887 "warning: mounting fs with errors, " 1887 "warning: mounting fs with errors, "
1888 "running e2fsck is recommended"); 1888 "running e2fsck is recommended");
1889 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && 1889 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1890 le16_to_cpu(es->s_mnt_count) >= 1890 le16_to_cpu(es->s_mnt_count) >=
1891 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1891 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1892 ext4_msg(sb, KERN_WARNING, 1892 ext4_msg(sb, KERN_WARNING,
1893 "warning: maximal mount count reached, " 1893 "warning: maximal mount count reached, "
1894 "running e2fsck is recommended"); 1894 "running e2fsck is recommended");
1895 else if (le32_to_cpu(es->s_checkinterval) && 1895 else if (le32_to_cpu(es->s_checkinterval) &&
1896 (le32_to_cpu(es->s_lastcheck) + 1896 (le32_to_cpu(es->s_lastcheck) +
1897 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1897 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1898 ext4_msg(sb, KERN_WARNING, 1898 ext4_msg(sb, KERN_WARNING,
1899 "warning: checktime reached, " 1899 "warning: checktime reached, "
1900 "running e2fsck is recommended"); 1900 "running e2fsck is recommended");
1901 if (!sbi->s_journal) 1901 if (!sbi->s_journal)
1902 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1902 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1903 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1903 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1904 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1904 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1905 le16_add_cpu(&es->s_mnt_count, 1); 1905 le16_add_cpu(&es->s_mnt_count, 1);
1906 es->s_mtime = cpu_to_le32(get_seconds()); 1906 es->s_mtime = cpu_to_le32(get_seconds());
1907 ext4_update_dynamic_rev(sb); 1907 ext4_update_dynamic_rev(sb);
1908 if (sbi->s_journal) 1908 if (sbi->s_journal)
1909 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1909 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1910 1910
1911 ext4_commit_super(sb, 1); 1911 ext4_commit_super(sb, 1);
1912 done: 1912 done:
1913 if (test_opt(sb, DEBUG)) 1913 if (test_opt(sb, DEBUG))
1914 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1914 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1915 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", 1915 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
1916 sb->s_blocksize, 1916 sb->s_blocksize,
1917 sbi->s_groups_count, 1917 sbi->s_groups_count,
1918 EXT4_BLOCKS_PER_GROUP(sb), 1918 EXT4_BLOCKS_PER_GROUP(sb),
1919 EXT4_INODES_PER_GROUP(sb), 1919 EXT4_INODES_PER_GROUP(sb),
1920 sbi->s_mount_opt, sbi->s_mount_opt2); 1920 sbi->s_mount_opt, sbi->s_mount_opt2);
1921 1921
1922 cleancache_init_fs(sb); 1922 cleancache_init_fs(sb);
1923 return res; 1923 return res;
1924 } 1924 }
1925 1925
1926 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) 1926 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
1927 { 1927 {
1928 struct ext4_sb_info *sbi = EXT4_SB(sb); 1928 struct ext4_sb_info *sbi = EXT4_SB(sb);
1929 struct flex_groups *new_groups; 1929 struct flex_groups *new_groups;
1930 int size; 1930 int size;
1931 1931
1932 if (!sbi->s_log_groups_per_flex) 1932 if (!sbi->s_log_groups_per_flex)
1933 return 0; 1933 return 0;
1934 1934
1935 size = ext4_flex_group(sbi, ngroup - 1) + 1; 1935 size = ext4_flex_group(sbi, ngroup - 1) + 1;
1936 if (size <= sbi->s_flex_groups_allocated) 1936 if (size <= sbi->s_flex_groups_allocated)
1937 return 0; 1937 return 0;
1938 1938
1939 size = roundup_pow_of_two(size * sizeof(struct flex_groups)); 1939 size = roundup_pow_of_two(size * sizeof(struct flex_groups));
1940 new_groups = ext4_kvzalloc(size, GFP_KERNEL); 1940 new_groups = ext4_kvzalloc(size, GFP_KERNEL);
1941 if (!new_groups) { 1941 if (!new_groups) {
1942 ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", 1942 ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
1943 size / (int) sizeof(struct flex_groups)); 1943 size / (int) sizeof(struct flex_groups));
1944 return -ENOMEM; 1944 return -ENOMEM;
1945 } 1945 }
1946 1946
1947 if (sbi->s_flex_groups) { 1947 if (sbi->s_flex_groups) {
1948 memcpy(new_groups, sbi->s_flex_groups, 1948 memcpy(new_groups, sbi->s_flex_groups,
1949 (sbi->s_flex_groups_allocated * 1949 (sbi->s_flex_groups_allocated *
1950 sizeof(struct flex_groups))); 1950 sizeof(struct flex_groups)));
1951 kvfree(sbi->s_flex_groups); 1951 kvfree(sbi->s_flex_groups);
1952 } 1952 }
1953 sbi->s_flex_groups = new_groups; 1953 sbi->s_flex_groups = new_groups;
1954 sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); 1954 sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
1955 return 0; 1955 return 0;
1956 } 1956 }
1957 1957
1958 static int ext4_fill_flex_info(struct super_block *sb) 1958 static int ext4_fill_flex_info(struct super_block *sb)
1959 { 1959 {
1960 struct ext4_sb_info *sbi = EXT4_SB(sb); 1960 struct ext4_sb_info *sbi = EXT4_SB(sb);
1961 struct ext4_group_desc *gdp = NULL; 1961 struct ext4_group_desc *gdp = NULL;
1962 ext4_group_t flex_group; 1962 ext4_group_t flex_group;
1963 int i, err; 1963 int i, err;
1964 1964
1965 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1965 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1966 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { 1966 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
1967 sbi->s_log_groups_per_flex = 0; 1967 sbi->s_log_groups_per_flex = 0;
1968 return 1; 1968 return 1;
1969 } 1969 }
1970 1970
1971 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); 1971 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
1972 if (err) 1972 if (err)
1973 goto failed; 1973 goto failed;
1974 1974
1975 for (i = 0; i < sbi->s_groups_count; i++) { 1975 for (i = 0; i < sbi->s_groups_count; i++) {
1976 gdp = ext4_get_group_desc(sb, i, NULL); 1976 gdp = ext4_get_group_desc(sb, i, NULL);
1977 1977
1978 flex_group = ext4_flex_group(sbi, i); 1978 flex_group = ext4_flex_group(sbi, i);
1979 atomic_add(ext4_free_inodes_count(sb, gdp), 1979 atomic_add(ext4_free_inodes_count(sb, gdp),
1980 &sbi->s_flex_groups[flex_group].free_inodes); 1980 &sbi->s_flex_groups[flex_group].free_inodes);
1981 atomic64_add(ext4_free_group_clusters(sb, gdp), 1981 atomic64_add(ext4_free_group_clusters(sb, gdp),
1982 &sbi->s_flex_groups[flex_group].free_clusters); 1982 &sbi->s_flex_groups[flex_group].free_clusters);
1983 atomic_add(ext4_used_dirs_count(sb, gdp), 1983 atomic_add(ext4_used_dirs_count(sb, gdp),
1984 &sbi->s_flex_groups[flex_group].used_dirs); 1984 &sbi->s_flex_groups[flex_group].used_dirs);
1985 } 1985 }
1986 1986
1987 return 1; 1987 return 1;
1988 failed: 1988 failed:
1989 return 0; 1989 return 0;
1990 } 1990 }
1991 1991
1992 static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1992 static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1993 struct ext4_group_desc *gdp) 1993 struct ext4_group_desc *gdp)
1994 { 1994 {
1995 int offset; 1995 int offset;
1996 __u16 crc = 0; 1996 __u16 crc = 0;
1997 __le32 le_group = cpu_to_le32(block_group); 1997 __le32 le_group = cpu_to_le32(block_group);
1998 1998
1999 if (ext4_has_metadata_csum(sbi->s_sb)) { 1999 if (ext4_has_metadata_csum(sbi->s_sb)) {
2000 /* Use new metadata_csum algorithm */ 2000 /* Use new metadata_csum algorithm */
2001 __le16 save_csum; 2001 __le16 save_csum;
2002 __u32 csum32; 2002 __u32 csum32;
2003 2003
2004 save_csum = gdp->bg_checksum; 2004 save_csum = gdp->bg_checksum;
2005 gdp->bg_checksum = 0; 2005 gdp->bg_checksum = 0;
2006 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group, 2006 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
2007 sizeof(le_group)); 2007 sizeof(le_group));
2008 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, 2008 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
2009 sbi->s_desc_size); 2009 sbi->s_desc_size);
2010 gdp->bg_checksum = save_csum; 2010 gdp->bg_checksum = save_csum;
2011 2011
2012 crc = csum32 & 0xFFFF; 2012 crc = csum32 & 0xFFFF;
2013 goto out; 2013 goto out;
2014 } 2014 }
2015 2015
2016 /* old crc16 code */ 2016 /* old crc16 code */
2017 if (!(sbi->s_es->s_feature_ro_compat & 2017 if (!(sbi->s_es->s_feature_ro_compat &
2018 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM))) 2018 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
2019 return 0; 2019 return 0;
2020 2020
2021 offset = offsetof(struct ext4_group_desc, bg_checksum); 2021 offset = offsetof(struct ext4_group_desc, bg_checksum);
2022 2022
2023 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 2023 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
2024 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 2024 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
2025 crc = crc16(crc, (__u8 *)gdp, offset); 2025 crc = crc16(crc, (__u8 *)gdp, offset);
2026 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 2026 offset += sizeof(gdp->bg_checksum); /* skip checksum */
2027 /* for checksum of struct ext4_group_desc do the rest...*/ 2027 /* for checksum of struct ext4_group_desc do the rest...*/
2028 if ((sbi->s_es->s_feature_incompat & 2028 if ((sbi->s_es->s_feature_incompat &
2029 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 2029 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
2030 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 2030 offset < le16_to_cpu(sbi->s_es->s_desc_size))
2031 crc = crc16(crc, (__u8 *)gdp + offset, 2031 crc = crc16(crc, (__u8 *)gdp + offset,
2032 le16_to_cpu(sbi->s_es->s_desc_size) - 2032 le16_to_cpu(sbi->s_es->s_desc_size) -
2033 offset); 2033 offset);
2034 2034
2035 out: 2035 out:
2036 return cpu_to_le16(crc); 2036 return cpu_to_le16(crc);
2037 } 2037 }
2038 2038
2039 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group, 2039 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
2040 struct ext4_group_desc *gdp) 2040 struct ext4_group_desc *gdp)
2041 { 2041 {
2042 if (ext4_has_group_desc_csum(sb) && 2042 if (ext4_has_group_desc_csum(sb) &&
2043 (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), 2043 (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb),
2044 block_group, gdp))) 2044 block_group, gdp)))
2045 return 0; 2045 return 0;
2046 2046
2047 return 1; 2047 return 1;
2048 } 2048 }
2049 2049
2050 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, 2050 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
2051 struct ext4_group_desc *gdp) 2051 struct ext4_group_desc *gdp)
2052 { 2052 {
2053 if (!ext4_has_group_desc_csum(sb)) 2053 if (!ext4_has_group_desc_csum(sb))
2054 return; 2054 return;
2055 gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); 2055 gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp);
2056 } 2056 }
2057 2057
2058 /* Called at mount-time, super-block is locked */ 2058 /* Called at mount-time, super-block is locked */
2059 static int ext4_check_descriptors(struct super_block *sb, 2059 static int ext4_check_descriptors(struct super_block *sb,
2060 ext4_group_t *first_not_zeroed) 2060 ext4_group_t *first_not_zeroed)
2061 { 2061 {
2062 struct ext4_sb_info *sbi = EXT4_SB(sb); 2062 struct ext4_sb_info *sbi = EXT4_SB(sb);
2063 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 2063 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
2064 ext4_fsblk_t last_block; 2064 ext4_fsblk_t last_block;
2065 ext4_fsblk_t block_bitmap; 2065 ext4_fsblk_t block_bitmap;
2066 ext4_fsblk_t inode_bitmap; 2066 ext4_fsblk_t inode_bitmap;
2067 ext4_fsblk_t inode_table; 2067 ext4_fsblk_t inode_table;
2068 int flexbg_flag = 0; 2068 int flexbg_flag = 0;
2069 ext4_group_t i, grp = sbi->s_groups_count; 2069 ext4_group_t i, grp = sbi->s_groups_count;
2070 2070
2071 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2071 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2072 flexbg_flag = 1; 2072 flexbg_flag = 1;
2073 2073
2074 ext4_debug("Checking group descriptors"); 2074 ext4_debug("Checking group descriptors");
2075 2075
2076 for (i = 0; i < sbi->s_groups_count; i++) { 2076 for (i = 0; i < sbi->s_groups_count; i++) {
2077 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 2077 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
2078 2078
2079 if (i == sbi->s_groups_count - 1 || flexbg_flag) 2079 if (i == sbi->s_groups_count - 1 || flexbg_flag)
2080 last_block = ext4_blocks_count(sbi->s_es) - 1; 2080 last_block = ext4_blocks_count(sbi->s_es) - 1;
2081 else 2081 else
2082 last_block = first_block + 2082 last_block = first_block +
2083 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 2083 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
2084 2084
2085 if ((grp == sbi->s_groups_count) && 2085 if ((grp == sbi->s_groups_count) &&
2086 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 2086 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2087 grp = i; 2087 grp = i;
2088 2088
2089 block_bitmap = ext4_block_bitmap(sb, gdp); 2089 block_bitmap = ext4_block_bitmap(sb, gdp);
2090 if (block_bitmap < first_block || block_bitmap > last_block) { 2090 if (block_bitmap < first_block || block_bitmap > last_block) {
2091 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2091 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2092 "Block bitmap for group %u not in group " 2092 "Block bitmap for group %u not in group "
2093 "(block %llu)!", i, block_bitmap); 2093 "(block %llu)!", i, block_bitmap);
2094 return 0; 2094 return 0;
2095 } 2095 }
2096 inode_bitmap = ext4_inode_bitmap(sb, gdp); 2096 inode_bitmap = ext4_inode_bitmap(sb, gdp);
2097 if (inode_bitmap < first_block || inode_bitmap > last_block) { 2097 if (inode_bitmap < first_block || inode_bitmap > last_block) {
2098 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2098 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2099 "Inode bitmap for group %u not in group " 2099 "Inode bitmap for group %u not in group "
2100 "(block %llu)!", i, inode_bitmap); 2100 "(block %llu)!", i, inode_bitmap);
2101 return 0; 2101 return 0;
2102 } 2102 }
2103 inode_table = ext4_inode_table(sb, gdp); 2103 inode_table = ext4_inode_table(sb, gdp);
2104 if (inode_table < first_block || 2104 if (inode_table < first_block ||
2105 inode_table + sbi->s_itb_per_group - 1 > last_block) { 2105 inode_table + sbi->s_itb_per_group - 1 > last_block) {
2106 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2106 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2107 "Inode table for group %u not in group " 2107 "Inode table for group %u not in group "
2108 "(block %llu)!", i, inode_table); 2108 "(block %llu)!", i, inode_table);
2109 return 0; 2109 return 0;
2110 } 2110 }
2111 ext4_lock_group(sb, i); 2111 ext4_lock_group(sb, i);
2112 if (!ext4_group_desc_csum_verify(sb, i, gdp)) { 2112 if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
2113 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2113 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2114 "Checksum for group %u failed (%u!=%u)", 2114 "Checksum for group %u failed (%u!=%u)",
2115 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 2115 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
2116 gdp)), le16_to_cpu(gdp->bg_checksum)); 2116 gdp)), le16_to_cpu(gdp->bg_checksum));
2117 if (!(sb->s_flags & MS_RDONLY)) { 2117 if (!(sb->s_flags & MS_RDONLY)) {
2118 ext4_unlock_group(sb, i); 2118 ext4_unlock_group(sb, i);
2119 return 0; 2119 return 0;
2120 } 2120 }
2121 } 2121 }
2122 ext4_unlock_group(sb, i); 2122 ext4_unlock_group(sb, i);
2123 if (!flexbg_flag) 2123 if (!flexbg_flag)
2124 first_block += EXT4_BLOCKS_PER_GROUP(sb); 2124 first_block += EXT4_BLOCKS_PER_GROUP(sb);
2125 } 2125 }
2126 if (NULL != first_not_zeroed) 2126 if (NULL != first_not_zeroed)
2127 *first_not_zeroed = grp; 2127 *first_not_zeroed = grp;
2128 return 1; 2128 return 1;
2129 } 2129 }
2130 2130
2131 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 2131 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
2132 * the superblock) which were deleted from all directories, but held open by 2132 * the superblock) which were deleted from all directories, but held open by
2133 * a process at the time of a crash. We walk the list and try to delete these 2133 * a process at the time of a crash. We walk the list and try to delete these
2134 * inodes at recovery time (only with a read-write filesystem). 2134 * inodes at recovery time (only with a read-write filesystem).
2135 * 2135 *
2136 * In order to keep the orphan inode chain consistent during traversal (in 2136 * In order to keep the orphan inode chain consistent during traversal (in
2137 * case of crash during recovery), we link each inode into the superblock 2137 * case of crash during recovery), we link each inode into the superblock
2138 * orphan list_head and handle it the same way as an inode deletion during 2138 * orphan list_head and handle it the same way as an inode deletion during
2139 * normal operation (which journals the operations for us). 2139 * normal operation (which journals the operations for us).
2140 * 2140 *
2141 * We only do an iget() and an iput() on each inode, which is very safe if we 2141 * We only do an iget() and an iput() on each inode, which is very safe if we
2142 * accidentally point at an in-use or already deleted inode. The worst that 2142 * accidentally point at an in-use or already deleted inode. The worst that
2143 * can happen in this case is that we get a "bit already cleared" message from 2143 * can happen in this case is that we get a "bit already cleared" message from
2144 * ext4_free_inode(). The only reason we would point at a wrong inode is if 2144 * ext4_free_inode(). The only reason we would point at a wrong inode is if
2145 * e2fsck was run on this filesystem, and it must have already done the orphan 2145 * e2fsck was run on this filesystem, and it must have already done the orphan
2146 * inode cleanup for us, so we can safely abort without any further action. 2146 * inode cleanup for us, so we can safely abort without any further action.
2147 */ 2147 */
2148 static void ext4_orphan_cleanup(struct super_block *sb, 2148 static void ext4_orphan_cleanup(struct super_block *sb,
2149 struct ext4_super_block *es) 2149 struct ext4_super_block *es)
2150 { 2150 {
2151 unsigned int s_flags = sb->s_flags; 2151 unsigned int s_flags = sb->s_flags;
2152 int nr_orphans = 0, nr_truncates = 0; 2152 int nr_orphans = 0, nr_truncates = 0;
2153 #ifdef CONFIG_QUOTA 2153 #ifdef CONFIG_QUOTA
2154 int i; 2154 int i;
2155 #endif 2155 #endif
2156 if (!es->s_last_orphan) { 2156 if (!es->s_last_orphan) {
2157 jbd_debug(4, "no orphan inodes to clean up\n"); 2157 jbd_debug(4, "no orphan inodes to clean up\n");
2158 return; 2158 return;
2159 } 2159 }
2160 2160
2161 if (bdev_read_only(sb->s_bdev)) { 2161 if (bdev_read_only(sb->s_bdev)) {
2162 ext4_msg(sb, KERN_ERR, "write access " 2162 ext4_msg(sb, KERN_ERR, "write access "
2163 "unavailable, skipping orphan cleanup"); 2163 "unavailable, skipping orphan cleanup");
2164 return; 2164 return;
2165 } 2165 }
2166 2166
2167 /* Check if feature set would not allow a r/w mount */ 2167 /* Check if feature set would not allow a r/w mount */
2168 if (!ext4_feature_set_ok(sb, 0)) { 2168 if (!ext4_feature_set_ok(sb, 0)) {
2169 ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " 2169 ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2170 "unknown ROCOMPAT features"); 2170 "unknown ROCOMPAT features");
2171 return; 2171 return;
2172 } 2172 }
2173 2173
2174 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 2174 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2175 /* don't clear list on RO mount w/ errors */ 2175 /* don't clear list on RO mount w/ errors */
2176 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { 2176 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
2177 ext4_msg(sb, KERN_INFO, "Errors on filesystem, " 2177 ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
2178 "clearing orphan list.\n"); 2178 "clearing orphan list.\n");
2179 es->s_last_orphan = 0; 2179 es->s_last_orphan = 0;
2180 } 2180 }
2181 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 2181 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2182 return; 2182 return;
2183 } 2183 }
2184 2184
2185 if (s_flags & MS_RDONLY) { 2185 if (s_flags & MS_RDONLY) {
2186 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 2186 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
2187 sb->s_flags &= ~MS_RDONLY; 2187 sb->s_flags &= ~MS_RDONLY;
2188 } 2188 }
2189 #ifdef CONFIG_QUOTA 2189 #ifdef CONFIG_QUOTA
2190 /* Needed for iput() to work correctly and not trash data */ 2190 /* Needed for iput() to work correctly and not trash data */
2191 sb->s_flags |= MS_ACTIVE; 2191 sb->s_flags |= MS_ACTIVE;
2192 /* Turn on quotas so that they are updated correctly */ 2192 /* Turn on quotas so that they are updated correctly */
2193 for (i = 0; i < EXT4_MAXQUOTAS; i++) { 2193 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2194 if (EXT4_SB(sb)->s_qf_names[i]) { 2194 if (EXT4_SB(sb)->s_qf_names[i]) {
2195 int ret = ext4_quota_on_mount(sb, i); 2195 int ret = ext4_quota_on_mount(sb, i);
2196 if (ret < 0) 2196 if (ret < 0)
2197 ext4_msg(sb, KERN_ERR, 2197 ext4_msg(sb, KERN_ERR,
2198 "Cannot turn on journaled " 2198 "Cannot turn on journaled "
2199 "quota: error %d", ret); 2199 "quota: error %d", ret);
2200 } 2200 }
2201 } 2201 }
2202 #endif 2202 #endif
2203 2203
2204 while (es->s_last_orphan) { 2204 while (es->s_last_orphan) {
2205 struct inode *inode; 2205 struct inode *inode;
2206 2206
2207 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 2207 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
2208 if (IS_ERR(inode)) { 2208 if (IS_ERR(inode)) {
2209 es->s_last_orphan = 0; 2209 es->s_last_orphan = 0;
2210 break; 2210 break;
2211 } 2211 }
2212 2212
2213 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2213 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2214 dquot_initialize(inode); 2214 dquot_initialize(inode);
2215 if (inode->i_nlink) { 2215 if (inode->i_nlink) {
2216 if (test_opt(sb, DEBUG)) 2216 if (test_opt(sb, DEBUG))
2217 ext4_msg(sb, KERN_DEBUG, 2217 ext4_msg(sb, KERN_DEBUG,
2218 "%s: truncating inode %lu to %lld bytes", 2218 "%s: truncating inode %lu to %lld bytes",
2219 __func__, inode->i_ino, inode->i_size); 2219 __func__, inode->i_ino, inode->i_size);
2220 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 2220 jbd_debug(2, "truncating inode %lu to %lld bytes\n",
2221 inode->i_ino, inode->i_size); 2221 inode->i_ino, inode->i_size);
2222 mutex_lock(&inode->i_mutex); 2222 mutex_lock(&inode->i_mutex);
2223 truncate_inode_pages(inode->i_mapping, inode->i_size); 2223 truncate_inode_pages(inode->i_mapping, inode->i_size);
2224 ext4_truncate(inode); 2224 ext4_truncate(inode);
2225 mutex_unlock(&inode->i_mutex); 2225 mutex_unlock(&inode->i_mutex);
2226 nr_truncates++; 2226 nr_truncates++;
2227 } else { 2227 } else {
2228 if (test_opt(sb, DEBUG)) 2228 if (test_opt(sb, DEBUG))
2229 ext4_msg(sb, KERN_DEBUG, 2229 ext4_msg(sb, KERN_DEBUG,
2230 "%s: deleting unreferenced inode %lu", 2230 "%s: deleting unreferenced inode %lu",
2231 __func__, inode->i_ino); 2231 __func__, inode->i_ino);
2232 jbd_debug(2, "deleting unreferenced inode %lu\n", 2232 jbd_debug(2, "deleting unreferenced inode %lu\n",
2233 inode->i_ino); 2233 inode->i_ino);
2234 nr_orphans++; 2234 nr_orphans++;
2235 } 2235 }
2236 iput(inode); /* The delete magic happens here! */ 2236 iput(inode); /* The delete magic happens here! */
2237 } 2237 }
2238 2238
2239 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 2239 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
2240 2240
2241 if (nr_orphans) 2241 if (nr_orphans)
2242 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 2242 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
2243 PLURAL(nr_orphans)); 2243 PLURAL(nr_orphans));
2244 if (nr_truncates) 2244 if (nr_truncates)
2245 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 2245 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
2246 PLURAL(nr_truncates)); 2246 PLURAL(nr_truncates));
2247 #ifdef CONFIG_QUOTA 2247 #ifdef CONFIG_QUOTA
2248 /* Turn quotas off */ 2248 /* Turn quotas off */
2249 for (i = 0; i < EXT4_MAXQUOTAS; i++) { 2249 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2250 if (sb_dqopt(sb)->files[i]) 2250 if (sb_dqopt(sb)->files[i])
2251 dquot_quota_off(sb, i); 2251 dquot_quota_off(sb, i);
2252 } 2252 }
2253 #endif 2253 #endif
2254 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 2254 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
2255 } 2255 }
2256 2256
2257 /* 2257 /*
2258 * Maximal extent format file size. 2258 * Maximal extent format file size.
2259 * Resulting logical blkno at s_maxbytes must fit in our on-disk 2259 * Resulting logical blkno at s_maxbytes must fit in our on-disk
2260 * extent format containers, within a sector_t, and within i_blocks 2260 * extent format containers, within a sector_t, and within i_blocks
2261 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 2261 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
2262 * so that won't be a limiting factor. 2262 * so that won't be a limiting factor.
2263 * 2263 *
2264 * However there is other limiting factor. We do store extents in the form 2264 * However there is other limiting factor. We do store extents in the form
2265 * of starting block and length, hence the resulting length of the extent 2265 * of starting block and length, hence the resulting length of the extent
2266 * covering maximum file size must fit into on-disk format containers as 2266 * covering maximum file size must fit into on-disk format containers as
2267 * well. Given that length is always by 1 unit bigger than max unit (because 2267 * well. Given that length is always by 1 unit bigger than max unit (because
2268 * we count 0 as well) we have to lower the s_maxbytes by one fs block. 2268 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2269 * 2269 *
2270 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 2270 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2271 */ 2271 */
2272 static loff_t ext4_max_size(int blkbits, int has_huge_files) 2272 static loff_t ext4_max_size(int blkbits, int has_huge_files)
2273 { 2273 {
2274 loff_t res; 2274 loff_t res;
2275 loff_t upper_limit = MAX_LFS_FILESIZE; 2275 loff_t upper_limit = MAX_LFS_FILESIZE;
2276 2276
2277 /* small i_blocks in vfs inode? */ 2277 /* small i_blocks in vfs inode? */
2278 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2278 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2279 /* 2279 /*
2280 * CONFIG_LBDAF is not enabled implies the inode 2280 * CONFIG_LBDAF is not enabled implies the inode
2281 * i_block represent total blocks in 512 bytes 2281 * i_block represent total blocks in 512 bytes
2282 * 32 == size of vfs inode i_blocks * 8 2282 * 32 == size of vfs inode i_blocks * 8
2283 */ 2283 */
2284 upper_limit = (1LL << 32) - 1; 2284 upper_limit = (1LL << 32) - 1;
2285 2285
2286 /* total blocks in file system block size */ 2286 /* total blocks in file system block size */
2287 upper_limit >>= (blkbits - 9); 2287 upper_limit >>= (blkbits - 9);
2288 upper_limit <<= blkbits; 2288 upper_limit <<= blkbits;
2289 } 2289 }
2290 2290
2291 /* 2291 /*
2292 * 32-bit extent-start container, ee_block. We lower the maxbytes 2292 * 32-bit extent-start container, ee_block. We lower the maxbytes
2293 * by one fs block, so ee_len can cover the extent of maximum file 2293 * by one fs block, so ee_len can cover the extent of maximum file
2294 * size 2294 * size
2295 */ 2295 */
2296 res = (1LL << 32) - 1; 2296 res = (1LL << 32) - 1;
2297 res <<= blkbits; 2297 res <<= blkbits;
2298 2298
2299 /* Sanity check against vm- & vfs- imposed limits */ 2299 /* Sanity check against vm- & vfs- imposed limits */
2300 if (res > upper_limit) 2300 if (res > upper_limit)
2301 res = upper_limit; 2301 res = upper_limit;
2302 2302
2303 return res; 2303 return res;
2304 } 2304 }
2305 2305
2306 /* 2306 /*
2307 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 2307 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
2308 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 2308 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
2309 * We need to be 1 filesystem block less than the 2^48 sector limit. 2309 * We need to be 1 filesystem block less than the 2^48 sector limit.
2310 */ 2310 */
2311 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 2311 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2312 { 2312 {
2313 loff_t res = EXT4_NDIR_BLOCKS; 2313 loff_t res = EXT4_NDIR_BLOCKS;
2314 int meta_blocks; 2314 int meta_blocks;
2315 loff_t upper_limit; 2315 loff_t upper_limit;
2316 /* This is calculated to be the largest file size for a dense, block 2316 /* This is calculated to be the largest file size for a dense, block
2317 * mapped file such that the file's total number of 512-byte sectors, 2317 * mapped file such that the file's total number of 512-byte sectors,
2318 * including data and all indirect blocks, does not exceed (2^48 - 1). 2318 * including data and all indirect blocks, does not exceed (2^48 - 1).
2319 * 2319 *
2320 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total 2320 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
2321 * number of 512-byte sectors of the file. 2321 * number of 512-byte sectors of the file.
2322 */ 2322 */
2323 2323
2324 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2324 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2325 /* 2325 /*
2326 * !has_huge_files or CONFIG_LBDAF not enabled implies that 2326 * !has_huge_files or CONFIG_LBDAF not enabled implies that
2327 * the inode i_block field represents total file blocks in 2327 * the inode i_block field represents total file blocks in
2328 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 2328 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
2329 */ 2329 */
2330 upper_limit = (1LL << 32) - 1; 2330 upper_limit = (1LL << 32) - 1;
2331 2331
2332 /* total blocks in file system block size */ 2332 /* total blocks in file system block size */
2333 upper_limit >>= (bits - 9); 2333 upper_limit >>= (bits - 9);
2334 2334
2335 } else { 2335 } else {
2336 /* 2336 /*
2337 * We use 48 bit ext4_inode i_blocks 2337 * We use 48 bit ext4_inode i_blocks
2338 * With EXT4_HUGE_FILE_FL set the i_blocks 2338 * With EXT4_HUGE_FILE_FL set the i_blocks
2339 * represent total number of blocks in 2339 * represent total number of blocks in
2340 * file system block size 2340 * file system block size
2341 */ 2341 */
2342 upper_limit = (1LL << 48) - 1; 2342 upper_limit = (1LL << 48) - 1;
2343 2343
2344 } 2344 }
2345 2345
2346 /* indirect blocks */ 2346 /* indirect blocks */
2347 meta_blocks = 1; 2347 meta_blocks = 1;
2348 /* double indirect blocks */ 2348 /* double indirect blocks */
2349 meta_blocks += 1 + (1LL << (bits-2)); 2349 meta_blocks += 1 + (1LL << (bits-2));
2350 /* tripple indirect blocks */ 2350 /* tripple indirect blocks */
2351 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 2351 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
2352 2352
2353 upper_limit -= meta_blocks; 2353 upper_limit -= meta_blocks;
2354 upper_limit <<= bits; 2354 upper_limit <<= bits;
2355 2355
2356 res += 1LL << (bits-2); 2356 res += 1LL << (bits-2);
2357 res += 1LL << (2*(bits-2)); 2357 res += 1LL << (2*(bits-2));
2358 res += 1LL << (3*(bits-2)); 2358 res += 1LL << (3*(bits-2));
2359 res <<= bits; 2359 res <<= bits;
2360 if (res > upper_limit) 2360 if (res > upper_limit)
2361 res = upper_limit; 2361 res = upper_limit;
2362 2362
2363 if (res > MAX_LFS_FILESIZE) 2363 if (res > MAX_LFS_FILESIZE)
2364 res = MAX_LFS_FILESIZE; 2364 res = MAX_LFS_FILESIZE;
2365 2365
2366 return res; 2366 return res;
2367 } 2367 }
2368 2368
2369 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2369 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2370 ext4_fsblk_t logical_sb_block, int nr) 2370 ext4_fsblk_t logical_sb_block, int nr)
2371 { 2371 {
2372 struct ext4_sb_info *sbi = EXT4_SB(sb); 2372 struct ext4_sb_info *sbi = EXT4_SB(sb);
2373 ext4_group_t bg, first_meta_bg; 2373 ext4_group_t bg, first_meta_bg;
2374 int has_super = 0; 2374 int has_super = 0;
2375 2375
2376 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 2376 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2377 2377
2378 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 2378 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2379 nr < first_meta_bg) 2379 nr < first_meta_bg)
2380 return logical_sb_block + nr + 1; 2380 return logical_sb_block + nr + 1;
2381 bg = sbi->s_desc_per_block * nr; 2381 bg = sbi->s_desc_per_block * nr;
2382 if (ext4_bg_has_super(sb, bg)) 2382 if (ext4_bg_has_super(sb, bg))
2383 has_super = 1; 2383 has_super = 1;
2384 2384
2385 /* 2385 /*
2386 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at 2386 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
2387 * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled 2387 * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled
2388 * on modern mke2fs or blksize > 1k on older mke2fs) then we must 2388 * on modern mke2fs or blksize > 1k on older mke2fs) then we must
2389 * compensate. 2389 * compensate.
2390 */ 2390 */
2391 if (sb->s_blocksize == 1024 && nr == 0 && 2391 if (sb->s_blocksize == 1024 && nr == 0 &&
2392 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0) 2392 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0)
2393 has_super++; 2393 has_super++;
2394 2394
2395 return (has_super + ext4_group_first_block_no(sb, bg)); 2395 return (has_super + ext4_group_first_block_no(sb, bg));
2396 } 2396 }
2397 2397
2398 /** 2398 /**
2399 * ext4_get_stripe_size: Get the stripe size. 2399 * ext4_get_stripe_size: Get the stripe size.
2400 * @sbi: In memory super block info 2400 * @sbi: In memory super block info
2401 * 2401 *
2402 * If we have specified it via mount option, then 2402 * If we have specified it via mount option, then
2403 * use the mount option value. If the value specified at mount time is 2403 * use the mount option value. If the value specified at mount time is
2404 * greater than the blocks per group use the super block value. 2404 * greater than the blocks per group use the super block value.
2405 * If the super block value is greater than blocks per group return 0. 2405 * If the super block value is greater than blocks per group return 0.
2406 * Allocator needs it be less than blocks per group. 2406 * Allocator needs it be less than blocks per group.
2407 * 2407 *
2408 */ 2408 */
2409 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 2409 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2410 { 2410 {
2411 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 2411 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2412 unsigned long stripe_width = 2412 unsigned long stripe_width =
2413 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 2413 le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2414 int ret; 2414 int ret;
2415 2415
2416 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 2416 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2417 ret = sbi->s_stripe; 2417 ret = sbi->s_stripe;
2418 else if (stripe_width <= sbi->s_blocks_per_group) 2418 else if (stripe_width <= sbi->s_blocks_per_group)
2419 ret = stripe_width; 2419 ret = stripe_width;
2420 else if (stride <= sbi->s_blocks_per_group) 2420 else if (stride <= sbi->s_blocks_per_group)
2421 ret = stride; 2421 ret = stride;
2422 else 2422 else
2423 ret = 0; 2423 ret = 0;
2424 2424
2425 /* 2425 /*
2426 * If the stripe width is 1, this makes no sense and 2426 * If the stripe width is 1, this makes no sense and
2427 * we set it to 0 to turn off stripe handling code. 2427 * we set it to 0 to turn off stripe handling code.
2428 */ 2428 */
2429 if (ret <= 1) 2429 if (ret <= 1)
2430 ret = 0; 2430 ret = 0;
2431 2431
2432 return ret; 2432 return ret;
2433 } 2433 }
2434 2434
2435 /* sysfs supprt */ 2435 /* sysfs supprt */
2436 2436
2437 struct ext4_attr { 2437 struct ext4_attr {
2438 struct attribute attr; 2438 struct attribute attr;
2439 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); 2439 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2440 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 2440 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
2441 const char *, size_t); 2441 const char *, size_t);
2442 union { 2442 union {
2443 int offset; 2443 int offset;
2444 int deprecated_val; 2444 int deprecated_val;
2445 } u; 2445 } u;
2446 }; 2446 };
2447 2447
2448 static int parse_strtoull(const char *buf, 2448 static int parse_strtoull(const char *buf,
2449 unsigned long long max, unsigned long long *value) 2449 unsigned long long max, unsigned long long *value)
2450 { 2450 {
2451 int ret; 2451 int ret;
2452 2452
2453 ret = kstrtoull(skip_spaces(buf), 0, value); 2453 ret = kstrtoull(skip_spaces(buf), 0, value);
2454 if (!ret && *value > max) 2454 if (!ret && *value > max)
2455 ret = -EINVAL; 2455 ret = -EINVAL;
2456 return ret; 2456 return ret;
2457 } 2457 }
2458 2458
2459 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, 2459 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2460 struct ext4_sb_info *sbi, 2460 struct ext4_sb_info *sbi,
2461 char *buf) 2461 char *buf)
2462 { 2462 {
2463 return snprintf(buf, PAGE_SIZE, "%llu\n", 2463 return snprintf(buf, PAGE_SIZE, "%llu\n",
2464 (s64) EXT4_C2B(sbi, 2464 (s64) EXT4_C2B(sbi,
2465 percpu_counter_sum(&sbi->s_dirtyclusters_counter))); 2465 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
2466 } 2466 }
2467 2467
2468 static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2468 static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2469 struct ext4_sb_info *sbi, char *buf) 2469 struct ext4_sb_info *sbi, char *buf)
2470 { 2470 {
2471 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2471 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2472 2472
2473 if (!sb->s_bdev->bd_part) 2473 if (!sb->s_bdev->bd_part)
2474 return snprintf(buf, PAGE_SIZE, "0\n"); 2474 return snprintf(buf, PAGE_SIZE, "0\n");
2475 return snprintf(buf, PAGE_SIZE, "%lu\n", 2475 return snprintf(buf, PAGE_SIZE, "%lu\n",
2476 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2476 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2477 sbi->s_sectors_written_start) >> 1); 2477 sbi->s_sectors_written_start) >> 1);
2478 } 2478 }
2479 2479
2480 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, 2480 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2481 struct ext4_sb_info *sbi, char *buf) 2481 struct ext4_sb_info *sbi, char *buf)
2482 { 2482 {
2483 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2483 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2484 2484
2485 if (!sb->s_bdev->bd_part) 2485 if (!sb->s_bdev->bd_part)
2486 return snprintf(buf, PAGE_SIZE, "0\n"); 2486 return snprintf(buf, PAGE_SIZE, "0\n");
2487 return snprintf(buf, PAGE_SIZE, "%llu\n", 2487 return snprintf(buf, PAGE_SIZE, "%llu\n",
2488 (unsigned long long)(sbi->s_kbytes_written + 2488 (unsigned long long)(sbi->s_kbytes_written +
2489 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2489 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2490 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 2490 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2491 } 2491 }
2492 2492
2493 static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2493 static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2494 struct ext4_sb_info *sbi, 2494 struct ext4_sb_info *sbi,
2495 const char *buf, size_t count) 2495 const char *buf, size_t count)
2496 { 2496 {
2497 unsigned long t; 2497 unsigned long t;
2498 int ret; 2498 int ret;
2499 2499
2500 ret = kstrtoul(skip_spaces(buf), 0, &t); 2500 ret = kstrtoul(skip_spaces(buf), 0, &t);
2501 if (ret) 2501 if (ret)
2502 return ret; 2502 return ret;
2503 2503
2504 if (t && (!is_power_of_2(t) || t > 0x40000000)) 2504 if (t && (!is_power_of_2(t) || t > 0x40000000))
2505 return -EINVAL; 2505 return -EINVAL;
2506 2506
2507 sbi->s_inode_readahead_blks = t; 2507 sbi->s_inode_readahead_blks = t;
2508 return count; 2508 return count;
2509 } 2509 }
2510 2510
2511 static ssize_t sbi_ui_show(struct ext4_attr *a, 2511 static ssize_t sbi_ui_show(struct ext4_attr *a,
2512 struct ext4_sb_info *sbi, char *buf) 2512 struct ext4_sb_info *sbi, char *buf)
2513 { 2513 {
2514 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); 2514 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
2515 2515
2516 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 2516 return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2517 } 2517 }
2518 2518
2519 static ssize_t sbi_ui_store(struct ext4_attr *a, 2519 static ssize_t sbi_ui_store(struct ext4_attr *a,
2520 struct ext4_sb_info *sbi, 2520 struct ext4_sb_info *sbi,
2521 const char *buf, size_t count) 2521 const char *buf, size_t count)
2522 { 2522 {
2523 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); 2523 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
2524 unsigned long t; 2524 unsigned long t;
2525 int ret; 2525 int ret;
2526 2526
2527 ret = kstrtoul(skip_spaces(buf), 0, &t); 2527 ret = kstrtoul(skip_spaces(buf), 0, &t);
2528 if (ret) 2528 if (ret)
2529 return ret; 2529 return ret;
2530 *ui = t; 2530 *ui = t;
2531 return count; 2531 return count;
2532 } 2532 }
2533 2533
2534 static ssize_t es_ui_show(struct ext4_attr *a, 2534 static ssize_t es_ui_show(struct ext4_attr *a,
2535 struct ext4_sb_info *sbi, char *buf) 2535 struct ext4_sb_info *sbi, char *buf)
2536 { 2536 {
2537 2537
2538 unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) + 2538 unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) +
2539 a->u.offset); 2539 a->u.offset);
2540 2540
2541 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 2541 return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2542 } 2542 }
2543 2543
2544 static ssize_t reserved_clusters_show(struct ext4_attr *a, 2544 static ssize_t reserved_clusters_show(struct ext4_attr *a,
2545 struct ext4_sb_info *sbi, char *buf) 2545 struct ext4_sb_info *sbi, char *buf)
2546 { 2546 {
2547 return snprintf(buf, PAGE_SIZE, "%llu\n", 2547 return snprintf(buf, PAGE_SIZE, "%llu\n",
2548 (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); 2548 (unsigned long long) atomic64_read(&sbi->s_resv_clusters));
2549 } 2549 }
2550 2550
2551 static ssize_t reserved_clusters_store(struct ext4_attr *a, 2551 static ssize_t reserved_clusters_store(struct ext4_attr *a,
2552 struct ext4_sb_info *sbi, 2552 struct ext4_sb_info *sbi,
2553 const char *buf, size_t count) 2553 const char *buf, size_t count)
2554 { 2554 {
2555 unsigned long long val; 2555 unsigned long long val;
2556 int ret; 2556 int ret;
2557 2557
2558 if (parse_strtoull(buf, -1ULL, &val)) 2558 if (parse_strtoull(buf, -1ULL, &val))
2559 return -EINVAL; 2559 return -EINVAL;
2560 ret = ext4_reserve_clusters(sbi, val); 2560 ret = ext4_reserve_clusters(sbi, val);
2561 2561
2562 return ret ? ret : count; 2562 return ret ? ret : count;
2563 } 2563 }
2564 2564
2565 static ssize_t trigger_test_error(struct ext4_attr *a, 2565 static ssize_t trigger_test_error(struct ext4_attr *a,
2566 struct ext4_sb_info *sbi, 2566 struct ext4_sb_info *sbi,
2567 const char *buf, size_t count) 2567 const char *buf, size_t count)
2568 { 2568 {
2569 int len = count; 2569 int len = count;
2570 2570
2571 if (!capable(CAP_SYS_ADMIN)) 2571 if (!capable(CAP_SYS_ADMIN))
2572 return -EPERM; 2572 return -EPERM;
2573 2573
2574 if (len && buf[len-1] == '\n') 2574 if (len && buf[len-1] == '\n')
2575 len--; 2575 len--;
2576 2576
2577 if (len) 2577 if (len)
2578 ext4_error(sbi->s_sb, "%.*s", len, buf); 2578 ext4_error(sbi->s_sb, "%.*s", len, buf);
2579 return count; 2579 return count;
2580 } 2580 }
2581 2581
2582 static ssize_t sbi_deprecated_show(struct ext4_attr *a, 2582 static ssize_t sbi_deprecated_show(struct ext4_attr *a,
2583 struct ext4_sb_info *sbi, char *buf) 2583 struct ext4_sb_info *sbi, char *buf)
2584 { 2584 {
2585 return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); 2585 return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val);
2586 } 2586 }
2587 2587
2588 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ 2588 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2589 static struct ext4_attr ext4_attr_##_name = { \ 2589 static struct ext4_attr ext4_attr_##_name = { \
2590 .attr = {.name = __stringify(_name), .mode = _mode }, \ 2590 .attr = {.name = __stringify(_name), .mode = _mode }, \
2591 .show = _show, \ 2591 .show = _show, \
2592 .store = _store, \ 2592 .store = _store, \
2593 .u = { \ 2593 .u = { \
2594 .offset = offsetof(struct ext4_sb_info, _elname),\ 2594 .offset = offsetof(struct ext4_sb_info, _elname),\
2595 }, \ 2595 }, \
2596 } 2596 }
2597 2597
2598 #define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \ 2598 #define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \
2599 static struct ext4_attr ext4_attr_##_name = { \ 2599 static struct ext4_attr ext4_attr_##_name = { \
2600 .attr = {.name = __stringify(_name), .mode = _mode }, \ 2600 .attr = {.name = __stringify(_name), .mode = _mode }, \
2601 .show = _show, \ 2601 .show = _show, \
2602 .store = _store, \ 2602 .store = _store, \
2603 .u = { \ 2603 .u = { \
2604 .offset = offsetof(struct ext4_super_block, _elname), \ 2604 .offset = offsetof(struct ext4_super_block, _elname), \
2605 }, \ 2605 }, \
2606 } 2606 }
2607 2607
2608 #define EXT4_ATTR(name, mode, show, store) \ 2608 #define EXT4_ATTR(name, mode, show, store) \
2609 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2609 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2610 2610
2611 #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) 2611 #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
2612 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2612 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2613 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2613 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2614 2614
2615 #define EXT4_RO_ATTR_ES_UI(name, elname) \ 2615 #define EXT4_RO_ATTR_ES_UI(name, elname) \
2616 EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname) 2616 EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname)
2617 #define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2617 #define EXT4_RW_ATTR_SBI_UI(name, elname) \
2618 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) 2618 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2619 2619
2620 #define ATTR_LIST(name) &ext4_attr_##name.attr 2620 #define ATTR_LIST(name) &ext4_attr_##name.attr
2621 #define EXT4_DEPRECATED_ATTR(_name, _val) \ 2621 #define EXT4_DEPRECATED_ATTR(_name, _val) \
2622 static struct ext4_attr ext4_attr_##_name = { \ 2622 static struct ext4_attr ext4_attr_##_name = { \
2623 .attr = {.name = __stringify(_name), .mode = 0444 }, \ 2623 .attr = {.name = __stringify(_name), .mode = 0444 }, \
2624 .show = sbi_deprecated_show, \ 2624 .show = sbi_deprecated_show, \
2625 .u = { \ 2625 .u = { \
2626 .deprecated_val = _val, \ 2626 .deprecated_val = _val, \
2627 }, \ 2627 }, \
2628 } 2628 }
2629 2629
2630 EXT4_RO_ATTR(delayed_allocation_blocks); 2630 EXT4_RO_ATTR(delayed_allocation_blocks);
2631 EXT4_RO_ATTR(session_write_kbytes); 2631 EXT4_RO_ATTR(session_write_kbytes);
2632 EXT4_RO_ATTR(lifetime_write_kbytes); 2632 EXT4_RO_ATTR(lifetime_write_kbytes);
2633 EXT4_RW_ATTR(reserved_clusters); 2633 EXT4_RW_ATTR(reserved_clusters);
2634 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2634 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2635 inode_readahead_blks_store, s_inode_readahead_blks); 2635 inode_readahead_blks_store, s_inode_readahead_blks);
2636 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); 2636 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
2637 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2637 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2638 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2638 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2639 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2639 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2640 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); 2640 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2641 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2641 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2642 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2642 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2643 EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); 2643 EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128);
2644 EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); 2644 EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
2645 EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); 2645 EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
2646 EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); 2646 EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
2647 EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); 2647 EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
2648 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); 2648 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
2649 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); 2649 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
2650 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); 2650 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
2651 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); 2651 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
2652 EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); 2652 EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
2653 EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); 2653 EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time);
2654 EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); 2654 EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time);
2655 2655
2656 static struct attribute *ext4_attrs[] = { 2656 static struct attribute *ext4_attrs[] = {
2657 ATTR_LIST(delayed_allocation_blocks), 2657 ATTR_LIST(delayed_allocation_blocks),
2658 ATTR_LIST(session_write_kbytes), 2658 ATTR_LIST(session_write_kbytes),
2659 ATTR_LIST(lifetime_write_kbytes), 2659 ATTR_LIST(lifetime_write_kbytes),
2660 ATTR_LIST(reserved_clusters), 2660 ATTR_LIST(reserved_clusters),
2661 ATTR_LIST(inode_readahead_blks), 2661 ATTR_LIST(inode_readahead_blks),
2662 ATTR_LIST(inode_goal), 2662 ATTR_LIST(inode_goal),
2663 ATTR_LIST(mb_stats), 2663 ATTR_LIST(mb_stats),
2664 ATTR_LIST(mb_max_to_scan), 2664 ATTR_LIST(mb_max_to_scan),
2665 ATTR_LIST(mb_min_to_scan), 2665 ATTR_LIST(mb_min_to_scan),
2666 ATTR_LIST(mb_order2_req), 2666 ATTR_LIST(mb_order2_req),
2667 ATTR_LIST(mb_stream_req), 2667 ATTR_LIST(mb_stream_req),
2668 ATTR_LIST(mb_group_prealloc), 2668 ATTR_LIST(mb_group_prealloc),
2669 ATTR_LIST(max_writeback_mb_bump), 2669 ATTR_LIST(max_writeback_mb_bump),
2670 ATTR_LIST(extent_max_zeroout_kb), 2670 ATTR_LIST(extent_max_zeroout_kb),
2671 ATTR_LIST(trigger_fs_error), 2671 ATTR_LIST(trigger_fs_error),
2672 ATTR_LIST(err_ratelimit_interval_ms), 2672 ATTR_LIST(err_ratelimit_interval_ms),
2673 ATTR_LIST(err_ratelimit_burst), 2673 ATTR_LIST(err_ratelimit_burst),
2674 ATTR_LIST(warning_ratelimit_interval_ms), 2674 ATTR_LIST(warning_ratelimit_interval_ms),
2675 ATTR_LIST(warning_ratelimit_burst), 2675 ATTR_LIST(warning_ratelimit_burst),
2676 ATTR_LIST(msg_ratelimit_interval_ms), 2676 ATTR_LIST(msg_ratelimit_interval_ms),
2677 ATTR_LIST(msg_ratelimit_burst), 2677 ATTR_LIST(msg_ratelimit_burst),
2678 ATTR_LIST(errors_count), 2678 ATTR_LIST(errors_count),
2679 ATTR_LIST(first_error_time), 2679 ATTR_LIST(first_error_time),
2680 ATTR_LIST(last_error_time), 2680 ATTR_LIST(last_error_time),
2681 NULL, 2681 NULL,
2682 }; 2682 };
2683 2683
2684 /* Features this copy of ext4 supports */ 2684 /* Features this copy of ext4 supports */
2685 EXT4_INFO_ATTR(lazy_itable_init); 2685 EXT4_INFO_ATTR(lazy_itable_init);
2686 EXT4_INFO_ATTR(batched_discard); 2686 EXT4_INFO_ATTR(batched_discard);
2687 EXT4_INFO_ATTR(meta_bg_resize); 2687 EXT4_INFO_ATTR(meta_bg_resize);
2688 2688
2689 static struct attribute *ext4_feat_attrs[] = { 2689 static struct attribute *ext4_feat_attrs[] = {
2690 ATTR_LIST(lazy_itable_init), 2690 ATTR_LIST(lazy_itable_init),
2691 ATTR_LIST(batched_discard), 2691 ATTR_LIST(batched_discard),
2692 ATTR_LIST(meta_bg_resize), 2692 ATTR_LIST(meta_bg_resize),
2693 NULL, 2693 NULL,
2694 }; 2694 };
2695 2695
2696 static ssize_t ext4_attr_show(struct kobject *kobj, 2696 static ssize_t ext4_attr_show(struct kobject *kobj,
2697 struct attribute *attr, char *buf) 2697 struct attribute *attr, char *buf)
2698 { 2698 {
2699 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2699 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2700 s_kobj); 2700 s_kobj);
2701 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2701 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2702 2702
2703 return a->show ? a->show(a, sbi, buf) : 0; 2703 return a->show ? a->show(a, sbi, buf) : 0;
2704 } 2704 }
2705 2705
2706 static ssize_t ext4_attr_store(struct kobject *kobj, 2706 static ssize_t ext4_attr_store(struct kobject *kobj,
2707 struct attribute *attr, 2707 struct attribute *attr,
2708 const char *buf, size_t len) 2708 const char *buf, size_t len)
2709 { 2709 {
2710 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2710 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2711 s_kobj); 2711 s_kobj);
2712 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2712 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2713 2713
2714 return a->store ? a->store(a, sbi, buf, len) : 0; 2714 return a->store ? a->store(a, sbi, buf, len) : 0;
2715 } 2715 }
2716 2716
2717 static void ext4_sb_release(struct kobject *kobj) 2717 static void ext4_sb_release(struct kobject *kobj)
2718 { 2718 {
2719 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2719 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2720 s_kobj); 2720 s_kobj);
2721 complete(&sbi->s_kobj_unregister); 2721 complete(&sbi->s_kobj_unregister);
2722 } 2722 }
2723 2723
2724 static const struct sysfs_ops ext4_attr_ops = { 2724 static const struct sysfs_ops ext4_attr_ops = {
2725 .show = ext4_attr_show, 2725 .show = ext4_attr_show,
2726 .store = ext4_attr_store, 2726 .store = ext4_attr_store,
2727 }; 2727 };
2728 2728
2729 static struct kobj_type ext4_ktype = { 2729 static struct kobj_type ext4_ktype = {
2730 .default_attrs = ext4_attrs, 2730 .default_attrs = ext4_attrs,
2731 .sysfs_ops = &ext4_attr_ops, 2731 .sysfs_ops = &ext4_attr_ops,
2732 .release = ext4_sb_release, 2732 .release = ext4_sb_release,
2733 }; 2733 };
2734 2734
2735 static void ext4_feat_release(struct kobject *kobj) 2735 static void ext4_feat_release(struct kobject *kobj)
2736 { 2736 {
2737 complete(&ext4_feat->f_kobj_unregister); 2737 complete(&ext4_feat->f_kobj_unregister);
2738 } 2738 }
2739 2739
2740 static ssize_t ext4_feat_show(struct kobject *kobj, 2740 static ssize_t ext4_feat_show(struct kobject *kobj,
2741 struct attribute *attr, char *buf) 2741 struct attribute *attr, char *buf)
2742 { 2742 {
2743 return snprintf(buf, PAGE_SIZE, "supported\n"); 2743 return snprintf(buf, PAGE_SIZE, "supported\n");
2744 } 2744 }
2745 2745
2746 /* 2746 /*
2747 * We can not use ext4_attr_show/store because it relies on the kobject 2747 * We can not use ext4_attr_show/store because it relies on the kobject
2748 * being embedded in the ext4_sb_info structure which is definitely not 2748 * being embedded in the ext4_sb_info structure which is definitely not
2749 * true in this case. 2749 * true in this case.
2750 */ 2750 */
2751 static const struct sysfs_ops ext4_feat_ops = { 2751 static const struct sysfs_ops ext4_feat_ops = {
2752 .show = ext4_feat_show, 2752 .show = ext4_feat_show,
2753 .store = NULL, 2753 .store = NULL,
2754 }; 2754 };
2755 2755
2756 static struct kobj_type ext4_feat_ktype = { 2756 static struct kobj_type ext4_feat_ktype = {
2757 .default_attrs = ext4_feat_attrs, 2757 .default_attrs = ext4_feat_attrs,
2758 .sysfs_ops = &ext4_feat_ops, 2758 .sysfs_ops = &ext4_feat_ops,
2759 .release = ext4_feat_release, 2759 .release = ext4_feat_release,
2760 }; 2760 };
2761 2761
2762 /* 2762 /*
2763 * Check whether this filesystem can be mounted based on 2763 * Check whether this filesystem can be mounted based on
2764 * the features present and the RDONLY/RDWR mount requested. 2764 * the features present and the RDONLY/RDWR mount requested.
2765 * Returns 1 if this filesystem can be mounted as requested, 2765 * Returns 1 if this filesystem can be mounted as requested,
2766 * 0 if it cannot be. 2766 * 0 if it cannot be.
2767 */ 2767 */
2768 static int ext4_feature_set_ok(struct super_block *sb, int readonly) 2768 static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2769 { 2769 {
2770 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { 2770 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
2771 ext4_msg(sb, KERN_ERR, 2771 ext4_msg(sb, KERN_ERR,
2772 "Couldn't mount because of " 2772 "Couldn't mount because of "
2773 "unsupported optional features (%x)", 2773 "unsupported optional features (%x)",
2774 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2774 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2775 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2775 ~EXT4_FEATURE_INCOMPAT_SUPP));
2776 return 0; 2776 return 0;
2777 } 2777 }
2778 2778
2779 if (readonly) 2779 if (readonly)
2780 return 1; 2780 return 1;
2781 2781
2782 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_READONLY)) {
2783 ext4_msg(sb, KERN_INFO, "filesystem is read-only");
2784 sb->s_flags |= MS_RDONLY;
2785 return 1;
2786 }
2787
2782 /* Check that feature set is OK for a read-write mount */ 2788 /* Check that feature set is OK for a read-write mount */
2783 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { 2789 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
2784 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " 2790 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
2785 "unsupported optional features (%x)", 2791 "unsupported optional features (%x)",
2786 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2792 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2787 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2793 ~EXT4_FEATURE_RO_COMPAT_SUPP));
2788 return 0; 2794 return 0;
2789 } 2795 }
2790 /* 2796 /*
2791 * Large file size enabled file system can only be mounted 2797 * Large file size enabled file system can only be mounted
2792 * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF 2798 * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
2793 */ 2799 */
2794 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { 2800 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
2795 if (sizeof(blkcnt_t) < sizeof(u64)) { 2801 if (sizeof(blkcnt_t) < sizeof(u64)) {
2796 ext4_msg(sb, KERN_ERR, "Filesystem with huge files " 2802 ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
2797 "cannot be mounted RDWR without " 2803 "cannot be mounted RDWR without "
2798 "CONFIG_LBDAF"); 2804 "CONFIG_LBDAF");
2799 return 0; 2805 return 0;
2800 } 2806 }
2801 } 2807 }
2802 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) && 2808 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
2803 !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { 2809 !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2804 ext4_msg(sb, KERN_ERR, 2810 ext4_msg(sb, KERN_ERR,
2805 "Can't support bigalloc feature without " 2811 "Can't support bigalloc feature without "
2806 "extents feature\n"); 2812 "extents feature\n");
2807 return 0; 2813 return 0;
2808 } 2814 }
2809 2815
2810 #ifndef CONFIG_QUOTA 2816 #ifndef CONFIG_QUOTA
2811 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 2817 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
2812 !readonly) { 2818 !readonly) {
2813 ext4_msg(sb, KERN_ERR, 2819 ext4_msg(sb, KERN_ERR,
2814 "Filesystem with quota feature cannot be mounted RDWR " 2820 "Filesystem with quota feature cannot be mounted RDWR "
2815 "without CONFIG_QUOTA"); 2821 "without CONFIG_QUOTA");
2816 return 0; 2822 return 0;
2817 } 2823 }
2818 #endif /* CONFIG_QUOTA */ 2824 #endif /* CONFIG_QUOTA */
2819 return 1; 2825 return 1;
2820 } 2826 }
2821 2827
2822 /* 2828 /*
2823 * This function is called once a day if we have errors logged 2829 * This function is called once a day if we have errors logged
2824 * on the file system 2830 * on the file system
2825 */ 2831 */
2826 static void print_daily_error_info(unsigned long arg) 2832 static void print_daily_error_info(unsigned long arg)
2827 { 2833 {
2828 struct super_block *sb = (struct super_block *) arg; 2834 struct super_block *sb = (struct super_block *) arg;
2829 struct ext4_sb_info *sbi; 2835 struct ext4_sb_info *sbi;
2830 struct ext4_super_block *es; 2836 struct ext4_super_block *es;
2831 2837
2832 sbi = EXT4_SB(sb); 2838 sbi = EXT4_SB(sb);
2833 es = sbi->s_es; 2839 es = sbi->s_es;
2834 2840
2835 if (es->s_error_count) 2841 if (es->s_error_count)
2836 /* fsck newer than v1.41.13 is needed to clean this condition. */ 2842 /* fsck newer than v1.41.13 is needed to clean this condition. */
2837 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u", 2843 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
2838 le32_to_cpu(es->s_error_count)); 2844 le32_to_cpu(es->s_error_count));
2839 if (es->s_first_error_time) { 2845 if (es->s_first_error_time) {
2840 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d", 2846 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
2841 sb->s_id, le32_to_cpu(es->s_first_error_time), 2847 sb->s_id, le32_to_cpu(es->s_first_error_time),
2842 (int) sizeof(es->s_first_error_func), 2848 (int) sizeof(es->s_first_error_func),
2843 es->s_first_error_func, 2849 es->s_first_error_func,
2844 le32_to_cpu(es->s_first_error_line)); 2850 le32_to_cpu(es->s_first_error_line));
2845 if (es->s_first_error_ino) 2851 if (es->s_first_error_ino)
2846 printk(": inode %u", 2852 printk(": inode %u",
2847 le32_to_cpu(es->s_first_error_ino)); 2853 le32_to_cpu(es->s_first_error_ino));
2848 if (es->s_first_error_block) 2854 if (es->s_first_error_block)
2849 printk(": block %llu", (unsigned long long) 2855 printk(": block %llu", (unsigned long long)
2850 le64_to_cpu(es->s_first_error_block)); 2856 le64_to_cpu(es->s_first_error_block));
2851 printk("\n"); 2857 printk("\n");
2852 } 2858 }
2853 if (es->s_last_error_time) { 2859 if (es->s_last_error_time) {
2854 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d", 2860 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
2855 sb->s_id, le32_to_cpu(es->s_last_error_time), 2861 sb->s_id, le32_to_cpu(es->s_last_error_time),
2856 (int) sizeof(es->s_last_error_func), 2862 (int) sizeof(es->s_last_error_func),
2857 es->s_last_error_func, 2863 es->s_last_error_func,
2858 le32_to_cpu(es->s_last_error_line)); 2864 le32_to_cpu(es->s_last_error_line));
2859 if (es->s_last_error_ino) 2865 if (es->s_last_error_ino)
2860 printk(": inode %u", 2866 printk(": inode %u",
2861 le32_to_cpu(es->s_last_error_ino)); 2867 le32_to_cpu(es->s_last_error_ino));
2862 if (es->s_last_error_block) 2868 if (es->s_last_error_block)
2863 printk(": block %llu", (unsigned long long) 2869 printk(": block %llu", (unsigned long long)
2864 le64_to_cpu(es->s_last_error_block)); 2870 le64_to_cpu(es->s_last_error_block));
2865 printk("\n"); 2871 printk("\n");
2866 } 2872 }
2867 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ 2873 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
2868 } 2874 }
2869 2875
2870 /* Find next suitable group and run ext4_init_inode_table */ 2876 /* Find next suitable group and run ext4_init_inode_table */
2871 static int ext4_run_li_request(struct ext4_li_request *elr) 2877 static int ext4_run_li_request(struct ext4_li_request *elr)
2872 { 2878 {
2873 struct ext4_group_desc *gdp = NULL; 2879 struct ext4_group_desc *gdp = NULL;
2874 ext4_group_t group, ngroups; 2880 ext4_group_t group, ngroups;
2875 struct super_block *sb; 2881 struct super_block *sb;
2876 unsigned long timeout = 0; 2882 unsigned long timeout = 0;
2877 int ret = 0; 2883 int ret = 0;
2878 2884
2879 sb = elr->lr_super; 2885 sb = elr->lr_super;
2880 ngroups = EXT4_SB(sb)->s_groups_count; 2886 ngroups = EXT4_SB(sb)->s_groups_count;
2881 2887
2882 sb_start_write(sb); 2888 sb_start_write(sb);
2883 for (group = elr->lr_next_group; group < ngroups; group++) { 2889 for (group = elr->lr_next_group; group < ngroups; group++) {
2884 gdp = ext4_get_group_desc(sb, group, NULL); 2890 gdp = ext4_get_group_desc(sb, group, NULL);
2885 if (!gdp) { 2891 if (!gdp) {
2886 ret = 1; 2892 ret = 1;
2887 break; 2893 break;
2888 } 2894 }
2889 2895
2890 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 2896 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2891 break; 2897 break;
2892 } 2898 }
2893 2899
2894 if (group >= ngroups) 2900 if (group >= ngroups)
2895 ret = 1; 2901 ret = 1;
2896 2902
2897 if (!ret) { 2903 if (!ret) {
2898 timeout = jiffies; 2904 timeout = jiffies;
2899 ret = ext4_init_inode_table(sb, group, 2905 ret = ext4_init_inode_table(sb, group,
2900 elr->lr_timeout ? 0 : 1); 2906 elr->lr_timeout ? 0 : 1);
2901 if (elr->lr_timeout == 0) { 2907 if (elr->lr_timeout == 0) {
2902 timeout = (jiffies - timeout) * 2908 timeout = (jiffies - timeout) *
2903 elr->lr_sbi->s_li_wait_mult; 2909 elr->lr_sbi->s_li_wait_mult;
2904 elr->lr_timeout = timeout; 2910 elr->lr_timeout = timeout;
2905 } 2911 }
2906 elr->lr_next_sched = jiffies + elr->lr_timeout; 2912 elr->lr_next_sched = jiffies + elr->lr_timeout;
2907 elr->lr_next_group = group + 1; 2913 elr->lr_next_group = group + 1;
2908 } 2914 }
2909 sb_end_write(sb); 2915 sb_end_write(sb);
2910 2916
2911 return ret; 2917 return ret;
2912 } 2918 }
2913 2919
2914 /* 2920 /*
2915 * Remove lr_request from the list_request and free the 2921 * Remove lr_request from the list_request and free the
2916 * request structure. Should be called with li_list_mtx held 2922 * request structure. Should be called with li_list_mtx held
2917 */ 2923 */
2918 static void ext4_remove_li_request(struct ext4_li_request *elr) 2924 static void ext4_remove_li_request(struct ext4_li_request *elr)
2919 { 2925 {
2920 struct ext4_sb_info *sbi; 2926 struct ext4_sb_info *sbi;
2921 2927
2922 if (!elr) 2928 if (!elr)
2923 return; 2929 return;
2924 2930
2925 sbi = elr->lr_sbi; 2931 sbi = elr->lr_sbi;
2926 2932
2927 list_del(&elr->lr_request); 2933 list_del(&elr->lr_request);
2928 sbi->s_li_request = NULL; 2934 sbi->s_li_request = NULL;
2929 kfree(elr); 2935 kfree(elr);
2930 } 2936 }
2931 2937
2932 static void ext4_unregister_li_request(struct super_block *sb) 2938 static void ext4_unregister_li_request(struct super_block *sb)
2933 { 2939 {
2934 mutex_lock(&ext4_li_mtx); 2940 mutex_lock(&ext4_li_mtx);
2935 if (!ext4_li_info) { 2941 if (!ext4_li_info) {
2936 mutex_unlock(&ext4_li_mtx); 2942 mutex_unlock(&ext4_li_mtx);
2937 return; 2943 return;
2938 } 2944 }
2939 2945
2940 mutex_lock(&ext4_li_info->li_list_mtx); 2946 mutex_lock(&ext4_li_info->li_list_mtx);
2941 ext4_remove_li_request(EXT4_SB(sb)->s_li_request); 2947 ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
2942 mutex_unlock(&ext4_li_info->li_list_mtx); 2948 mutex_unlock(&ext4_li_info->li_list_mtx);
2943 mutex_unlock(&ext4_li_mtx); 2949 mutex_unlock(&ext4_li_mtx);
2944 } 2950 }
2945 2951
2946 static struct task_struct *ext4_lazyinit_task; 2952 static struct task_struct *ext4_lazyinit_task;
2947 2953
2948 /* 2954 /*
2949 * This is the function where ext4lazyinit thread lives. It walks 2955 * This is the function where ext4lazyinit thread lives. It walks
2950 * through the request list searching for next scheduled filesystem. 2956 * through the request list searching for next scheduled filesystem.
2951 * When such a fs is found, run the lazy initialization request 2957 * When such a fs is found, run the lazy initialization request
2952 * (ext4_rn_li_request) and keep track of the time spend in this 2958 * (ext4_rn_li_request) and keep track of the time spend in this
2953 * function. Based on that time we compute next schedule time of 2959 * function. Based on that time we compute next schedule time of
2954 * the request. When walking through the list is complete, compute 2960 * the request. When walking through the list is complete, compute
2955 * next waking time and put itself into sleep. 2961 * next waking time and put itself into sleep.
2956 */ 2962 */
2957 static int ext4_lazyinit_thread(void *arg) 2963 static int ext4_lazyinit_thread(void *arg)
2958 { 2964 {
2959 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; 2965 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
2960 struct list_head *pos, *n; 2966 struct list_head *pos, *n;
2961 struct ext4_li_request *elr; 2967 struct ext4_li_request *elr;
2962 unsigned long next_wakeup, cur; 2968 unsigned long next_wakeup, cur;
2963 2969
2964 BUG_ON(NULL == eli); 2970 BUG_ON(NULL == eli);
2965 2971
2966 cont_thread: 2972 cont_thread:
2967 while (true) { 2973 while (true) {
2968 next_wakeup = MAX_JIFFY_OFFSET; 2974 next_wakeup = MAX_JIFFY_OFFSET;
2969 2975
2970 mutex_lock(&eli->li_list_mtx); 2976 mutex_lock(&eli->li_list_mtx);
2971 if (list_empty(&eli->li_request_list)) { 2977 if (list_empty(&eli->li_request_list)) {
2972 mutex_unlock(&eli->li_list_mtx); 2978 mutex_unlock(&eli->li_list_mtx);
2973 goto exit_thread; 2979 goto exit_thread;
2974 } 2980 }
2975 2981
2976 list_for_each_safe(pos, n, &eli->li_request_list) { 2982 list_for_each_safe(pos, n, &eli->li_request_list) {
2977 elr = list_entry(pos, struct ext4_li_request, 2983 elr = list_entry(pos, struct ext4_li_request,
2978 lr_request); 2984 lr_request);
2979 2985
2980 if (time_after_eq(jiffies, elr->lr_next_sched)) { 2986 if (time_after_eq(jiffies, elr->lr_next_sched)) {
2981 if (ext4_run_li_request(elr) != 0) { 2987 if (ext4_run_li_request(elr) != 0) {
2982 /* error, remove the lazy_init job */ 2988 /* error, remove the lazy_init job */
2983 ext4_remove_li_request(elr); 2989 ext4_remove_li_request(elr);
2984 continue; 2990 continue;
2985 } 2991 }
2986 } 2992 }
2987 2993
2988 if (time_before(elr->lr_next_sched, next_wakeup)) 2994 if (time_before(elr->lr_next_sched, next_wakeup))
2989 next_wakeup = elr->lr_next_sched; 2995 next_wakeup = elr->lr_next_sched;
2990 } 2996 }
2991 mutex_unlock(&eli->li_list_mtx); 2997 mutex_unlock(&eli->li_list_mtx);
2992 2998
2993 try_to_freeze(); 2999 try_to_freeze();
2994 3000
2995 cur = jiffies; 3001 cur = jiffies;
2996 if ((time_after_eq(cur, next_wakeup)) || 3002 if ((time_after_eq(cur, next_wakeup)) ||
2997 (MAX_JIFFY_OFFSET == next_wakeup)) { 3003 (MAX_JIFFY_OFFSET == next_wakeup)) {
2998 cond_resched(); 3004 cond_resched();
2999 continue; 3005 continue;
3000 } 3006 }
3001 3007
3002 schedule_timeout_interruptible(next_wakeup - cur); 3008 schedule_timeout_interruptible(next_wakeup - cur);
3003 3009
3004 if (kthread_should_stop()) { 3010 if (kthread_should_stop()) {
3005 ext4_clear_request_list(); 3011 ext4_clear_request_list();
3006 goto exit_thread; 3012 goto exit_thread;
3007 } 3013 }
3008 } 3014 }
3009 3015
3010 exit_thread: 3016 exit_thread:
3011 /* 3017 /*
3012 * It looks like the request list is empty, but we need 3018 * It looks like the request list is empty, but we need
3013 * to check it under the li_list_mtx lock, to prevent any 3019 * to check it under the li_list_mtx lock, to prevent any
3014 * additions into it, and of course we should lock ext4_li_mtx 3020 * additions into it, and of course we should lock ext4_li_mtx
3015 * to atomically free the list and ext4_li_info, because at 3021 * to atomically free the list and ext4_li_info, because at
3016 * this point another ext4 filesystem could be registering 3022 * this point another ext4 filesystem could be registering
3017 * new one. 3023 * new one.
3018 */ 3024 */
3019 mutex_lock(&ext4_li_mtx); 3025 mutex_lock(&ext4_li_mtx);
3020 mutex_lock(&eli->li_list_mtx); 3026 mutex_lock(&eli->li_list_mtx);
3021 if (!list_empty(&eli->li_request_list)) { 3027 if (!list_empty(&eli->li_request_list)) {
3022 mutex_unlock(&eli->li_list_mtx); 3028 mutex_unlock(&eli->li_list_mtx);
3023 mutex_unlock(&ext4_li_mtx); 3029 mutex_unlock(&ext4_li_mtx);
3024 goto cont_thread; 3030 goto cont_thread;
3025 } 3031 }
3026 mutex_unlock(&eli->li_list_mtx); 3032 mutex_unlock(&eli->li_list_mtx);
3027 kfree(ext4_li_info); 3033 kfree(ext4_li_info);
3028 ext4_li_info = NULL; 3034 ext4_li_info = NULL;
3029 mutex_unlock(&ext4_li_mtx); 3035 mutex_unlock(&ext4_li_mtx);
3030 3036
3031 return 0; 3037 return 0;
3032 } 3038 }
3033 3039
3034 static void ext4_clear_request_list(void) 3040 static void ext4_clear_request_list(void)
3035 { 3041 {
3036 struct list_head *pos, *n; 3042 struct list_head *pos, *n;
3037 struct ext4_li_request *elr; 3043 struct ext4_li_request *elr;
3038 3044
3039 mutex_lock(&ext4_li_info->li_list_mtx); 3045 mutex_lock(&ext4_li_info->li_list_mtx);
3040 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { 3046 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3041 elr = list_entry(pos, struct ext4_li_request, 3047 elr = list_entry(pos, struct ext4_li_request,
3042 lr_request); 3048 lr_request);
3043 ext4_remove_li_request(elr); 3049 ext4_remove_li_request(elr);
3044 } 3050 }
3045 mutex_unlock(&ext4_li_info->li_list_mtx); 3051 mutex_unlock(&ext4_li_info->li_list_mtx);
3046 } 3052 }
3047 3053
3048 static int ext4_run_lazyinit_thread(void) 3054 static int ext4_run_lazyinit_thread(void)
3049 { 3055 {
3050 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread, 3056 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3051 ext4_li_info, "ext4lazyinit"); 3057 ext4_li_info, "ext4lazyinit");
3052 if (IS_ERR(ext4_lazyinit_task)) { 3058 if (IS_ERR(ext4_lazyinit_task)) {
3053 int err = PTR_ERR(ext4_lazyinit_task); 3059 int err = PTR_ERR(ext4_lazyinit_task);
3054 ext4_clear_request_list(); 3060 ext4_clear_request_list();
3055 kfree(ext4_li_info); 3061 kfree(ext4_li_info);
3056 ext4_li_info = NULL; 3062 ext4_li_info = NULL;
3057 printk(KERN_CRIT "EXT4-fs: error %d creating inode table " 3063 printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3058 "initialization thread\n", 3064 "initialization thread\n",
3059 err); 3065 err);
3060 return err; 3066 return err;
3061 } 3067 }
3062 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; 3068 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3063 return 0; 3069 return 0;
3064 } 3070 }
3065 3071
3066 /* 3072 /*
3067 * Check whether it make sense to run itable init. thread or not. 3073 * Check whether it make sense to run itable init. thread or not.
3068 * If there is at least one uninitialized inode table, return 3074 * If there is at least one uninitialized inode table, return
3069 * corresponding group number, else the loop goes through all 3075 * corresponding group number, else the loop goes through all
3070 * groups and return total number of groups. 3076 * groups and return total number of groups.
3071 */ 3077 */
3072 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) 3078 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3073 { 3079 {
3074 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; 3080 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3075 struct ext4_group_desc *gdp = NULL; 3081 struct ext4_group_desc *gdp = NULL;
3076 3082
3077 for (group = 0; group < ngroups; group++) { 3083 for (group = 0; group < ngroups; group++) {
3078 gdp = ext4_get_group_desc(sb, group, NULL); 3084 gdp = ext4_get_group_desc(sb, group, NULL);
3079 if (!gdp) 3085 if (!gdp)
3080 continue; 3086 continue;
3081 3087
3082 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 3088 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3083 break; 3089 break;
3084 } 3090 }
3085 3091
3086 return group; 3092 return group;
3087 } 3093 }
3088 3094
3089 static int ext4_li_info_new(void) 3095 static int ext4_li_info_new(void)
3090 { 3096 {
3091 struct ext4_lazy_init *eli = NULL; 3097 struct ext4_lazy_init *eli = NULL;
3092 3098
3093 eli = kzalloc(sizeof(*eli), GFP_KERNEL); 3099 eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3094 if (!eli) 3100 if (!eli)
3095 return -ENOMEM; 3101 return -ENOMEM;
3096 3102
3097 INIT_LIST_HEAD(&eli->li_request_list); 3103 INIT_LIST_HEAD(&eli->li_request_list);
3098 mutex_init(&eli->li_list_mtx); 3104 mutex_init(&eli->li_list_mtx);
3099 3105
3100 eli->li_state |= EXT4_LAZYINIT_QUIT; 3106 eli->li_state |= EXT4_LAZYINIT_QUIT;
3101 3107
3102 ext4_li_info = eli; 3108 ext4_li_info = eli;
3103 3109
3104 return 0; 3110 return 0;
3105 } 3111 }
3106 3112
3107 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, 3113 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3108 ext4_group_t start) 3114 ext4_group_t start)
3109 { 3115 {
3110 struct ext4_sb_info *sbi = EXT4_SB(sb); 3116 struct ext4_sb_info *sbi = EXT4_SB(sb);
3111 struct ext4_li_request *elr; 3117 struct ext4_li_request *elr;
3112 3118
3113 elr = kzalloc(sizeof(*elr), GFP_KERNEL); 3119 elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3114 if (!elr) 3120 if (!elr)
3115 return NULL; 3121 return NULL;
3116 3122
3117 elr->lr_super = sb; 3123 elr->lr_super = sb;
3118 elr->lr_sbi = sbi; 3124 elr->lr_sbi = sbi;
3119 elr->lr_next_group = start; 3125 elr->lr_next_group = start;
3120 3126
3121 /* 3127 /*
3122 * Randomize first schedule time of the request to 3128 * Randomize first schedule time of the request to
3123 * spread the inode table initialization requests 3129 * spread the inode table initialization requests
3124 * better. 3130 * better.
3125 */ 3131 */
3126 elr->lr_next_sched = jiffies + (prandom_u32() % 3132 elr->lr_next_sched = jiffies + (prandom_u32() %
3127 (EXT4_DEF_LI_MAX_START_DELAY * HZ)); 3133 (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3128 return elr; 3134 return elr;
3129 } 3135 }
3130 3136
3131 int ext4_register_li_request(struct super_block *sb, 3137 int ext4_register_li_request(struct super_block *sb,
3132 ext4_group_t first_not_zeroed) 3138 ext4_group_t first_not_zeroed)
3133 { 3139 {
3134 struct ext4_sb_info *sbi = EXT4_SB(sb); 3140 struct ext4_sb_info *sbi = EXT4_SB(sb);
3135 struct ext4_li_request *elr = NULL; 3141 struct ext4_li_request *elr = NULL;
3136 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 3142 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3137 int ret = 0; 3143 int ret = 0;
3138 3144
3139 mutex_lock(&ext4_li_mtx); 3145 mutex_lock(&ext4_li_mtx);
3140 if (sbi->s_li_request != NULL) { 3146 if (sbi->s_li_request != NULL) {
3141 /* 3147 /*
3142 * Reset timeout so it can be computed again, because 3148 * Reset timeout so it can be computed again, because
3143 * s_li_wait_mult might have changed. 3149 * s_li_wait_mult might have changed.
3144 */ 3150 */
3145 sbi->s_li_request->lr_timeout = 0; 3151 sbi->s_li_request->lr_timeout = 0;
3146 goto out; 3152 goto out;
3147 } 3153 }
3148 3154
3149 if (first_not_zeroed == ngroups || 3155 if (first_not_zeroed == ngroups ||
3150 (sb->s_flags & MS_RDONLY) || 3156 (sb->s_flags & MS_RDONLY) ||
3151 !test_opt(sb, INIT_INODE_TABLE)) 3157 !test_opt(sb, INIT_INODE_TABLE))
3152 goto out; 3158 goto out;
3153 3159
3154 elr = ext4_li_request_new(sb, first_not_zeroed); 3160 elr = ext4_li_request_new(sb, first_not_zeroed);
3155 if (!elr) { 3161 if (!elr) {
3156 ret = -ENOMEM; 3162 ret = -ENOMEM;
3157 goto out; 3163 goto out;
3158 } 3164 }
3159 3165
3160 if (NULL == ext4_li_info) { 3166 if (NULL == ext4_li_info) {
3161 ret = ext4_li_info_new(); 3167 ret = ext4_li_info_new();
3162 if (ret) 3168 if (ret)
3163 goto out; 3169 goto out;
3164 } 3170 }
3165 3171
3166 mutex_lock(&ext4_li_info->li_list_mtx); 3172 mutex_lock(&ext4_li_info->li_list_mtx);
3167 list_add(&elr->lr_request, &ext4_li_info->li_request_list); 3173 list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3168 mutex_unlock(&ext4_li_info->li_list_mtx); 3174 mutex_unlock(&ext4_li_info->li_list_mtx);
3169 3175
3170 sbi->s_li_request = elr; 3176 sbi->s_li_request = elr;
3171 /* 3177 /*
3172 * set elr to NULL here since it has been inserted to 3178 * set elr to NULL here since it has been inserted to
3173 * the request_list and the removal and free of it is 3179 * the request_list and the removal and free of it is
3174 * handled by ext4_clear_request_list from now on. 3180 * handled by ext4_clear_request_list from now on.
3175 */ 3181 */
3176 elr = NULL; 3182 elr = NULL;
3177 3183
3178 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { 3184 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3179 ret = ext4_run_lazyinit_thread(); 3185 ret = ext4_run_lazyinit_thread();
3180 if (ret) 3186 if (ret)
3181 goto out; 3187 goto out;
3182 } 3188 }
3183 out: 3189 out:
3184 mutex_unlock(&ext4_li_mtx); 3190 mutex_unlock(&ext4_li_mtx);
3185 if (ret) 3191 if (ret)
3186 kfree(elr); 3192 kfree(elr);
3187 return ret; 3193 return ret;
3188 } 3194 }
3189 3195
3190 /* 3196 /*
3191 * We do not need to lock anything since this is called on 3197 * We do not need to lock anything since this is called on
3192 * module unload. 3198 * module unload.
3193 */ 3199 */
3194 static void ext4_destroy_lazyinit_thread(void) 3200 static void ext4_destroy_lazyinit_thread(void)
3195 { 3201 {
3196 /* 3202 /*
3197 * If thread exited earlier 3203 * If thread exited earlier
3198 * there's nothing to be done. 3204 * there's nothing to be done.
3199 */ 3205 */
3200 if (!ext4_li_info || !ext4_lazyinit_task) 3206 if (!ext4_li_info || !ext4_lazyinit_task)
3201 return; 3207 return;
3202 3208
3203 kthread_stop(ext4_lazyinit_task); 3209 kthread_stop(ext4_lazyinit_task);
3204 } 3210 }
3205 3211
3206 static int set_journal_csum_feature_set(struct super_block *sb) 3212 static int set_journal_csum_feature_set(struct super_block *sb)
3207 { 3213 {
3208 int ret = 1; 3214 int ret = 1;
3209 int compat, incompat; 3215 int compat, incompat;
3210 struct ext4_sb_info *sbi = EXT4_SB(sb); 3216 struct ext4_sb_info *sbi = EXT4_SB(sb);
3211 3217
3212 if (ext4_has_metadata_csum(sb)) { 3218 if (ext4_has_metadata_csum(sb)) {
3213 /* journal checksum v3 */ 3219 /* journal checksum v3 */
3214 compat = 0; 3220 compat = 0;
3215 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; 3221 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
3216 } else { 3222 } else {
3217 /* journal checksum v1 */ 3223 /* journal checksum v1 */
3218 compat = JBD2_FEATURE_COMPAT_CHECKSUM; 3224 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
3219 incompat = 0; 3225 incompat = 0;
3220 } 3226 }
3221 3227
3222 jbd2_journal_clear_features(sbi->s_journal, 3228 jbd2_journal_clear_features(sbi->s_journal,
3223 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3229 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3224 JBD2_FEATURE_INCOMPAT_CSUM_V3 | 3230 JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3225 JBD2_FEATURE_INCOMPAT_CSUM_V2); 3231 JBD2_FEATURE_INCOMPAT_CSUM_V2);
3226 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 3232 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3227 ret = jbd2_journal_set_features(sbi->s_journal, 3233 ret = jbd2_journal_set_features(sbi->s_journal,
3228 compat, 0, 3234 compat, 0,
3229 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | 3235 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3230 incompat); 3236 incompat);
3231 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 3237 } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
3232 ret = jbd2_journal_set_features(sbi->s_journal, 3238 ret = jbd2_journal_set_features(sbi->s_journal,
3233 compat, 0, 3239 compat, 0,
3234 incompat); 3240 incompat);
3235 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 3241 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3236 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3242 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3237 } else { 3243 } else {
3238 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 3244 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3239 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3245 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3240 } 3246 }
3241 3247
3242 return ret; 3248 return ret;
3243 } 3249 }
3244 3250
3245 /* 3251 /*
3246 * Note: calculating the overhead so we can be compatible with 3252 * Note: calculating the overhead so we can be compatible with
3247 * historical BSD practice is quite difficult in the face of 3253 * historical BSD practice is quite difficult in the face of
3248 * clusters/bigalloc. This is because multiple metadata blocks from 3254 * clusters/bigalloc. This is because multiple metadata blocks from
3249 * different block group can end up in the same allocation cluster. 3255 * different block group can end up in the same allocation cluster.
3250 * Calculating the exact overhead in the face of clustered allocation 3256 * Calculating the exact overhead in the face of clustered allocation
3251 * requires either O(all block bitmaps) in memory or O(number of block 3257 * requires either O(all block bitmaps) in memory or O(number of block
3252 * groups**2) in time. We will still calculate the superblock for 3258 * groups**2) in time. We will still calculate the superblock for
3253 * older file systems --- and if we come across with a bigalloc file 3259 * older file systems --- and if we come across with a bigalloc file
3254 * system with zero in s_overhead_clusters the estimate will be close to 3260 * system with zero in s_overhead_clusters the estimate will be close to
3255 * correct especially for very large cluster sizes --- but for newer 3261 * correct especially for very large cluster sizes --- but for newer
3256 * file systems, it's better to calculate this figure once at mkfs 3262 * file systems, it's better to calculate this figure once at mkfs
3257 * time, and store it in the superblock. If the superblock value is 3263 * time, and store it in the superblock. If the superblock value is
3258 * present (even for non-bigalloc file systems), we will use it. 3264 * present (even for non-bigalloc file systems), we will use it.
3259 */ 3265 */
3260 static int count_overhead(struct super_block *sb, ext4_group_t grp, 3266 static int count_overhead(struct super_block *sb, ext4_group_t grp,
3261 char *buf) 3267 char *buf)
3262 { 3268 {
3263 struct ext4_sb_info *sbi = EXT4_SB(sb); 3269 struct ext4_sb_info *sbi = EXT4_SB(sb);
3264 struct ext4_group_desc *gdp; 3270 struct ext4_group_desc *gdp;
3265 ext4_fsblk_t first_block, last_block, b; 3271 ext4_fsblk_t first_block, last_block, b;
3266 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3272 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3267 int s, j, count = 0; 3273 int s, j, count = 0;
3268 3274
3269 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) 3275 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
3270 return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + 3276 return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3271 sbi->s_itb_per_group + 2); 3277 sbi->s_itb_per_group + 2);
3272 3278
3273 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + 3279 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3274 (grp * EXT4_BLOCKS_PER_GROUP(sb)); 3280 (grp * EXT4_BLOCKS_PER_GROUP(sb));
3275 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; 3281 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
3276 for (i = 0; i < ngroups; i++) { 3282 for (i = 0; i < ngroups; i++) {
3277 gdp = ext4_get_group_desc(sb, i, NULL); 3283 gdp = ext4_get_group_desc(sb, i, NULL);
3278 b = ext4_block_bitmap(sb, gdp); 3284 b = ext4_block_bitmap(sb, gdp);
3279 if (b >= first_block && b <= last_block) { 3285 if (b >= first_block && b <= last_block) {
3280 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); 3286 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3281 count++; 3287 count++;
3282 } 3288 }
3283 b = ext4_inode_bitmap(sb, gdp); 3289 b = ext4_inode_bitmap(sb, gdp);
3284 if (b >= first_block && b <= last_block) { 3290 if (b >= first_block && b <= last_block) {
3285 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); 3291 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3286 count++; 3292 count++;
3287 } 3293 }
3288 b = ext4_inode_table(sb, gdp); 3294 b = ext4_inode_table(sb, gdp);
3289 if (b >= first_block && b + sbi->s_itb_per_group <= last_block) 3295 if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
3290 for (j = 0; j < sbi->s_itb_per_group; j++, b++) { 3296 for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
3291 int c = EXT4_B2C(sbi, b - first_block); 3297 int c = EXT4_B2C(sbi, b - first_block);
3292 ext4_set_bit(c, buf); 3298 ext4_set_bit(c, buf);
3293 count++; 3299 count++;
3294 } 3300 }
3295 if (i != grp) 3301 if (i != grp)
3296 continue; 3302 continue;
3297 s = 0; 3303 s = 0;
3298 if (ext4_bg_has_super(sb, grp)) { 3304 if (ext4_bg_has_super(sb, grp)) {
3299 ext4_set_bit(s++, buf); 3305 ext4_set_bit(s++, buf);
3300 count++; 3306 count++;
3301 } 3307 }
3302 for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { 3308 for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
3303 ext4_set_bit(EXT4_B2C(sbi, s++), buf); 3309 ext4_set_bit(EXT4_B2C(sbi, s++), buf);
3304 count++; 3310 count++;
3305 } 3311 }
3306 } 3312 }
3307 if (!count) 3313 if (!count)
3308 return 0; 3314 return 0;
3309 return EXT4_CLUSTERS_PER_GROUP(sb) - 3315 return EXT4_CLUSTERS_PER_GROUP(sb) -
3310 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8); 3316 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
3311 } 3317 }
3312 3318
3313 /* 3319 /*
3314 * Compute the overhead and stash it in sbi->s_overhead 3320 * Compute the overhead and stash it in sbi->s_overhead
3315 */ 3321 */
3316 int ext4_calculate_overhead(struct super_block *sb) 3322 int ext4_calculate_overhead(struct super_block *sb)
3317 { 3323 {
3318 struct ext4_sb_info *sbi = EXT4_SB(sb); 3324 struct ext4_sb_info *sbi = EXT4_SB(sb);
3319 struct ext4_super_block *es = sbi->s_es; 3325 struct ext4_super_block *es = sbi->s_es;
3320 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3326 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3321 ext4_fsblk_t overhead = 0; 3327 ext4_fsblk_t overhead = 0;
3322 char *buf = (char *) get_zeroed_page(GFP_NOFS); 3328 char *buf = (char *) get_zeroed_page(GFP_NOFS);
3323 3329
3324 if (!buf) 3330 if (!buf)
3325 return -ENOMEM; 3331 return -ENOMEM;
3326 3332
3327 /* 3333 /*
3328 * Compute the overhead (FS structures). This is constant 3334 * Compute the overhead (FS structures). This is constant
3329 * for a given filesystem unless the number of block groups 3335 * for a given filesystem unless the number of block groups
3330 * changes so we cache the previous value until it does. 3336 * changes so we cache the previous value until it does.
3331 */ 3337 */
3332 3338
3333 /* 3339 /*
3334 * All of the blocks before first_data_block are overhead 3340 * All of the blocks before first_data_block are overhead
3335 */ 3341 */
3336 overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block)); 3342 overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
3337 3343
3338 /* 3344 /*
3339 * Add the overhead found in each block group 3345 * Add the overhead found in each block group
3340 */ 3346 */
3341 for (i = 0; i < ngroups; i++) { 3347 for (i = 0; i < ngroups; i++) {
3342 int blks; 3348 int blks;
3343 3349
3344 blks = count_overhead(sb, i, buf); 3350 blks = count_overhead(sb, i, buf);
3345 overhead += blks; 3351 overhead += blks;
3346 if (blks) 3352 if (blks)
3347 memset(buf, 0, PAGE_SIZE); 3353 memset(buf, 0, PAGE_SIZE);
3348 cond_resched(); 3354 cond_resched();
3349 } 3355 }
3350 /* Add the internal journal blocks as well */ 3356 /* Add the internal journal blocks as well */
3351 if (sbi->s_journal && !sbi->journal_bdev) 3357 if (sbi->s_journal && !sbi->journal_bdev)
3352 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); 3358 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
3353 3359
3354 sbi->s_overhead = overhead; 3360 sbi->s_overhead = overhead;
3355 smp_wmb(); 3361 smp_wmb();
3356 free_page((unsigned long) buf); 3362 free_page((unsigned long) buf);
3357 return 0; 3363 return 0;
3358 } 3364 }
3359 3365
3360 3366
3361 static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) 3367 static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
3362 { 3368 {
3363 ext4_fsblk_t resv_clusters; 3369 ext4_fsblk_t resv_clusters;
3364 3370
3365 /* 3371 /*
3366 * There's no need to reserve anything when we aren't using extents. 3372 * There's no need to reserve anything when we aren't using extents.
3367 * The space estimates are exact, there are no unwritten extents, 3373 * The space estimates are exact, there are no unwritten extents,
3368 * hole punching doesn't need new metadata... This is needed especially 3374 * hole punching doesn't need new metadata... This is needed especially
3369 * to keep ext2/3 backward compatibility. 3375 * to keep ext2/3 backward compatibility.
3370 */ 3376 */
3371 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) 3377 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
3372 return 0; 3378 return 0;
3373 /* 3379 /*
3374 * By default we reserve 2% or 4096 clusters, whichever is smaller. 3380 * By default we reserve 2% or 4096 clusters, whichever is smaller.
3375 * This should cover the situations where we can not afford to run 3381 * This should cover the situations where we can not afford to run
3376 * out of space like for example punch hole, or converting 3382 * out of space like for example punch hole, or converting
3377 * unwritten extents in delalloc path. In most cases such 3383 * unwritten extents in delalloc path. In most cases such
3378 * allocation would require 1, or 2 blocks, higher numbers are 3384 * allocation would require 1, or 2 blocks, higher numbers are
3379 * very rare. 3385 * very rare.
3380 */ 3386 */
3381 resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> 3387 resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
3382 EXT4_SB(sb)->s_cluster_bits; 3388 EXT4_SB(sb)->s_cluster_bits;
3383 3389
3384 do_div(resv_clusters, 50); 3390 do_div(resv_clusters, 50);
3385 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); 3391 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
3386 3392
3387 return resv_clusters; 3393 return resv_clusters;
3388 } 3394 }
3389 3395
3390 3396
3391 static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count) 3397 static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count)
3392 { 3398 {
3393 ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >> 3399 ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >>
3394 sbi->s_cluster_bits; 3400 sbi->s_cluster_bits;
3395 3401
3396 if (count >= clusters) 3402 if (count >= clusters)
3397 return -EINVAL; 3403 return -EINVAL;
3398 3404
3399 atomic64_set(&sbi->s_resv_clusters, count); 3405 atomic64_set(&sbi->s_resv_clusters, count);
3400 return 0; 3406 return 0;
3401 } 3407 }
3402 3408
3403 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3409 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3404 { 3410 {
3405 char *orig_data = kstrdup(data, GFP_KERNEL); 3411 char *orig_data = kstrdup(data, GFP_KERNEL);
3406 struct buffer_head *bh; 3412 struct buffer_head *bh;
3407 struct ext4_super_block *es = NULL; 3413 struct ext4_super_block *es = NULL;
3408 struct ext4_sb_info *sbi; 3414 struct ext4_sb_info *sbi;
3409 ext4_fsblk_t block; 3415 ext4_fsblk_t block;
3410 ext4_fsblk_t sb_block = get_sb_block(&data); 3416 ext4_fsblk_t sb_block = get_sb_block(&data);
3411 ext4_fsblk_t logical_sb_block; 3417 ext4_fsblk_t logical_sb_block;
3412 unsigned long offset = 0; 3418 unsigned long offset = 0;
3413 unsigned long journal_devnum = 0; 3419 unsigned long journal_devnum = 0;
3414 unsigned long def_mount_opts; 3420 unsigned long def_mount_opts;
3415 struct inode *root; 3421 struct inode *root;
3416 char *cp; 3422 char *cp;
3417 const char *descr; 3423 const char *descr;
3418 int ret = -ENOMEM; 3424 int ret = -ENOMEM;
3419 int blocksize, clustersize; 3425 int blocksize, clustersize;
3420 unsigned int db_count; 3426 unsigned int db_count;
3421 unsigned int i; 3427 unsigned int i;
3422 int needs_recovery, has_huge_files, has_bigalloc; 3428 int needs_recovery, has_huge_files, has_bigalloc;
3423 __u64 blocks_count; 3429 __u64 blocks_count;
3424 int err = 0; 3430 int err = 0;
3425 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3431 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3426 ext4_group_t first_not_zeroed; 3432 ext4_group_t first_not_zeroed;
3427 3433
3428 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 3434 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
3429 if (!sbi) 3435 if (!sbi)
3430 goto out_free_orig; 3436 goto out_free_orig;
3431 3437
3432 sbi->s_blockgroup_lock = 3438 sbi->s_blockgroup_lock =
3433 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 3439 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
3434 if (!sbi->s_blockgroup_lock) { 3440 if (!sbi->s_blockgroup_lock) {
3435 kfree(sbi); 3441 kfree(sbi);
3436 goto out_free_orig; 3442 goto out_free_orig;
3437 } 3443 }
3438 sb->s_fs_info = sbi; 3444 sb->s_fs_info = sbi;
3439 sbi->s_sb = sb; 3445 sbi->s_sb = sb;
3440 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 3446 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
3441 sbi->s_sb_block = sb_block; 3447 sbi->s_sb_block = sb_block;
3442 if (sb->s_bdev->bd_part) 3448 if (sb->s_bdev->bd_part)
3443 sbi->s_sectors_written_start = 3449 sbi->s_sectors_written_start =
3444 part_stat_read(sb->s_bdev->bd_part, sectors[1]); 3450 part_stat_read(sb->s_bdev->bd_part, sectors[1]);
3445 3451
3446 /* Cleanup superblock name */ 3452 /* Cleanup superblock name */
3447 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 3453 for (cp = sb->s_id; (cp = strchr(cp, '/'));)
3448 *cp = '!'; 3454 *cp = '!';
3449 3455
3450 /* -EINVAL is default */ 3456 /* -EINVAL is default */
3451 ret = -EINVAL; 3457 ret = -EINVAL;
3452 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 3458 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
3453 if (!blocksize) { 3459 if (!blocksize) {
3454 ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 3460 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
3455 goto out_fail; 3461 goto out_fail;
3456 } 3462 }
3457 3463
3458 /* 3464 /*
3459 * The ext4 superblock will not be buffer aligned for other than 1kB 3465 * The ext4 superblock will not be buffer aligned for other than 1kB
3460 * block sizes. We need to calculate the offset from buffer start. 3466 * block sizes. We need to calculate the offset from buffer start.
3461 */ 3467 */
3462 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 3468 if (blocksize != EXT4_MIN_BLOCK_SIZE) {
3463 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 3469 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3464 offset = do_div(logical_sb_block, blocksize); 3470 offset = do_div(logical_sb_block, blocksize);
3465 } else { 3471 } else {
3466 logical_sb_block = sb_block; 3472 logical_sb_block = sb_block;
3467 } 3473 }
3468 3474
3469 if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) { 3475 if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
3470 ext4_msg(sb, KERN_ERR, "unable to read superblock"); 3476 ext4_msg(sb, KERN_ERR, "unable to read superblock");
3471 goto out_fail; 3477 goto out_fail;
3472 } 3478 }
3473 /* 3479 /*
3474 * Note: s_es must be initialized as soon as possible because 3480 * Note: s_es must be initialized as soon as possible because
3475 * some ext4 macro-instructions depend on its value 3481 * some ext4 macro-instructions depend on its value
3476 */ 3482 */
3477 es = (struct ext4_super_block *) (bh->b_data + offset); 3483 es = (struct ext4_super_block *) (bh->b_data + offset);
3478 sbi->s_es = es; 3484 sbi->s_es = es;
3479 sb->s_magic = le16_to_cpu(es->s_magic); 3485 sb->s_magic = le16_to_cpu(es->s_magic);
3480 if (sb->s_magic != EXT4_SUPER_MAGIC) 3486 if (sb->s_magic != EXT4_SUPER_MAGIC)
3481 goto cantfind_ext4; 3487 goto cantfind_ext4;
3482 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 3488 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
3483 3489
3484 /* Warn if metadata_csum and gdt_csum are both set. */ 3490 /* Warn if metadata_csum and gdt_csum are both set. */
3485 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3491 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3486 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && 3492 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
3487 EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) 3493 EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
3488 ext4_warning(sb, "metadata_csum and uninit_bg are " 3494 ext4_warning(sb, "metadata_csum and uninit_bg are "
3489 "redundant flags; please run fsck."); 3495 "redundant flags; please run fsck.");
3490 3496
3491 /* Check for a known checksum algorithm */ 3497 /* Check for a known checksum algorithm */
3492 if (!ext4_verify_csum_type(sb, es)) { 3498 if (!ext4_verify_csum_type(sb, es)) {
3493 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " 3499 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3494 "unknown checksum algorithm."); 3500 "unknown checksum algorithm.");
3495 silent = 1; 3501 silent = 1;
3496 goto cantfind_ext4; 3502 goto cantfind_ext4;
3497 } 3503 }
3498 3504
3499 /* Load the checksum driver */ 3505 /* Load the checksum driver */
3500 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3506 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3501 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 3507 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3502 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 3508 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3503 if (IS_ERR(sbi->s_chksum_driver)) { 3509 if (IS_ERR(sbi->s_chksum_driver)) {
3504 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); 3510 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
3505 ret = PTR_ERR(sbi->s_chksum_driver); 3511 ret = PTR_ERR(sbi->s_chksum_driver);
3506 sbi->s_chksum_driver = NULL; 3512 sbi->s_chksum_driver = NULL;
3507 goto failed_mount; 3513 goto failed_mount;
3508 } 3514 }
3509 } 3515 }
3510 3516
3511 /* Check superblock checksum */ 3517 /* Check superblock checksum */
3512 if (!ext4_superblock_csum_verify(sb, es)) { 3518 if (!ext4_superblock_csum_verify(sb, es)) {
3513 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " 3519 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3514 "invalid superblock checksum. Run e2fsck?"); 3520 "invalid superblock checksum. Run e2fsck?");
3515 silent = 1; 3521 silent = 1;
3516 goto cantfind_ext4; 3522 goto cantfind_ext4;
3517 } 3523 }
3518 3524
3519 /* Precompute checksum seed for all metadata */ 3525 /* Precompute checksum seed for all metadata */
3520 if (ext4_has_metadata_csum(sb)) 3526 if (ext4_has_metadata_csum(sb))
3521 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, 3527 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3522 sizeof(es->s_uuid)); 3528 sizeof(es->s_uuid));
3523 3529
3524 /* Set defaults before we parse the mount options */ 3530 /* Set defaults before we parse the mount options */
3525 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 3531 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3526 set_opt(sb, INIT_INODE_TABLE); 3532 set_opt(sb, INIT_INODE_TABLE);
3527 if (def_mount_opts & EXT4_DEFM_DEBUG) 3533 if (def_mount_opts & EXT4_DEFM_DEBUG)
3528 set_opt(sb, DEBUG); 3534 set_opt(sb, DEBUG);
3529 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 3535 if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
3530 set_opt(sb, GRPID); 3536 set_opt(sb, GRPID);
3531 if (def_mount_opts & EXT4_DEFM_UID16) 3537 if (def_mount_opts & EXT4_DEFM_UID16)
3532 set_opt(sb, NO_UID32); 3538 set_opt(sb, NO_UID32);
3533 /* xattr user namespace & acls are now defaulted on */ 3539 /* xattr user namespace & acls are now defaulted on */
3534 set_opt(sb, XATTR_USER); 3540 set_opt(sb, XATTR_USER);
3535 #ifdef CONFIG_EXT4_FS_POSIX_ACL 3541 #ifdef CONFIG_EXT4_FS_POSIX_ACL
3536 set_opt(sb, POSIX_ACL); 3542 set_opt(sb, POSIX_ACL);
3537 #endif 3543 #endif
3538 /* don't forget to enable journal_csum when metadata_csum is enabled. */ 3544 /* don't forget to enable journal_csum when metadata_csum is enabled. */
3539 if (ext4_has_metadata_csum(sb)) 3545 if (ext4_has_metadata_csum(sb))
3540 set_opt(sb, JOURNAL_CHECKSUM); 3546 set_opt(sb, JOURNAL_CHECKSUM);
3541 3547
3542 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3548 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3543 set_opt(sb, JOURNAL_DATA); 3549 set_opt(sb, JOURNAL_DATA);
3544 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3550 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3545 set_opt(sb, ORDERED_DATA); 3551 set_opt(sb, ORDERED_DATA);
3546 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 3552 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3547 set_opt(sb, WRITEBACK_DATA); 3553 set_opt(sb, WRITEBACK_DATA);
3548 3554
3549 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 3555 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
3550 set_opt(sb, ERRORS_PANIC); 3556 set_opt(sb, ERRORS_PANIC);
3551 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 3557 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
3552 set_opt(sb, ERRORS_CONT); 3558 set_opt(sb, ERRORS_CONT);
3553 else 3559 else
3554 set_opt(sb, ERRORS_RO); 3560 set_opt(sb, ERRORS_RO);
3555 /* block_validity enabled by default; disable with noblock_validity */ 3561 /* block_validity enabled by default; disable with noblock_validity */
3556 set_opt(sb, BLOCK_VALIDITY); 3562 set_opt(sb, BLOCK_VALIDITY);
3557 if (def_mount_opts & EXT4_DEFM_DISCARD) 3563 if (def_mount_opts & EXT4_DEFM_DISCARD)
3558 set_opt(sb, DISCARD); 3564 set_opt(sb, DISCARD);
3559 3565
3560 sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); 3566 sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
3561 sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); 3567 sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
3562 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 3568 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
3563 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 3569 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
3564 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 3570 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3565 3571
3566 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) 3572 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3567 set_opt(sb, BARRIER); 3573 set_opt(sb, BARRIER);
3568 3574
3569 /* 3575 /*
3570 * enable delayed allocation by default 3576 * enable delayed allocation by default
3571 * Use -o nodelalloc to turn it off 3577 * Use -o nodelalloc to turn it off
3572 */ 3578 */
3573 if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) && 3579 if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
3574 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) 3580 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3575 set_opt(sb, DELALLOC); 3581 set_opt(sb, DELALLOC);
3576 3582
3577 /* 3583 /*
3578 * set default s_li_wait_mult for lazyinit, for the case there is 3584 * set default s_li_wait_mult for lazyinit, for the case there is
3579 * no mount option specified. 3585 * no mount option specified.
3580 */ 3586 */
3581 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; 3587 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3582 3588
3583 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, 3589 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
3584 &journal_devnum, &journal_ioprio, 0)) { 3590 &journal_devnum, &journal_ioprio, 0)) {
3585 ext4_msg(sb, KERN_WARNING, 3591 ext4_msg(sb, KERN_WARNING,
3586 "failed to parse options in superblock: %s", 3592 "failed to parse options in superblock: %s",
3587 sbi->s_es->s_mount_opts); 3593 sbi->s_es->s_mount_opts);
3588 } 3594 }
3589 sbi->s_def_mount_opt = sbi->s_mount_opt; 3595 sbi->s_def_mount_opt = sbi->s_mount_opt;
3590 if (!parse_options((char *) data, sb, &journal_devnum, 3596 if (!parse_options((char *) data, sb, &journal_devnum,
3591 &journal_ioprio, 0)) 3597 &journal_ioprio, 0))
3592 goto failed_mount; 3598 goto failed_mount;
3593 3599
3594 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 3600 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3595 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting " 3601 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
3596 "with data=journal disables delayed " 3602 "with data=journal disables delayed "
3597 "allocation and O_DIRECT support!\n"); 3603 "allocation and O_DIRECT support!\n");
3598 if (test_opt2(sb, EXPLICIT_DELALLOC)) { 3604 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3599 ext4_msg(sb, KERN_ERR, "can't mount with " 3605 ext4_msg(sb, KERN_ERR, "can't mount with "
3600 "both data=journal and delalloc"); 3606 "both data=journal and delalloc");
3601 goto failed_mount; 3607 goto failed_mount;
3602 } 3608 }
3603 if (test_opt(sb, DIOREAD_NOLOCK)) { 3609 if (test_opt(sb, DIOREAD_NOLOCK)) {
3604 ext4_msg(sb, KERN_ERR, "can't mount with " 3610 ext4_msg(sb, KERN_ERR, "can't mount with "
3605 "both data=journal and dioread_nolock"); 3611 "both data=journal and dioread_nolock");
3606 goto failed_mount; 3612 goto failed_mount;
3607 } 3613 }
3608 if (test_opt(sb, DAX)) { 3614 if (test_opt(sb, DAX)) {
3609 ext4_msg(sb, KERN_ERR, "can't mount with " 3615 ext4_msg(sb, KERN_ERR, "can't mount with "
3610 "both data=journal and dax"); 3616 "both data=journal and dax");
3611 goto failed_mount; 3617 goto failed_mount;
3612 } 3618 }
3613 if (test_opt(sb, DELALLOC)) 3619 if (test_opt(sb, DELALLOC))
3614 clear_opt(sb, DELALLOC); 3620 clear_opt(sb, DELALLOC);
3615 } 3621 }
3616 3622
3617 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3623 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3618 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3624 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3619 3625
3620 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 3626 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
3621 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 3627 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
3622 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 3628 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
3623 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 3629 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
3624 ext4_msg(sb, KERN_WARNING, 3630 ext4_msg(sb, KERN_WARNING,
3625 "feature flags set on rev 0 fs, " 3631 "feature flags set on rev 0 fs, "
3626 "running e2fsck is recommended"); 3632 "running e2fsck is recommended");
3627 3633
3628 if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { 3634 if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
3629 set_opt2(sb, HURD_COMPAT); 3635 set_opt2(sb, HURD_COMPAT);
3630 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 3636 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
3631 EXT4_FEATURE_INCOMPAT_64BIT)) { 3637 EXT4_FEATURE_INCOMPAT_64BIT)) {
3632 ext4_msg(sb, KERN_ERR, 3638 ext4_msg(sb, KERN_ERR,
3633 "The Hurd can't support 64-bit file systems"); 3639 "The Hurd can't support 64-bit file systems");
3634 goto failed_mount; 3640 goto failed_mount;
3635 } 3641 }
3636 } 3642 }
3637 3643
3638 if (IS_EXT2_SB(sb)) { 3644 if (IS_EXT2_SB(sb)) {
3639 if (ext2_feature_set_ok(sb)) 3645 if (ext2_feature_set_ok(sb))
3640 ext4_msg(sb, KERN_INFO, "mounting ext2 file system " 3646 ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
3641 "using the ext4 subsystem"); 3647 "using the ext4 subsystem");
3642 else { 3648 else {
3643 ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " 3649 ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
3644 "to feature incompatibilities"); 3650 "to feature incompatibilities");
3645 goto failed_mount; 3651 goto failed_mount;
3646 } 3652 }
3647 } 3653 }
3648 3654
3649 if (IS_EXT3_SB(sb)) { 3655 if (IS_EXT3_SB(sb)) {
3650 if (ext3_feature_set_ok(sb)) 3656 if (ext3_feature_set_ok(sb))
3651 ext4_msg(sb, KERN_INFO, "mounting ext3 file system " 3657 ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
3652 "using the ext4 subsystem"); 3658 "using the ext4 subsystem");
3653 else { 3659 else {
3654 ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " 3660 ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
3655 "to feature incompatibilities"); 3661 "to feature incompatibilities");
3656 goto failed_mount; 3662 goto failed_mount;
3657 } 3663 }
3658 } 3664 }
3659 3665
3660 /* 3666 /*
3661 * Check feature flags regardless of the revision level, since we 3667 * Check feature flags regardless of the revision level, since we
3662 * previously didn't change the revision level when setting the flags, 3668 * previously didn't change the revision level when setting the flags,
3663 * so there is a chance incompat flags are set on a rev 0 filesystem. 3669 * so there is a chance incompat flags are set on a rev 0 filesystem.
3664 */ 3670 */
3665 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 3671 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3666 goto failed_mount; 3672 goto failed_mount;
3667 3673
3668 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 3674 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3669 if (blocksize < EXT4_MIN_BLOCK_SIZE || 3675 if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3670 blocksize > EXT4_MAX_BLOCK_SIZE) { 3676 blocksize > EXT4_MAX_BLOCK_SIZE) {
3671 ext4_msg(sb, KERN_ERR, 3677 ext4_msg(sb, KERN_ERR,
3672 "Unsupported filesystem blocksize %d", blocksize); 3678 "Unsupported filesystem blocksize %d", blocksize);
3673 goto failed_mount; 3679 goto failed_mount;
3674 } 3680 }
3675 3681
3676 if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { 3682 if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
3677 if (blocksize != PAGE_SIZE) { 3683 if (blocksize != PAGE_SIZE) {
3678 ext4_msg(sb, KERN_ERR, 3684 ext4_msg(sb, KERN_ERR,
3679 "error: unsupported blocksize for dax"); 3685 "error: unsupported blocksize for dax");
3680 goto failed_mount; 3686 goto failed_mount;
3681 } 3687 }
3682 if (!sb->s_bdev->bd_disk->fops->direct_access) { 3688 if (!sb->s_bdev->bd_disk->fops->direct_access) {
3683 ext4_msg(sb, KERN_ERR, 3689 ext4_msg(sb, KERN_ERR,
3684 "error: device does not support dax"); 3690 "error: device does not support dax");
3685 goto failed_mount; 3691 goto failed_mount;
3686 } 3692 }
3687 } 3693 }
3688 3694
3689 if (sb->s_blocksize != blocksize) { 3695 if (sb->s_blocksize != blocksize) {
3690 /* Validate the filesystem blocksize */ 3696 /* Validate the filesystem blocksize */
3691 if (!sb_set_blocksize(sb, blocksize)) { 3697 if (!sb_set_blocksize(sb, blocksize)) {
3692 ext4_msg(sb, KERN_ERR, "bad block size %d", 3698 ext4_msg(sb, KERN_ERR, "bad block size %d",
3693 blocksize); 3699 blocksize);
3694 goto failed_mount; 3700 goto failed_mount;
3695 } 3701 }
3696 3702
3697 brelse(bh); 3703 brelse(bh);
3698 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 3704 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3699 offset = do_div(logical_sb_block, blocksize); 3705 offset = do_div(logical_sb_block, blocksize);
3700 bh = sb_bread_unmovable(sb, logical_sb_block); 3706 bh = sb_bread_unmovable(sb, logical_sb_block);
3701 if (!bh) { 3707 if (!bh) {
3702 ext4_msg(sb, KERN_ERR, 3708 ext4_msg(sb, KERN_ERR,
3703 "Can't read superblock on 2nd try"); 3709 "Can't read superblock on 2nd try");
3704 goto failed_mount; 3710 goto failed_mount;
3705 } 3711 }
3706 es = (struct ext4_super_block *)(bh->b_data + offset); 3712 es = (struct ext4_super_block *)(bh->b_data + offset);
3707 sbi->s_es = es; 3713 sbi->s_es = es;
3708 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 3714 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
3709 ext4_msg(sb, KERN_ERR, 3715 ext4_msg(sb, KERN_ERR,
3710 "Magic mismatch, very weird!"); 3716 "Magic mismatch, very weird!");
3711 goto failed_mount; 3717 goto failed_mount;
3712 } 3718 }
3713 } 3719 }
3714 3720
3715 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3721 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3716 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 3722 EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
3717 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 3723 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
3718 has_huge_files); 3724 has_huge_files);
3719 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 3725 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
3720 3726
3721 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 3727 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
3722 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 3728 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
3723 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 3729 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
3724 } else { 3730 } else {
3725 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 3731 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
3726 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 3732 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
3727 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 3733 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
3728 (!is_power_of_2(sbi->s_inode_size)) || 3734 (!is_power_of_2(sbi->s_inode_size)) ||
3729 (sbi->s_inode_size > blocksize)) { 3735 (sbi->s_inode_size > blocksize)) {
3730 ext4_msg(sb, KERN_ERR, 3736 ext4_msg(sb, KERN_ERR,
3731 "unsupported inode size: %d", 3737 "unsupported inode size: %d",
3732 sbi->s_inode_size); 3738 sbi->s_inode_size);
3733 goto failed_mount; 3739 goto failed_mount;
3734 } 3740 }
3735 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 3741 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
3736 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 3742 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
3737 } 3743 }
3738 3744
3739 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 3745 sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
3740 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 3746 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
3741 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 3747 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
3742 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 3748 sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
3743 !is_power_of_2(sbi->s_desc_size)) { 3749 !is_power_of_2(sbi->s_desc_size)) {
3744 ext4_msg(sb, KERN_ERR, 3750 ext4_msg(sb, KERN_ERR,
3745 "unsupported descriptor size %lu", 3751 "unsupported descriptor size %lu",
3746 sbi->s_desc_size); 3752 sbi->s_desc_size);
3747 goto failed_mount; 3753 goto failed_mount;
3748 } 3754 }
3749 } else 3755 } else
3750 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 3756 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
3751 3757
3752 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 3758 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
3753 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 3759 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
3754 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 3760 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
3755 goto cantfind_ext4; 3761 goto cantfind_ext4;
3756 3762
3757 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 3763 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
3758 if (sbi->s_inodes_per_block == 0) 3764 if (sbi->s_inodes_per_block == 0)
3759 goto cantfind_ext4; 3765 goto cantfind_ext4;
3760 sbi->s_itb_per_group = sbi->s_inodes_per_group / 3766 sbi->s_itb_per_group = sbi->s_inodes_per_group /
3761 sbi->s_inodes_per_block; 3767 sbi->s_inodes_per_block;
3762 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 3768 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
3763 sbi->s_sbh = bh; 3769 sbi->s_sbh = bh;
3764 sbi->s_mount_state = le16_to_cpu(es->s_state); 3770 sbi->s_mount_state = le16_to_cpu(es->s_state);
3765 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 3771 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
3766 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 3772 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
3767 3773
3768 for (i = 0; i < 4; i++) 3774 for (i = 0; i < 4; i++)
3769 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 3775 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
3770 sbi->s_def_hash_version = es->s_def_hash_version; 3776 sbi->s_def_hash_version = es->s_def_hash_version;
3771 if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { 3777 if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
3772 i = le32_to_cpu(es->s_flags); 3778 i = le32_to_cpu(es->s_flags);
3773 if (i & EXT2_FLAGS_UNSIGNED_HASH) 3779 if (i & EXT2_FLAGS_UNSIGNED_HASH)
3774 sbi->s_hash_unsigned = 3; 3780 sbi->s_hash_unsigned = 3;
3775 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 3781 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
3776 #ifdef __CHAR_UNSIGNED__ 3782 #ifdef __CHAR_UNSIGNED__
3777 if (!(sb->s_flags & MS_RDONLY)) 3783 if (!(sb->s_flags & MS_RDONLY))
3778 es->s_flags |= 3784 es->s_flags |=
3779 cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 3785 cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
3780 sbi->s_hash_unsigned = 3; 3786 sbi->s_hash_unsigned = 3;
3781 #else 3787 #else
3782 if (!(sb->s_flags & MS_RDONLY)) 3788 if (!(sb->s_flags & MS_RDONLY))
3783 es->s_flags |= 3789 es->s_flags |=
3784 cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 3790 cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
3785 #endif 3791 #endif
3786 } 3792 }
3787 } 3793 }
3788 3794
3789 /* Handle clustersize */ 3795 /* Handle clustersize */
3790 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); 3796 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
3791 has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3797 has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3792 EXT4_FEATURE_RO_COMPAT_BIGALLOC); 3798 EXT4_FEATURE_RO_COMPAT_BIGALLOC);
3793 if (has_bigalloc) { 3799 if (has_bigalloc) {
3794 if (clustersize < blocksize) { 3800 if (clustersize < blocksize) {
3795 ext4_msg(sb, KERN_ERR, 3801 ext4_msg(sb, KERN_ERR,
3796 "cluster size (%d) smaller than " 3802 "cluster size (%d) smaller than "
3797 "block size (%d)", clustersize, blocksize); 3803 "block size (%d)", clustersize, blocksize);
3798 goto failed_mount; 3804 goto failed_mount;
3799 } 3805 }
3800 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - 3806 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
3801 le32_to_cpu(es->s_log_block_size); 3807 le32_to_cpu(es->s_log_block_size);
3802 sbi->s_clusters_per_group = 3808 sbi->s_clusters_per_group =
3803 le32_to_cpu(es->s_clusters_per_group); 3809 le32_to_cpu(es->s_clusters_per_group);
3804 if (sbi->s_clusters_per_group > blocksize * 8) { 3810 if (sbi->s_clusters_per_group > blocksize * 8) {
3805 ext4_msg(sb, KERN_ERR, 3811 ext4_msg(sb, KERN_ERR,
3806 "#clusters per group too big: %lu", 3812 "#clusters per group too big: %lu",
3807 sbi->s_clusters_per_group); 3813 sbi->s_clusters_per_group);
3808 goto failed_mount; 3814 goto failed_mount;
3809 } 3815 }
3810 if (sbi->s_blocks_per_group != 3816 if (sbi->s_blocks_per_group !=
3811 (sbi->s_clusters_per_group * (clustersize / blocksize))) { 3817 (sbi->s_clusters_per_group * (clustersize / blocksize))) {
3812 ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " 3818 ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
3813 "clusters per group (%lu) inconsistent", 3819 "clusters per group (%lu) inconsistent",
3814 sbi->s_blocks_per_group, 3820 sbi->s_blocks_per_group,
3815 sbi->s_clusters_per_group); 3821 sbi->s_clusters_per_group);
3816 goto failed_mount; 3822 goto failed_mount;
3817 } 3823 }
3818 } else { 3824 } else {
3819 if (clustersize != blocksize) { 3825 if (clustersize != blocksize) {
3820 ext4_warning(sb, "fragment/cluster size (%d) != " 3826 ext4_warning(sb, "fragment/cluster size (%d) != "
3821 "block size (%d)", clustersize, 3827 "block size (%d)", clustersize,
3822 blocksize); 3828 blocksize);
3823 clustersize = blocksize; 3829 clustersize = blocksize;
3824 } 3830 }
3825 if (sbi->s_blocks_per_group > blocksize * 8) { 3831 if (sbi->s_blocks_per_group > blocksize * 8) {
3826 ext4_msg(sb, KERN_ERR, 3832 ext4_msg(sb, KERN_ERR,
3827 "#blocks per group too big: %lu", 3833 "#blocks per group too big: %lu",
3828 sbi->s_blocks_per_group); 3834 sbi->s_blocks_per_group);
3829 goto failed_mount; 3835 goto failed_mount;
3830 } 3836 }
3831 sbi->s_clusters_per_group = sbi->s_blocks_per_group; 3837 sbi->s_clusters_per_group = sbi->s_blocks_per_group;
3832 sbi->s_cluster_bits = 0; 3838 sbi->s_cluster_bits = 0;
3833 } 3839 }
3834 sbi->s_cluster_ratio = clustersize / blocksize; 3840 sbi->s_cluster_ratio = clustersize / blocksize;
3835 3841
3836 if (sbi->s_inodes_per_group > blocksize * 8) { 3842 if (sbi->s_inodes_per_group > blocksize * 8) {
3837 ext4_msg(sb, KERN_ERR, 3843 ext4_msg(sb, KERN_ERR,
3838 "#inodes per group too big: %lu", 3844 "#inodes per group too big: %lu",
3839 sbi->s_inodes_per_group); 3845 sbi->s_inodes_per_group);
3840 goto failed_mount; 3846 goto failed_mount;
3841 } 3847 }
3842 3848
3843 /* Do we have standard group size of clustersize * 8 blocks ? */ 3849 /* Do we have standard group size of clustersize * 8 blocks ? */
3844 if (sbi->s_blocks_per_group == clustersize << 3) 3850 if (sbi->s_blocks_per_group == clustersize << 3)
3845 set_opt2(sb, STD_GROUP_SIZE); 3851 set_opt2(sb, STD_GROUP_SIZE);
3846 3852
3847 /* 3853 /*
3848 * Test whether we have more sectors than will fit in sector_t, 3854 * Test whether we have more sectors than will fit in sector_t,
3849 * and whether the max offset is addressable by the page cache. 3855 * and whether the max offset is addressable by the page cache.
3850 */ 3856 */
3851 err = generic_check_addressable(sb->s_blocksize_bits, 3857 err = generic_check_addressable(sb->s_blocksize_bits,
3852 ext4_blocks_count(es)); 3858 ext4_blocks_count(es));
3853 if (err) { 3859 if (err) {
3854 ext4_msg(sb, KERN_ERR, "filesystem" 3860 ext4_msg(sb, KERN_ERR, "filesystem"
3855 " too large to mount safely on this system"); 3861 " too large to mount safely on this system");
3856 if (sizeof(sector_t) < 8) 3862 if (sizeof(sector_t) < 8)
3857 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); 3863 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
3858 goto failed_mount; 3864 goto failed_mount;
3859 } 3865 }
3860 3866
3861 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 3867 if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
3862 goto cantfind_ext4; 3868 goto cantfind_ext4;
3863 3869
3864 /* check blocks count against device size */ 3870 /* check blocks count against device size */
3865 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 3871 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
3866 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 3872 if (blocks_count && ext4_blocks_count(es) > blocks_count) {
3867 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " 3873 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
3868 "exceeds size of device (%llu blocks)", 3874 "exceeds size of device (%llu blocks)",
3869 ext4_blocks_count(es), blocks_count); 3875 ext4_blocks_count(es), blocks_count);
3870 goto failed_mount; 3876 goto failed_mount;
3871 } 3877 }
3872 3878
3873 /* 3879 /*
3874 * It makes no sense for the first data block to be beyond the end 3880 * It makes no sense for the first data block to be beyond the end
3875 * of the filesystem. 3881 * of the filesystem.
3876 */ 3882 */
3877 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 3883 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
3878 ext4_msg(sb, KERN_WARNING, "bad geometry: first data " 3884 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
3879 "block %u is beyond end of filesystem (%llu)", 3885 "block %u is beyond end of filesystem (%llu)",
3880 le32_to_cpu(es->s_first_data_block), 3886 le32_to_cpu(es->s_first_data_block),
3881 ext4_blocks_count(es)); 3887 ext4_blocks_count(es));
3882 goto failed_mount; 3888 goto failed_mount;
3883 } 3889 }
3884 blocks_count = (ext4_blocks_count(es) - 3890 blocks_count = (ext4_blocks_count(es) -
3885 le32_to_cpu(es->s_first_data_block) + 3891 le32_to_cpu(es->s_first_data_block) +
3886 EXT4_BLOCKS_PER_GROUP(sb) - 1); 3892 EXT4_BLOCKS_PER_GROUP(sb) - 1);
3887 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 3893 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
3888 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 3894 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
3889 ext4_msg(sb, KERN_WARNING, "groups count too large: %u " 3895 ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
3890 "(block count %llu, first data block %u, " 3896 "(block count %llu, first data block %u, "
3891 "blocks per group %lu)", sbi->s_groups_count, 3897 "blocks per group %lu)", sbi->s_groups_count,
3892 ext4_blocks_count(es), 3898 ext4_blocks_count(es),
3893 le32_to_cpu(es->s_first_data_block), 3899 le32_to_cpu(es->s_first_data_block),
3894 EXT4_BLOCKS_PER_GROUP(sb)); 3900 EXT4_BLOCKS_PER_GROUP(sb));
3895 goto failed_mount; 3901 goto failed_mount;
3896 } 3902 }
3897 sbi->s_groups_count = blocks_count; 3903 sbi->s_groups_count = blocks_count;
3898 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, 3904 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
3899 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 3905 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
3900 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 3906 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
3901 EXT4_DESC_PER_BLOCK(sb); 3907 EXT4_DESC_PER_BLOCK(sb);
3902 sbi->s_group_desc = ext4_kvmalloc(db_count * 3908 sbi->s_group_desc = ext4_kvmalloc(db_count *
3903 sizeof(struct buffer_head *), 3909 sizeof(struct buffer_head *),
3904 GFP_KERNEL); 3910 GFP_KERNEL);
3905 if (sbi->s_group_desc == NULL) { 3911 if (sbi->s_group_desc == NULL) {
3906 ext4_msg(sb, KERN_ERR, "not enough memory"); 3912 ext4_msg(sb, KERN_ERR, "not enough memory");
3907 ret = -ENOMEM; 3913 ret = -ENOMEM;
3908 goto failed_mount; 3914 goto failed_mount;
3909 } 3915 }
3910 3916
3911 if (ext4_proc_root) 3917 if (ext4_proc_root)
3912 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 3918 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
3913 3919
3914 if (sbi->s_proc) 3920 if (sbi->s_proc)
3915 proc_create_data("options", S_IRUGO, sbi->s_proc, 3921 proc_create_data("options", S_IRUGO, sbi->s_proc,
3916 &ext4_seq_options_fops, sb); 3922 &ext4_seq_options_fops, sb);
3917 3923
3918 bgl_lock_init(sbi->s_blockgroup_lock); 3924 bgl_lock_init(sbi->s_blockgroup_lock);
3919 3925
3920 for (i = 0; i < db_count; i++) { 3926 for (i = 0; i < db_count; i++) {
3921 block = descriptor_loc(sb, logical_sb_block, i); 3927 block = descriptor_loc(sb, logical_sb_block, i);
3922 sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); 3928 sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
3923 if (!sbi->s_group_desc[i]) { 3929 if (!sbi->s_group_desc[i]) {
3924 ext4_msg(sb, KERN_ERR, 3930 ext4_msg(sb, KERN_ERR,
3925 "can't read group descriptor %d", i); 3931 "can't read group descriptor %d", i);
3926 db_count = i; 3932 db_count = i;
3927 goto failed_mount2; 3933 goto failed_mount2;
3928 } 3934 }
3929 } 3935 }
3930 if (!ext4_check_descriptors(sb, &first_not_zeroed)) { 3936 if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
3931 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3937 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3932 goto failed_mount2; 3938 goto failed_mount2;
3933 } 3939 }
3934 3940
3935 sbi->s_gdb_count = db_count; 3941 sbi->s_gdb_count = db_count;
3936 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3942 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3937 spin_lock_init(&sbi->s_next_gen_lock); 3943 spin_lock_init(&sbi->s_next_gen_lock);
3938 3944
3939 init_timer(&sbi->s_err_report); 3945 setup_timer(&sbi->s_err_report, print_daily_error_info,
3940 sbi->s_err_report.function = print_daily_error_info; 3946 (unsigned long) sb);
3941 sbi->s_err_report.data = (unsigned long) sb;
3942 3947
3943 /* Register extent status tree shrinker */ 3948 /* Register extent status tree shrinker */
3944 if (ext4_es_register_shrinker(sbi)) 3949 if (ext4_es_register_shrinker(sbi))
3945 goto failed_mount3; 3950 goto failed_mount3;
3946 3951
3947 sbi->s_stripe = ext4_get_stripe_size(sbi); 3952 sbi->s_stripe = ext4_get_stripe_size(sbi);
3948 sbi->s_extent_max_zeroout_kb = 32; 3953 sbi->s_extent_max_zeroout_kb = 32;
3949 3954
3950 /* 3955 /*
3951 * set up enough so that it can read an inode 3956 * set up enough so that it can read an inode
3952 */ 3957 */
3953 sb->s_op = &ext4_sops; 3958 sb->s_op = &ext4_sops;
3954 sb->s_export_op = &ext4_export_ops; 3959 sb->s_export_op = &ext4_export_ops;
3955 sb->s_xattr = ext4_xattr_handlers; 3960 sb->s_xattr = ext4_xattr_handlers;
3956 #ifdef CONFIG_QUOTA 3961 #ifdef CONFIG_QUOTA
3957 sb->dq_op = &ext4_quota_operations; 3962 sb->dq_op = &ext4_quota_operations;
3958 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) 3963 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
3959 sb->s_qcop = &dquot_quotactl_sysfile_ops; 3964 sb->s_qcop = &dquot_quotactl_sysfile_ops;
3960 else 3965 else
3961 sb->s_qcop = &ext4_qctl_operations; 3966 sb->s_qcop = &ext4_qctl_operations;
3962 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; 3967 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
3963 #endif 3968 #endif
3964 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); 3969 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3965 3970
3966 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3971 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3967 mutex_init(&sbi->s_orphan_lock); 3972 mutex_init(&sbi->s_orphan_lock);
3968 3973
3969 sb->s_root = NULL; 3974 sb->s_root = NULL;
3970 3975
3971 needs_recovery = (es->s_last_orphan != 0 || 3976 needs_recovery = (es->s_last_orphan != 0 ||
3972 EXT4_HAS_INCOMPAT_FEATURE(sb, 3977 EXT4_HAS_INCOMPAT_FEATURE(sb,
3973 EXT4_FEATURE_INCOMPAT_RECOVER)); 3978 EXT4_FEATURE_INCOMPAT_RECOVER));
3974 3979
3975 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && 3980 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
3976 !(sb->s_flags & MS_RDONLY)) 3981 !(sb->s_flags & MS_RDONLY))
3977 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) 3982 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
3978 goto failed_mount3a; 3983 goto failed_mount3a;
3979 3984
3980 /* 3985 /*
3981 * The first inode we look at is the journal inode. Don't try 3986 * The first inode we look at is the journal inode. Don't try
3982 * root first: it may be modified in the journal! 3987 * root first: it may be modified in the journal!
3983 */ 3988 */
3984 if (!test_opt(sb, NOLOAD) && 3989 if (!test_opt(sb, NOLOAD) &&
3985 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 3990 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
3986 if (ext4_load_journal(sb, es, journal_devnum)) 3991 if (ext4_load_journal(sb, es, journal_devnum))
3987 goto failed_mount3a; 3992 goto failed_mount3a;
3988 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 3993 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
3989 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3994 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3990 ext4_msg(sb, KERN_ERR, "required journal recovery " 3995 ext4_msg(sb, KERN_ERR, "required journal recovery "
3991 "suppressed and not mounted read-only"); 3996 "suppressed and not mounted read-only");
3992 goto failed_mount_wq; 3997 goto failed_mount_wq;
3993 } else { 3998 } else {
3994 clear_opt(sb, DATA_FLAGS); 3999 clear_opt(sb, DATA_FLAGS);
3995 sbi->s_journal = NULL; 4000 sbi->s_journal = NULL;
3996 needs_recovery = 0; 4001 needs_recovery = 0;
3997 goto no_journal; 4002 goto no_journal;
3998 } 4003 }
3999 4004
4000 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) && 4005 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) &&
4001 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 4006 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4002 JBD2_FEATURE_INCOMPAT_64BIT)) { 4007 JBD2_FEATURE_INCOMPAT_64BIT)) {
4003 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 4008 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4004 goto failed_mount_wq; 4009 goto failed_mount_wq;
4005 } 4010 }
4006 4011
4007 if (!set_journal_csum_feature_set(sb)) { 4012 if (!set_journal_csum_feature_set(sb)) {
4008 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " 4013 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4009 "feature set"); 4014 "feature set");
4010 goto failed_mount_wq; 4015 goto failed_mount_wq;
4011 } 4016 }
4012 4017
4013 /* We have now updated the journal if required, so we can 4018 /* We have now updated the journal if required, so we can
4014 * validate the data journaling mode. */ 4019 * validate the data journaling mode. */
4015 switch (test_opt(sb, DATA_FLAGS)) { 4020 switch (test_opt(sb, DATA_FLAGS)) {
4016 case 0: 4021 case 0:
4017 /* No mode set, assume a default based on the journal 4022 /* No mode set, assume a default based on the journal
4018 * capabilities: ORDERED_DATA if the journal can 4023 * capabilities: ORDERED_DATA if the journal can
4019 * cope, else JOURNAL_DATA 4024 * cope, else JOURNAL_DATA
4020 */ 4025 */
4021 if (jbd2_journal_check_available_features 4026 if (jbd2_journal_check_available_features
4022 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 4027 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
4023 set_opt(sb, ORDERED_DATA); 4028 set_opt(sb, ORDERED_DATA);
4024 else 4029 else
4025 set_opt(sb, JOURNAL_DATA); 4030 set_opt(sb, JOURNAL_DATA);
4026 break; 4031 break;
4027 4032
4028 case EXT4_MOUNT_ORDERED_DATA: 4033 case EXT4_MOUNT_ORDERED_DATA:
4029 case EXT4_MOUNT_WRITEBACK_DATA: 4034 case EXT4_MOUNT_WRITEBACK_DATA:
4030 if (!jbd2_journal_check_available_features 4035 if (!jbd2_journal_check_available_features
4031 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 4036 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4032 ext4_msg(sb, KERN_ERR, "Journal does not support " 4037 ext4_msg(sb, KERN_ERR, "Journal does not support "
4033 "requested data journaling mode"); 4038 "requested data journaling mode");
4034 goto failed_mount_wq; 4039 goto failed_mount_wq;
4035 } 4040 }
4036 default: 4041 default:
4037 break; 4042 break;
4038 } 4043 }
4039 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 4044 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4040 4045
4041 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; 4046 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
4042 4047
4043 no_journal: 4048 no_journal:
4044 if (ext4_mballoc_ready) { 4049 if (ext4_mballoc_ready) {
4045 sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); 4050 sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
4046 if (!sbi->s_mb_cache) { 4051 if (!sbi->s_mb_cache) {
4047 ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); 4052 ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
4048 goto failed_mount_wq; 4053 goto failed_mount_wq;
4049 } 4054 }
4050 } 4055 }
4051 4056
4052 /* 4057 /*
4053 * Get the # of file system overhead blocks from the 4058 * Get the # of file system overhead blocks from the
4054 * superblock if present. 4059 * superblock if present.
4055 */ 4060 */
4056 if (es->s_overhead_clusters) 4061 if (es->s_overhead_clusters)
4057 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); 4062 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
4058 else { 4063 else {
4059 err = ext4_calculate_overhead(sb); 4064 err = ext4_calculate_overhead(sb);
4060 if (err) 4065 if (err)
4061 goto failed_mount_wq; 4066 goto failed_mount_wq;
4062 } 4067 }
4063 4068
4064 /* 4069 /*
4065 * The maximum number of concurrent works can be high and 4070 * The maximum number of concurrent works can be high and
4066 * concurrency isn't really necessary. Limit it to 1. 4071 * concurrency isn't really necessary. Limit it to 1.
4067 */ 4072 */
4068 EXT4_SB(sb)->rsv_conversion_wq = 4073 EXT4_SB(sb)->rsv_conversion_wq =
4069 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); 4074 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
4070 if (!EXT4_SB(sb)->rsv_conversion_wq) { 4075 if (!EXT4_SB(sb)->rsv_conversion_wq) {
4071 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); 4076 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
4072 ret = -ENOMEM; 4077 ret = -ENOMEM;
4073 goto failed_mount4; 4078 goto failed_mount4;
4074 } 4079 }
4075 4080
4076 /* 4081 /*
4077 * The jbd2_journal_load will have done any necessary log recovery, 4082 * The jbd2_journal_load will have done any necessary log recovery,
4078 * so we can safely mount the rest of the filesystem now. 4083 * so we can safely mount the rest of the filesystem now.
4079 */ 4084 */
4080 4085
4081 root = ext4_iget(sb, EXT4_ROOT_INO); 4086 root = ext4_iget(sb, EXT4_ROOT_INO);
4082 if (IS_ERR(root)) { 4087 if (IS_ERR(root)) {
4083 ext4_msg(sb, KERN_ERR, "get root inode failed"); 4088 ext4_msg(sb, KERN_ERR, "get root inode failed");
4084 ret = PTR_ERR(root); 4089 ret = PTR_ERR(root);
4085 root = NULL; 4090 root = NULL;
4086 goto failed_mount4; 4091 goto failed_mount4;
4087 } 4092 }
4088 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 4093 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
4089 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 4094 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
4090 iput(root); 4095 iput(root);
4091 goto failed_mount4; 4096 goto failed_mount4;
4092 } 4097 }
4093 sb->s_root = d_make_root(root); 4098 sb->s_root = d_make_root(root);
4094 if (!sb->s_root) { 4099 if (!sb->s_root) {
4095 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 4100 ext4_msg(sb, KERN_ERR, "get root dentry failed");
4096 ret = -ENOMEM; 4101 ret = -ENOMEM;
4097 goto failed_mount4; 4102 goto failed_mount4;
4098 } 4103 }
4099 4104
4100 if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) 4105 if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
4101 sb->s_flags |= MS_RDONLY; 4106 sb->s_flags |= MS_RDONLY;
4102 4107
4103 /* determine the minimum size of new large inodes, if present */ 4108 /* determine the minimum size of new large inodes, if present */
4104 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 4109 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4105 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 4110 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4106 EXT4_GOOD_OLD_INODE_SIZE; 4111 EXT4_GOOD_OLD_INODE_SIZE;
4107 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 4112 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
4108 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 4113 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
4109 if (sbi->s_want_extra_isize < 4114 if (sbi->s_want_extra_isize <
4110 le16_to_cpu(es->s_want_extra_isize)) 4115 le16_to_cpu(es->s_want_extra_isize))
4111 sbi->s_want_extra_isize = 4116 sbi->s_want_extra_isize =
4112 le16_to_cpu(es->s_want_extra_isize); 4117 le16_to_cpu(es->s_want_extra_isize);
4113 if (sbi->s_want_extra_isize < 4118 if (sbi->s_want_extra_isize <
4114 le16_to_cpu(es->s_min_extra_isize)) 4119 le16_to_cpu(es->s_min_extra_isize))
4115 sbi->s_want_extra_isize = 4120 sbi->s_want_extra_isize =
4116 le16_to_cpu(es->s_min_extra_isize); 4121 le16_to_cpu(es->s_min_extra_isize);
4117 } 4122 }
4118 } 4123 }
4119 /* Check if enough inode space is available */ 4124 /* Check if enough inode space is available */
4120 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 4125 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
4121 sbi->s_inode_size) { 4126 sbi->s_inode_size) {
4122 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 4127 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4123 EXT4_GOOD_OLD_INODE_SIZE; 4128 EXT4_GOOD_OLD_INODE_SIZE;
4124 ext4_msg(sb, KERN_INFO, "required extra inode space not" 4129 ext4_msg(sb, KERN_INFO, "required extra inode space not"
4125 "available"); 4130 "available");
4126 } 4131 }
4127 4132
4128 err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); 4133 err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
4129 if (err) { 4134 if (err) {
4130 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " 4135 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
4131 "reserved pool", ext4_calculate_resv_clusters(sb)); 4136 "reserved pool", ext4_calculate_resv_clusters(sb));
4132 goto failed_mount4a; 4137 goto failed_mount4a;
4133 } 4138 }
4134 4139
4135 err = ext4_setup_system_zone(sb); 4140 err = ext4_setup_system_zone(sb);
4136 if (err) { 4141 if (err) {
4137 ext4_msg(sb, KERN_ERR, "failed to initialize system " 4142 ext4_msg(sb, KERN_ERR, "failed to initialize system "
4138 "zone (%d)", err); 4143 "zone (%d)", err);
4139 goto failed_mount4a; 4144 goto failed_mount4a;
4140 } 4145 }
4141 4146
4142 ext4_ext_init(sb); 4147 ext4_ext_init(sb);
4143 err = ext4_mb_init(sb); 4148 err = ext4_mb_init(sb);
4144 if (err) { 4149 if (err) {
4145 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", 4150 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4146 err); 4151 err);
4147 goto failed_mount5; 4152 goto failed_mount5;
4148 } 4153 }
4149 4154
4150 block = ext4_count_free_clusters(sb); 4155 block = ext4_count_free_clusters(sb);
4151 ext4_free_blocks_count_set(sbi->s_es, 4156 ext4_free_blocks_count_set(sbi->s_es,
4152 EXT4_C2B(sbi, block)); 4157 EXT4_C2B(sbi, block));
4153 err = percpu_counter_init(&sbi->s_freeclusters_counter, block, 4158 err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
4154 GFP_KERNEL); 4159 GFP_KERNEL);
4155 if (!err) { 4160 if (!err) {
4156 unsigned long freei = ext4_count_free_inodes(sb); 4161 unsigned long freei = ext4_count_free_inodes(sb);
4157 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); 4162 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
4158 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, 4163 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
4159 GFP_KERNEL); 4164 GFP_KERNEL);
4160 } 4165 }
4161 if (!err) 4166 if (!err)
4162 err = percpu_counter_init(&sbi->s_dirs_counter, 4167 err = percpu_counter_init(&sbi->s_dirs_counter,
4163 ext4_count_dirs(sb), GFP_KERNEL); 4168 ext4_count_dirs(sb), GFP_KERNEL);
4164 if (!err) 4169 if (!err)
4165 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, 4170 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
4166 GFP_KERNEL); 4171 GFP_KERNEL);
4167 if (err) { 4172 if (err) {
4168 ext4_msg(sb, KERN_ERR, "insufficient memory"); 4173 ext4_msg(sb, KERN_ERR, "insufficient memory");
4169 goto failed_mount6; 4174 goto failed_mount6;
4170 } 4175 }
4171 4176
4172 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 4177 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
4173 if (!ext4_fill_flex_info(sb)) { 4178 if (!ext4_fill_flex_info(sb)) {
4174 ext4_msg(sb, KERN_ERR, 4179 ext4_msg(sb, KERN_ERR,
4175 "unable to initialize " 4180 "unable to initialize "
4176 "flex_bg meta info!"); 4181 "flex_bg meta info!");
4177 goto failed_mount6; 4182 goto failed_mount6;
4178 } 4183 }
4179 4184
4180 err = ext4_register_li_request(sb, first_not_zeroed); 4185 err = ext4_register_li_request(sb, first_not_zeroed);
4181 if (err) 4186 if (err)
4182 goto failed_mount6; 4187 goto failed_mount6;
4183 4188
4184 sbi->s_kobj.kset = ext4_kset; 4189 sbi->s_kobj.kset = ext4_kset;
4185 init_completion(&sbi->s_kobj_unregister); 4190 init_completion(&sbi->s_kobj_unregister);
4186 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 4191 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
4187 "%s", sb->s_id); 4192 "%s", sb->s_id);
4188 if (err) 4193 if (err)
4189 goto failed_mount7; 4194 goto failed_mount7;
4190 4195
4191 #ifdef CONFIG_QUOTA 4196 #ifdef CONFIG_QUOTA
4192 /* Enable quota usage during mount. */ 4197 /* Enable quota usage during mount. */
4193 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 4198 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
4194 !(sb->s_flags & MS_RDONLY)) { 4199 !(sb->s_flags & MS_RDONLY)) {
4195 err = ext4_enable_quotas(sb); 4200 err = ext4_enable_quotas(sb);
4196 if (err) 4201 if (err)
4197 goto failed_mount8; 4202 goto failed_mount8;
4198 } 4203 }
4199 #endif /* CONFIG_QUOTA */ 4204 #endif /* CONFIG_QUOTA */
4200 4205
4201 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 4206 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
4202 ext4_orphan_cleanup(sb, es); 4207 ext4_orphan_cleanup(sb, es);
4203 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 4208 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
4204 if (needs_recovery) { 4209 if (needs_recovery) {
4205 ext4_msg(sb, KERN_INFO, "recovery complete"); 4210 ext4_msg(sb, KERN_INFO, "recovery complete");
4206 ext4_mark_recovery_complete(sb, es); 4211 ext4_mark_recovery_complete(sb, es);
4207 } 4212 }
4208 if (EXT4_SB(sb)->s_journal) { 4213 if (EXT4_SB(sb)->s_journal) {
4209 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 4214 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
4210 descr = " journalled data mode"; 4215 descr = " journalled data mode";
4211 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 4216 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
4212 descr = " ordered data mode"; 4217 descr = " ordered data mode";
4213 else 4218 else
4214 descr = " writeback data mode"; 4219 descr = " writeback data mode";
4215 } else 4220 } else
4216 descr = "out journal"; 4221 descr = "out journal";
4217 4222
4218 if (test_opt(sb, DISCARD)) { 4223 if (test_opt(sb, DISCARD)) {
4219 struct request_queue *q = bdev_get_queue(sb->s_bdev); 4224 struct request_queue *q = bdev_get_queue(sb->s_bdev);
4220 if (!blk_queue_discard(q)) 4225 if (!blk_queue_discard(q))
4221 ext4_msg(sb, KERN_WARNING, 4226 ext4_msg(sb, KERN_WARNING,
4222 "mounting with \"discard\" option, but " 4227 "mounting with \"discard\" option, but "
4223 "the device does not support discard"); 4228 "the device does not support discard");
4224 } 4229 }
4225 4230
4226 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " 4231 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
4227 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, 4232 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
4228 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); 4233 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
4229 4234
4230 if (es->s_error_count) 4235 if (es->s_error_count)
4231 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ 4236 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
4232 4237
4233 /* Enable message ratelimiting. Default is 10 messages per 5 secs. */ 4238 /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
4234 ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10); 4239 ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
4235 ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); 4240 ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
4236 ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); 4241 ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
4237 4242
4238 kfree(orig_data); 4243 kfree(orig_data);
4239 return 0; 4244 return 0;
4240 4245
4241 cantfind_ext4: 4246 cantfind_ext4:
4242 if (!silent) 4247 if (!silent)
4243 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 4248 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4244 goto failed_mount; 4249 goto failed_mount;
4245 4250
4246 #ifdef CONFIG_QUOTA 4251 #ifdef CONFIG_QUOTA
4247 failed_mount8: 4252 failed_mount8:
4248 kobject_del(&sbi->s_kobj); 4253 kobject_del(&sbi->s_kobj);
4249 #endif 4254 #endif
4250 failed_mount7: 4255 failed_mount7:
4251 ext4_unregister_li_request(sb); 4256 ext4_unregister_li_request(sb);
4252 failed_mount6: 4257 failed_mount6:
4253 ext4_mb_release(sb); 4258 ext4_mb_release(sb);
4254 if (sbi->s_flex_groups) 4259 if (sbi->s_flex_groups)
4255 kvfree(sbi->s_flex_groups); 4260 kvfree(sbi->s_flex_groups);
4256 percpu_counter_destroy(&sbi->s_freeclusters_counter); 4261 percpu_counter_destroy(&sbi->s_freeclusters_counter);
4257 percpu_counter_destroy(&sbi->s_freeinodes_counter); 4262 percpu_counter_destroy(&sbi->s_freeinodes_counter);
4258 percpu_counter_destroy(&sbi->s_dirs_counter); 4263 percpu_counter_destroy(&sbi->s_dirs_counter);
4259 percpu_counter_destroy(&sbi->s_dirtyclusters_counter); 4264 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4260 failed_mount5: 4265 failed_mount5:
4261 ext4_ext_release(sb); 4266 ext4_ext_release(sb);
4262 ext4_release_system_zone(sb); 4267 ext4_release_system_zone(sb);
4263 failed_mount4a: 4268 failed_mount4a:
4264 dput(sb->s_root); 4269 dput(sb->s_root);
4265 sb->s_root = NULL; 4270 sb->s_root = NULL;
4266 failed_mount4: 4271 failed_mount4:
4267 ext4_msg(sb, KERN_ERR, "mount failed"); 4272 ext4_msg(sb, KERN_ERR, "mount failed");
4268 if (EXT4_SB(sb)->rsv_conversion_wq) 4273 if (EXT4_SB(sb)->rsv_conversion_wq)
4269 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); 4274 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4270 failed_mount_wq: 4275 failed_mount_wq:
4271 if (sbi->s_journal) { 4276 if (sbi->s_journal) {
4272 jbd2_journal_destroy(sbi->s_journal); 4277 jbd2_journal_destroy(sbi->s_journal);
4273 sbi->s_journal = NULL; 4278 sbi->s_journal = NULL;
4274 } 4279 }
4275 failed_mount3a: 4280 failed_mount3a:
4276 ext4_es_unregister_shrinker(sbi); 4281 ext4_es_unregister_shrinker(sbi);
4277 failed_mount3: 4282 failed_mount3:
4278 del_timer_sync(&sbi->s_err_report); 4283 del_timer_sync(&sbi->s_err_report);
4279 if (sbi->s_mmp_tsk) 4284 if (sbi->s_mmp_tsk)
4280 kthread_stop(sbi->s_mmp_tsk); 4285 kthread_stop(sbi->s_mmp_tsk);
4281 failed_mount2: 4286 failed_mount2:
4282 for (i = 0; i < db_count; i++) 4287 for (i = 0; i < db_count; i++)
4283 brelse(sbi->s_group_desc[i]); 4288 brelse(sbi->s_group_desc[i]);
4284 kvfree(sbi->s_group_desc); 4289 kvfree(sbi->s_group_desc);
4285 failed_mount: 4290 failed_mount:
4286 if (sbi->s_chksum_driver) 4291 if (sbi->s_chksum_driver)
4287 crypto_free_shash(sbi->s_chksum_driver); 4292 crypto_free_shash(sbi->s_chksum_driver);
4288 if (sbi->s_proc) { 4293 if (sbi->s_proc) {
4289 remove_proc_entry("options", sbi->s_proc); 4294 remove_proc_entry("options", sbi->s_proc);
4290 remove_proc_entry(sb->s_id, ext4_proc_root); 4295 remove_proc_entry(sb->s_id, ext4_proc_root);
4291 } 4296 }
4292 #ifdef CONFIG_QUOTA 4297 #ifdef CONFIG_QUOTA
4293 for (i = 0; i < EXT4_MAXQUOTAS; i++) 4298 for (i = 0; i < EXT4_MAXQUOTAS; i++)
4294 kfree(sbi->s_qf_names[i]); 4299 kfree(sbi->s_qf_names[i]);
4295 #endif 4300 #endif
4296 ext4_blkdev_remove(sbi); 4301 ext4_blkdev_remove(sbi);
4297 brelse(bh); 4302 brelse(bh);
4298 out_fail: 4303 out_fail:
4299 sb->s_fs_info = NULL; 4304 sb->s_fs_info = NULL;
4300 kfree(sbi->s_blockgroup_lock); 4305 kfree(sbi->s_blockgroup_lock);
4301 kfree(sbi); 4306 kfree(sbi);
4302 out_free_orig: 4307 out_free_orig:
4303 kfree(orig_data); 4308 kfree(orig_data);
4304 return err ? err : ret; 4309 return err ? err : ret;
4305 } 4310 }
4306 4311
4307 /* 4312 /*
4308 * Setup any per-fs journal parameters now. We'll do this both on 4313 * Setup any per-fs journal parameters now. We'll do this both on
4309 * initial mount, once the journal has been initialised but before we've 4314 * initial mount, once the journal has been initialised but before we've
4310 * done any recovery; and again on any subsequent remount. 4315 * done any recovery; and again on any subsequent remount.
4311 */ 4316 */
4312 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 4317 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
4313 { 4318 {
4314 struct ext4_sb_info *sbi = EXT4_SB(sb); 4319 struct ext4_sb_info *sbi = EXT4_SB(sb);
4315 4320
4316 journal->j_commit_interval = sbi->s_commit_interval; 4321 journal->j_commit_interval = sbi->s_commit_interval;
4317 journal->j_min_batch_time = sbi->s_min_batch_time; 4322 journal->j_min_batch_time = sbi->s_min_batch_time;
4318 journal->j_max_batch_time = sbi->s_max_batch_time; 4323 journal->j_max_batch_time = sbi->s_max_batch_time;
4319 4324
4320 write_lock(&journal->j_state_lock); 4325 write_lock(&journal->j_state_lock);
4321 if (test_opt(sb, BARRIER)) 4326 if (test_opt(sb, BARRIER))
4322 journal->j_flags |= JBD2_BARRIER; 4327 journal->j_flags |= JBD2_BARRIER;
4323 else 4328 else
4324 journal->j_flags &= ~JBD2_BARRIER; 4329 journal->j_flags &= ~JBD2_BARRIER;
4325 if (test_opt(sb, DATA_ERR_ABORT)) 4330 if (test_opt(sb, DATA_ERR_ABORT))
4326 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 4331 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
4327 else 4332 else
4328 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 4333 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
4329 write_unlock(&journal->j_state_lock); 4334 write_unlock(&journal->j_state_lock);
4330 } 4335 }
4331 4336
4332 static journal_t *ext4_get_journal(struct super_block *sb, 4337 static journal_t *ext4_get_journal(struct super_block *sb,
4333 unsigned int journal_inum) 4338 unsigned int journal_inum)
4334 { 4339 {
4335 struct inode *journal_inode; 4340 struct inode *journal_inode;
4336 journal_t *journal; 4341 journal_t *journal;
4337 4342
4338 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4343 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4339 4344
4340 /* First, test for the existence of a valid inode on disk. Bad 4345 /* First, test for the existence of a valid inode on disk. Bad
4341 * things happen if we iget() an unused inode, as the subsequent 4346 * things happen if we iget() an unused inode, as the subsequent
4342 * iput() will try to delete it. */ 4347 * iput() will try to delete it. */
4343 4348
4344 journal_inode = ext4_iget(sb, journal_inum); 4349 journal_inode = ext4_iget(sb, journal_inum);
4345 if (IS_ERR(journal_inode)) { 4350 if (IS_ERR(journal_inode)) {
4346 ext4_msg(sb, KERN_ERR, "no journal found"); 4351 ext4_msg(sb, KERN_ERR, "no journal found");
4347 return NULL; 4352 return NULL;
4348 } 4353 }
4349 if (!journal_inode->i_nlink) { 4354 if (!journal_inode->i_nlink) {
4350 make_bad_inode(journal_inode); 4355 make_bad_inode(journal_inode);
4351 iput(journal_inode); 4356 iput(journal_inode);
4352 ext4_msg(sb, KERN_ERR, "journal inode is deleted"); 4357 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
4353 return NULL; 4358 return NULL;
4354 } 4359 }
4355 4360
4356 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 4361 jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
4357 journal_inode, journal_inode->i_size); 4362 journal_inode, journal_inode->i_size);
4358 if (!S_ISREG(journal_inode->i_mode)) { 4363 if (!S_ISREG(journal_inode->i_mode)) {
4359 ext4_msg(sb, KERN_ERR, "invalid journal inode"); 4364 ext4_msg(sb, KERN_ERR, "invalid journal inode");
4360 iput(journal_inode); 4365 iput(journal_inode);
4361 return NULL; 4366 return NULL;
4362 } 4367 }
4363 4368
4364 journal = jbd2_journal_init_inode(journal_inode); 4369 journal = jbd2_journal_init_inode(journal_inode);
4365 if (!journal) { 4370 if (!journal) {
4366 ext4_msg(sb, KERN_ERR, "Could not load journal inode"); 4371 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
4367 iput(journal_inode); 4372 iput(journal_inode);
4368 return NULL; 4373 return NULL;
4369 } 4374 }
4370 journal->j_private = sb; 4375 journal->j_private = sb;
4371 ext4_init_journal_params(sb, journal); 4376 ext4_init_journal_params(sb, journal);
4372 return journal; 4377 return journal;
4373 } 4378 }
4374 4379
4375 static journal_t *ext4_get_dev_journal(struct super_block *sb, 4380 static journal_t *ext4_get_dev_journal(struct super_block *sb,
4376 dev_t j_dev) 4381 dev_t j_dev)
4377 { 4382 {
4378 struct buffer_head *bh; 4383 struct buffer_head *bh;
4379 journal_t *journal; 4384 journal_t *journal;
4380 ext4_fsblk_t start; 4385 ext4_fsblk_t start;
4381 ext4_fsblk_t len; 4386 ext4_fsblk_t len;
4382 int hblock, blocksize; 4387 int hblock, blocksize;
4383 ext4_fsblk_t sb_block; 4388 ext4_fsblk_t sb_block;
4384 unsigned long offset; 4389 unsigned long offset;
4385 struct ext4_super_block *es; 4390 struct ext4_super_block *es;
4386 struct block_device *bdev; 4391 struct block_device *bdev;
4387 4392
4388 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4393 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4389 4394
4390 bdev = ext4_blkdev_get(j_dev, sb); 4395 bdev = ext4_blkdev_get(j_dev, sb);
4391 if (bdev == NULL) 4396 if (bdev == NULL)
4392 return NULL; 4397 return NULL;
4393 4398
4394 blocksize = sb->s_blocksize; 4399 blocksize = sb->s_blocksize;
4395 hblock = bdev_logical_block_size(bdev); 4400 hblock = bdev_logical_block_size(bdev);
4396 if (blocksize < hblock) { 4401 if (blocksize < hblock) {
4397 ext4_msg(sb, KERN_ERR, 4402 ext4_msg(sb, KERN_ERR,
4398 "blocksize too small for journal device"); 4403 "blocksize too small for journal device");
4399 goto out_bdev; 4404 goto out_bdev;
4400 } 4405 }
4401 4406
4402 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 4407 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
4403 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 4408 offset = EXT4_MIN_BLOCK_SIZE % blocksize;
4404 set_blocksize(bdev, blocksize); 4409 set_blocksize(bdev, blocksize);
4405 if (!(bh = __bread(bdev, sb_block, blocksize))) { 4410 if (!(bh = __bread(bdev, sb_block, blocksize))) {
4406 ext4_msg(sb, KERN_ERR, "couldn't read superblock of " 4411 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
4407 "external journal"); 4412 "external journal");
4408 goto out_bdev; 4413 goto out_bdev;
4409 } 4414 }
4410 4415
4411 es = (struct ext4_super_block *) (bh->b_data + offset); 4416 es = (struct ext4_super_block *) (bh->b_data + offset);
4412 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 4417 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
4413 !(le32_to_cpu(es->s_feature_incompat) & 4418 !(le32_to_cpu(es->s_feature_incompat) &
4414 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 4419 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
4415 ext4_msg(sb, KERN_ERR, "external journal has " 4420 ext4_msg(sb, KERN_ERR, "external journal has "
4416 "bad superblock"); 4421 "bad superblock");
4417 brelse(bh); 4422 brelse(bh);
4418 goto out_bdev; 4423 goto out_bdev;
4419 } 4424 }
4420 4425
4421 if ((le32_to_cpu(es->s_feature_ro_compat) & 4426 if ((le32_to_cpu(es->s_feature_ro_compat) &
4422 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && 4427 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
4423 es->s_checksum != ext4_superblock_csum(sb, es)) { 4428 es->s_checksum != ext4_superblock_csum(sb, es)) {
4424 ext4_msg(sb, KERN_ERR, "external journal has " 4429 ext4_msg(sb, KERN_ERR, "external journal has "
4425 "corrupt superblock"); 4430 "corrupt superblock");
4426 brelse(bh); 4431 brelse(bh);
4427 goto out_bdev; 4432 goto out_bdev;
4428 } 4433 }
4429 4434
4430 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 4435 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4431 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 4436 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
4432 brelse(bh); 4437 brelse(bh);
4433 goto out_bdev; 4438 goto out_bdev;
4434 } 4439 }
4435 4440
4436 len = ext4_blocks_count(es); 4441 len = ext4_blocks_count(es);
4437 start = sb_block + 1; 4442 start = sb_block + 1;
4438 brelse(bh); /* we're done with the superblock */ 4443 brelse(bh); /* we're done with the superblock */
4439 4444
4440 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 4445 journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
4441 start, len, blocksize); 4446 start, len, blocksize);
4442 if (!journal) { 4447 if (!journal) {
4443 ext4_msg(sb, KERN_ERR, "failed to create device journal"); 4448 ext4_msg(sb, KERN_ERR, "failed to create device journal");
4444 goto out_bdev; 4449 goto out_bdev;
4445 } 4450 }
4446 journal->j_private = sb; 4451 journal->j_private = sb;
4447 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer); 4452 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
4448 wait_on_buffer(journal->j_sb_buffer); 4453 wait_on_buffer(journal->j_sb_buffer);
4449 if (!buffer_uptodate(journal->j_sb_buffer)) { 4454 if (!buffer_uptodate(journal->j_sb_buffer)) {
4450 ext4_msg(sb, KERN_ERR, "I/O error on journal device"); 4455 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
4451 goto out_journal; 4456 goto out_journal;
4452 } 4457 }
4453 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 4458 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
4454 ext4_msg(sb, KERN_ERR, "External journal has more than one " 4459 ext4_msg(sb, KERN_ERR, "External journal has more than one "
4455 "user (unsupported) - %d", 4460 "user (unsupported) - %d",
4456 be32_to_cpu(journal->j_superblock->s_nr_users)); 4461 be32_to_cpu(journal->j_superblock->s_nr_users));
4457 goto out_journal; 4462 goto out_journal;
4458 } 4463 }
4459 EXT4_SB(sb)->journal_bdev = bdev; 4464 EXT4_SB(sb)->journal_bdev = bdev;
4460 ext4_init_journal_params(sb, journal); 4465 ext4_init_journal_params(sb, journal);
4461 return journal; 4466 return journal;
4462 4467
4463 out_journal: 4468 out_journal:
4464 jbd2_journal_destroy(journal); 4469 jbd2_journal_destroy(journal);
4465 out_bdev: 4470 out_bdev:
4466 ext4_blkdev_put(bdev); 4471 ext4_blkdev_put(bdev);
4467 return NULL; 4472 return NULL;
4468 } 4473 }
4469 4474
4470 static int ext4_load_journal(struct super_block *sb, 4475 static int ext4_load_journal(struct super_block *sb,
4471 struct ext4_super_block *es, 4476 struct ext4_super_block *es,
4472 unsigned long journal_devnum) 4477 unsigned long journal_devnum)
4473 { 4478 {
4474 journal_t *journal; 4479 journal_t *journal;
4475 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 4480 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
4476 dev_t journal_dev; 4481 dev_t journal_dev;
4477 int err = 0; 4482 int err = 0;
4478 int really_read_only; 4483 int really_read_only;
4479 4484
4480 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4485 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4481 4486
4482 if (journal_devnum && 4487 if (journal_devnum &&
4483 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 4488 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4484 ext4_msg(sb, KERN_INFO, "external journal device major/minor " 4489 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
4485 "numbers have changed"); 4490 "numbers have changed");
4486 journal_dev = new_decode_dev(journal_devnum); 4491 journal_dev = new_decode_dev(journal_devnum);
4487 } else 4492 } else
4488 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 4493 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
4489 4494
4490 really_read_only = bdev_read_only(sb->s_bdev); 4495 really_read_only = bdev_read_only(sb->s_bdev);
4491 4496
4492 /* 4497 /*
4493 * Are we loading a blank journal or performing recovery after a 4498 * Are we loading a blank journal or performing recovery after a
4494 * crash? For recovery, we need to check in advance whether we 4499 * crash? For recovery, we need to check in advance whether we
4495 * can get read-write access to the device. 4500 * can get read-write access to the device.
4496 */ 4501 */
4497 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 4502 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
4498 if (sb->s_flags & MS_RDONLY) { 4503 if (sb->s_flags & MS_RDONLY) {
4499 ext4_msg(sb, KERN_INFO, "INFO: recovery " 4504 ext4_msg(sb, KERN_INFO, "INFO: recovery "
4500 "required on readonly filesystem"); 4505 "required on readonly filesystem");
4501 if (really_read_only) { 4506 if (really_read_only) {
4502 ext4_msg(sb, KERN_ERR, "write access " 4507 ext4_msg(sb, KERN_ERR, "write access "
4503 "unavailable, cannot proceed"); 4508 "unavailable, cannot proceed");
4504 return -EROFS; 4509 return -EROFS;
4505 } 4510 }
4506 ext4_msg(sb, KERN_INFO, "write access will " 4511 ext4_msg(sb, KERN_INFO, "write access will "
4507 "be enabled during recovery"); 4512 "be enabled during recovery");
4508 } 4513 }
4509 } 4514 }
4510 4515
4511 if (journal_inum && journal_dev) { 4516 if (journal_inum && journal_dev) {
4512 ext4_msg(sb, KERN_ERR, "filesystem has both journal " 4517 ext4_msg(sb, KERN_ERR, "filesystem has both journal "
4513 "and inode journals!"); 4518 "and inode journals!");
4514 return -EINVAL; 4519 return -EINVAL;
4515 } 4520 }
4516 4521
4517 if (journal_inum) { 4522 if (journal_inum) {
4518 if (!(journal = ext4_get_journal(sb, journal_inum))) 4523 if (!(journal = ext4_get_journal(sb, journal_inum)))
4519 return -EINVAL; 4524 return -EINVAL;
4520 } else { 4525 } else {
4521 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 4526 if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
4522 return -EINVAL; 4527 return -EINVAL;
4523 } 4528 }
4524 4529
4525 if (!(journal->j_flags & JBD2_BARRIER)) 4530 if (!(journal->j_flags & JBD2_BARRIER))
4526 ext4_msg(sb, KERN_INFO, "barriers disabled"); 4531 ext4_msg(sb, KERN_INFO, "barriers disabled");
4527 4532
4528 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 4533 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
4529 err = jbd2_journal_wipe(journal, !really_read_only); 4534 err = jbd2_journal_wipe(journal, !really_read_only);
4530 if (!err) { 4535 if (!err) {
4531 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); 4536 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
4532 if (save) 4537 if (save)
4533 memcpy(save, ((char *) es) + 4538 memcpy(save, ((char *) es) +
4534 EXT4_S_ERR_START, EXT4_S_ERR_LEN); 4539 EXT4_S_ERR_START, EXT4_S_ERR_LEN);
4535 err = jbd2_journal_load(journal); 4540 err = jbd2_journal_load(journal);
4536 if (save) 4541 if (save)
4537 memcpy(((char *) es) + EXT4_S_ERR_START, 4542 memcpy(((char *) es) + EXT4_S_ERR_START,
4538 save, EXT4_S_ERR_LEN); 4543 save, EXT4_S_ERR_LEN);
4539 kfree(save); 4544 kfree(save);
4540 } 4545 }
4541 4546
4542 if (err) { 4547 if (err) {
4543 ext4_msg(sb, KERN_ERR, "error loading journal"); 4548 ext4_msg(sb, KERN_ERR, "error loading journal");
4544 jbd2_journal_destroy(journal); 4549 jbd2_journal_destroy(journal);
4545 return err; 4550 return err;
4546 } 4551 }
4547 4552
4548 EXT4_SB(sb)->s_journal = journal; 4553 EXT4_SB(sb)->s_journal = journal;
4549 ext4_clear_journal_err(sb, es); 4554 ext4_clear_journal_err(sb, es);
4550 4555
4551 if (!really_read_only && journal_devnum && 4556 if (!really_read_only && journal_devnum &&
4552 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 4557 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4553 es->s_journal_dev = cpu_to_le32(journal_devnum); 4558 es->s_journal_dev = cpu_to_le32(journal_devnum);
4554 4559
4555 /* Make sure we flush the recovery flag to disk. */ 4560 /* Make sure we flush the recovery flag to disk. */
4556 ext4_commit_super(sb, 1); 4561 ext4_commit_super(sb, 1);
4557 } 4562 }
4558 4563
4559 return 0; 4564 return 0;
4560 } 4565 }
4561 4566
4562 static int ext4_commit_super(struct super_block *sb, int sync) 4567 static int ext4_commit_super(struct super_block *sb, int sync)
4563 { 4568 {
4564 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 4569 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
4565 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 4570 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4566 int error = 0; 4571 int error = 0;
4567 4572
4568 if (!sbh || block_device_ejected(sb)) 4573 if (!sbh || block_device_ejected(sb))
4569 return error; 4574 return error;
4570 if (buffer_write_io_error(sbh)) { 4575 if (buffer_write_io_error(sbh)) {
4571 /* 4576 /*
4572 * Oh, dear. A previous attempt to write the 4577 * Oh, dear. A previous attempt to write the
4573 * superblock failed. This could happen because the 4578 * superblock failed. This could happen because the
4574 * USB device was yanked out. Or it could happen to 4579 * USB device was yanked out. Or it could happen to
4575 * be a transient write error and maybe the block will 4580 * be a transient write error and maybe the block will
4576 * be remapped. Nothing we can do but to retry the 4581 * be remapped. Nothing we can do but to retry the
4577 * write and hope for the best. 4582 * write and hope for the best.
4578 */ 4583 */
4579 ext4_msg(sb, KERN_ERR, "previous I/O error to " 4584 ext4_msg(sb, KERN_ERR, "previous I/O error to "
4580 "superblock detected"); 4585 "superblock detected");
4581 clear_buffer_write_io_error(sbh); 4586 clear_buffer_write_io_error(sbh);
4582 set_buffer_uptodate(sbh); 4587 set_buffer_uptodate(sbh);
4583 } 4588 }
4584 /* 4589 /*
4585 * If the file system is mounted read-only, don't update the 4590 * If the file system is mounted read-only, don't update the
4586 * superblock write time. This avoids updating the superblock 4591 * superblock write time. This avoids updating the superblock
4587 * write time when we are mounting the root file system 4592 * write time when we are mounting the root file system
4588 * read/only but we need to replay the journal; at that point, 4593 * read/only but we need to replay the journal; at that point,
4589 * for people who are east of GMT and who make their clock 4594 * for people who are east of GMT and who make their clock
4590 * tick in localtime for Windows bug-for-bug compatibility, 4595 * tick in localtime for Windows bug-for-bug compatibility,
4591 * the clock is set in the future, and this will cause e2fsck 4596 * the clock is set in the future, and this will cause e2fsck
4592 * to complain and force a full file system check. 4597 * to complain and force a full file system check.
4593 */ 4598 */
4594 if (!(sb->s_flags & MS_RDONLY)) 4599 if (!(sb->s_flags & MS_RDONLY))
4595 es->s_wtime = cpu_to_le32(get_seconds()); 4600 es->s_wtime = cpu_to_le32(get_seconds());
4596 if (sb->s_bdev->bd_part) 4601 if (sb->s_bdev->bd_part)
4597 es->s_kbytes_written = 4602 es->s_kbytes_written =
4598 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 4603 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
4599 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 4604 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
4600 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 4605 EXT4_SB(sb)->s_sectors_written_start) >> 1));
4601 else 4606 else
4602 es->s_kbytes_written = 4607 es->s_kbytes_written =
4603 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 4608 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4604 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) 4609 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
4605 ext4_free_blocks_count_set(es, 4610 ext4_free_blocks_count_set(es,
4606 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( 4611 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4607 &EXT4_SB(sb)->s_freeclusters_counter))); 4612 &EXT4_SB(sb)->s_freeclusters_counter)));
4608 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) 4613 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
4609 es->s_free_inodes_count = 4614 es->s_free_inodes_count =
4610 cpu_to_le32(percpu_counter_sum_positive( 4615 cpu_to_le32(percpu_counter_sum_positive(
4611 &EXT4_SB(sb)->s_freeinodes_counter)); 4616 &EXT4_SB(sb)->s_freeinodes_counter));
4612 BUFFER_TRACE(sbh, "marking dirty"); 4617 BUFFER_TRACE(sbh, "marking dirty");
4613 ext4_superblock_csum_set(sb); 4618 ext4_superblock_csum_set(sb);
4614 mark_buffer_dirty(sbh); 4619 mark_buffer_dirty(sbh);
4615 if (sync) { 4620 if (sync) {
4616 error = sync_dirty_buffer(sbh); 4621 error = sync_dirty_buffer(sbh);
4617 if (error) 4622 if (error)
4618 return error; 4623 return error;
4619 4624
4620 error = buffer_write_io_error(sbh); 4625 error = buffer_write_io_error(sbh);
4621 if (error) { 4626 if (error) {
4622 ext4_msg(sb, KERN_ERR, "I/O error while writing " 4627 ext4_msg(sb, KERN_ERR, "I/O error while writing "
4623 "superblock"); 4628 "superblock");
4624 clear_buffer_write_io_error(sbh); 4629 clear_buffer_write_io_error(sbh);
4625 set_buffer_uptodate(sbh); 4630 set_buffer_uptodate(sbh);
4626 } 4631 }
4627 } 4632 }
4628 return error; 4633 return error;
4629 } 4634 }
4630 4635
4631 /* 4636 /*
4632 * Have we just finished recovery? If so, and if we are mounting (or 4637 * Have we just finished recovery? If so, and if we are mounting (or
4633 * remounting) the filesystem readonly, then we will end up with a 4638 * remounting) the filesystem readonly, then we will end up with a
4634 * consistent fs on disk. Record that fact. 4639 * consistent fs on disk. Record that fact.
4635 */ 4640 */
4636 static void ext4_mark_recovery_complete(struct super_block *sb, 4641 static void ext4_mark_recovery_complete(struct super_block *sb,
4637 struct ext4_super_block *es) 4642 struct ext4_super_block *es)
4638 { 4643 {
4639 journal_t *journal = EXT4_SB(sb)->s_journal; 4644 journal_t *journal = EXT4_SB(sb)->s_journal;
4640 4645
4641 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 4646 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
4642 BUG_ON(journal != NULL); 4647 BUG_ON(journal != NULL);
4643 return; 4648 return;
4644 } 4649 }
4645 jbd2_journal_lock_updates(journal); 4650 jbd2_journal_lock_updates(journal);
4646 if (jbd2_journal_flush(journal) < 0) 4651 if (jbd2_journal_flush(journal) < 0)
4647 goto out; 4652 goto out;
4648 4653
4649 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 4654 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
4650 sb->s_flags & MS_RDONLY) { 4655 sb->s_flags & MS_RDONLY) {
4651 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4656 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4652 ext4_commit_super(sb, 1); 4657 ext4_commit_super(sb, 1);
4653 } 4658 }
4654 4659
4655 out: 4660 out:
4656 jbd2_journal_unlock_updates(journal); 4661 jbd2_journal_unlock_updates(journal);
4657 } 4662 }
4658 4663
4659 /* 4664 /*
4660 * If we are mounting (or read-write remounting) a filesystem whose journal 4665 * If we are mounting (or read-write remounting) a filesystem whose journal
4661 * has recorded an error from a previous lifetime, move that error to the 4666 * has recorded an error from a previous lifetime, move that error to the
4662 * main filesystem now. 4667 * main filesystem now.
4663 */ 4668 */
4664 static void ext4_clear_journal_err(struct super_block *sb, 4669 static void ext4_clear_journal_err(struct super_block *sb,
4665 struct ext4_super_block *es) 4670 struct ext4_super_block *es)
4666 { 4671 {
4667 journal_t *journal; 4672 journal_t *journal;
4668 int j_errno; 4673 int j_errno;
4669 const char *errstr; 4674 const char *errstr;
4670 4675
4671 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4676 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4672 4677
4673 journal = EXT4_SB(sb)->s_journal; 4678 journal = EXT4_SB(sb)->s_journal;
4674 4679
4675 /* 4680 /*
4676 * Now check for any error status which may have been recorded in the 4681 * Now check for any error status which may have been recorded in the
4677 * journal by a prior ext4_error() or ext4_abort() 4682 * journal by a prior ext4_error() or ext4_abort()
4678 */ 4683 */
4679 4684
4680 j_errno = jbd2_journal_errno(journal); 4685 j_errno = jbd2_journal_errno(journal);
4681 if (j_errno) { 4686 if (j_errno) {
4682 char nbuf[16]; 4687 char nbuf[16];
4683 4688
4684 errstr = ext4_decode_error(sb, j_errno, nbuf); 4689 errstr = ext4_decode_error(sb, j_errno, nbuf);
4685 ext4_warning(sb, "Filesystem error recorded " 4690 ext4_warning(sb, "Filesystem error recorded "
4686 "from previous mount: %s", errstr); 4691 "from previous mount: %s", errstr);
4687 ext4_warning(sb, "Marking fs in need of filesystem check."); 4692 ext4_warning(sb, "Marking fs in need of filesystem check.");
4688 4693
4689 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 4694 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
4690 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 4695 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
4691 ext4_commit_super(sb, 1); 4696 ext4_commit_super(sb, 1);
4692 4697
4693 jbd2_journal_clear_err(journal); 4698 jbd2_journal_clear_err(journal);
4694 jbd2_journal_update_sb_errno(journal); 4699 jbd2_journal_update_sb_errno(journal);
4695 } 4700 }
4696 } 4701 }
4697 4702
4698 /* 4703 /*
4699 * Force the running and committing transactions to commit, 4704 * Force the running and committing transactions to commit,
4700 * and wait on the commit. 4705 * and wait on the commit.
4701 */ 4706 */
4702 int ext4_force_commit(struct super_block *sb) 4707 int ext4_force_commit(struct super_block *sb)
4703 { 4708 {
4704 journal_t *journal; 4709 journal_t *journal;
4705 4710
4706 if (sb->s_flags & MS_RDONLY) 4711 if (sb->s_flags & MS_RDONLY)
4707 return 0; 4712 return 0;
4708 4713
4709 journal = EXT4_SB(sb)->s_journal; 4714 journal = EXT4_SB(sb)->s_journal;
4710 return ext4_journal_force_commit(journal); 4715 return ext4_journal_force_commit(journal);
4711 } 4716 }
4712 4717
4713 static int ext4_sync_fs(struct super_block *sb, int wait) 4718 static int ext4_sync_fs(struct super_block *sb, int wait)
4714 { 4719 {
4715 int ret = 0; 4720 int ret = 0;
4716 tid_t target; 4721 tid_t target;
4717 bool needs_barrier = false; 4722 bool needs_barrier = false;
4718 struct ext4_sb_info *sbi = EXT4_SB(sb); 4723 struct ext4_sb_info *sbi = EXT4_SB(sb);
4719 4724
4720 trace_ext4_sync_fs(sb, wait); 4725 trace_ext4_sync_fs(sb, wait);
4721 flush_workqueue(sbi->rsv_conversion_wq); 4726 flush_workqueue(sbi->rsv_conversion_wq);
4722 /* 4727 /*
4723 * Writeback quota in non-journalled quota case - journalled quota has 4728 * Writeback quota in non-journalled quota case - journalled quota has
4724 * no dirty dquots 4729 * no dirty dquots
4725 */ 4730 */
4726 dquot_writeback_dquots(sb, -1); 4731 dquot_writeback_dquots(sb, -1);
4727 /* 4732 /*
4728 * Data writeback is possible w/o journal transaction, so barrier must 4733 * Data writeback is possible w/o journal transaction, so barrier must
4729 * being sent at the end of the function. But we can skip it if 4734 * being sent at the end of the function. But we can skip it if
4730 * transaction_commit will do it for us. 4735 * transaction_commit will do it for us.
4731 */ 4736 */
4732 if (sbi->s_journal) { 4737 if (sbi->s_journal) {
4733 target = jbd2_get_latest_transaction(sbi->s_journal); 4738 target = jbd2_get_latest_transaction(sbi->s_journal);
4734 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && 4739 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
4735 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) 4740 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
4736 needs_barrier = true; 4741 needs_barrier = true;
4737 4742
4738 if (jbd2_journal_start_commit(sbi->s_journal, &target)) { 4743 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4739 if (wait) 4744 if (wait)
4740 ret = jbd2_log_wait_commit(sbi->s_journal, 4745 ret = jbd2_log_wait_commit(sbi->s_journal,
4741 target); 4746 target);
4742 } 4747 }
4743 } else if (wait && test_opt(sb, BARRIER)) 4748 } else if (wait && test_opt(sb, BARRIER))
4744 needs_barrier = true; 4749 needs_barrier = true;
4745 if (needs_barrier) { 4750 if (needs_barrier) {
4746 int err; 4751 int err;
4747 err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); 4752 err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
4748 if (!ret) 4753 if (!ret)
4749 ret = err; 4754 ret = err;
4750 } 4755 }
4751 4756
4752 return ret; 4757 return ret;
4753 } 4758 }
4754 4759
4755 /* 4760 /*
4756 * LVM calls this function before a (read-only) snapshot is created. This 4761 * LVM calls this function before a (read-only) snapshot is created. This
4757 * gives us a chance to flush the journal completely and mark the fs clean. 4762 * gives us a chance to flush the journal completely and mark the fs clean.
4758 * 4763 *
4759 * Note that only this function cannot bring a filesystem to be in a clean 4764 * Note that only this function cannot bring a filesystem to be in a clean
4760 * state independently. It relies on upper layer to stop all data & metadata 4765 * state independently. It relies on upper layer to stop all data & metadata
4761 * modifications. 4766 * modifications.
4762 */ 4767 */
4763 static int ext4_freeze(struct super_block *sb) 4768 static int ext4_freeze(struct super_block *sb)
4764 { 4769 {
4765 int error = 0; 4770 int error = 0;
4766 journal_t *journal; 4771 journal_t *journal;
4767 4772
4768 if (sb->s_flags & MS_RDONLY) 4773 if (sb->s_flags & MS_RDONLY)
4769 return 0; 4774 return 0;
4770 4775
4771 journal = EXT4_SB(sb)->s_journal; 4776 journal = EXT4_SB(sb)->s_journal;
4772 4777
4773 if (journal) { 4778 if (journal) {
4774 /* Now we set up the journal barrier. */ 4779 /* Now we set up the journal barrier. */
4775 jbd2_journal_lock_updates(journal); 4780 jbd2_journal_lock_updates(journal);
4776 4781
4777 /* 4782 /*
4778 * Don't clear the needs_recovery flag if we failed to 4783 * Don't clear the needs_recovery flag if we failed to
4779 * flush the journal. 4784 * flush the journal.
4780 */ 4785 */
4781 error = jbd2_journal_flush(journal); 4786 error = jbd2_journal_flush(journal);
4782 if (error < 0) 4787 if (error < 0)
4783 goto out; 4788 goto out;
4784 } 4789 }
4785 4790
4786 /* Journal blocked and flushed, clear needs_recovery flag. */ 4791 /* Journal blocked and flushed, clear needs_recovery flag. */
4787 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4792 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4788 error = ext4_commit_super(sb, 1); 4793 error = ext4_commit_super(sb, 1);
4789 out: 4794 out:
4790 if (journal) 4795 if (journal)
4791 /* we rely on upper layer to stop further updates */ 4796 /* we rely on upper layer to stop further updates */
4792 jbd2_journal_unlock_updates(journal); 4797 jbd2_journal_unlock_updates(journal);
4793 return error; 4798 return error;
4794 } 4799 }
4795 4800
4796 /* 4801 /*
4797 * Called by LVM after the snapshot is done. We need to reset the RECOVER 4802 * Called by LVM after the snapshot is done. We need to reset the RECOVER
4798 * flag here, even though the filesystem is not technically dirty yet. 4803 * flag here, even though the filesystem is not technically dirty yet.
4799 */ 4804 */
4800 static int ext4_unfreeze(struct super_block *sb) 4805 static int ext4_unfreeze(struct super_block *sb)
4801 { 4806 {
4802 if (sb->s_flags & MS_RDONLY) 4807 if (sb->s_flags & MS_RDONLY)
4803 return 0; 4808 return 0;
4804 4809
4805 /* Reset the needs_recovery flag before the fs is unlocked. */ 4810 /* Reset the needs_recovery flag before the fs is unlocked. */
4806 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4811 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4807 ext4_commit_super(sb, 1); 4812 ext4_commit_super(sb, 1);
4808 return 0; 4813 return 0;
4809 } 4814 }
4810 4815
4811 /* 4816 /*
4812 * Structure to save mount options for ext4_remount's benefit 4817 * Structure to save mount options for ext4_remount's benefit
4813 */ 4818 */
4814 struct ext4_mount_options { 4819 struct ext4_mount_options {
4815 unsigned long s_mount_opt; 4820 unsigned long s_mount_opt;
4816 unsigned long s_mount_opt2; 4821 unsigned long s_mount_opt2;
4817 kuid_t s_resuid; 4822 kuid_t s_resuid;
4818 kgid_t s_resgid; 4823 kgid_t s_resgid;
4819 unsigned long s_commit_interval; 4824 unsigned long s_commit_interval;
4820 u32 s_min_batch_time, s_max_batch_time; 4825 u32 s_min_batch_time, s_max_batch_time;
4821 #ifdef CONFIG_QUOTA 4826 #ifdef CONFIG_QUOTA
4822 int s_jquota_fmt; 4827 int s_jquota_fmt;
4823 char *s_qf_names[EXT4_MAXQUOTAS]; 4828 char *s_qf_names[EXT4_MAXQUOTAS];
4824 #endif 4829 #endif
4825 }; 4830 };
4826 4831
4827 static int ext4_remount(struct super_block *sb, int *flags, char *data) 4832 static int ext4_remount(struct super_block *sb, int *flags, char *data)
4828 { 4833 {
4829 struct ext4_super_block *es; 4834 struct ext4_super_block *es;
4830 struct ext4_sb_info *sbi = EXT4_SB(sb); 4835 struct ext4_sb_info *sbi = EXT4_SB(sb);
4831 unsigned long old_sb_flags; 4836 unsigned long old_sb_flags;
4832 struct ext4_mount_options old_opts; 4837 struct ext4_mount_options old_opts;
4833 int enable_quota = 0; 4838 int enable_quota = 0;
4834 ext4_group_t g; 4839 ext4_group_t g;
4835 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 4840 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
4836 int err = 0; 4841 int err = 0;
4837 #ifdef CONFIG_QUOTA 4842 #ifdef CONFIG_QUOTA
4838 int i, j; 4843 int i, j;
4839 #endif 4844 #endif
4840 char *orig_data = kstrdup(data, GFP_KERNEL); 4845 char *orig_data = kstrdup(data, GFP_KERNEL);
4841 4846
4842 /* Store the original options */ 4847 /* Store the original options */
4843 old_sb_flags = sb->s_flags; 4848 old_sb_flags = sb->s_flags;
4844 old_opts.s_mount_opt = sbi->s_mount_opt; 4849 old_opts.s_mount_opt = sbi->s_mount_opt;
4845 old_opts.s_mount_opt2 = sbi->s_mount_opt2; 4850 old_opts.s_mount_opt2 = sbi->s_mount_opt2;
4846 old_opts.s_resuid = sbi->s_resuid; 4851 old_opts.s_resuid = sbi->s_resuid;
4847 old_opts.s_resgid = sbi->s_resgid; 4852 old_opts.s_resgid = sbi->s_resgid;
4848 old_opts.s_commit_interval = sbi->s_commit_interval; 4853 old_opts.s_commit_interval = sbi->s_commit_interval;
4849 old_opts.s_min_batch_time = sbi->s_min_batch_time; 4854 old_opts.s_min_batch_time = sbi->s_min_batch_time;
4850 old_opts.s_max_batch_time = sbi->s_max_batch_time; 4855 old_opts.s_max_batch_time = sbi->s_max_batch_time;
4851 #ifdef CONFIG_QUOTA 4856 #ifdef CONFIG_QUOTA
4852 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 4857 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
4853 for (i = 0; i < EXT4_MAXQUOTAS; i++) 4858 for (i = 0; i < EXT4_MAXQUOTAS; i++)
4854 if (sbi->s_qf_names[i]) { 4859 if (sbi->s_qf_names[i]) {
4855 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], 4860 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
4856 GFP_KERNEL); 4861 GFP_KERNEL);
4857 if (!old_opts.s_qf_names[i]) { 4862 if (!old_opts.s_qf_names[i]) {
4858 for (j = 0; j < i; j++) 4863 for (j = 0; j < i; j++)
4859 kfree(old_opts.s_qf_names[j]); 4864 kfree(old_opts.s_qf_names[j]);
4860 kfree(orig_data); 4865 kfree(orig_data);
4861 return -ENOMEM; 4866 return -ENOMEM;
4862 } 4867 }
4863 } else 4868 } else
4864 old_opts.s_qf_names[i] = NULL; 4869 old_opts.s_qf_names[i] = NULL;
4865 #endif 4870 #endif
4866 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 4871 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
4867 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 4872 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
4868 4873
4869 /*
4870 * Allow the "check" option to be passed as a remount option.
4871 */
4872 if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { 4874 if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
4873 err = -EINVAL; 4875 err = -EINVAL;
4874 goto restore_opts; 4876 goto restore_opts;
4875 } 4877 }
4876 4878
4877 if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ 4879 if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
4878 test_opt(sb, JOURNAL_CHECKSUM)) { 4880 test_opt(sb, JOURNAL_CHECKSUM)) {
4879 ext4_msg(sb, KERN_ERR, "changing journal_checksum " 4881 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
4880 "during remount not supported"); 4882 "during remount not supported; ignoring");
4881 err = -EINVAL; 4883 sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
4882 goto restore_opts;
4883 } 4884 }
4884 4885
4885 if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
4886 test_opt(sb, JOURNAL_CHECKSUM)) {
4887 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
4888 "during remount not supported");
4889 err = -EINVAL;
4890 goto restore_opts;
4891 }
4892
4893 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 4886 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4894 if (test_opt2(sb, EXPLICIT_DELALLOC)) { 4887 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4895 ext4_msg(sb, KERN_ERR, "can't mount with " 4888 ext4_msg(sb, KERN_ERR, "can't mount with "
4896 "both data=journal and delalloc"); 4889 "both data=journal and delalloc");
4897 err = -EINVAL; 4890 err = -EINVAL;
4898 goto restore_opts; 4891 goto restore_opts;
4899 } 4892 }
4900 if (test_opt(sb, DIOREAD_NOLOCK)) { 4893 if (test_opt(sb, DIOREAD_NOLOCK)) {
4901 ext4_msg(sb, KERN_ERR, "can't mount with " 4894 ext4_msg(sb, KERN_ERR, "can't mount with "
4902 "both data=journal and dioread_nolock"); 4895 "both data=journal and dioread_nolock");
4903 err = -EINVAL; 4896 err = -EINVAL;
4904 goto restore_opts; 4897 goto restore_opts;
4905 } 4898 }
4906 if (test_opt(sb, DAX)) { 4899 if (test_opt(sb, DAX)) {
4907 ext4_msg(sb, KERN_ERR, "can't mount with " 4900 ext4_msg(sb, KERN_ERR, "can't mount with "
4908 "both data=journal and dax"); 4901 "both data=journal and dax");
4909 err = -EINVAL; 4902 err = -EINVAL;
4910 goto restore_opts; 4903 goto restore_opts;
4911 } 4904 }
4912 } 4905 }
4913 4906
4914 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) { 4907 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
4915 ext4_msg(sb, KERN_WARNING, "warning: refusing change of " 4908 ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
4916 "dax flag with busy inodes while remounting"); 4909 "dax flag with busy inodes while remounting");
4917 sbi->s_mount_opt ^= EXT4_MOUNT_DAX; 4910 sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
4918 } 4911 }
4919 4912
4920 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) 4913 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
4921 ext4_abort(sb, "Abort forced by user"); 4914 ext4_abort(sb, "Abort forced by user");
4922 4915
4923 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 4916 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
4924 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 4917 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
4925 4918
4926 es = sbi->s_es; 4919 es = sbi->s_es;
4927 4920
4928 if (sbi->s_journal) { 4921 if (sbi->s_journal) {
4929 ext4_init_journal_params(sb, sbi->s_journal); 4922 ext4_init_journal_params(sb, sbi->s_journal);
4930 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 4923 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4931 } 4924 }
4932 4925
4933 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { 4926 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
4934 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { 4927 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
4935 err = -EROFS; 4928 err = -EROFS;
4936 goto restore_opts; 4929 goto restore_opts;
4937 } 4930 }
4938 4931
4939 if (*flags & MS_RDONLY) { 4932 if (*flags & MS_RDONLY) {
4940 err = sync_filesystem(sb); 4933 err = sync_filesystem(sb);
4941 if (err < 0) 4934 if (err < 0)
4942 goto restore_opts; 4935 goto restore_opts;
4943 err = dquot_suspend(sb, -1); 4936 err = dquot_suspend(sb, -1);
4944 if (err < 0) 4937 if (err < 0)
4945 goto restore_opts; 4938 goto restore_opts;
4946 4939
4947 /* 4940 /*
4948 * First of all, the unconditional stuff we have to do 4941 * First of all, the unconditional stuff we have to do
4949 * to disable replay of the journal when we next remount 4942 * to disable replay of the journal when we next remount
4950 */ 4943 */
4951 sb->s_flags |= MS_RDONLY; 4944 sb->s_flags |= MS_RDONLY;
4952 4945
4953 /* 4946 /*
4954 * OK, test if we are remounting a valid rw partition 4947 * OK, test if we are remounting a valid rw partition
4955 * readonly, and if so set the rdonly flag and then 4948 * readonly, and if so set the rdonly flag and then
4956 * mark the partition as valid again. 4949 * mark the partition as valid again.
4957 */ 4950 */
4958 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 4951 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
4959 (sbi->s_mount_state & EXT4_VALID_FS)) 4952 (sbi->s_mount_state & EXT4_VALID_FS))
4960 es->s_state = cpu_to_le16(sbi->s_mount_state); 4953 es->s_state = cpu_to_le16(sbi->s_mount_state);
4961 4954
4962 if (sbi->s_journal) 4955 if (sbi->s_journal)
4963 ext4_mark_recovery_complete(sb, es); 4956 ext4_mark_recovery_complete(sb, es);
4964 } else { 4957 } else {
4965 /* Make sure we can mount this feature set readwrite */ 4958 /* Make sure we can mount this feature set readwrite */
4966 if (!ext4_feature_set_ok(sb, 0)) { 4959 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
4960 EXT4_FEATURE_RO_COMPAT_READONLY) ||
4961 !ext4_feature_set_ok(sb, 0)) {
4967 err = -EROFS; 4962 err = -EROFS;
4968 goto restore_opts; 4963 goto restore_opts;
4969 } 4964 }
4970 /* 4965 /*
4971 * Make sure the group descriptor checksums 4966 * Make sure the group descriptor checksums
4972 * are sane. If they aren't, refuse to remount r/w. 4967 * are sane. If they aren't, refuse to remount r/w.
4973 */ 4968 */
4974 for (g = 0; g < sbi->s_groups_count; g++) { 4969 for (g = 0; g < sbi->s_groups_count; g++) {
4975 struct ext4_group_desc *gdp = 4970 struct ext4_group_desc *gdp =
4976 ext4_get_group_desc(sb, g, NULL); 4971 ext4_get_group_desc(sb, g, NULL);
4977 4972
4978 if (!ext4_group_desc_csum_verify(sb, g, gdp)) { 4973 if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
4979 ext4_msg(sb, KERN_ERR, 4974 ext4_msg(sb, KERN_ERR,
4980 "ext4_remount: Checksum for group %u failed (%u!=%u)", 4975 "ext4_remount: Checksum for group %u failed (%u!=%u)",
4981 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 4976 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
4982 le16_to_cpu(gdp->bg_checksum)); 4977 le16_to_cpu(gdp->bg_checksum));
4983 err = -EINVAL; 4978 err = -EINVAL;
4984 goto restore_opts; 4979 goto restore_opts;
4985 } 4980 }
4986 } 4981 }
4987 4982
4988 /* 4983 /*
4989 * If we have an unprocessed orphan list hanging 4984 * If we have an unprocessed orphan list hanging
4990 * around from a previously readonly bdev mount, 4985 * around from a previously readonly bdev mount,
4991 * require a full umount/remount for now. 4986 * require a full umount/remount for now.
4992 */ 4987 */
4993 if (es->s_last_orphan) { 4988 if (es->s_last_orphan) {
4994 ext4_msg(sb, KERN_WARNING, "Couldn't " 4989 ext4_msg(sb, KERN_WARNING, "Couldn't "
4995 "remount RDWR because of unprocessed " 4990 "remount RDWR because of unprocessed "
4996 "orphan inode list. Please " 4991 "orphan inode list. Please "
4997 "umount/remount instead"); 4992 "umount/remount instead");
4998 err = -EINVAL; 4993 err = -EINVAL;
4999 goto restore_opts; 4994 goto restore_opts;
5000 } 4995 }
5001 4996
5002 /* 4997 /*
5003 * Mounting a RDONLY partition read-write, so reread 4998 * Mounting a RDONLY partition read-write, so reread
5004 * and store the current valid flag. (It may have 4999 * and store the current valid flag. (It may have
5005 * been changed by e2fsck since we originally mounted 5000 * been changed by e2fsck since we originally mounted
5006 * the partition.) 5001 * the partition.)
5007 */ 5002 */
5008 if (sbi->s_journal) 5003 if (sbi->s_journal)
5009 ext4_clear_journal_err(sb, es); 5004 ext4_clear_journal_err(sb, es);
5010 sbi->s_mount_state = le16_to_cpu(es->s_state); 5005 sbi->s_mount_state = le16_to_cpu(es->s_state);
5011 if (!ext4_setup_super(sb, es, 0)) 5006 if (!ext4_setup_super(sb, es, 0))
5012 sb->s_flags &= ~MS_RDONLY; 5007 sb->s_flags &= ~MS_RDONLY;
5013 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 5008 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
5014 EXT4_FEATURE_INCOMPAT_MMP)) 5009 EXT4_FEATURE_INCOMPAT_MMP))
5015 if (ext4_multi_mount_protect(sb, 5010 if (ext4_multi_mount_protect(sb,
5016 le64_to_cpu(es->s_mmp_block))) { 5011 le64_to_cpu(es->s_mmp_block))) {
5017 err = -EROFS; 5012 err = -EROFS;
5018 goto restore_opts; 5013 goto restore_opts;
5019 } 5014 }
5020 enable_quota = 1; 5015 enable_quota = 1;
5021 } 5016 }
5022 } 5017 }
5023 5018
5024 /* 5019 /*
5025 * Reinitialize lazy itable initialization thread based on 5020 * Reinitialize lazy itable initialization thread based on
5026 * current settings 5021 * current settings
5027 */ 5022 */
5028 if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) 5023 if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
5029 ext4_unregister_li_request(sb); 5024 ext4_unregister_li_request(sb);
5030 else { 5025 else {
5031 ext4_group_t first_not_zeroed; 5026 ext4_group_t first_not_zeroed;
5032 first_not_zeroed = ext4_has_uninit_itable(sb); 5027 first_not_zeroed = ext4_has_uninit_itable(sb);
5033 ext4_register_li_request(sb, first_not_zeroed); 5028 ext4_register_li_request(sb, first_not_zeroed);
5034 } 5029 }
5035 5030
5036 ext4_setup_system_zone(sb); 5031 ext4_setup_system_zone(sb);
5037 if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY)) 5032 if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
5038 ext4_commit_super(sb, 1); 5033 ext4_commit_super(sb, 1);
5039 5034
5040 #ifdef CONFIG_QUOTA 5035 #ifdef CONFIG_QUOTA
5041 /* Release old quota file names */ 5036 /* Release old quota file names */
5042 for (i = 0; i < EXT4_MAXQUOTAS; i++) 5037 for (i = 0; i < EXT4_MAXQUOTAS; i++)
5043 kfree(old_opts.s_qf_names[i]); 5038 kfree(old_opts.s_qf_names[i]);
5044 if (enable_quota) { 5039 if (enable_quota) {
5045 if (sb_any_quota_suspended(sb)) 5040 if (sb_any_quota_suspended(sb))
5046 dquot_resume(sb, -1); 5041 dquot_resume(sb, -1);
5047 else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 5042 else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
5048 EXT4_FEATURE_RO_COMPAT_QUOTA)) { 5043 EXT4_FEATURE_RO_COMPAT_QUOTA)) {
5049 err = ext4_enable_quotas(sb); 5044 err = ext4_enable_quotas(sb);
5050 if (err) 5045 if (err)
5051 goto restore_opts; 5046 goto restore_opts;
5052 } 5047 }
5053 } 5048 }
5054 #endif 5049 #endif
5055 5050
5056 *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); 5051 *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
5057 ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); 5052 ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
5058 kfree(orig_data); 5053 kfree(orig_data);
5059 return 0; 5054 return 0;
5060 5055
5061 restore_opts: 5056 restore_opts:
5062 sb->s_flags = old_sb_flags; 5057 sb->s_flags = old_sb_flags;
5063 sbi->s_mount_opt = old_opts.s_mount_opt; 5058 sbi->s_mount_opt = old_opts.s_mount_opt;
5064 sbi->s_mount_opt2 = old_opts.s_mount_opt2; 5059 sbi->s_mount_opt2 = old_opts.s_mount_opt2;
5065 sbi->s_resuid = old_opts.s_resuid; 5060 sbi->s_resuid = old_opts.s_resuid;
5066 sbi->s_resgid = old_opts.s_resgid; 5061 sbi->s_resgid = old_opts.s_resgid;
5067 sbi->s_commit_interval = old_opts.s_commit_interval; 5062 sbi->s_commit_interval = old_opts.s_commit_interval;
5068 sbi->s_min_batch_time = old_opts.s_min_batch_time; 5063 sbi->s_min_batch_time = old_opts.s_min_batch_time;
5069 sbi->s_max_batch_time = old_opts.s_max_batch_time; 5064 sbi->s_max_batch_time = old_opts.s_max_batch_time;
5070 #ifdef CONFIG_QUOTA 5065 #ifdef CONFIG_QUOTA
5071 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 5066 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
5072 for (i = 0; i < EXT4_MAXQUOTAS; i++) { 5067 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
5073 kfree(sbi->s_qf_names[i]); 5068 kfree(sbi->s_qf_names[i]);
5074 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 5069 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
5075 } 5070 }
5076 #endif 5071 #endif
5077 kfree(orig_data); 5072 kfree(orig_data);
5078 return err; 5073 return err;
5079 } 5074 }
5080 5075
5081 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 5076 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
5082 { 5077 {
5083 struct super_block *sb = dentry->d_sb; 5078 struct super_block *sb = dentry->d_sb;
5084 struct ext4_sb_info *sbi = EXT4_SB(sb); 5079 struct ext4_sb_info *sbi = EXT4_SB(sb);
5085 struct ext4_super_block *es = sbi->s_es; 5080 struct ext4_super_block *es = sbi->s_es;
5086 ext4_fsblk_t overhead = 0, resv_blocks; 5081 ext4_fsblk_t overhead = 0, resv_blocks;
5087 u64 fsid; 5082 u64 fsid;
5088 s64 bfree; 5083 s64 bfree;
5089 resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters)); 5084 resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
5090 5085
5091 if (!test_opt(sb, MINIX_DF)) 5086 if (!test_opt(sb, MINIX_DF))
5092 overhead = sbi->s_overhead; 5087 overhead = sbi->s_overhead;
5093 5088
5094 buf->f_type = EXT4_SUPER_MAGIC; 5089 buf->f_type = EXT4_SUPER_MAGIC;
5095 buf->f_bsize = sb->s_blocksize; 5090 buf->f_bsize = sb->s_blocksize;
5096 buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead); 5091 buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
5097 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) - 5092 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
5098 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter); 5093 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
5099 /* prevent underflow in case that few free space is available */ 5094 /* prevent underflow in case that few free space is available */
5100 buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0)); 5095 buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
5101 buf->f_bavail = buf->f_bfree - 5096 buf->f_bavail = buf->f_bfree -
5102 (ext4_r_blocks_count(es) + resv_blocks); 5097 (ext4_r_blocks_count(es) + resv_blocks);
5103 if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks)) 5098 if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
5104 buf->f_bavail = 0; 5099 buf->f_bavail = 0;
5105 buf->f_files = le32_to_cpu(es->s_inodes_count); 5100 buf->f_files = le32_to_cpu(es->s_inodes_count);
5106 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 5101 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
5107 buf->f_namelen = EXT4_NAME_LEN; 5102 buf->f_namelen = EXT4_NAME_LEN;
5108 fsid = le64_to_cpup((void *)es->s_uuid) ^ 5103 fsid = le64_to_cpup((void *)es->s_uuid) ^
5109 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 5104 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
5110 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 5105 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
5111 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 5106 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
5112 5107
5113 return 0; 5108 return 0;
5114 } 5109 }
5115 5110
5116 /* Helper function for writing quotas on sync - we need to start transaction 5111 /* Helper function for writing quotas on sync - we need to start transaction
5117 * before quota file is locked for write. Otherwise the are possible deadlocks: 5112 * before quota file is locked for write. Otherwise the are possible deadlocks:
5118 * Process 1 Process 2 5113 * Process 1 Process 2
5119 * ext4_create() quota_sync() 5114 * ext4_create() quota_sync()
5120 * jbd2_journal_start() write_dquot() 5115 * jbd2_journal_start() write_dquot()
5121 * dquot_initialize() down(dqio_mutex) 5116 * dquot_initialize() down(dqio_mutex)
5122 * down(dqio_mutex) jbd2_journal_start() 5117 * down(dqio_mutex) jbd2_journal_start()
5123 * 5118 *
5124 */ 5119 */
5125 5120
5126 #ifdef CONFIG_QUOTA 5121 #ifdef CONFIG_QUOTA
5127 5122
5128 static inline struct inode *dquot_to_inode(struct dquot *dquot) 5123 static inline struct inode *dquot_to_inode(struct dquot *dquot)
5129 { 5124 {
5130 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type]; 5125 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
5131 } 5126 }
5132 5127
5133 static int ext4_write_dquot(struct dquot *dquot) 5128 static int ext4_write_dquot(struct dquot *dquot)
5134 { 5129 {
5135 int ret, err; 5130 int ret, err;
5136 handle_t *handle; 5131 handle_t *handle;
5137 struct inode *inode; 5132 struct inode *inode;
5138 5133
5139 inode = dquot_to_inode(dquot); 5134 inode = dquot_to_inode(dquot);
5140 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 5135 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
5141 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 5136 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
5142 if (IS_ERR(handle)) 5137 if (IS_ERR(handle))
5143 return PTR_ERR(handle); 5138 return PTR_ERR(handle);
5144 ret = dquot_commit(dquot); 5139 ret = dquot_commit(dquot);
5145 err = ext4_journal_stop(handle); 5140 err = ext4_journal_stop(handle);
5146 if (!ret) 5141 if (!ret)
5147 ret = err; 5142 ret = err;
5148 return ret; 5143 return ret;
5149 } 5144 }
5150 5145
5151 static int ext4_acquire_dquot(struct dquot *dquot) 5146 static int ext4_acquire_dquot(struct dquot *dquot)
5152 { 5147 {
5153 int ret, err; 5148 int ret, err;
5154 handle_t *handle; 5149 handle_t *handle;
5155 5150
5156 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA, 5151 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5157 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 5152 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
5158 if (IS_ERR(handle)) 5153 if (IS_ERR(handle))
5159 return PTR_ERR(handle); 5154 return PTR_ERR(handle);
5160 ret = dquot_acquire(dquot); 5155 ret = dquot_acquire(dquot);
5161 err = ext4_journal_stop(handle); 5156 err = ext4_journal_stop(handle);
5162 if (!ret) 5157 if (!ret)
5163 ret = err; 5158 ret = err;
5164 return ret; 5159 return ret;
5165 } 5160 }
5166 5161
5167 static int ext4_release_dquot(struct dquot *dquot) 5162 static int ext4_release_dquot(struct dquot *dquot)
5168 { 5163 {
5169 int ret, err; 5164 int ret, err;
5170 handle_t *handle; 5165 handle_t *handle;
5171 5166
5172 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA, 5167 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5173 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 5168 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
5174 if (IS_ERR(handle)) { 5169 if (IS_ERR(handle)) {
5175 /* Release dquot anyway to avoid endless cycle in dqput() */ 5170 /* Release dquot anyway to avoid endless cycle in dqput() */
5176 dquot_release(dquot); 5171 dquot_release(dquot);
5177 return PTR_ERR(handle); 5172 return PTR_ERR(handle);
5178 } 5173 }
5179 ret = dquot_release(dquot); 5174 ret = dquot_release(dquot);
5180 err = ext4_journal_stop(handle); 5175 err = ext4_journal_stop(handle);
5181 if (!ret) 5176 if (!ret)
5182 ret = err; 5177 ret = err;
5183 return ret; 5178 return ret;
5184 } 5179 }
5185 5180
5186 static int ext4_mark_dquot_dirty(struct dquot *dquot) 5181 static int ext4_mark_dquot_dirty(struct dquot *dquot)
5187 { 5182 {
5188 struct super_block *sb = dquot->dq_sb; 5183 struct super_block *sb = dquot->dq_sb;
5189 struct ext4_sb_info *sbi = EXT4_SB(sb); 5184 struct ext4_sb_info *sbi = EXT4_SB(sb);
5190 5185
5191 /* Are we journaling quotas? */ 5186 /* Are we journaling quotas? */
5192 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || 5187 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) ||
5193 sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 5188 sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
5194 dquot_mark_dquot_dirty(dquot); 5189 dquot_mark_dquot_dirty(dquot);
5195 return ext4_write_dquot(dquot); 5190 return ext4_write_dquot(dquot);
5196 } else { 5191 } else {
5197 return dquot_mark_dquot_dirty(dquot); 5192 return dquot_mark_dquot_dirty(dquot);
5198 } 5193 }
5199 } 5194 }
5200 5195
5201 static int ext4_write_info(struct super_block *sb, int type) 5196 static int ext4_write_info(struct super_block *sb, int type)
5202 { 5197 {
5203 int ret, err; 5198 int ret, err;
5204 handle_t *handle; 5199 handle_t *handle;
5205 5200
5206 /* Data block + inode block */ 5201 /* Data block + inode block */
5207 handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2); 5202 handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2);
5208 if (IS_ERR(handle)) 5203 if (IS_ERR(handle))
5209 return PTR_ERR(handle); 5204 return PTR_ERR(handle);
5210 ret = dquot_commit_info(sb, type); 5205 ret = dquot_commit_info(sb, type);
5211 err = ext4_journal_stop(handle); 5206 err = ext4_journal_stop(handle);
5212 if (!ret) 5207 if (!ret)
5213 ret = err; 5208 ret = err;
5214 return ret; 5209 return ret;
5215 } 5210 }
5216 5211
5217 /* 5212 /*
5218 * Turn on quotas during mount time - we need to find 5213 * Turn on quotas during mount time - we need to find
5219 * the quota file and such... 5214 * the quota file and such...
5220 */ 5215 */
5221 static int ext4_quota_on_mount(struct super_block *sb, int type) 5216 static int ext4_quota_on_mount(struct super_block *sb, int type)
5222 { 5217 {
5223 return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 5218 return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
5224 EXT4_SB(sb)->s_jquota_fmt, type); 5219 EXT4_SB(sb)->s_jquota_fmt, type);
5225 } 5220 }
5226 5221
5227 /* 5222 /*
5228 * Standard function to be called on quota_on 5223 * Standard function to be called on quota_on
5229 */ 5224 */
5230 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 5225 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
5231 struct path *path) 5226 struct path *path)
5232 { 5227 {
5233 int err; 5228 int err;
5234 5229
5235 if (!test_opt(sb, QUOTA)) 5230 if (!test_opt(sb, QUOTA))
5236 return -EINVAL; 5231 return -EINVAL;
5237 5232
5238 /* Quotafile not on the same filesystem? */ 5233 /* Quotafile not on the same filesystem? */
5239 if (path->dentry->d_sb != sb) 5234 if (path->dentry->d_sb != sb)
5240 return -EXDEV; 5235 return -EXDEV;
5241 /* Journaling quota? */ 5236 /* Journaling quota? */
5242 if (EXT4_SB(sb)->s_qf_names[type]) { 5237 if (EXT4_SB(sb)->s_qf_names[type]) {
5243 /* Quotafile not in fs root? */ 5238 /* Quotafile not in fs root? */
5244 if (path->dentry->d_parent != sb->s_root) 5239 if (path->dentry->d_parent != sb->s_root)
5245 ext4_msg(sb, KERN_WARNING, 5240 ext4_msg(sb, KERN_WARNING,
5246 "Quota file not on filesystem root. " 5241 "Quota file not on filesystem root. "
5247 "Journaled quota will not work"); 5242 "Journaled quota will not work");
5248 } 5243 }
5249 5244
5250 /* 5245 /*
5251 * When we journal data on quota file, we have to flush journal to see 5246 * When we journal data on quota file, we have to flush journal to see
5252 * all updates to the file when we bypass pagecache... 5247 * all updates to the file when we bypass pagecache...
5253 */ 5248 */
5254 if (EXT4_SB(sb)->s_journal && 5249 if (EXT4_SB(sb)->s_journal &&
5255 ext4_should_journal_data(path->dentry->d_inode)) { 5250 ext4_should_journal_data(path->dentry->d_inode)) {
5256 /* 5251 /*
5257 * We don't need to lock updates but journal_flush() could 5252 * We don't need to lock updates but journal_flush() could
5258 * otherwise be livelocked... 5253 * otherwise be livelocked...
5259 */ 5254 */
5260 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 5255 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5261 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 5256 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5262 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 5257 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5263 if (err) 5258 if (err)
5264 return err; 5259 return err;
5265 } 5260 }
5266 5261
5267 return dquot_quota_on(sb, type, format_id, path); 5262 return dquot_quota_on(sb, type, format_id, path);
5268 } 5263 }
5269 5264
5270 static int ext4_quota_enable(struct super_block *sb, int type, int format_id, 5265 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5271 unsigned int flags) 5266 unsigned int flags)
5272 { 5267 {
5273 int err; 5268 int err;
5274 struct inode *qf_inode; 5269 struct inode *qf_inode;
5275 unsigned long qf_inums[EXT4_MAXQUOTAS] = { 5270 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5276 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), 5271 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5277 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) 5272 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5278 }; 5273 };
5279 5274
5280 BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)); 5275 BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA));
5281 5276
5282 if (!qf_inums[type]) 5277 if (!qf_inums[type])
5283 return -EPERM; 5278 return -EPERM;
5284 5279
5285 qf_inode = ext4_iget(sb, qf_inums[type]); 5280 qf_inode = ext4_iget(sb, qf_inums[type]);
5286 if (IS_ERR(qf_inode)) { 5281 if (IS_ERR(qf_inode)) {
5287 ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]); 5282 ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
5288 return PTR_ERR(qf_inode); 5283 return PTR_ERR(qf_inode);
5289 } 5284 }
5290 5285
5291 /* Don't account quota for quota files to avoid recursion */ 5286 /* Don't account quota for quota files to avoid recursion */
5292 qf_inode->i_flags |= S_NOQUOTA; 5287 qf_inode->i_flags |= S_NOQUOTA;
5293 err = dquot_enable(qf_inode, type, format_id, flags); 5288 err = dquot_enable(qf_inode, type, format_id, flags);
5294 iput(qf_inode); 5289 iput(qf_inode);
5295 5290
5296 return err; 5291 return err;
5297 } 5292 }
5298 5293
5299 /* Enable usage tracking for all quota types. */ 5294 /* Enable usage tracking for all quota types. */
5300 static int ext4_enable_quotas(struct super_block *sb) 5295 static int ext4_enable_quotas(struct super_block *sb)
5301 { 5296 {
5302 int type, err = 0; 5297 int type, err = 0;
5303 unsigned long qf_inums[EXT4_MAXQUOTAS] = { 5298 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5304 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), 5299 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5305 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) 5300 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5306 }; 5301 };
5307 5302
5308 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; 5303 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
5309 for (type = 0; type < EXT4_MAXQUOTAS; type++) { 5304 for (type = 0; type < EXT4_MAXQUOTAS; type++) {
5310 if (qf_inums[type]) { 5305 if (qf_inums[type]) {
5311 err = ext4_quota_enable(sb, type, QFMT_VFS_V1, 5306 err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
5312 DQUOT_USAGE_ENABLED); 5307 DQUOT_USAGE_ENABLED);
5313 if (err) { 5308 if (err) {
5314 ext4_warning(sb, 5309 ext4_warning(sb,
5315 "Failed to enable quota tracking " 5310 "Failed to enable quota tracking "
5316 "(type=%d, err=%d). Please run " 5311 "(type=%d, err=%d). Please run "
5317 "e2fsck to fix.", type, err); 5312 "e2fsck to fix.", type, err);
5318 return err; 5313 return err;
5319 } 5314 }
5320 } 5315 }
5321 } 5316 }
5322 return 0; 5317 return 0;
5323 } 5318 }
5324 5319
5325 static int ext4_quota_off(struct super_block *sb, int type) 5320 static int ext4_quota_off(struct super_block *sb, int type)
5326 { 5321 {
5327 struct inode *inode = sb_dqopt(sb)->files[type]; 5322 struct inode *inode = sb_dqopt(sb)->files[type];
5328 handle_t *handle; 5323 handle_t *handle;
5329 5324
5330 /* Force all delayed allocation blocks to be allocated. 5325 /* Force all delayed allocation blocks to be allocated.
5331 * Caller already holds s_umount sem */ 5326 * Caller already holds s_umount sem */
5332 if (test_opt(sb, DELALLOC)) 5327 if (test_opt(sb, DELALLOC))
5333 sync_filesystem(sb); 5328 sync_filesystem(sb);
5334 5329
5335 if (!inode) 5330 if (!inode)
5336 goto out; 5331 goto out;
5337 5332
5338 /* Update modification times of quota files when userspace can 5333 /* Update modification times of quota files when userspace can
5339 * start looking at them */ 5334 * start looking at them */
5340 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); 5335 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5341 if (IS_ERR(handle)) 5336 if (IS_ERR(handle))
5342 goto out; 5337 goto out;
5343 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 5338 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
5344 ext4_mark_inode_dirty(handle, inode); 5339 ext4_mark_inode_dirty(handle, inode);
5345 ext4_journal_stop(handle); 5340 ext4_journal_stop(handle);
5346 5341
5347 out: 5342 out:
5348 return dquot_quota_off(sb, type); 5343 return dquot_quota_off(sb, type);
5349 } 5344 }
5350 5345
5351 /* Read data from quotafile - avoid pagecache and such because we cannot afford 5346 /* Read data from quotafile - avoid pagecache and such because we cannot afford
5352 * acquiring the locks... As quota files are never truncated and quota code 5347 * acquiring the locks... As quota files are never truncated and quota code
5353 * itself serializes the operations (and no one else should touch the files) 5348 * itself serializes the operations (and no one else should touch the files)
5354 * we don't have to be afraid of races */ 5349 * we don't have to be afraid of races */
5355 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 5350 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5356 size_t len, loff_t off) 5351 size_t len, loff_t off)
5357 { 5352 {
5358 struct inode *inode = sb_dqopt(sb)->files[type]; 5353 struct inode *inode = sb_dqopt(sb)->files[type];
5359 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 5354 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5360 int offset = off & (sb->s_blocksize - 1); 5355 int offset = off & (sb->s_blocksize - 1);
5361 int tocopy; 5356 int tocopy;
5362 size_t toread; 5357 size_t toread;
5363 struct buffer_head *bh; 5358 struct buffer_head *bh;
5364 loff_t i_size = i_size_read(inode); 5359 loff_t i_size = i_size_read(inode);
5365 5360
5366 if (off > i_size) 5361 if (off > i_size)
5367 return 0; 5362 return 0;
5368 if (off+len > i_size) 5363 if (off+len > i_size)
5369 len = i_size-off; 5364 len = i_size-off;
5370 toread = len; 5365 toread = len;
5371 while (toread > 0) { 5366 while (toread > 0) {
5372 tocopy = sb->s_blocksize - offset < toread ? 5367 tocopy = sb->s_blocksize - offset < toread ?
5373 sb->s_blocksize - offset : toread; 5368 sb->s_blocksize - offset : toread;
5374 bh = ext4_bread(NULL, inode, blk, 0); 5369 bh = ext4_bread(NULL, inode, blk, 0);
5375 if (IS_ERR(bh)) 5370 if (IS_ERR(bh))
5376 return PTR_ERR(bh); 5371 return PTR_ERR(bh);
5377 if (!bh) /* A hole? */ 5372 if (!bh) /* A hole? */
5378 memset(data, 0, tocopy); 5373 memset(data, 0, tocopy);
5379 else 5374 else
5380 memcpy(data, bh->b_data+offset, tocopy); 5375 memcpy(data, bh->b_data+offset, tocopy);
5381 brelse(bh); 5376 brelse(bh);
5382 offset = 0; 5377 offset = 0;
5383 toread -= tocopy; 5378 toread -= tocopy;
5384 data += tocopy; 5379 data += tocopy;
5385 blk++; 5380 blk++;
5386 } 5381 }
5387 return len; 5382 return len;
5388 } 5383 }
5389 5384
5390 /* Write to quotafile (we know the transaction is already started and has 5385 /* Write to quotafile (we know the transaction is already started and has
5391 * enough credits) */ 5386 * enough credits) */
5392 static ssize_t ext4_quota_write(struct super_block *sb, int type, 5387 static ssize_t ext4_quota_write(struct super_block *sb, int type,
5393 const char *data, size_t len, loff_t off) 5388 const char *data, size_t len, loff_t off)
5394 { 5389 {
5395 struct inode *inode = sb_dqopt(sb)->files[type]; 5390 struct inode *inode = sb_dqopt(sb)->files[type];
5396 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 5391 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5397 int err, offset = off & (sb->s_blocksize - 1); 5392 int err, offset = off & (sb->s_blocksize - 1);
5398 struct buffer_head *bh; 5393 struct buffer_head *bh;
5399 handle_t *handle = journal_current_handle(); 5394 handle_t *handle = journal_current_handle();
5400 5395
5401 if (EXT4_SB(sb)->s_journal && !handle) { 5396 if (EXT4_SB(sb)->s_journal && !handle) {
5402 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 5397 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5403 " cancelled because transaction is not started", 5398 " cancelled because transaction is not started",
5404 (unsigned long long)off, (unsigned long long)len); 5399 (unsigned long long)off, (unsigned long long)len);
5405 return -EIO; 5400 return -EIO;
5406 } 5401 }
5407 /* 5402 /*
5408 * Since we account only one data block in transaction credits, 5403 * Since we account only one data block in transaction credits,
5409 * then it is impossible to cross a block boundary. 5404 * then it is impossible to cross a block boundary.
5410 */ 5405 */
5411 if (sb->s_blocksize - offset < len) { 5406 if (sb->s_blocksize - offset < len) {
5412 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 5407 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5413 " cancelled because not block aligned", 5408 " cancelled because not block aligned",
5414 (unsigned long long)off, (unsigned long long)len); 5409 (unsigned long long)off, (unsigned long long)len);
5415 return -EIO; 5410 return -EIO;
5416 } 5411 }
5417 5412
5418 bh = ext4_bread(handle, inode, blk, 1); 5413 bh = ext4_bread(handle, inode, blk, 1);
5419 if (IS_ERR(bh)) 5414 if (IS_ERR(bh))
5420 return PTR_ERR(bh); 5415 return PTR_ERR(bh);
5421 if (!bh) 5416 if (!bh)
5422 goto out; 5417 goto out;
5423 BUFFER_TRACE(bh, "get write access"); 5418 BUFFER_TRACE(bh, "get write access");
5424 err = ext4_journal_get_write_access(handle, bh); 5419 err = ext4_journal_get_write_access(handle, bh);
5425 if (err) { 5420 if (err) {
5426 brelse(bh); 5421 brelse(bh);
5427 return err; 5422 return err;
5428 } 5423 }
5429 lock_buffer(bh); 5424 lock_buffer(bh);
5430 memcpy(bh->b_data+offset, data, len); 5425 memcpy(bh->b_data+offset, data, len);
5431 flush_dcache_page(bh->b_page); 5426 flush_dcache_page(bh->b_page);
5432 unlock_buffer(bh); 5427 unlock_buffer(bh);
5433 err = ext4_handle_dirty_metadata(handle, NULL, bh); 5428 err = ext4_handle_dirty_metadata(handle, NULL, bh);
5434 brelse(bh); 5429 brelse(bh);
5435 out: 5430 out:
5436 if (inode->i_size < off + len) { 5431 if (inode->i_size < off + len) {
5437 i_size_write(inode, off + len); 5432 i_size_write(inode, off + len);
5438 EXT4_I(inode)->i_disksize = inode->i_size; 5433 EXT4_I(inode)->i_disksize = inode->i_size;
5439 ext4_mark_inode_dirty(handle, inode); 5434 ext4_mark_inode_dirty(handle, inode);
5440 } 5435 }
5441 return len; 5436 return len;
5442 } 5437 }
5443 5438
5444 #endif 5439 #endif
5445 5440
5446 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 5441 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
5447 const char *dev_name, void *data) 5442 const char *dev_name, void *data)
5448 { 5443 {
5449 return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); 5444 return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
5450 } 5445 }
5451 5446
5452 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 5447 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
5453 static inline void register_as_ext2(void) 5448 static inline void register_as_ext2(void)
5454 { 5449 {
5455 int err = register_filesystem(&ext2_fs_type); 5450 int err = register_filesystem(&ext2_fs_type);
5456 if (err) 5451 if (err)
5457 printk(KERN_WARNING 5452 printk(KERN_WARNING
5458 "EXT4-fs: Unable to register as ext2 (%d)\n", err); 5453 "EXT4-fs: Unable to register as ext2 (%d)\n", err);
5459 } 5454 }
5460 5455
5461 static inline void unregister_as_ext2(void) 5456 static inline void unregister_as_ext2(void)
5462 { 5457 {
5463 unregister_filesystem(&ext2_fs_type); 5458 unregister_filesystem(&ext2_fs_type);
5464 } 5459 }
5465 5460
5466 static inline int ext2_feature_set_ok(struct super_block *sb) 5461 static inline int ext2_feature_set_ok(struct super_block *sb)
5467 { 5462 {
5468 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) 5463 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
5469 return 0; 5464 return 0;
5470 if (sb->s_flags & MS_RDONLY) 5465 if (sb->s_flags & MS_RDONLY)
5471 return 1; 5466 return 1;
5472 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) 5467 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
5473 return 0; 5468 return 0;
5474 return 1; 5469 return 1;
5475 } 5470 }
5476 #else 5471 #else
5477 static inline void register_as_ext2(void) { } 5472 static inline void register_as_ext2(void) { }
5478 static inline void unregister_as_ext2(void) { } 5473 static inline void unregister_as_ext2(void) { }
5479 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } 5474 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
5480 #endif 5475 #endif
5481 5476
5482 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 5477 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
5483 static inline void register_as_ext3(void) 5478 static inline void register_as_ext3(void)
5484 { 5479 {
5485 int err = register_filesystem(&ext3_fs_type); 5480 int err = register_filesystem(&ext3_fs_type);
5486 if (err) 5481 if (err)
5487 printk(KERN_WARNING 5482 printk(KERN_WARNING
5488 "EXT4-fs: Unable to register as ext3 (%d)\n", err); 5483 "EXT4-fs: Unable to register as ext3 (%d)\n", err);
5489 } 5484 }
5490 5485
5491 static inline void unregister_as_ext3(void) 5486 static inline void unregister_as_ext3(void)
5492 { 5487 {
5493 unregister_filesystem(&ext3_fs_type); 5488 unregister_filesystem(&ext3_fs_type);
5494 } 5489 }
5495 5490
5496 static inline int ext3_feature_set_ok(struct super_block *sb) 5491 static inline int ext3_feature_set_ok(struct super_block *sb)
5497 { 5492 {
5498 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) 5493 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
5499 return 0; 5494 return 0;
5500 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) 5495 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
5501 return 0; 5496 return 0;
5502 if (sb->s_flags & MS_RDONLY) 5497 if (sb->s_flags & MS_RDONLY)
5503 return 1; 5498 return 1;
5504 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) 5499 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
5505 return 0; 5500 return 0;
5506 return 1; 5501 return 1;
5507 } 5502 }
5508 #else 5503 #else
5509 static inline void register_as_ext3(void) { } 5504 static inline void register_as_ext3(void) { }
5510 static inline void unregister_as_ext3(void) { } 5505 static inline void unregister_as_ext3(void) { }
5511 static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } 5506 static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
5512 #endif 5507 #endif
5513 5508
5514 static struct file_system_type ext4_fs_type = { 5509 static struct file_system_type ext4_fs_type = {
5515 .owner = THIS_MODULE, 5510 .owner = THIS_MODULE,
5516 .name = "ext4", 5511 .name = "ext4",
5517 .mount = ext4_mount, 5512 .mount = ext4_mount,
5518 .kill_sb = kill_block_super, 5513 .kill_sb = kill_block_super,
5519 .fs_flags = FS_REQUIRES_DEV, 5514 .fs_flags = FS_REQUIRES_DEV,
5520 }; 5515 };
5521 MODULE_ALIAS_FS("ext4"); 5516 MODULE_ALIAS_FS("ext4");
5522 5517
5523 static int __init ext4_init_feat_adverts(void) 5518 static int __init ext4_init_feat_adverts(void)
5524 { 5519 {
5525 struct ext4_features *ef; 5520 struct ext4_features *ef;
5526 int ret = -ENOMEM; 5521 int ret = -ENOMEM;
5527 5522
5528 ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); 5523 ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
5529 if (!ef) 5524 if (!ef)
5530 goto out; 5525 goto out;
5531 5526
5532 ef->f_kobj.kset = ext4_kset; 5527 ef->f_kobj.kset = ext4_kset;
5533 init_completion(&ef->f_kobj_unregister); 5528 init_completion(&ef->f_kobj_unregister);
5534 ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, 5529 ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
5535 "features"); 5530 "features");
5536 if (ret) { 5531 if (ret) {
5537 kfree(ef); 5532 kfree(ef);
5538 goto out; 5533 goto out;
5539 } 5534 }
5540 5535
5541 ext4_feat = ef; 5536 ext4_feat = ef;
5542 ret = 0; 5537 ret = 0;
5543 out: 5538 out:
5544 return ret; 5539 return ret;
5545 } 5540 }
5546 5541
5547 static void ext4_exit_feat_adverts(void) 5542 static void ext4_exit_feat_adverts(void)
5548 { 5543 {
5549 kobject_put(&ext4_feat->f_kobj); 5544 kobject_put(&ext4_feat->f_kobj);
5550 wait_for_completion(&ext4_feat->f_kobj_unregister); 5545 wait_for_completion(&ext4_feat->f_kobj_unregister);
5551 kfree(ext4_feat); 5546 kfree(ext4_feat);
5552 } 5547 }
5553 5548
5554 /* Shared across all ext4 file systems */ 5549 /* Shared across all ext4 file systems */
5555 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; 5550 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
5556 struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; 5551 struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
5557 5552
5558 static int __init ext4_init_fs(void) 5553 static int __init ext4_init_fs(void)
5559 { 5554 {
5560 int i, err; 5555 int i, err;
5561 5556
5562 ext4_li_info = NULL; 5557 ext4_li_info = NULL;
5563 mutex_init(&ext4_li_mtx); 5558 mutex_init(&ext4_li_mtx);
5564 5559
5565 /* Build-time check for flags consistency */ 5560 /* Build-time check for flags consistency */
5566 ext4_check_flag_values(); 5561 ext4_check_flag_values();
5567 5562
5568 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { 5563 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
5569 mutex_init(&ext4__aio_mutex[i]); 5564 mutex_init(&ext4__aio_mutex[i]);
5570 init_waitqueue_head(&ext4__ioend_wq[i]); 5565 init_waitqueue_head(&ext4__ioend_wq[i]);
5571 } 5566 }
5572 5567
5573 err = ext4_init_es(); 5568 err = ext4_init_es();
5574 if (err) 5569 if (err)
5575 return err; 5570 return err;
5576 5571
5577 err = ext4_init_pageio(); 5572 err = ext4_init_pageio();
5578 if (err) 5573 if (err)
5579 goto out7; 5574 goto out7;
5580 5575
5581 err = ext4_init_system_zone(); 5576 err = ext4_init_system_zone();
5582 if (err) 5577 if (err)
5583 goto out6; 5578 goto out6;
5584 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 5579 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
5585 if (!ext4_kset) { 5580 if (!ext4_kset) {
5586 err = -ENOMEM; 5581 err = -ENOMEM;
5587 goto out5; 5582 goto out5;
5588 } 5583 }
5589 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 5584 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
5590 5585
5591 err = ext4_init_feat_adverts(); 5586 err = ext4_init_feat_adverts();
5592 if (err) 5587 if (err)
5593 goto out4; 5588 goto out4;
5594 5589
5595 err = ext4_init_mballoc(); 5590 err = ext4_init_mballoc();
5596 if (err) 5591 if (err)
5597 goto out2; 5592 goto out2;
5598 else 5593 else
5599 ext4_mballoc_ready = 1; 5594 ext4_mballoc_ready = 1;
5600 err = init_inodecache(); 5595 err = init_inodecache();
5601 if (err) 5596 if (err)
5602 goto out1; 5597 goto out1;
5603 register_as_ext3(); 5598 register_as_ext3();
5604 register_as_ext2(); 5599 register_as_ext2();
5605 err = register_filesystem(&ext4_fs_type); 5600 err = register_filesystem(&ext4_fs_type);
5606 if (err) 5601 if (err)
5607 goto out; 5602 goto out;
5608 5603
5609 return 0; 5604 return 0;
5610 out: 5605 out:
5611 unregister_as_ext2(); 5606 unregister_as_ext2();
5612 unregister_as_ext3(); 5607 unregister_as_ext3();
5613 destroy_inodecache(); 5608 destroy_inodecache();
5614 out1: 5609 out1:
5615 ext4_mballoc_ready = 0; 5610 ext4_mballoc_ready = 0;
5616 ext4_exit_mballoc(); 5611 ext4_exit_mballoc();
5617 out2: 5612 out2:
5618 ext4_exit_feat_adverts(); 5613 ext4_exit_feat_adverts();
5619 out4: 5614 out4:
5620 if (ext4_proc_root) 5615 if (ext4_proc_root)
5621 remove_proc_entry("fs/ext4", NULL); 5616 remove_proc_entry("fs/ext4", NULL);
5622 kset_unregister(ext4_kset); 5617 kset_unregister(ext4_kset);
5623 out5: 5618 out5:
5624 ext4_exit_system_zone(); 5619 ext4_exit_system_zone();
5625 out6: 5620 out6:
5626 ext4_exit_pageio(); 5621 ext4_exit_pageio();
5627 out7: 5622 out7:
5628 ext4_exit_es(); 5623 ext4_exit_es();
5629 5624
5630 return err; 5625 return err;
5631 } 5626 }
5632 5627
5633 static void __exit ext4_exit_fs(void) 5628 static void __exit ext4_exit_fs(void)
5634 { 5629 {
5635 ext4_destroy_lazyinit_thread(); 5630 ext4_destroy_lazyinit_thread();
5636 unregister_as_ext2(); 5631 unregister_as_ext2();
5637 unregister_as_ext3(); 5632 unregister_as_ext3();
5638 unregister_filesystem(&ext4_fs_type); 5633 unregister_filesystem(&ext4_fs_type);
5639 destroy_inodecache(); 5634 destroy_inodecache();
5640 ext4_exit_mballoc(); 5635 ext4_exit_mballoc();
5641 ext4_exit_feat_adverts(); 5636 ext4_exit_feat_adverts();
5642 remove_proc_entry("fs/ext4", NULL); 5637 remove_proc_entry("fs/ext4", NULL);
5643 kset_unregister(ext4_kset); 5638 kset_unregister(ext4_kset);
5644 ext4_exit_system_zone(); 5639 ext4_exit_system_zone();
5645 ext4_exit_pageio(); 5640 ext4_exit_pageio();
5646 ext4_exit_es(); 5641 ext4_exit_es();
1 /* 1 /*
2 * linux/fs/jbd2/recovery.c 2 * linux/fs/jbd2/recovery.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
6 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 6 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
7 * 7 *
8 * This file is part of the Linux kernel and is made available under 8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your 9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference. 10 * option, any later version, incorporated herein by reference.
11 * 11 *
12 * Journal recovery routines for the generic filesystem journaling code; 12 * Journal recovery routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system. 13 * part of the ext2fs journaling system.
14 */ 14 */
15 15
16 #ifndef __KERNEL__ 16 #ifndef __KERNEL__
17 #include "jfs_user.h" 17 #include "jfs_user.h"
18 #else 18 #else
19 #include <linux/time.h> 19 #include <linux/time.h>
20 #include <linux/fs.h> 20 #include <linux/fs.h>
21 #include <linux/jbd2.h> 21 #include <linux/jbd2.h>
22 #include <linux/errno.h> 22 #include <linux/errno.h>
23 #include <linux/crc32.h> 23 #include <linux/crc32.h>
24 #include <linux/blkdev.h> 24 #include <linux/blkdev.h>
25 #endif 25 #endif
26 26
27 /* 27 /*
28 * Maintain information about the progress of the recovery job, so that 28 * Maintain information about the progress of the recovery job, so that
29 * the different passes can carry information between them. 29 * the different passes can carry information between them.
30 */ 30 */
31 struct recovery_info 31 struct recovery_info
32 { 32 {
33 tid_t start_transaction; 33 tid_t start_transaction;
34 tid_t end_transaction; 34 tid_t end_transaction;
35 35
36 int nr_replays; 36 int nr_replays;
37 int nr_revokes; 37 int nr_revokes;
38 int nr_revoke_hits; 38 int nr_revoke_hits;
39 }; 39 };
40 40
41 enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; 41 enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
42 static int do_one_pass(journal_t *journal, 42 static int do_one_pass(journal_t *journal,
43 struct recovery_info *info, enum passtype pass); 43 struct recovery_info *info, enum passtype pass);
44 static int scan_revoke_records(journal_t *, struct buffer_head *, 44 static int scan_revoke_records(journal_t *, struct buffer_head *,
45 tid_t, struct recovery_info *); 45 tid_t, struct recovery_info *);
46 46
47 #ifdef __KERNEL__ 47 #ifdef __KERNEL__
48 48
49 /* Release readahead buffers after use */ 49 /* Release readahead buffers after use */
50 static void journal_brelse_array(struct buffer_head *b[], int n) 50 static void journal_brelse_array(struct buffer_head *b[], int n)
51 { 51 {
52 while (--n >= 0) 52 while (--n >= 0)
53 brelse (b[n]); 53 brelse (b[n]);
54 } 54 }
55 55
56 56
57 /* 57 /*
58 * When reading from the journal, we are going through the block device 58 * When reading from the journal, we are going through the block device
59 * layer directly and so there is no readahead being done for us. We 59 * layer directly and so there is no readahead being done for us. We
60 * need to implement any readahead ourselves if we want it to happen at 60 * need to implement any readahead ourselves if we want it to happen at
61 * all. Recovery is basically one long sequential read, so make sure we 61 * all. Recovery is basically one long sequential read, so make sure we
62 * do the IO in reasonably large chunks. 62 * do the IO in reasonably large chunks.
63 * 63 *
64 * This is not so critical that we need to be enormously clever about 64 * This is not so critical that we need to be enormously clever about
65 * the readahead size, though. 128K is a purely arbitrary, good-enough 65 * the readahead size, though. 128K is a purely arbitrary, good-enough
66 * fixed value. 66 * fixed value.
67 */ 67 */
68 68
69 #define MAXBUF 8 69 #define MAXBUF 8
70 static int do_readahead(journal_t *journal, unsigned int start) 70 static int do_readahead(journal_t *journal, unsigned int start)
71 { 71 {
72 int err; 72 int err;
73 unsigned int max, nbufs, next; 73 unsigned int max, nbufs, next;
74 unsigned long long blocknr; 74 unsigned long long blocknr;
75 struct buffer_head *bh; 75 struct buffer_head *bh;
76 76
77 struct buffer_head * bufs[MAXBUF]; 77 struct buffer_head * bufs[MAXBUF];
78 78
79 /* Do up to 128K of readahead */ 79 /* Do up to 128K of readahead */
80 max = start + (128 * 1024 / journal->j_blocksize); 80 max = start + (128 * 1024 / journal->j_blocksize);
81 if (max > journal->j_maxlen) 81 if (max > journal->j_maxlen)
82 max = journal->j_maxlen; 82 max = journal->j_maxlen;
83 83
84 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 84 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at
85 * a time to the block device IO layer. */ 85 * a time to the block device IO layer. */
86 86
87 nbufs = 0; 87 nbufs = 0;
88 88
89 for (next = start; next < max; next++) { 89 for (next = start; next < max; next++) {
90 err = jbd2_journal_bmap(journal, next, &blocknr); 90 err = jbd2_journal_bmap(journal, next, &blocknr);
91 91
92 if (err) { 92 if (err) {
93 printk(KERN_ERR "JBD2: bad block at offset %u\n", 93 printk(KERN_ERR "JBD2: bad block at offset %u\n",
94 next); 94 next);
95 goto failed; 95 goto failed;
96 } 96 }
97 97
98 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 98 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
99 if (!bh) { 99 if (!bh) {
100 err = -ENOMEM; 100 err = -ENOMEM;
101 goto failed; 101 goto failed;
102 } 102 }
103 103
104 if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 104 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
105 bufs[nbufs++] = bh; 105 bufs[nbufs++] = bh;
106 if (nbufs == MAXBUF) { 106 if (nbufs == MAXBUF) {
107 ll_rw_block(READ, nbufs, bufs); 107 ll_rw_block(READ, nbufs, bufs);
108 journal_brelse_array(bufs, nbufs); 108 journal_brelse_array(bufs, nbufs);
109 nbufs = 0; 109 nbufs = 0;
110 } 110 }
111 } else 111 } else
112 brelse(bh); 112 brelse(bh);
113 } 113 }
114 114
115 if (nbufs) 115 if (nbufs)
116 ll_rw_block(READ, nbufs, bufs); 116 ll_rw_block(READ, nbufs, bufs);
117 err = 0; 117 err = 0;
118 118
119 failed: 119 failed:
120 if (nbufs) 120 if (nbufs)
121 journal_brelse_array(bufs, nbufs); 121 journal_brelse_array(bufs, nbufs);
122 return err; 122 return err;
123 } 123 }
124 124
125 #endif /* __KERNEL__ */ 125 #endif /* __KERNEL__ */
126 126
127 127
128 /* 128 /*
129 * Read a block from the journal 129 * Read a block from the journal
130 */ 130 */
131 131
132 static int jread(struct buffer_head **bhp, journal_t *journal, 132 static int jread(struct buffer_head **bhp, journal_t *journal,
133 unsigned int offset) 133 unsigned int offset)
134 { 134 {
135 int err; 135 int err;
136 unsigned long long blocknr; 136 unsigned long long blocknr;
137 struct buffer_head *bh; 137 struct buffer_head *bh;
138 138
139 *bhp = NULL; 139 *bhp = NULL;
140 140
141 if (offset >= journal->j_maxlen) { 141 if (offset >= journal->j_maxlen) {
142 printk(KERN_ERR "JBD2: corrupted journal superblock\n"); 142 printk(KERN_ERR "JBD2: corrupted journal superblock\n");
143 return -EIO; 143 return -EIO;
144 } 144 }
145 145
146 err = jbd2_journal_bmap(journal, offset, &blocknr); 146 err = jbd2_journal_bmap(journal, offset, &blocknr);
147 147
148 if (err) { 148 if (err) {
149 printk(KERN_ERR "JBD2: bad block at offset %u\n", 149 printk(KERN_ERR "JBD2: bad block at offset %u\n",
150 offset); 150 offset);
151 return err; 151 return err;
152 } 152 }
153 153
154 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 154 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
155 if (!bh) 155 if (!bh)
156 return -ENOMEM; 156 return -ENOMEM;
157 157
158 if (!buffer_uptodate(bh)) { 158 if (!buffer_uptodate(bh)) {
159 /* If this is a brand new buffer, start readahead. 159 /* If this is a brand new buffer, start readahead.
160 Otherwise, we assume we are already reading it. */ 160 Otherwise, we assume we are already reading it. */
161 if (!buffer_req(bh)) 161 if (!buffer_req(bh))
162 do_readahead(journal, offset); 162 do_readahead(journal, offset);
163 wait_on_buffer(bh); 163 wait_on_buffer(bh);
164 } 164 }
165 165
166 if (!buffer_uptodate(bh)) { 166 if (!buffer_uptodate(bh)) {
167 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", 167 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
168 offset); 168 offset);
169 brelse(bh); 169 brelse(bh);
170 return -EIO; 170 return -EIO;
171 } 171 }
172 172
173 *bhp = bh; 173 *bhp = bh;
174 return 0; 174 return 0;
175 } 175 }
176 176
177 static int jbd2_descr_block_csum_verify(journal_t *j, 177 static int jbd2_descr_block_csum_verify(journal_t *j,
178 void *buf) 178 void *buf)
179 { 179 {
180 struct jbd2_journal_block_tail *tail; 180 struct jbd2_journal_block_tail *tail;
181 __be32 provided; 181 __be32 provided;
182 __u32 calculated; 182 __u32 calculated;
183 183
184 if (!jbd2_journal_has_csum_v2or3(j)) 184 if (!jbd2_journal_has_csum_v2or3(j))
185 return 1; 185 return 1;
186 186
187 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - 187 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
188 sizeof(struct jbd2_journal_block_tail)); 188 sizeof(struct jbd2_journal_block_tail));
189 provided = tail->t_checksum; 189 provided = tail->t_checksum;
190 tail->t_checksum = 0; 190 tail->t_checksum = 0;
191 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 191 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
192 tail->t_checksum = provided; 192 tail->t_checksum = provided;
193 193
194 return provided == cpu_to_be32(calculated); 194 return provided == cpu_to_be32(calculated);
195 } 195 }
196 196
197 /* 197 /*
198 * Count the number of in-use tags in a journal descriptor block. 198 * Count the number of in-use tags in a journal descriptor block.
199 */ 199 */
200 200
201 static int count_tags(journal_t *journal, struct buffer_head *bh) 201 static int count_tags(journal_t *journal, struct buffer_head *bh)
202 { 202 {
203 char * tagp; 203 char * tagp;
204 journal_block_tag_t * tag; 204 journal_block_tag_t * tag;
205 int nr = 0, size = journal->j_blocksize; 205 int nr = 0, size = journal->j_blocksize;
206 int tag_bytes = journal_tag_bytes(journal); 206 int tag_bytes = journal_tag_bytes(journal);
207 207
208 if (jbd2_journal_has_csum_v2or3(journal)) 208 if (jbd2_journal_has_csum_v2or3(journal))
209 size -= sizeof(struct jbd2_journal_block_tail); 209 size -= sizeof(struct jbd2_journal_block_tail);
210 210
211 tagp = &bh->b_data[sizeof(journal_header_t)]; 211 tagp = &bh->b_data[sizeof(journal_header_t)];
212 212
213 while ((tagp - bh->b_data + tag_bytes) <= size) { 213 while ((tagp - bh->b_data + tag_bytes) <= size) {
214 tag = (journal_block_tag_t *) tagp; 214 tag = (journal_block_tag_t *) tagp;
215 215
216 nr++; 216 nr++;
217 tagp += tag_bytes; 217 tagp += tag_bytes;
218 if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) 218 if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
219 tagp += 16; 219 tagp += 16;
220 220
221 if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) 221 if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
222 break; 222 break;
223 } 223 }
224 224
225 return nr; 225 return nr;
226 } 226 }
227 227
228 228
229 /* Make sure we wrap around the log correctly! */ 229 /* Make sure we wrap around the log correctly! */
230 #define wrap(journal, var) \ 230 #define wrap(journal, var) \
231 do { \ 231 do { \
232 if (var >= (journal)->j_last) \ 232 if (var >= (journal)->j_last) \
233 var -= ((journal)->j_last - (journal)->j_first); \ 233 var -= ((journal)->j_last - (journal)->j_first); \
234 } while (0) 234 } while (0)
235 235
236 /** 236 /**
237 * jbd2_journal_recover - recovers a on-disk journal 237 * jbd2_journal_recover - recovers a on-disk journal
238 * @journal: the journal to recover 238 * @journal: the journal to recover
239 * 239 *
240 * The primary function for recovering the log contents when mounting a 240 * The primary function for recovering the log contents when mounting a
241 * journaled device. 241 * journaled device.
242 * 242 *
243 * Recovery is done in three passes. In the first pass, we look for the 243 * Recovery is done in three passes. In the first pass, we look for the
244 * end of the log. In the second, we assemble the list of revoke 244 * end of the log. In the second, we assemble the list of revoke
245 * blocks. In the third and final pass, we replay any un-revoked blocks 245 * blocks. In the third and final pass, we replay any un-revoked blocks
246 * in the log. 246 * in the log.
247 */ 247 */
248 int jbd2_journal_recover(journal_t *journal) 248 int jbd2_journal_recover(journal_t *journal)
249 { 249 {
250 int err, err2; 250 int err, err2;
251 journal_superblock_t * sb; 251 journal_superblock_t * sb;
252 252
253 struct recovery_info info; 253 struct recovery_info info;
254 254
255 memset(&info, 0, sizeof(info)); 255 memset(&info, 0, sizeof(info));
256 sb = journal->j_superblock; 256 sb = journal->j_superblock;
257 257
258 /* 258 /*
259 * The journal superblock's s_start field (the current log head) 259 * The journal superblock's s_start field (the current log head)
260 * is always zero if, and only if, the journal was cleanly 260 * is always zero if, and only if, the journal was cleanly
261 * unmounted. 261 * unmounted.
262 */ 262 */
263 263
264 if (!sb->s_start) { 264 if (!sb->s_start) {
265 jbd_debug(1, "No recovery required, last transaction %d\n", 265 jbd_debug(1, "No recovery required, last transaction %d\n",
266 be32_to_cpu(sb->s_sequence)); 266 be32_to_cpu(sb->s_sequence));
267 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 267 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
268 return 0; 268 return 0;
269 } 269 }
270 270
271 err = do_one_pass(journal, &info, PASS_SCAN); 271 err = do_one_pass(journal, &info, PASS_SCAN);
272 if (!err) 272 if (!err)
273 err = do_one_pass(journal, &info, PASS_REVOKE); 273 err = do_one_pass(journal, &info, PASS_REVOKE);
274 if (!err) 274 if (!err)
275 err = do_one_pass(journal, &info, PASS_REPLAY); 275 err = do_one_pass(journal, &info, PASS_REPLAY);
276 276
277 jbd_debug(1, "JBD2: recovery, exit status %d, " 277 jbd_debug(1, "JBD2: recovery, exit status %d, "
278 "recovered transactions %u to %u\n", 278 "recovered transactions %u to %u\n",
279 err, info.start_transaction, info.end_transaction); 279 err, info.start_transaction, info.end_transaction);
280 jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", 280 jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
281 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 281 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
282 282
283 /* Restart the log at the next transaction ID, thus invalidating 283 /* Restart the log at the next transaction ID, thus invalidating
284 * any existing commit records in the log. */ 284 * any existing commit records in the log. */
285 journal->j_transaction_sequence = ++info.end_transaction; 285 journal->j_transaction_sequence = ++info.end_transaction;
286 286
287 jbd2_journal_clear_revoke(journal); 287 jbd2_journal_clear_revoke(journal);
288 err2 = sync_blockdev(journal->j_fs_dev); 288 err2 = sync_blockdev(journal->j_fs_dev);
289 if (!err) 289 if (!err)
290 err = err2; 290 err = err2;
291 /* Make sure all replayed data is on permanent storage */ 291 /* Make sure all replayed data is on permanent storage */
292 if (journal->j_flags & JBD2_BARRIER) { 292 if (journal->j_flags & JBD2_BARRIER) {
293 err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 293 err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
294 if (!err) 294 if (!err)
295 err = err2; 295 err = err2;
296 } 296 }
297 return err; 297 return err;
298 } 298 }
299 299
300 /** 300 /**
301 * jbd2_journal_skip_recovery - Start journal and wipe exiting records 301 * jbd2_journal_skip_recovery - Start journal and wipe exiting records
302 * @journal: journal to startup 302 * @journal: journal to startup
303 * 303 *
304 * Locate any valid recovery information from the journal and set up the 304 * Locate any valid recovery information from the journal and set up the
305 * journal structures in memory to ignore it (presumably because the 305 * journal structures in memory to ignore it (presumably because the
306 * caller has evidence that it is out of date). 306 * caller has evidence that it is out of date).
307 * This function does'nt appear to be exorted.. 307 * This function does'nt appear to be exorted..
308 * 308 *
309 * We perform one pass over the journal to allow us to tell the user how 309 * We perform one pass over the journal to allow us to tell the user how
310 * much recovery information is being erased, and to let us initialise 310 * much recovery information is being erased, and to let us initialise
311 * the journal transaction sequence numbers to the next unused ID. 311 * the journal transaction sequence numbers to the next unused ID.
312 */ 312 */
313 int jbd2_journal_skip_recovery(journal_t *journal) 313 int jbd2_journal_skip_recovery(journal_t *journal)
314 { 314 {
315 int err; 315 int err;
316 316
317 struct recovery_info info; 317 struct recovery_info info;
318 318
319 memset (&info, 0, sizeof(info)); 319 memset (&info, 0, sizeof(info));
320 320
321 err = do_one_pass(journal, &info, PASS_SCAN); 321 err = do_one_pass(journal, &info, PASS_SCAN);
322 322
323 if (err) { 323 if (err) {
324 printk(KERN_ERR "JBD2: error %d scanning journal\n", err); 324 printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
325 ++journal->j_transaction_sequence; 325 ++journal->j_transaction_sequence;
326 } else { 326 } else {
327 #ifdef CONFIG_JBD2_DEBUG 327 #ifdef CONFIG_JBD2_DEBUG
328 int dropped = info.end_transaction - 328 int dropped = info.end_transaction -
329 be32_to_cpu(journal->j_superblock->s_sequence); 329 be32_to_cpu(journal->j_superblock->s_sequence);
330 jbd_debug(1, 330 jbd_debug(1,
331 "JBD2: ignoring %d transaction%s from the journal.\n", 331 "JBD2: ignoring %d transaction%s from the journal.\n",
332 dropped, (dropped == 1) ? "" : "s"); 332 dropped, (dropped == 1) ? "" : "s");
333 #endif 333 #endif
334 journal->j_transaction_sequence = ++info.end_transaction; 334 journal->j_transaction_sequence = ++info.end_transaction;
335 } 335 }
336 336
337 journal->j_tail = 0; 337 journal->j_tail = 0;
338 return err; 338 return err;
339 } 339 }
340 340
341 static inline unsigned long long read_tag_block(journal_t *journal, 341 static inline unsigned long long read_tag_block(journal_t *journal,
342 journal_block_tag_t *tag) 342 journal_block_tag_t *tag)
343 { 343 {
344 unsigned long long block = be32_to_cpu(tag->t_blocknr); 344 unsigned long long block = be32_to_cpu(tag->t_blocknr);
345 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 345 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
346 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 346 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
347 return block; 347 return block;
348 } 348 }
349 349
350 /* 350 /*
351 * calc_chksums calculates the checksums for the blocks described in the 351 * calc_chksums calculates the checksums for the blocks described in the
352 * descriptor block. 352 * descriptor block.
353 */ 353 */
354 static int calc_chksums(journal_t *journal, struct buffer_head *bh, 354 static int calc_chksums(journal_t *journal, struct buffer_head *bh,
355 unsigned long *next_log_block, __u32 *crc32_sum) 355 unsigned long *next_log_block, __u32 *crc32_sum)
356 { 356 {
357 int i, num_blks, err; 357 int i, num_blks, err;
358 unsigned long io_block; 358 unsigned long io_block;
359 struct buffer_head *obh; 359 struct buffer_head *obh;
360 360
361 num_blks = count_tags(journal, bh); 361 num_blks = count_tags(journal, bh);
362 /* Calculate checksum of the descriptor block. */ 362 /* Calculate checksum of the descriptor block. */
363 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); 363 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
364 364
365 for (i = 0; i < num_blks; i++) { 365 for (i = 0; i < num_blks; i++) {
366 io_block = (*next_log_block)++; 366 io_block = (*next_log_block)++;
367 wrap(journal, *next_log_block); 367 wrap(journal, *next_log_block);
368 err = jread(&obh, journal, io_block); 368 err = jread(&obh, journal, io_block);
369 if (err) { 369 if (err) {
370 printk(KERN_ERR "JBD2: IO error %d recovering block " 370 printk(KERN_ERR "JBD2: IO error %d recovering block "
371 "%lu in log\n", err, io_block); 371 "%lu in log\n", err, io_block);
372 return 1; 372 return 1;
373 } else { 373 } else {
374 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 374 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
375 obh->b_size); 375 obh->b_size);
376 } 376 }
377 put_bh(obh); 377 put_bh(obh);
378 } 378 }
379 return 0; 379 return 0;
380 } 380 }
381 381
382 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 382 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
383 { 383 {
384 struct commit_header *h; 384 struct commit_header *h;
385 __be32 provided; 385 __be32 provided;
386 __u32 calculated; 386 __u32 calculated;
387 387
388 if (!jbd2_journal_has_csum_v2or3(j)) 388 if (!jbd2_journal_has_csum_v2or3(j))
389 return 1; 389 return 1;
390 390
391 h = buf; 391 h = buf;
392 provided = h->h_chksum[0]; 392 provided = h->h_chksum[0];
393 h->h_chksum[0] = 0; 393 h->h_chksum[0] = 0;
394 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 394 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
395 h->h_chksum[0] = provided; 395 h->h_chksum[0] = provided;
396 396
397 return provided == cpu_to_be32(calculated); 397 return provided == cpu_to_be32(calculated);
398 } 398 }
399 399
400 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 400 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
401 void *buf, __u32 sequence) 401 void *buf, __u32 sequence)
402 { 402 {
403 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; 403 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
404 __u32 csum32; 404 __u32 csum32;
405 __be32 seq; 405 __be32 seq;
406 406
407 if (!jbd2_journal_has_csum_v2or3(j)) 407 if (!jbd2_journal_has_csum_v2or3(j))
408 return 1; 408 return 1;
409 409
410 seq = cpu_to_be32(sequence); 410 seq = cpu_to_be32(sequence);
411 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 411 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
412 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 412 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
413 413
414 if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) 414 if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
415 return tag3->t_checksum == cpu_to_be32(csum32); 415 return tag3->t_checksum == cpu_to_be32(csum32);
416 else 416 else
417 return tag->t_checksum == cpu_to_be16(csum32); 417 return tag->t_checksum == cpu_to_be16(csum32);
418 } 418 }
419 419
420 static int do_one_pass(journal_t *journal, 420 static int do_one_pass(journal_t *journal,
421 struct recovery_info *info, enum passtype pass) 421 struct recovery_info *info, enum passtype pass)
422 { 422 {
423 unsigned int first_commit_ID, next_commit_ID; 423 unsigned int first_commit_ID, next_commit_ID;
424 unsigned long next_log_block; 424 unsigned long next_log_block;
425 int err, success = 0; 425 int err, success = 0;
426 journal_superblock_t * sb; 426 journal_superblock_t * sb;
427 journal_header_t * tmp; 427 journal_header_t * tmp;
428 struct buffer_head * bh; 428 struct buffer_head * bh;
429 unsigned int sequence; 429 unsigned int sequence;
430 int blocktype; 430 int blocktype;
431 int tag_bytes = journal_tag_bytes(journal); 431 int tag_bytes = journal_tag_bytes(journal);
432 __u32 crc32_sum = ~0; /* Transactional Checksums */ 432 __u32 crc32_sum = ~0; /* Transactional Checksums */
433 int descr_csum_size = 0; 433 int descr_csum_size = 0;
434 int block_error = 0; 434 int block_error = 0;
435 435
436 /* 436 /*
437 * First thing is to establish what we expect to find in the log 437 * First thing is to establish what we expect to find in the log
438 * (in terms of transaction IDs), and where (in terms of log 438 * (in terms of transaction IDs), and where (in terms of log
439 * block offsets): query the superblock. 439 * block offsets): query the superblock.
440 */ 440 */
441 441
442 sb = journal->j_superblock; 442 sb = journal->j_superblock;
443 next_commit_ID = be32_to_cpu(sb->s_sequence); 443 next_commit_ID = be32_to_cpu(sb->s_sequence);
444 next_log_block = be32_to_cpu(sb->s_start); 444 next_log_block = be32_to_cpu(sb->s_start);
445 445
446 first_commit_ID = next_commit_ID; 446 first_commit_ID = next_commit_ID;
447 if (pass == PASS_SCAN) 447 if (pass == PASS_SCAN)
448 info->start_transaction = first_commit_ID; 448 info->start_transaction = first_commit_ID;
449 449
450 jbd_debug(1, "Starting recovery pass %d\n", pass); 450 jbd_debug(1, "Starting recovery pass %d\n", pass);
451 451
452 /* 452 /*
453 * Now we walk through the log, transaction by transaction, 453 * Now we walk through the log, transaction by transaction,
454 * making sure that each transaction has a commit block in the 454 * making sure that each transaction has a commit block in the
455 * expected place. Each complete transaction gets replayed back 455 * expected place. Each complete transaction gets replayed back
456 * into the main filesystem. 456 * into the main filesystem.
457 */ 457 */
458 458
459 while (1) { 459 while (1) {
460 int flags; 460 int flags;
461 char * tagp; 461 char * tagp;
462 journal_block_tag_t * tag; 462 journal_block_tag_t * tag;
463 struct buffer_head * obh; 463 struct buffer_head * obh;
464 struct buffer_head * nbh; 464 struct buffer_head * nbh;
465 465
466 cond_resched(); 466 cond_resched();
467 467
468 /* If we already know where to stop the log traversal, 468 /* If we already know where to stop the log traversal,
469 * check right now that we haven't gone past the end of 469 * check right now that we haven't gone past the end of
470 * the log. */ 470 * the log. */
471 471
472 if (pass != PASS_SCAN) 472 if (pass != PASS_SCAN)
473 if (tid_geq(next_commit_ID, info->end_transaction)) 473 if (tid_geq(next_commit_ID, info->end_transaction))
474 break; 474 break;
475 475
476 jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 476 jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
477 next_commit_ID, next_log_block, journal->j_last); 477 next_commit_ID, next_log_block, journal->j_last);
478 478
479 /* Skip over each chunk of the transaction looking 479 /* Skip over each chunk of the transaction looking
480 * either the next descriptor block or the final commit 480 * either the next descriptor block or the final commit
481 * record. */ 481 * record. */
482 482
483 jbd_debug(3, "JBD2: checking block %ld\n", next_log_block); 483 jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
484 err = jread(&bh, journal, next_log_block); 484 err = jread(&bh, journal, next_log_block);
485 if (err) 485 if (err)
486 goto failed; 486 goto failed;
487 487
488 next_log_block++; 488 next_log_block++;
489 wrap(journal, next_log_block); 489 wrap(journal, next_log_block);
490 490
491 /* What kind of buffer is it? 491 /* What kind of buffer is it?
492 * 492 *
493 * If it is a descriptor block, check that it has the 493 * If it is a descriptor block, check that it has the
494 * expected sequence number. Otherwise, we're all done 494 * expected sequence number. Otherwise, we're all done
495 * here. */ 495 * here. */
496 496
497 tmp = (journal_header_t *)bh->b_data; 497 tmp = (journal_header_t *)bh->b_data;
498 498
499 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { 499 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
500 brelse(bh); 500 brelse(bh);
501 break; 501 break;
502 } 502 }
503 503
504 blocktype = be32_to_cpu(tmp->h_blocktype); 504 blocktype = be32_to_cpu(tmp->h_blocktype);
505 sequence = be32_to_cpu(tmp->h_sequence); 505 sequence = be32_to_cpu(tmp->h_sequence);
506 jbd_debug(3, "Found magic %d, sequence %d\n", 506 jbd_debug(3, "Found magic %d, sequence %d\n",
507 blocktype, sequence); 507 blocktype, sequence);
508 508
509 if (sequence != next_commit_ID) { 509 if (sequence != next_commit_ID) {
510 brelse(bh); 510 brelse(bh);
511 break; 511 break;
512 } 512 }
513 513
514 /* OK, we have a valid descriptor block which matches 514 /* OK, we have a valid descriptor block which matches
515 * all of the sequence number checks. What are we going 515 * all of the sequence number checks. What are we going
516 * to do with it? That depends on the pass... */ 516 * to do with it? That depends on the pass... */
517 517
518 switch(blocktype) { 518 switch(blocktype) {
519 case JBD2_DESCRIPTOR_BLOCK: 519 case JBD2_DESCRIPTOR_BLOCK:
520 /* Verify checksum first */ 520 /* Verify checksum first */
521 if (jbd2_journal_has_csum_v2or3(journal)) 521 if (jbd2_journal_has_csum_v2or3(journal))
522 descr_csum_size = 522 descr_csum_size =
523 sizeof(struct jbd2_journal_block_tail); 523 sizeof(struct jbd2_journal_block_tail);
524 if (descr_csum_size > 0 && 524 if (descr_csum_size > 0 &&
525 !jbd2_descr_block_csum_verify(journal, 525 !jbd2_descr_block_csum_verify(journal,
526 bh->b_data)) { 526 bh->b_data)) {
527 printk(KERN_ERR "JBD2: Invalid checksum "
528 "recovering block %lu in log\n",
529 next_log_block);
527 err = -EIO; 530 err = -EIO;
528 brelse(bh); 531 brelse(bh);
529 goto failed; 532 goto failed;
530 } 533 }
531 534
532 /* If it is a valid descriptor block, replay it 535 /* If it is a valid descriptor block, replay it
533 * in pass REPLAY; if journal_checksums enabled, then 536 * in pass REPLAY; if journal_checksums enabled, then
534 * calculate checksums in PASS_SCAN, otherwise, 537 * calculate checksums in PASS_SCAN, otherwise,
535 * just skip over the blocks it describes. */ 538 * just skip over the blocks it describes. */
536 if (pass != PASS_REPLAY) { 539 if (pass != PASS_REPLAY) {
537 if (pass == PASS_SCAN && 540 if (pass == PASS_SCAN &&
538 JBD2_HAS_COMPAT_FEATURE(journal, 541 JBD2_HAS_COMPAT_FEATURE(journal,
539 JBD2_FEATURE_COMPAT_CHECKSUM) && 542 JBD2_FEATURE_COMPAT_CHECKSUM) &&
540 !info->end_transaction) { 543 !info->end_transaction) {
541 if (calc_chksums(journal, bh, 544 if (calc_chksums(journal, bh,
542 &next_log_block, 545 &next_log_block,
543 &crc32_sum)) { 546 &crc32_sum)) {
544 put_bh(bh); 547 put_bh(bh);
545 break; 548 break;
546 } 549 }
547 put_bh(bh); 550 put_bh(bh);
548 continue; 551 continue;
549 } 552 }
550 next_log_block += count_tags(journal, bh); 553 next_log_block += count_tags(journal, bh);
551 wrap(journal, next_log_block); 554 wrap(journal, next_log_block);
552 put_bh(bh); 555 put_bh(bh);
553 continue; 556 continue;
554 } 557 }
555 558
556 /* A descriptor block: we can now write all of 559 /* A descriptor block: we can now write all of
557 * the data blocks. Yay, useful work is finally 560 * the data blocks. Yay, useful work is finally
558 * getting done here! */ 561 * getting done here! */
559 562
560 tagp = &bh->b_data[sizeof(journal_header_t)]; 563 tagp = &bh->b_data[sizeof(journal_header_t)];
561 while ((tagp - bh->b_data + tag_bytes) 564 while ((tagp - bh->b_data + tag_bytes)
562 <= journal->j_blocksize - descr_csum_size) { 565 <= journal->j_blocksize - descr_csum_size) {
563 unsigned long io_block; 566 unsigned long io_block;
564 567
565 tag = (journal_block_tag_t *) tagp; 568 tag = (journal_block_tag_t *) tagp;
566 flags = be16_to_cpu(tag->t_flags); 569 flags = be16_to_cpu(tag->t_flags);
567 570
568 io_block = next_log_block++; 571 io_block = next_log_block++;
569 wrap(journal, next_log_block); 572 wrap(journal, next_log_block);
570 err = jread(&obh, journal, io_block); 573 err = jread(&obh, journal, io_block);
571 if (err) { 574 if (err) {
572 /* Recover what we can, but 575 /* Recover what we can, but
573 * report failure at the end. */ 576 * report failure at the end. */
574 success = err; 577 success = err;
575 printk(KERN_ERR 578 printk(KERN_ERR
576 "JBD2: IO error %d recovering " 579 "JBD2: IO error %d recovering "
577 "block %ld in log\n", 580 "block %ld in log\n",
578 err, io_block); 581 err, io_block);
579 } else { 582 } else {
580 unsigned long long blocknr; 583 unsigned long long blocknr;
581 584
582 J_ASSERT(obh != NULL); 585 J_ASSERT(obh != NULL);
583 blocknr = read_tag_block(journal, 586 blocknr = read_tag_block(journal,
584 tag); 587 tag);
585 588
586 /* If the block has been 589 /* If the block has been
587 * revoked, then we're all done 590 * revoked, then we're all done
588 * here. */ 591 * here. */
589 if (jbd2_journal_test_revoke 592 if (jbd2_journal_test_revoke
590 (journal, blocknr, 593 (journal, blocknr,
591 next_commit_ID)) { 594 next_commit_ID)) {
592 brelse(obh); 595 brelse(obh);
593 ++info->nr_revoke_hits; 596 ++info->nr_revoke_hits;
594 goto skip_write; 597 goto skip_write;
595 } 598 }
596 599
597 /* Look for block corruption */ 600 /* Look for block corruption */
598 if (!jbd2_block_tag_csum_verify( 601 if (!jbd2_block_tag_csum_verify(
599 journal, tag, obh->b_data, 602 journal, tag, obh->b_data,
600 be32_to_cpu(tmp->h_sequence))) { 603 be32_to_cpu(tmp->h_sequence))) {
601 brelse(obh); 604 brelse(obh);
602 success = -EIO; 605 success = -EIO;
603 printk(KERN_ERR "JBD2: Invalid " 606 printk(KERN_ERR "JBD2: Invalid "
604 "checksum recovering " 607 "checksum recovering "
605 "block %llu in log\n", 608 "block %llu in log\n",
606 blocknr); 609 blocknr);
607 block_error = 1; 610 block_error = 1;
608 goto skip_write; 611 goto skip_write;
609 } 612 }
610 613
611 /* Find a buffer for the new 614 /* Find a buffer for the new
612 * data being restored */ 615 * data being restored */
613 nbh = __getblk(journal->j_fs_dev, 616 nbh = __getblk(journal->j_fs_dev,
614 blocknr, 617 blocknr,
615 journal->j_blocksize); 618 journal->j_blocksize);
616 if (nbh == NULL) { 619 if (nbh == NULL) {
617 printk(KERN_ERR 620 printk(KERN_ERR
618 "JBD2: Out of memory " 621 "JBD2: Out of memory "
619 "during recovery.\n"); 622 "during recovery.\n");
620 err = -ENOMEM; 623 err = -ENOMEM;
621 brelse(bh); 624 brelse(bh);
622 brelse(obh); 625 brelse(obh);
623 goto failed; 626 goto failed;
624 } 627 }
625 628
626 lock_buffer(nbh); 629 lock_buffer(nbh);
627 memcpy(nbh->b_data, obh->b_data, 630 memcpy(nbh->b_data, obh->b_data,
628 journal->j_blocksize); 631 journal->j_blocksize);
629 if (flags & JBD2_FLAG_ESCAPE) { 632 if (flags & JBD2_FLAG_ESCAPE) {
630 *((__be32 *)nbh->b_data) = 633 *((__be32 *)nbh->b_data) =
631 cpu_to_be32(JBD2_MAGIC_NUMBER); 634 cpu_to_be32(JBD2_MAGIC_NUMBER);
632 } 635 }
633 636
634 BUFFER_TRACE(nbh, "marking dirty"); 637 BUFFER_TRACE(nbh, "marking dirty");
635 set_buffer_uptodate(nbh); 638 set_buffer_uptodate(nbh);
636 mark_buffer_dirty(nbh); 639 mark_buffer_dirty(nbh);
637 BUFFER_TRACE(nbh, "marking uptodate"); 640 BUFFER_TRACE(nbh, "marking uptodate");
638 ++info->nr_replays; 641 ++info->nr_replays;
639 /* ll_rw_block(WRITE, 1, &nbh); */ 642 /* ll_rw_block(WRITE, 1, &nbh); */
640 unlock_buffer(nbh); 643 unlock_buffer(nbh);
641 brelse(obh); 644 brelse(obh);
642 brelse(nbh); 645 brelse(nbh);
643 } 646 }
644 647
645 skip_write: 648 skip_write:
646 tagp += tag_bytes; 649 tagp += tag_bytes;
647 if (!(flags & JBD2_FLAG_SAME_UUID)) 650 if (!(flags & JBD2_FLAG_SAME_UUID))
648 tagp += 16; 651 tagp += 16;
649 652
650 if (flags & JBD2_FLAG_LAST_TAG) 653 if (flags & JBD2_FLAG_LAST_TAG)
651 break; 654 break;
652 } 655 }
653 656
654 brelse(bh); 657 brelse(bh);
655 continue; 658 continue;
656 659
657 case JBD2_COMMIT_BLOCK: 660 case JBD2_COMMIT_BLOCK:
658 /* How to differentiate between interrupted commit 661 /* How to differentiate between interrupted commit
659 * and journal corruption ? 662 * and journal corruption ?
660 * 663 *
661 * {nth transaction} 664 * {nth transaction}
662 * Checksum Verification Failed 665 * Checksum Verification Failed
663 * | 666 * |
664 * ____________________ 667 * ____________________
665 * | | 668 * | |
666 * async_commit sync_commit 669 * async_commit sync_commit
667 * | | 670 * | |
668 * | GO TO NEXT "Journal Corruption" 671 * | GO TO NEXT "Journal Corruption"
669 * | TRANSACTION 672 * | TRANSACTION
670 * | 673 * |
671 * {(n+1)th transanction} 674 * {(n+1)th transanction}
672 * | 675 * |
673 * _______|______________ 676 * _______|______________
674 * | | 677 * | |
675 * Commit block found Commit block not found 678 * Commit block found Commit block not found
676 * | | 679 * | |
677 * "Journal Corruption" | 680 * "Journal Corruption" |
678 * _____________|_________ 681 * _____________|_________
679 * | | 682 * | |
680 * nth trans corrupt OR nth trans 683 * nth trans corrupt OR nth trans
681 * and (n+1)th interrupted interrupted 684 * and (n+1)th interrupted interrupted
682 * before commit block 685 * before commit block
683 * could reach the disk. 686 * could reach the disk.
684 * (Cannot find the difference in above 687 * (Cannot find the difference in above
685 * mentioned conditions. Hence assume 688 * mentioned conditions. Hence assume
686 * "Interrupted Commit".) 689 * "Interrupted Commit".)
687 */ 690 */
688 691
689 /* Found an expected commit block: if checksums 692 /* Found an expected commit block: if checksums
690 * are present verify them in PASS_SCAN; else not 693 * are present verify them in PASS_SCAN; else not
691 * much to do other than move on to the next sequence 694 * much to do other than move on to the next sequence
692 * number. */ 695 * number. */
693 if (pass == PASS_SCAN && 696 if (pass == PASS_SCAN &&
694 JBD2_HAS_COMPAT_FEATURE(journal, 697 JBD2_HAS_COMPAT_FEATURE(journal,
695 JBD2_FEATURE_COMPAT_CHECKSUM)) { 698 JBD2_FEATURE_COMPAT_CHECKSUM)) {
696 int chksum_err, chksum_seen; 699 int chksum_err, chksum_seen;
697 struct commit_header *cbh = 700 struct commit_header *cbh =
698 (struct commit_header *)bh->b_data; 701 (struct commit_header *)bh->b_data;
699 unsigned found_chksum = 702 unsigned found_chksum =
700 be32_to_cpu(cbh->h_chksum[0]); 703 be32_to_cpu(cbh->h_chksum[0]);
701 704
702 chksum_err = chksum_seen = 0; 705 chksum_err = chksum_seen = 0;
703 706
704 if (info->end_transaction) { 707 if (info->end_transaction) {
705 journal->j_failed_commit = 708 journal->j_failed_commit =
706 info->end_transaction; 709 info->end_transaction;
707 brelse(bh); 710 brelse(bh);
708 break; 711 break;
709 } 712 }
710 713
711 if (crc32_sum == found_chksum && 714 if (crc32_sum == found_chksum &&
712 cbh->h_chksum_type == JBD2_CRC32_CHKSUM && 715 cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
713 cbh->h_chksum_size == 716 cbh->h_chksum_size ==
714 JBD2_CRC32_CHKSUM_SIZE) 717 JBD2_CRC32_CHKSUM_SIZE)
715 chksum_seen = 1; 718 chksum_seen = 1;
716 else if (!(cbh->h_chksum_type == 0 && 719 else if (!(cbh->h_chksum_type == 0 &&
717 cbh->h_chksum_size == 0 && 720 cbh->h_chksum_size == 0 &&
718 found_chksum == 0 && 721 found_chksum == 0 &&
719 !chksum_seen)) 722 !chksum_seen))
720 /* 723 /*
721 * If fs is mounted using an old kernel and then 724 * If fs is mounted using an old kernel and then
722 * kernel with journal_chksum is used then we 725 * kernel with journal_chksum is used then we
723 * get a situation where the journal flag has 726 * get a situation where the journal flag has
724 * checksum flag set but checksums are not 727 * checksum flag set but checksums are not
725 * present i.e chksum = 0, in the individual 728 * present i.e chksum = 0, in the individual
726 * commit blocks. 729 * commit blocks.
727 * Hence to avoid checksum failures, in this 730 * Hence to avoid checksum failures, in this
728 * situation, this extra check is added. 731 * situation, this extra check is added.
729 */ 732 */
730 chksum_err = 1; 733 chksum_err = 1;
731 734
732 if (chksum_err) { 735 if (chksum_err) {
733 info->end_transaction = next_commit_ID; 736 info->end_transaction = next_commit_ID;
734 737
735 if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 738 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
736 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ 739 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
737 journal->j_failed_commit = 740 journal->j_failed_commit =
738 next_commit_ID; 741 next_commit_ID;
739 brelse(bh); 742 brelse(bh);
740 break; 743 break;
741 } 744 }
742 } 745 }
743 crc32_sum = ~0; 746 crc32_sum = ~0;
744 } 747 }
745 if (pass == PASS_SCAN && 748 if (pass == PASS_SCAN &&
746 !jbd2_commit_block_csum_verify(journal, 749 !jbd2_commit_block_csum_verify(journal,
747 bh->b_data)) { 750 bh->b_data)) {
748 info->end_transaction = next_commit_ID; 751 info->end_transaction = next_commit_ID;
749 752
750 if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 753 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
751 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 754 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
752 journal->j_failed_commit = 755 journal->j_failed_commit =
753 next_commit_ID; 756 next_commit_ID;
754 brelse(bh); 757 brelse(bh);
755 break; 758 break;
756 } 759 }
757 } 760 }
758 brelse(bh); 761 brelse(bh);
759 next_commit_ID++; 762 next_commit_ID++;
760 continue; 763 continue;
761 764
762 case JBD2_REVOKE_BLOCK: 765 case JBD2_REVOKE_BLOCK:
763 /* If we aren't in the REVOKE pass, then we can 766 /* If we aren't in the REVOKE pass, then we can
764 * just skip over this block. */ 767 * just skip over this block. */
765 if (pass != PASS_REVOKE) { 768 if (pass != PASS_REVOKE) {
766 brelse(bh); 769 brelse(bh);
767 continue; 770 continue;
768 } 771 }
769 772
770 err = scan_revoke_records(journal, bh, 773 err = scan_revoke_records(journal, bh,
771 next_commit_ID, info); 774 next_commit_ID, info);
772 brelse(bh); 775 brelse(bh);
773 if (err) 776 if (err)
774 goto failed; 777 goto failed;
775 continue; 778 continue;
776 779
777 default: 780 default:
778 jbd_debug(3, "Unrecognised magic %d, end of scan.\n", 781 jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
779 blocktype); 782 blocktype);
780 brelse(bh); 783 brelse(bh);
781 goto done; 784 goto done;
782 } 785 }
783 } 786 }
784 787
785 done: 788 done:
786 /* 789 /*
787 * We broke out of the log scan loop: either we came to the 790 * We broke out of the log scan loop: either we came to the
788 * known end of the log or we found an unexpected block in the 791 * known end of the log or we found an unexpected block in the
789 * log. If the latter happened, then we know that the "current" 792 * log. If the latter happened, then we know that the "current"
790 * transaction marks the end of the valid log. 793 * transaction marks the end of the valid log.
791 */ 794 */
792 795
793 if (pass == PASS_SCAN) { 796 if (pass == PASS_SCAN) {
794 if (!info->end_transaction) 797 if (!info->end_transaction)
795 info->end_transaction = next_commit_ID; 798 info->end_transaction = next_commit_ID;
796 } else { 799 } else {
797 /* It's really bad news if different passes end up at 800 /* It's really bad news if different passes end up at
798 * different places (but possible due to IO errors). */ 801 * different places (but possible due to IO errors). */
799 if (info->end_transaction != next_commit_ID) { 802 if (info->end_transaction != next_commit_ID) {
800 printk(KERN_ERR "JBD2: recovery pass %d ended at " 803 printk(KERN_ERR "JBD2: recovery pass %d ended at "
801 "transaction %u, expected %u\n", 804 "transaction %u, expected %u\n",
802 pass, next_commit_ID, info->end_transaction); 805 pass, next_commit_ID, info->end_transaction);
803 if (!success) 806 if (!success)
804 success = -EIO; 807 success = -EIO;
805 } 808 }
806 } 809 }
807 if (block_error && success == 0) 810 if (block_error && success == 0)
808 success = -EIO; 811 success = -EIO;
809 return success; 812 return success;
810 813
811 failed: 814 failed:
812 return err; 815 return err;
813 } 816 }
814 817
815 static int jbd2_revoke_block_csum_verify(journal_t *j, 818 static int jbd2_revoke_block_csum_verify(journal_t *j,
816 void *buf) 819 void *buf)
817 { 820 {
818 struct jbd2_journal_revoke_tail *tail; 821 struct jbd2_journal_revoke_tail *tail;
819 __be32 provided; 822 __be32 provided;
820 __u32 calculated; 823 __u32 calculated;
821 824
822 if (!jbd2_journal_has_csum_v2or3(j)) 825 if (!jbd2_journal_has_csum_v2or3(j))
823 return 1; 826 return 1;
824 827
825 tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - 828 tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
826 sizeof(struct jbd2_journal_revoke_tail)); 829 sizeof(struct jbd2_journal_revoke_tail));
827 provided = tail->r_checksum; 830 provided = tail->r_checksum;
828 tail->r_checksum = 0; 831 tail->r_checksum = 0;
829 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 832 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
830 tail->r_checksum = provided; 833 tail->r_checksum = provided;
831 834
832 return provided == cpu_to_be32(calculated); 835 return provided == cpu_to_be32(calculated);
833 } 836 }
834 837
835 /* Scan a revoke record, marking all blocks mentioned as revoked. */ 838 /* Scan a revoke record, marking all blocks mentioned as revoked. */
836 839
837 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 840 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
838 tid_t sequence, struct recovery_info *info) 841 tid_t sequence, struct recovery_info *info)
839 { 842 {
840 jbd2_journal_revoke_header_t *header; 843 jbd2_journal_revoke_header_t *header;
841 int offset, max; 844 int offset, max;
842 int record_len = 4; 845 int record_len = 4;
843 846
844 header = (jbd2_journal_revoke_header_t *) bh->b_data; 847 header = (jbd2_journal_revoke_header_t *) bh->b_data;
845 offset = sizeof(jbd2_journal_revoke_header_t); 848 offset = sizeof(jbd2_journal_revoke_header_t);
846 max = be32_to_cpu(header->r_count); 849 max = be32_to_cpu(header->r_count);
847 850
848 if (!jbd2_revoke_block_csum_verify(journal, header)) 851 if (!jbd2_revoke_block_csum_verify(journal, header))
849 return -EINVAL; 852 return -EINVAL;
850 853
851 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 854 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
852 record_len = 8; 855 record_len = 8;
853 856
854 while (offset + record_len <= max) { 857 while (offset + record_len <= max) {
855 unsigned long long blocknr; 858 unsigned long long blocknr;
856 int err; 859 int err;
857 860
858 if (record_len == 4) 861 if (record_len == 4)
859 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 862 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
860 else 863 else
861 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); 864 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
862 offset += record_len; 865 offset += record_len;
863 err = jbd2_journal_set_revoke(journal, blocknr, sequence); 866 err = jbd2_journal_set_revoke(journal, blocknr, sequence);
864 if (err) 867 if (err)
865 return err; 868 return err;
866 ++info->nr_revokes; 869 ++info->nr_revokes;
867 } 870 }
868 return 0; 871 return 0;
869 } 872 }
870 873